Skip to content

Commit 46f25a4

Browse files
committed
Merge remote-tracking branch 'upstream/master' into pd-todatetime-unit_s-float-vs-int
2 parents 76cd0eb + fd20f7d commit 46f25a4

File tree

130 files changed

+2309
-1212
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+2309
-1212
lines changed

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,9 @@ check:
3232
--included-file-extensions="py" \
3333
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
3434
pandas/
35+
36+
python3 scripts/validate_unwanted_patterns.py \
37+
--validation-type="private_import_across_module" \
38+
--included-file-extensions="py" \
39+
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
40+
pandas/

asv_bench/benchmarks/groupby.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -627,49 +627,63 @@ def time_first(self):
627627

628628

629629
class TransformEngine:
630-
def setup(self):
630+
631+
param_names = ["parallel"]
632+
params = [[True, False]]
633+
634+
def setup(self, parallel):
631635
N = 10 ** 3
632636
data = DataFrame(
633637
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
634638
columns=[0, 1],
635639
)
640+
self.parallel = parallel
636641
self.grouper = data.groupby(0)
637642

638-
def time_series_numba(self):
643+
def time_series_numba(self, parallel):
639644
def function(values, index):
640645
return values * 5
641646

642-
self.grouper[1].transform(function, engine="numba")
647+
self.grouper[1].transform(
648+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
649+
)
643650

644-
def time_series_cython(self):
651+
def time_series_cython(self, parallel):
645652
def function(values):
646653
return values * 5
647654

648655
self.grouper[1].transform(function, engine="cython")
649656

650-
def time_dataframe_numba(self):
657+
def time_dataframe_numba(self, parallel):
651658
def function(values, index):
652659
return values * 5
653660

654-
self.grouper.transform(function, engine="numba")
661+
self.grouper.transform(
662+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
663+
)
655664

656-
def time_dataframe_cython(self):
665+
def time_dataframe_cython(self, parallel):
657666
def function(values):
658667
return values * 5
659668

660669
self.grouper.transform(function, engine="cython")
661670

662671

663672
class AggEngine:
664-
def setup(self):
673+
674+
param_names = ["parallel"]
675+
params = [[True, False]]
676+
677+
def setup(self, parallel):
665678
N = 10 ** 3
666679
data = DataFrame(
667680
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
668681
columns=[0, 1],
669682
)
683+
self.parallel = parallel
670684
self.grouper = data.groupby(0)
671685

672-
def time_series_numba(self):
686+
def time_series_numba(self, parallel):
673687
def function(values, index):
674688
total = 0
675689
for i, value in enumerate(values):
@@ -679,9 +693,11 @@ def function(values, index):
679693
total += value * 2
680694
return total
681695

682-
self.grouper[1].agg(function, engine="numba")
696+
self.grouper[1].agg(
697+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
698+
)
683699

684-
def time_series_cython(self):
700+
def time_series_cython(self, parallel):
685701
def function(values):
686702
total = 0
687703
for i, value in enumerate(values):
@@ -693,7 +709,7 @@ def function(values):
693709

694710
self.grouper[1].agg(function, engine="cython")
695711

696-
def time_dataframe_numba(self):
712+
def time_dataframe_numba(self, parallel):
697713
def function(values, index):
698714
total = 0
699715
for i, value in enumerate(values):
@@ -703,9 +719,11 @@ def function(values, index):
703719
total += value * 2
704720
return total
705721

706-
self.grouper.agg(function, engine="numba")
722+
self.grouper.agg(
723+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
724+
)
707725

708-
def time_dataframe_cython(self):
726+
def time_dataframe_cython(self, parallel):
709727
def function(values):
710728
total = 0
711729
for i, value in enumerate(values):

ci/build39.sh

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,9 @@
33

44
sudo apt-get install build-essential gcc xvfb
55
pip install --no-deps -U pip wheel setuptools
6-
pip install python-dateutil pytz pytest pytest-xdist hypothesis
6+
pip install numpy python-dateutil pytz pytest pytest-xdist hypothesis
77
pip install cython --pre # https://github.com/cython/cython/issues/3395
88

9-
git clone https://github.com/numpy/numpy
10-
cd numpy
11-
python setup.py build_ext --inplace
12-
python setup.py install
13-
cd ..
14-
rm -rf numpy
15-
169
python setup.py build_ext -inplace
1710
python -m pip install --no-build-isolation -e .
1811

ci/code_checks.sh

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,19 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
116116
fi
117117
RET=$(($RET + $?)) ; echo $MSG "DONE"
118118

119-
MSG='Check for use of private module attribute access' ; echo $MSG
119+
MSG='Check for import of private attributes across modules' ; echo $MSG
120120
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
121-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
121+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
122122
else
123-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
123+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
124+
fi
125+
RET=$(($RET + $?)) ; echo $MSG "DONE"
126+
127+
MSG='Check for use of private functions across modules' ; echo $MSG
128+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
129+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
130+
else
131+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
124132
fi
125133
RET=$(($RET + $?)) ; echo $MSG "DONE"
126134

doc/source/development/extending.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,6 @@ applies only to certain dtypes.
7373
Extension types
7474
---------------
7575

76-
.. versionadded:: 0.23.0
77-
7876
.. warning::
7977

8078
The :class:`pandas.api.extensions.ExtensionDtype` and :class:`pandas.api.extensions.ExtensionArray` APIs are new and

doc/source/getting_started/install.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,6 @@ Optional dependencies for parsing HTML
301301
One of the following combinations of libraries is needed to use the
302302
top-level :func:`~pandas.read_html` function:
303303

304-
.. versionchanged:: 0.23.0
305-
306304
* `BeautifulSoup4`_ and `html5lib`_
307305
* `BeautifulSoup4`_ and `lxml`_
308306
* `BeautifulSoup4`_ and `html5lib`_ and `lxml`_

doc/source/user_guide/advanced.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,8 +1065,6 @@ are closed on. Intervals are closed on the right side by default.
10651065
10661066
pd.interval_range(start=0, end=4, closed='neither')
10671067
1068-
.. versionadded:: 0.23.0
1069-
10701068
Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced
10711069
intervals from ``start`` to ``end`` inclusively, with ``periods`` number of elements
10721070
in the resulting ``IntervalIndex``:

doc/source/user_guide/basics.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,8 +1877,6 @@ different columns.
18771877
By indexes and values
18781878
~~~~~~~~~~~~~~~~~~~~~
18791879

1880-
.. versionadded:: 0.23.0
1881-
18821880
Strings passed as the ``by`` parameter to :meth:`DataFrame.sort_values` may
18831881
refer to either columns or index level names.
18841882

doc/source/user_guide/categorical.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,6 @@ only labels present in a given column are categories:
112112
df['B']
113113
114114
115-
.. versionadded:: 0.23.0
116-
117115
Analogously, all columns in an existing ``DataFrame`` can be batch converted using :meth:`DataFrame.astype`:
118116

119117
.. ipython:: python

doc/source/user_guide/dsintro.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,8 +597,6 @@ to be inserted (for example, a ``Series`` or NumPy array), or a function
597597
of one argument to be called on the ``DataFrame``. A *copy* of the original
598598
DataFrame is returned, with the new values inserted.
599599

600-
.. versionchanged:: 0.23.0
601-
602600
Starting with Python 3.6 the order of ``**kwargs`` is preserved. This allows
603601
for *dependent* assignment, where an expression later in ``**kwargs`` can refer
604602
to a column created earlier in the same :meth:`~DataFrame.assign`.

doc/source/user_guide/io.rst

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,7 @@ take full advantage of the flexibility of the date parsing API:
930930
.. ipython:: python
931931
932932
df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec,
933-
date_parser=pd.io.date_converters.parse_date_time)
933+
date_parser=pd.to_datetime)
934934
df
935935
936936
Pandas will try to call the ``date_parser`` function in three different ways. If
@@ -942,11 +942,6 @@ an exception is raised, the next one is tried:
942942
2. If #1 fails, ``date_parser`` is called with all the columns
943943
concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``).
944944

945-
3. If #2 fails, ``date_parser`` is called once for every row with one or more
946-
string arguments from the columns indicated with `parse_dates`
947-
(e.g., ``date_parser('2013', '1')`` for the first row, ``date_parser('2013', '2')``
948-
for the second, etc.).
949-
950945
Note that performance-wise, you should try these methods of parsing dates in order:
951946

952947
1. Try to infer the format using ``infer_datetime_format=True`` (see section below).
@@ -958,14 +953,6 @@ Note that performance-wise, you should try these methods of parsing dates in ord
958953
For optimal performance, this should be vectorized, i.e., it should accept arrays
959954
as arguments.
960955

961-
You can explore the date parsing functionality in
962-
`date_converters.py <https://github.com/pandas-dev/pandas/blob/master/pandas/io/date_converters.py>`__
963-
and add your own. We would love to turn this module into a community supported
964-
set of date/time parsers. To get you started, ``date_converters.py`` contains
965-
functions to parse dual date and time columns, year/month/day columns,
966-
and year/month/day/hour/minute/second columns. It also contains a
967-
``generic_parser`` function so you can curry it with a function that deals with
968-
a single date rather than the entire array.
969956

970957
.. ipython:: python
971958
:suppress:
@@ -2373,8 +2360,6 @@ A few notes on the generated table schema:
23732360
then ``level_<i>`` is used.
23742361

23752362

2376-
.. versionadded:: 0.23.0
2377-
23782363
``read_json`` also accepts ``orient='table'`` as an argument. This allows for
23792364
the preservation of metadata such as dtypes and index names in a
23802365
round-trippable manner.

doc/source/user_guide/merging.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,6 @@ behavior:
175175
176176
.. warning::
177177

178-
.. versionchanged:: 0.23.0
179-
180178
The default behavior with ``join='outer'`` is to sort the other axis
181179
(columns in this case). In a future version of pandas, the default will
182180
be to not sort. We specified ``sort=False`` to opt in to the new
@@ -1198,8 +1196,6 @@ done using the following code.
11981196
Merging on a combination of columns and index levels
11991197
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
12001198

1201-
.. versionadded:: 0.23
1202-
12031199
Strings passed as the ``on``, ``left_on``, and ``right_on`` parameters
12041200
may refer to either column names or index level names. This enables merging
12051201
``DataFrame`` instances on a combination of index levels and columns without

doc/source/user_guide/missing_data.rst

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -336,10 +336,6 @@ examined :ref:`in the API <api.dataframe.missing>`.
336336
Interpolation
337337
~~~~~~~~~~~~~
338338

339-
.. versionadded:: 0.23.0
340-
341-
The ``limit_area`` keyword argument was added.
342-
343339
Both Series and DataFrame objects have :meth:`~DataFrame.interpolate`
344340
that, by default, performs linear interpolation at missing data points.
345341

@@ -507,8 +503,8 @@ By default, ``NaN`` values are filled in a ``forward`` direction. Use
507503
ser.interpolate(limit_direction='both')
508504
509505
By default, ``NaN`` values are filled whether they are inside (surrounded by)
510-
existing valid values, or outside existing valid values. Introduced in v0.23
511-
the ``limit_area`` parameter restricts filling to either inside or outside values.
506+
existing valid values, or outside existing valid values. The ``limit_area``
507+
parameter restricts filling to either inside or outside values.
512508

513509
.. ipython:: python
514510

doc/source/user_guide/reshaping.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -679,8 +679,6 @@ To choose another dtype, use the ``dtype`` argument:
679679
680680
pd.get_dummies(df, dtype=bool).dtypes
681681
682-
.. versionadded:: 0.23.0
683-
684682
685683
.. _reshaping.factorize:
686684

doc/source/user_guide/text.rst

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,6 @@ following code will cause trouble because of the regular expression meaning of
282282
# We need to escape the special character (for >1 len patterns)
283283
dollars.str.replace(r'-\$', '-')
284284
285-
.. versionadded:: 0.23.0
286-
287285
If you do want literal replacement of a string (equivalent to
288286
:meth:`str.replace`), you can set the optional ``regex`` parameter to
289287
``False``, rather than escaping each character. In this case both ``pat``
@@ -390,8 +388,6 @@ Missing values on either side will result in missing values in the result as wel
390388
Concatenating a Series and something array-like into a Series
391389
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
392390

393-
.. versionadded:: 0.23.0
394-
395391
The parameter ``others`` can also be two-dimensional. In this case, the number or rows must match the lengths of the calling ``Series`` (or ``Index``).
396392

397393
.. ipython:: python
@@ -404,8 +400,6 @@ The parameter ``others`` can also be two-dimensional. In this case, the number o
404400
Concatenating a Series and an indexed object into a Series, with alignment
405401
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
406402

407-
.. versionadded:: 0.23.0
408-
409403
For concatenation with a ``Series`` or ``DataFrame``, it is possible to align the indexes before concatenation by setting
410404
the ``join``-keyword.
411405

doc/source/user_guide/timedeltas.rst

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ parsing, and attributes.
1818
Parsing
1919
-------
2020

21-
You can construct a ``Timedelta`` scalar through various arguments:
21+
You can construct a ``Timedelta`` scalar through various arguments, including `ISO 8601 Duration`_ strings.
2222

2323
.. ipython:: python
2424
@@ -53,10 +53,6 @@ You can construct a ``Timedelta`` scalar through various arguments:
5353
pd.Timedelta('P0DT0H1M0S')
5454
pd.Timedelta('P0DT0H0M0.000000123S')
5555
56-
.. versionadded:: 0.23.0
57-
58-
Added constructor for `ISO 8601 Duration`_ strings
59-
6056
:ref:`DateOffsets<timeseries.offsets>` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction.
6157

6258
.. ipython:: python
@@ -387,8 +383,6 @@ The ``freq`` parameter can passed a variety of :ref:`frequency aliases <timeseri
387383
pd.timedelta_range(start='1 days', periods=5, freq='2D5H')
388384
389385
390-
.. versionadded:: 0.23.0
391-
392386
Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced
393387
timedeltas from ``start`` to ``end`` inclusively, with ``periods`` number of elements
394388
in the resulting ``TimedeltaIndex``:

doc/source/user_guide/timeseries.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -461,8 +461,6 @@ of those specified will not be generated:
461461
462462
pd.bdate_range(start=start, periods=20)
463463
464-
.. versionadded:: 0.23.0
465-
466464
Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced
467465
dates from ``start`` to ``end`` inclusively, with ``periods`` number of elements in the
468466
resulting ``DatetimeIndex``:
@@ -643,8 +641,6 @@ Slicing with string indexing also honors UTC offset.
643641
Slice vs. exact match
644642
~~~~~~~~~~~~~~~~~~~~~
645643

646-
.. versionchanged:: 0.20.0
647-
648644
The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match.
649645

650646
Consider a ``Series`` object with a minute resolution index:

0 commit comments

Comments
 (0)