Skip to content

Commit 2b705a8

Browse files
fixed merge conflicts
2 parents 35cc2b3 + 0db2286 commit 2b705a8

File tree

243 files changed

+5359
-3721
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

243 files changed

+5359
-3721
lines changed

asv_bench/benchmarks/arithmetic.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self):
6767
self.ser.add(self.ser, fill_value=4)
6868

6969

70-
class MixedFrameWithSeriesAxis0:
70+
class MixedFrameWithSeriesAxis:
7171
params = [
7272
[
7373
"eq",
@@ -78,7 +78,7 @@ class MixedFrameWithSeriesAxis0:
7878
"gt",
7979
"add",
8080
"sub",
81-
"div",
81+
"truediv",
8282
"floordiv",
8383
"mul",
8484
"pow",
@@ -87,15 +87,19 @@ class MixedFrameWithSeriesAxis0:
8787
param_names = ["opname"]
8888

8989
def setup(self, opname):
90-
arr = np.arange(10 ** 6).reshape(100, -1)
90+
arr = np.arange(10 ** 6).reshape(1000, -1)
9191
df = DataFrame(arr)
9292
df["C"] = 1.0
9393
self.df = df
9494
self.ser = df[0]
95+
self.row = df.iloc[0]
9596

9697
def time_frame_op_with_series_axis0(self, opname):
9798
getattr(self.df, opname)(self.ser, axis=0)
9899

100+
def time_frame_op_with_series_axis1(self, opname):
101+
getattr(operator, opname)(self.df, self.ser)
102+
99103

100104
class Ops:
101105

asv_bench/benchmarks/frame_methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def setup(self):
564564

565565
def time_frame_get_dtype_counts(self):
566566
with warnings.catch_warnings(record=True):
567-
self.df._data.get_dtype_counts()
567+
self.df.dtypes.value_counts()
568568

569569
def time_info(self):
570570
self.df.info()

asv_bench/benchmarks/groupby.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,4 +626,96 @@ def time_first(self):
626626
self.df_nans.groupby("key").transform("first")
627627

628628

629+
class TransformEngine:
630+
def setup(self):
631+
N = 10 ** 3
632+
data = DataFrame(
633+
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
634+
columns=[0, 1],
635+
)
636+
self.grouper = data.groupby(0)
637+
638+
def time_series_numba(self):
639+
def function(values, index):
640+
return values * 5
641+
642+
self.grouper[1].transform(function, engine="numba")
643+
644+
def time_series_cython(self):
645+
def function(values):
646+
return values * 5
647+
648+
self.grouper[1].transform(function, engine="cython")
649+
650+
def time_dataframe_numba(self):
651+
def function(values, index):
652+
return values * 5
653+
654+
self.grouper.transform(function, engine="numba")
655+
656+
def time_dataframe_cython(self):
657+
def function(values):
658+
return values * 5
659+
660+
self.grouper.transform(function, engine="cython")
661+
662+
663+
class AggEngine:
664+
def setup(self):
665+
N = 10 ** 3
666+
data = DataFrame(
667+
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
668+
columns=[0, 1],
669+
)
670+
self.grouper = data.groupby(0)
671+
672+
def time_series_numba(self):
673+
def function(values, index):
674+
total = 0
675+
for i, value in enumerate(values):
676+
if i % 2:
677+
total += value + 5
678+
else:
679+
total += value * 2
680+
return total
681+
682+
self.grouper[1].agg(function, engine="numba")
683+
684+
def time_series_cython(self):
685+
def function(values):
686+
total = 0
687+
for i, value in enumerate(values):
688+
if i % 2:
689+
total += value + 5
690+
else:
691+
total += value * 2
692+
return total
693+
694+
self.grouper[1].agg(function, engine="cython")
695+
696+
def time_dataframe_numba(self):
697+
def function(values, index):
698+
total = 0
699+
for i, value in enumerate(values):
700+
if i % 2:
701+
total += value + 5
702+
else:
703+
total += value * 2
704+
return total
705+
706+
self.grouper.agg(function, engine="numba")
707+
708+
def time_dataframe_cython(self):
709+
def function(values):
710+
total = 0
711+
for i, value in enumerate(values):
712+
if i % 2:
713+
total += value + 5
714+
else:
715+
total += value * 2
716+
return total
717+
718+
self.grouper.agg(function, engine="cython")
719+
720+
629721
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/stat_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ class FrameOps:
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14-
if op == "mad" and dtype == "Int64" and axis == 1:
15-
# GH-33036
14+
if op == "mad" and dtype == "Int64":
15+
# GH-33036, GH#33600
1616
raise NotImplementedError
1717
values = np.random.randn(100000, 4)
1818
if dtype == "Int64":

ci/code_checks.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,13 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
150150
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
151151
invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests
152152
RET=$(($RET + $?)) ; echo $MSG "DONE"
153-
invgrep -R --include="*.py*" -E "from pandas.util import testing as tm" pandas/tests
153+
invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests
154+
RET=$(($RET + $?)) ; echo $MSG "DONE"
155+
156+
# No direct imports from conftest
157+
invgrep -R --include="*.py*" -E "conftest import" pandas/tests
158+
RET=$(($RET + $?)) ; echo $MSG "DONE"
159+
invgrep -R --include="*.py*" -E "import conftest" pandas/tests
154160
RET=$(($RET + $?)) ; echo $MSG "DONE"
155161

156162
MSG='Check for use of exec' ; echo $MSG

ci/deps/azure-36-minimum_versions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dependencies:
2121
- numexpr=2.6.2
2222
- numpy=1.13.3
2323
- openpyxl=2.5.7
24-
- pytables=3.4.2
24+
- pytables=3.4.3
2525
- python-dateutil=2.7.3
2626
- pytz=2017.2
2727
- scipy=0.19.0

doc/source/getting_started/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ data set, a sliding window of the data or grouped by categories. The latter is a
398398
<div class="card-body">
399399

400400
Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot`
401-
from long to wide format. With aggregations built-in, a pivot table is created with a sinlge command.
401+
from long to wide format. With aggregations built-in, a pivot table is created with a single command.
402402

403403
.. image:: ../_static/schemas/07_melt.svg
404404
:align: center

doc/source/getting_started/install.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ BeautifulSoup4 4.6.0 HTML parser for read_html (see :ref
262262
Jinja2 Conditional formatting with DataFrame.style
263263
PyQt4 Clipboard I/O
264264
PyQt5 Clipboard I/O
265-
PyTables 3.4.2 HDF5-based reading / writing
265+
PyTables 3.4.3 HDF5-based reading / writing
266266
SQLAlchemy 1.1.4 SQL support for databases other than sqlite
267267
SciPy 0.19.0 Miscellaneous statistical functions
268268
XLsxWriter 0.9.8 Excel writing
@@ -279,7 +279,7 @@ psycopg2 PostgreSQL engine for sqlalchemy
279279
pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and feather reading / writing
280280
pymysql 0.7.11 MySQL engine for sqlalchemy
281281
pyreadstat SPSS files (.sav) reading
282-
pytables 3.4.2 HDF5 reading / writing
282+
pytables 3.4.3 HDF5 reading / writing
283283
pyxlsb 1.0.6 Reading for xlsb files
284284
qtpy Clipboard I/O
285285
s3fs 0.3.0 Amazon S3 access

doc/source/getting_started/intro_tutorials/03_subset_data.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
<div class="card-body">
2424
<p class="card-text">
2525

26-
This tutorial uses the titanic data set, stored as CSV. The data
26+
This tutorial uses the Titanic data set, stored as CSV. The data
2727
consists of the following data columns:
2828

2929
- PassengerId: Id of every passenger.
@@ -72,7 +72,7 @@ How do I select specific columns from a ``DataFrame``?
7272
<ul class="task-bullet">
7373
<li>
7474

75-
I’m interested in the age of the titanic passengers.
75+
I’m interested in the age of the Titanic passengers.
7676

7777
.. ipython:: python
7878
@@ -111,7 +111,7 @@ the number of rows is returned.
111111
<ul class="task-bullet">
112112
<li>
113113

114-
I’m interested in the age and sex of the titanic passengers.
114+
I’m interested in the age and sex of the Titanic passengers.
115115

116116
.. ipython:: python
117117
@@ -198,7 +198,7 @@ can be used to filter the ``DataFrame`` by putting it in between the
198198
selection brackets ``[]``. Only rows for which the value is ``True``
199199
will be selected.
200200

201-
We now from before that the original titanic ``DataFrame`` consists of
201+
We know from before that the original Titanic ``DataFrame`` consists of
202202
891 rows. Let’s have a look at the amount of rows which satisfy the
203203
condition by checking the ``shape`` attribute of the resulting
204204
``DataFrame`` ``above_35``:
@@ -212,7 +212,7 @@ condition by checking the ``shape`` attribute of the resulting
212212
<ul class="task-bullet">
213213
<li>
214214

215-
I’m interested in the titanic passengers from cabin class 2 and 3.
215+
I’m interested in the Titanic passengers from cabin class 2 and 3.
216216

217217
.. ipython:: python
218218

doc/source/getting_started/intro_tutorials/10_text_data.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ names in the ``Name`` column. By using pandas string methods, the
199199
200200
Next, we need to get the corresponding location, preferably the index
201201
label, in the table for which the name length is the largest. The
202-
:meth:`~Series.idxmax`` method does exactly that. It is not a string method and is
202+
:meth:`~Series.idxmax` method does exactly that. It is not a string method and is
203203
applied to integers, so no ``str`` is used.
204204

205205
.. ipython:: python

0 commit comments

Comments
 (0)