Skip to content

Commit 075eca1

Browse files
authored
DEPR: allow options for using bottleneck/numexpr (#16157)
* DEPR: allow options for using bottleneck/numexpr deprecate pd.computation.expressions.set_use_numexpr() * DEPR: pandas.types.concat.union_categoricals in favor of pandas.api.type.union_categoricals closes #16140
1 parent 669973a commit 075eca1

File tree

17 files changed

+215
-94
lines changed

17 files changed

+215
-94
lines changed

doc/source/basics.rst

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ Accelerated operations
9393
----------------------
9494

9595
pandas has support for accelerating certain types of binary numerical and boolean operations using
96-
the ``numexpr`` library (starting in 0.11.0) and the ``bottleneck`` libraries.
96+
the ``numexpr`` library and the ``bottleneck`` libraries.
9797

9898
These libraries are especially useful when dealing with large data sets, and provide large
9999
speedups. ``numexpr`` uses smart chunking, caching, and multiple cores. ``bottleneck`` is
@@ -114,6 +114,15 @@ Here is a sample (using 100 column x 100,000 row ``DataFrames``):
114114
You are highly encouraged to install both libraries. See the section
115115
:ref:`Recommended Dependencies <install.recommended_dependencies>` for more installation info.
116116

117+
These are both enabled to be used by default, you can control this by setting the options:
118+
119+
.. versionadded:: 0.20.0
120+
121+
.. code-block:: python
122+
123+
pd.set_option('compute.use_bottleneck', False)
124+
pd.set_option('compute.use_numexpr', False)
125+
117126
.. _basics.binop:
118127

119128
Flexible binary operations

doc/source/options.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,10 @@ mode.use_inf_as_null False True means treat None, NaN, -IN
425425
INF as null (old way), False means
426426
None and NaN are null, but INF, -INF
427427
are not null (new way).
428+
compute.use_bottleneck True Use the bottleneck library to accelerate
429+
computation if it is installed
430+
compute.use_numexpr True Use the numexpr library to accelerate
431+
computation if it is installed
428432
=================================== ============ ==================================
429433

430434

@@ -538,4 +542,4 @@ Only ``'display.max_rows'`` are serialized and published.
538542
.. ipython:: python
539543
:suppress:
540544
541-
pd.reset_option('display.html.table_schema')
545+
pd.reset_option('display.html.table_schema')

doc/source/whatsnew/v0.20.0.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,7 @@ Other Enhancements
521521
- The ``display.show_dimensions`` option can now also be used to specify
522522
whether the length of a ``Series`` should be shown in its repr (:issue:`7117`).
523523
- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`)
524+
- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here <basics.accelerate>` (:issue:`16157`)
524525

525526

526527
.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
@@ -1217,7 +1218,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul
12171218

12181219
"pandas.lib", "pandas._libs.lib", "X"
12191220
"pandas.tslib", "pandas._libs.tslib", "X"
1220-
"pandas.computation", "pandas.core.computation", ""
1221+
"pandas.computation", "pandas.core.computation", "X"
12211222
"pandas.msgpack", "pandas.io.msgpack", ""
12221223
"pandas.index", "pandas._libs.index", ""
12231224
"pandas.algos", "pandas._libs.algos", ""

pandas/computation/__init__.py

Whitespace-only changes.

pandas/computation/expressions.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import warnings
2+
3+
4+
def set_use_numexpr(v=True):
5+
warnings.warn("pandas.computation.expressions.set_use_numexpr is "
6+
"deprecated and will be removed in a future version.\n"
7+
"you can toggle usage of numexpr via "
8+
"pandas.get_option('compute.use_numexpr')",
9+
FutureWarning, stacklevel=2)
10+
from pandas import set_option
11+
set_option('compute.use_numexpr', v)

pandas/core/computation/expressions.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy as np
1111
from pandas.core.common import _values_from_object
1212
from pandas.core.computation import _NUMEXPR_INSTALLED
13+
from pandas.core.config import get_option
1314

1415
if _NUMEXPR_INSTALLED:
1516
import numexpr as ne
@@ -156,7 +157,7 @@ def _where_numexpr(cond, a, b, raise_on_error=False):
156157

157158

158159
# turn myself on
159-
set_use_numexpr(True)
160+
set_use_numexpr(get_option('compute.use_numexpr'))
160161

161162

162163
def _has_bool_dtype(x):

pandas/core/config_init.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,41 @@
1515
from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory,
1616
is_one_of_factory, get_default_val,
1717
is_callable)
18-
from pandas.io.formats.format import detect_console_encoding
18+
from pandas.io.formats.console import detect_console_encoding
1919

20+
# compute
21+
22+
use_bottleneck_doc = """
23+
: bool
24+
Use the bottleneck library to accelerate if it is installed,
25+
the default is True
26+
Valid values: False,True
27+
"""
28+
29+
30+
def use_bottleneck_cb(key):
31+
from pandas.core import nanops
32+
nanops.set_use_bottleneck(cf.get_option(key))
33+
34+
35+
use_numexpr_doc = """
36+
: bool
37+
Use the numexpr library to accelerate computation if it is installed,
38+
the default is True
39+
Valid values: False,True
40+
"""
41+
42+
43+
def use_numexpr_cb(key):
44+
from pandas.core.computation import expressions
45+
expressions.set_use_numexpr(cf.get_option(key))
46+
47+
48+
with cf.config_prefix('compute'):
49+
cf.register_option('use_bottleneck', True, use_bottleneck_doc,
50+
validator=is_bool, cb=use_bottleneck_cb)
51+
cf.register_option('use_numexpr', True, use_numexpr_doc,
52+
validator=is_bool, cb=use_numexpr_cb)
2053
#
2154
# options from the "display" namespace
2255

pandas/core/frame.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
import pandas.core.nanops as nanops
9292
import pandas.core.ops as ops
9393
import pandas.io.formats.format as fmt
94+
import pandas.io.formats.console as console
9495
from pandas.io.formats.printing import pprint_thing
9596
import pandas.plotting._core as gfx
9697

@@ -513,7 +514,7 @@ def _repr_fits_horizontal_(self, ignore_width=False):
513514
GH3541, GH3573
514515
"""
515516

516-
width, height = fmt.get_console_size()
517+
width, height = console.get_console_size()
517518
max_columns = get_option("display.max_columns")
518519
nb_columns = len(self.columns)
519520

@@ -577,7 +578,7 @@ def __unicode__(self):
577578
max_cols = get_option("display.max_columns")
578579
show_dimensions = get_option("display.show_dimensions")
579580
if get_option("display.expand_frame_repr"):
580-
width, _ = fmt.get_console_size()
581+
width, _ = console.get_console_size()
581582
else:
582583
width = None
583584
self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,

pandas/core/indexes/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,8 @@ def _format_data(self):
837837
"""
838838
Return the formatted data as a unicode string
839839
"""
840-
from pandas.io.formats.format import get_console_size, _get_adjustment
840+
from pandas.io.formats.console import get_console_size
841+
from pandas.io.formats.format import _get_adjustment
841842
display_width, _ = get_console_size()
842843
if display_width is None:
843844
display_width = get_option('display.width') or 80

pandas/core/nanops.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,8 @@
11
import itertools
22
import functools
3-
import numpy as np
43
import operator
54

6-
try:
7-
import bottleneck as bn
8-
_USE_BOTTLENECK = True
9-
except ImportError: # pragma: no cover
10-
_USE_BOTTLENECK = False
11-
5+
import numpy as np
126
from pandas import compat
137
from pandas._libs import tslib, algos, lib
148
from pandas.core.dtypes.common import (
@@ -23,9 +17,27 @@
2317
is_int_or_datetime_dtype, is_any_int_dtype)
2418
from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
2519
from pandas.core.dtypes.missing import isnull, notnull
26-
20+
from pandas.core.config import get_option
2721
from pandas.core.common import _values_from_object
2822

23+
try:
24+
import bottleneck as bn
25+
_BOTTLENECK_INSTALLED = True
26+
except ImportError: # pragma: no cover
27+
_BOTTLENECK_INSTALLED = False
28+
29+
_USE_BOTTLENECK = False
30+
31+
32+
def set_use_bottleneck(v=True):
33+
# set/unset to use bottleneck
34+
global _USE_BOTTLENECK
35+
if _BOTTLENECK_INSTALLED:
36+
_USE_BOTTLENECK = v
37+
38+
39+
set_use_bottleneck(get_option('compute.use_bottleneck'))
40+
2941

3042
class disallow(object):
3143

0 commit comments

Comments
 (0)