Skip to content

MAINT: Enforce string type for where parameter #15798

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,7 @@ Removal of prior version deprecations/changes
- The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`)
- The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`)
- ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`)
- Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`)
- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`)

.. _whatsnew_0200.performance:
Expand Down
89 changes: 29 additions & 60 deletions pandas/computation/pytables.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
""" manage PyTables query interface via Expressions """

import ast
import warnings
from functools import partial
from datetime import datetime, timedelta
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -452,6 +450,32 @@ def _rewrite_membership_op(self, node, left, right):
return self.visit(node.op), node.op, left, right


def _validate_where(w):
"""
Validate that the where statement is of the right type.

The type may either be String, Expr, or list-like of Exprs.

Parameters
----------
w : String term expression, Expr, or list-like of Exprs.

Returns
-------
where : The original where clause if the check was successful.

Raises
------
TypeError : An invalid data type was passed in for w (e.g. dict).
"""

if not (isinstance(w, (Expr, string_types)) or is_list_like(w)):
raise TypeError("where must be passed as a string, Expr, "
"or list-like of Exprs")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could return w here
makes it a bit more natural to use it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. Done.

return w


class Expr(expr.Expr):

""" hold a pytables like expression, comprised of possibly multiple 'terms'
Expand Down Expand Up @@ -481,11 +505,9 @@ class Expr(expr.Expr):
"major_axis>=20130101"
"""

def __init__(self, where, op=None, value=None, queryables=None,
encoding=None, scope_level=0):
def __init__(self, where, queryables=None, encoding=None, scope_level=0):

# try to be back compat
where = self.parse_back_compat(where, op, value)
where = _validate_where(where)

self.encoding = encoding
self.condition = None
Expand All @@ -505,7 +527,7 @@ def __init__(self, where, op=None, value=None, queryables=None,
if isinstance(w, Expr):
local_dict = w.env.scope
else:
w = self.parse_back_compat(w)
w = _validate_where(w)
where[idx] = w
where = ' & ' .join(["(%s)" % w for w in where]) # noqa

Expand All @@ -519,59 +541,6 @@ def __init__(self, where, op=None, value=None, queryables=None,
encoding=encoding)
self.terms = self.parse()

def parse_back_compat(self, w, op=None, value=None):
""" allow backward compatibility for passed arguments """

if isinstance(w, dict):
w, op, value = w.get('field'), w.get('op'), w.get('value')
if not isinstance(w, string_types):
raise TypeError(
"where must be passed as a string if op/value are passed")
warnings.warn("passing a dict to Expr is deprecated, "
"pass the where as a single string",
FutureWarning, stacklevel=10)
if isinstance(w, tuple):
if len(w) == 2:
w, value = w
op = '=='
elif len(w) == 3:
w, op, value = w
warnings.warn("passing a tuple into Expr is deprecated, "
"pass the where as a single string",
FutureWarning, stacklevel=10)

if op is not None:
if not isinstance(w, string_types):
raise TypeError(
"where must be passed as a string if op/value are passed")

if isinstance(op, Expr):
raise TypeError("invalid op passed, must be a string")
w = "{0}{1}".format(w, op)
if value is not None:
if isinstance(value, Expr):
raise TypeError("invalid value passed, must be a string")

# stringify with quotes these values
def convert(v):
if isinstance(v, (datetime, np.datetime64,
timedelta, np.timedelta64)):
return "'{0}'".format(v)
return v

if isinstance(value, (list, tuple)):
value = [convert(v) for v in value]
else:
value = convert(value)

w = "{0}{1}".format(w, value)

warnings.warn("passing multiple values to Expr is deprecated, "
"pass the where as a single string",
FutureWarning, stacklevel=10)

return w

def __unicode__(self):
if self.terms is not None:
return pprint_thing(self.terms)
Expand Down
53 changes: 0 additions & 53 deletions pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2585,59 +2585,6 @@ def test_term_compat(self):
expected = wp.loc[:, :, ['A', 'B']]
assert_panel_equal(result, expected)

def test_backwards_compat_without_term_object(self):
with ensure_clean_store(self.path) as store:

wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
major_axis=date_range('1/1/2000', periods=5),
minor_axis=['A', 'B', 'C', 'D'])
store.append('wp', wp)
with catch_warnings(record=True):
result = store.select('wp', [('major_axis>20000102'),
('minor_axis', '=', ['A', 'B'])])
expected = wp.loc[:,
wp.major_axis > Timestamp('20000102'),
['A', 'B']]
assert_panel_equal(result, expected)

store.remove('wp', ('major_axis>20000103'))
result = store.select('wp')
expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :]
assert_panel_equal(result, expected)

with ensure_clean_store(self.path) as store:

wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
major_axis=date_range('1/1/2000', periods=5),
minor_axis=['A', 'B', 'C', 'D'])
store.append('wp', wp)

# stringified datetimes
with catch_warnings(record=True):
result = store.select('wp',
[('major_axis',
'>',
datetime.datetime(2000, 1, 2))])
expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
assert_panel_equal(result, expected)
with catch_warnings(record=True):
result = store.select('wp',
[('major_axis',
'>',
datetime.datetime(2000, 1, 2, 0, 0))])
expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
assert_panel_equal(result, expected)
with catch_warnings(record=True):
result = store.select('wp',
[('major_axis',
'=',
[datetime.datetime(2000, 1, 2, 0, 0),
datetime.datetime(2000, 1, 3, 0, 0)])]
)
expected = wp.loc[:, [Timestamp('20000102'),
Timestamp('20000103')]]
assert_panel_equal(result, expected)

def test_same_name_scoping(self):

with ensure_clean_store(self.path) as store:
Expand Down