From a4612085e40dfded55cf00a05b01f789a8bcf784 Mon Sep 17 00:00:00 2001
From: Shawn Heide <heidesw@uw.edu>
Date: Mon, 25 Jul 2016 16:13:16 -0700
Subject: [PATCH] BUG: fix categories in HDFStore not filtering correctly
 (#13322)

---
 doc/source/whatsnew/v0.19.0.txt  |  1 +
 pandas/computation/pytables.py   |  5 +++++
 pandas/io/tests/test_pytables.py | 30 ++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 11d2fab464d1f..24c38b5c660a9 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -788,3 +788,4 @@ Bug Fixes
 - Bugs in ``Index.difference`` and ``DataFrame.join`` raise in Python3 when using mixed-integer indexes (:issue:`13432`, :issue:`12814`)
 
 - Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`)
+- Bug in ``pd.read_hdf()`` returns incorrect result when HDF Store contains a DataFrame with a categorical column and query doesn't match any values(:issue:`13792`)
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index e375716b0d606..a4dd03a0fa7ee 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -198,6 +198,11 @@ def stringify(value):
         elif meta == u('category'):
             metadata = com._values_from_object(self.metadata)
             result = metadata.searchsorted(v, side='left')
+
+            # result returns 0 if v is first element or if v is not in metadata
+            # check that metadata contains v
+            if not result and v not in metadata:
+                result = -1
             return TermValue(result, result, u('integer'))
         elif kind == u('integer'):
             v = int(float(v))
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index f95e764ad4da3..e214ea5237f30 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4733,6 +4733,36 @@ def test_categorical(self):
             self.assertRaises(
                 KeyError, lambda: store.select('df3/meta/s/meta'))
 
+    def test_categorical_conversion(self):
+
+        # GH13322
+        # Check that read_hdf with categorical columns doesn't return rows if
+        # where criteria isn't met.
+        obsids = ['ESP_012345_6789', 'ESP_987654_3210']
+        imgids = ['APF00006np', 'APF0001imm']
+        data = [4.3, 9.8]
+
+        # Test without categories
+        df = DataFrame(dict(obsids=obsids, imgids=imgids, data=data))
+
+        # We are expecting an empty DataFrame matching types of df
+        expected = df.iloc[[], :]
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df', format='table', data_columns=True)
+            result = read_hdf(path, 'df', where='obsids=B')
+            tm.assert_frame_equal(result, expected)
+
+        # Test with categories
+        df.obsids = df.obsids.astype('category')
+        df.imgids = df.imgids.astype('category')
+
+        # We are expecting an empty DataFrame matching types of df
+        expected = df.iloc[[], :]
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df', format='table', data_columns=True)
+            result = read_hdf(path, 'df', where='obsids=B')
+            tm.assert_frame_equal(result, expected)
+
     def test_duplicate_column_name(self):
         df = DataFrame(columns=["a", "a"], data=[[0, 0]])