diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4e25b546bddf2..c2653089c54f7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2989,7 +2989,6 @@ def data_orientation(self): def queryables(self): """ return a dict of the kinds allowable columns for this object """ - # compute the values_axes queryables return dict( [(a.cname, a) for a in self.index_axes] + @@ -3090,6 +3089,13 @@ def validate_min_itemsize(self, min_itemsize): return q = self.queryables() + + if ('index' in min_itemsize) and ('index' not in q): # issue #11364 + for axname in self.index_axes: + #print("axname:" , axname.name) + min_itemsize[ axname.name ] = min_itemsize['index'] + del min_itemsize['index'] + for k, v in min_itemsize.items(): # ok, apply generally @@ -3099,6 +3105,7 @@ def validate_min_itemsize(self, min_itemsize): raise ValueError( "min_itemsize has the key [%s] which is not an axis or " "data_column" % k) + return min_itemsize @property def indexables(self): @@ -3288,7 +3295,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, # map axes to numbers axes = [obj._get_axis_number(a) for a in axes] - + # do we have an existing table (if so, use its axes & data_columns) if self.infer_axes(): existing_table = self.copy() @@ -3318,15 +3325,17 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, # create axes to index and non_index index_axes_map = dict() + for i, a in enumerate(obj.axes): if i in axes: - name = obj._AXIS_NAMES[i] + name = getattr(obj, obj._AXIS_NAMES[i]).name # obj._AXIS_NAMES[i] + if name is None: + name = obj._AXIS_NAMES[i] index_axes_map[i] = _convert_index( a, self.encoding, self.format_type ).set_name(name).set_axis(i) else: - # we might be able to change the axes on the appending data if # necessary append_axis = list(a) @@ -3346,6 +3355,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, self.non_index_axes.append((i, append_axis)) + # set axis positions (based on the axes) self.index_axes = [ index_axes_map[a].set_pos(j).update_info(self.info) @@ -3353,11 +3363,6 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, ] j = len(self.index_axes) - # check for column conflicts - if validate: - for a in self.axes: - a.maybe_set_size(min_itemsize=min_itemsize) - # reindex by our non_index_axes & compute data_columns for a in self.non_index_axes: obj = _reindex_axis(obj, a[0], a[1]) @@ -3455,10 +3460,7 @@ def get_blk_items(mgr, blocks): % (b.dtype.name, b_items, str(detail)) ) j += 1 - - # validate our min_itemsize - self.validate_min_itemsize(min_itemsize) - + # validate our metadata self.validate_metadata(existing_table) @@ -3466,6 +3468,15 @@ def get_blk_items(mgr, blocks): if validate: self.validate(existing_table) + # validate and correct our min_itemsize # issue #11364 + min_itemsize = self.validate_min_itemsize(min_itemsize) + + # check for column conflicts + if validate: + for a in self.axes: + a.maybe_set_size(min_itemsize=min_itemsize) + + def process_axes(self, obj, columns=None): """ process axes filters """ diff --git a/pandas/io/tests/test_hdf5_index_11364.py b/pandas/io/tests/test_hdf5_index_11364.py new file mode 100644 index 0000000000000..3f2b0e9277d63 --- /dev/null +++ b/pandas/io/tests/test_hdf5_index_11364.py @@ -0,0 +1,71 @@ +import nose +from nose import with_setup +import pandas as pd +import numpy as np +import os, sys + +def create_test_file(): + global xbed, xstore, xgroup + xbed = "testtable.tab" + xstore = 'tempstore.h5' + xgroup = "x" + + col_nums = [0] + df = pd.DataFrame({"V1":["a","b","c","d","e", "aaaah!!!"], + "W":["c","d","c","d","c","c"], + "ZZZ":np.arange(6)}) + df.set_index(["V1","W"], inplace = True) + df.to_csv( xbed, sep = "\t") + + +def clear_files(): + os.remove(xbed) + os.remove(xstore) + +def write_hdf5_11364(indexcols): + sep = "\t" + chunksize=5 + try: + os.remove(xstore) + except OSError: + pass + # create a store + with pd.HDFStore(xstore) as store: + for nn, chunk in enumerate(pd.read_table(xbed, chunksize=chunksize, sep = sep, index_col= indexcols if not indexcols==["index"] else 0)): + #print(chunk.index.names) + store.append(xgroup, chunk, format = "table", min_itemsize = \ + #{"index":32} if len(indexcols)==1 else \ + dict(zip(chunk.index.names, [32]*len(chunk.index.names)))) + print("chunk #" , nn, file = sys.stderr) + + print("index columns:", indexcols, file = sys.stderr) + assert True + +def read_hdf5_11364(indexcols): + with pd.HDFStore(xstore) as store: + df = store.get(xgroup) + print(df.shape) + assert (df.shape==(6,3 - len(indexcols))), "wrong shape" + +@with_setup(create_test_file, clear_files ) +def test_write_read_hdf5_11364_indexcol(): + indexcols = ["index"] + write_hdf5_11364(indexcols) + read_hdf5_11364(indexcols) + return + +@with_setup(create_test_file, clear_files ) +def test_write_read_hdf5_11364_1col(): + indexcols =[0] + write_hdf5_11364(indexcols) + read_hdf5_11364(indexcols) + return + +@with_setup(create_test_file, clear_files ) +def test_write_read_hdf5_11364_2col(): + indexcols =[0,1] + write_hdf5_11364(indexcols) + read_hdf5_11364(indexcols) + return + +