From de27eef23acf377c1eed1c117765ff4bf81a6d2f Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 10 May 2013 21:39:40 -0400
Subject: [PATCH 1/7] ENH: to_csv write multi-index columns similar to how they
 are displayed in to_string

---
 pandas/core/format.py      | 39 ++++++++++++++++++++++++++++++++++----
 pandas/tests/test_frame.py | 12 +++++++++++-
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/pandas/core/format.py b/pandas/core/format.py
index bea4b59bfaaa4..285d50373d811 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -959,9 +959,12 @@ def _save_header(self):
         index_label = self.index_label
         cols = self.cols
         header = self.header
+        has_mi_columns = isinstance(obj.columns, MultiIndex)
+        encoded_labels = []
 
         has_aliases = isinstance(header, (tuple, list, np.ndarray))
         if has_aliases or self.header:
+
             if self.index:
                 # should write something for index label
                 if index_label is not False:
@@ -994,12 +997,40 @@ def _save_header(self):
                         write_cols = header
                 else:
                     write_cols = cols
-                encoded_cols = list(write_cols)
 
-                writer.writerow(encoded_labels + encoded_cols)
+                if not has_mi_columns:
+                    encoded_labels += list(write_cols)
+
             else:
-                encoded_cols = list(cols)
-                writer.writerow(encoded_cols)
+
+                if not has_mi_columns:
+                    encoded_labels += list(cols)
+
+        # write out the mi
+        if has_mi_columns:
+            columns = obj.columns
+
+            # write out the names for each level, then ALL of the values for each level
+            for i in range(columns.nlevels):
+
+                # name is the first column
+                col_line = [ columns.names[i] ]
+
+                # skipp len labels-1
+                if self.index and isinstance(index_label,list) and len(index_label)>1:
+                    col_line.extend([ '' ] * (len(index_label)-1))
+
+                for j in range(len(columns)):
+                    col_line.append(columns.levels[i][j])
+
+                writer.writerow(col_line)
+
+            # add blanks for the columns, so that we
+            # have consistent seps
+            encoded_labels.extend([ '' ] * len(columns))
+
+        # write out the index label line
+        writer.writerow(encoded_labels)
 
     def _save(self):
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 8e48ef094c419..d8eb2748dda29 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4962,6 +4962,7 @@ def test_to_csv_multiindex(self):
         frame.index = new_index
 
         with ensure_clean(pname) as path:
+
              frame.to_csv(path, header=False)
              frame.to_csv(path, cols=['A', 'B'])
 
@@ -4973,7 +4974,7 @@ def test_to_csv_multiindex(self):
              self.assertEqual(frame.index.names, df.index.names)
              self.frame.index = old_index  # needed if setUP becomes a classmethod
 
-              # try multiindex with dates
+             # try multiindex with dates
              tsframe = self.tsframe
              old_index = tsframe.index
              new_index = [old_index, np.arange(len(old_index))]
@@ -4994,6 +4995,15 @@ def test_to_csv_multiindex(self):
              assert_almost_equal(recons.values, self.tsframe.values)
              self.tsframe.index = old_index  # needed if setUP becomes classmethod
 
+        with ensure_clean(pname) as path:
+            # column & index are mi
+            import pdb; pdb.set_trace()
+            df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
+            df.to_csv(path)
+
+            result = pd.read_csv(path,header=[0,1,2,3],index_col=[0,1])
+
+
         with ensure_clean(pname) as path:
             # empty
             tsframe[:0].to_csv(path)

From cc93d614eaa3e3c46daf340a4aae58b56a0fa226 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 10 May 2013 23:47:53 -0400
Subject: [PATCH 2/7] ENH: Allow read_csv to handle multi-index in columns

     GH3571, GH1651, GH3141
---
 pandas/core/format.py           |  69 +++++++++----------
 pandas/io/parsers.py            |  53 +++++++++++----
 pandas/io/tests/test_cparser.py |   2 +-
 pandas/src/parser.pyx           | 113 ++++++++++++++++++++------------
 pandas/src/parser/tokenizer.c   |   4 +-
 pandas/src/parser/tokenizer.h   |   2 +
 pandas/tests/test_frame.py      |  14 ++--
 7 files changed, 162 insertions(+), 95 deletions(-)

diff --git a/pandas/core/format.py b/pandas/core/format.py
index 285d50373d811..2eaa17bc659c3 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -963,48 +963,49 @@ def _save_header(self):
         encoded_labels = []
 
         has_aliases = isinstance(header, (tuple, list, np.ndarray))
-        if has_aliases or self.header:
+        if not (has_aliases or self.header):
+            return
 
-            if self.index:
-                # should write something for index label
-                if index_label is not False:
-                    if index_label is None:
-                        if isinstance(obj.index, MultiIndex):
-                            index_label = []
-                            for i, name in enumerate(obj.index.names):
-                                if name is None:
-                                    name = ''
-                                index_label.append(name)
+        if self.index:
+            # should write something for index label
+            if index_label is not False:
+                if index_label is None:
+                    if isinstance(obj.index, MultiIndex):
+                        index_label = []
+                        for i, name in enumerate(obj.index.names):
+                            if name is None:
+                                name = ''
+                            index_label.append(name)
+                    else:
+                        index_label = obj.index.name
+                        if index_label is None:
+                            index_label = ['']
                         else:
-                            index_label = obj.index.name
-                            if index_label is None:
-                                index_label = ['']
-                            else:
-                                index_label = [index_label]
-                    elif not isinstance(index_label, (list, tuple, np.ndarray)):
-                        # given a string for a DF with Index
-                        index_label = [index_label]
+                            index_label = [index_label]
+                elif not isinstance(index_label, (list, tuple, np.ndarray)):
+                    # given a string for a DF with Index
+                    index_label = [index_label]
 
-                    encoded_labels = list(index_label)
-                else:
-                    encoded_labels = []
+                encoded_labels = list(index_label)
+            else:
+                encoded_labels = []
 
-                if has_aliases:
-                    if len(header) != len(cols):
-                        raise ValueError(('Writing %d cols but got %d aliases'
-                                          % (len(cols), len(header))))
-                    else:
-                        write_cols = header
+            if has_aliases:
+                if len(header) != len(cols):
+                    raise ValueError(('Writing %d cols but got %d aliases'
+                                      % (len(cols), len(header))))
                 else:
-                    write_cols = cols
+                    write_cols = header
+            else:
+                write_cols = cols
 
-                if not has_mi_columns:
-                    encoded_labels += list(write_cols)
+            if not has_mi_columns:
+                encoded_labels += list(write_cols)
 
-            else:
+        else:
 
-                if not has_mi_columns:
-                    encoded_labels += list(cols)
+            if not has_mi_columns:
+                encoded_labels += list(cols)
 
         # write out the mi
         if has_mi_columns:
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 044b25041afd9..dca3dfb5e5cec 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -677,10 +677,8 @@ def read(self, nrows=None):
         if self.options.get('as_recarray'):
             return ret
 
-        index, columns, col_dict = ret
-
         # May alter columns / col_dict
-        # index, columns, col_dict = self._create_index(col_dict, columns)
+        index, columns, col_dict = self._create_index(ret)
 
         df = DataFrame(col_dict, columns=columns, index=index)
 
@@ -688,8 +686,9 @@ def read(self, nrows=None):
             return df[df.columns[0]]
         return df
 
-    def _create_index(self, col_dict, columns):
-        pass
+    def _create_index(self, ret):
+        index, columns, col_dict = ret
+        return index, columns, col_dict
 
     def get_chunk(self, size=None):
         if size is None:
@@ -709,6 +708,7 @@ def __init__(self, kwds):
 
         self.index_col = kwds.pop('index_col', None)
         self.index_names = None
+        self.col_names = None
 
         self.parse_dates = kwds.pop('parse_dates', False)
         self.date_parser = kwds.pop('date_parser', None)
@@ -942,7 +942,32 @@ def __init__(self, src, **kwds):
         if self._reader.header is None:
             self.names = None
         else:
-            self.names = list(self._reader.header)
+            if len(self._reader.header) > 1:
+                # the names are the tuples of the header that are not the index cols
+                # 0 is the name of the index, assuming index_col is a list of column
+                # numbers 
+                if (self._reader.leading_cols == 0 and
+                    _is_index_col(self.index_col)):
+                    ic = self.index_col
+                    if not isinstance(ic, (list,tuple,np.ndarray)):
+                        ic = [ ic ]
+                    sic = set(ic)
+
+                    header = list(self._reader.header)
+                    index_names = header.pop(-1) 
+                    self.index_names = [ index_names[i] for i in ic ]
+                    field_count = len(header[0])
+
+                    def extract(r):
+                        return tuple([ r[i] for i in range(field_count) if i not in sic ])
+
+                    self.names = ic + zip(*[ extract(r) for r in header ])
+                    self.col_names = [ r[0] if len(r[0]) else None for r in header ]
+                    passed_names = True
+                else:
+                    raise Exception("must have an index_col when have a multi-index specified")
+            else:
+                self.names = list(self._reader.header[0])
 
         if self.names is None:
             if self.prefix:
@@ -958,12 +983,14 @@ def __init__(self, src, **kwds):
 
         if not self._has_complex_date_col:
             if (self._reader.leading_cols == 0 and
-                    _is_index_col(self.index_col)):
+                _is_index_col(self.index_col)):
 
                 self._name_processed = True
-                (self.index_names, self.names,
-                 self.index_col) = _clean_index_names(self.names,
-                                                      self.index_col)
+                (index_names, self.names, 
+                 self.index_col) = _clean_index_names(self.names, self.index_col)
+
+                if self.index_names is None:
+                    self.index_names = index_names
 
             if self._reader.header is None and not passed_names:
                 self.index_names = [None] * len(self.index_names)
@@ -1051,6 +1078,10 @@ def read(self, nrows=None):
             names, data = self._do_date_conversions(names, data)
             index = self._make_index(data, alldata, names)
 
+        # possibly create a column mi here
+        if all([ isinstance(c,tuple) for c in names]):
+            names = MultiIndex.from_tuples(names,names=self.col_names)
+
         return index, names, data
 
     def _filter_usecols(self, names):
@@ -1061,7 +1092,7 @@ def _filter_usecols(self, names):
         return names
 
     def _get_index_names(self):
-        names = list(self._reader.header)
+        names = list(self._reader.header[0])
         idx_names = None
 
         if self._reader.leading_cols == 0 and self.index_col is not None:
diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py
index b352b189a74b8..0c5b168ee8de5 100644
--- a/pandas/io/tests/test_cparser.py
+++ b/pandas/io/tests/test_cparser.py
@@ -179,7 +179,7 @@ def test_header_not_enough_lines(self):
         reader = TextReader(StringIO(data), delimiter=',', header=2,
                             as_recarray=True)
         header = reader.header
-        expected = ['a', 'b', 'c']
+        expected = [['a', 'b', 'c']]
         self.assertEquals(header, expected)
 
         recs = reader.read()
diff --git a/pandas/src/parser.pyx b/pandas/src/parser.pyx
index 694a769641b0d..97e31515bec78 100644
--- a/pandas/src/parser.pyx
+++ b/pandas/src/parser.pyx
@@ -143,6 +143,8 @@ cdef extern from "parser/tokenizer.h":
         char thousands
 
         int header # Boolean: 1: has header, 0: no header
+        int header_start # header row start
+        int header_end # header row end
 
         void *skipset
         int skip_footer
@@ -242,7 +244,7 @@ cdef class TextReader:
         object na_values, true_values, false_values
         object memory_map
         object as_recarray
-        object header, names
+        object header, names, header_start, header_end
         object low_memory
         object skiprows
         object compact_ints, use_unsigned
@@ -256,6 +258,8 @@ cdef class TextReader:
                   delimiter=b',',
 
                   header=0,
+                  header_start=0,
+                  header_end=0,
                   names=None,
 
                   memory_map=False,
@@ -435,11 +439,28 @@ cdef class TextReader:
         # TODO: no header vs. header is not the first row
         if header is None:
             # sentinel value
+            self.parser.header_start = -1
+            self.parser.header_end = -1
             self.parser.header = -1
             self.parser_start = 0
+            self.header = []
         else:
-            self.parser.header = header
-            self.parser_start = header + 1
+            if isinstance(header, list) and len(header):
+                # need to artifically skip the final line
+                # which is still a header line
+                header.append(header[-1]+1)
+
+                self.parser.header_start = header[0]
+                self.parser.header_end = header[-1]
+                self.parser.header = header[0]
+                self.parser_start = header[-1] + 1
+                self.header = header
+            else:
+                self.parser.header_start = header
+                self.parser.header_end = header
+                self.parser.header = header
+                self.parser_start = header + 1
+                self.header = [ header ]
 
         self.names = names
         self.header, self.table_width = self._get_header()
@@ -534,8 +555,10 @@ cdef class TextReader:
                           ' got %s type' % type(source))
 
     cdef _get_header(self):
+        # header is now a list of lists, so field_count should use header[0]
+
         cdef:
-            size_t i, start, data_line, field_count, passed_count
+            size_t i, start, data_line, field_count, passed_count, hr
             char *word
             object name
             int status
@@ -544,49 +567,53 @@ cdef class TextReader:
 
         header = []
 
-        if self.parser.header >= 0:
-            # Header is in the file
+        if self.parser.header_start >= 0:
 
-            if self.parser.lines < self.parser.header + 1:
-                self._tokenize_rows(self.parser.header + 2)
-
-            # e.g., if header=3 and file only has 2 lines
-            if self.parser.lines < self.parser.header + 1:
-                raise CParserError('Passed header=%d but only %d lines in file'
-                                   % (self.parser.header, self.parser.lines))
+            # Header is in the file
+            for hr in self.header:
 
-            field_count = self.parser.line_fields[self.parser.header]
-            start = self.parser.line_start[self.parser.header]
+                this_header = []
 
-            # TODO: Py3 vs. Py2
-            counts = {}
-            for i in range(field_count):
-                word = self.parser.words[start + i]
+                if self.parser.lines < hr + 1:
+                    self._tokenize_rows(hr + 2)
 
-                if self.c_encoding == NULL and not PY3:
-                    name = PyBytes_FromString(word)
-                else:
-                    if self.c_encoding == NULL or self.c_encoding == b'utf-8':
-                        name = PyUnicode_FromString(word)
-                    else:
-                        name = PyUnicode_Decode(word, strlen(word),
-                                                self.c_encoding, errors)
+                # e.g., if header=3 and file only has 2 lines
+                if self.parser.lines < hr + 1:
+                    raise CParserError('Passed header=%d but only %d lines in file'
+                                       % (self.parser.header, self.parser.lines))
 
-                if name == '':
-                    name = 'Unnamed: %d' % i
+                field_count = self.parser.line_fields[hr]
+                start = self.parser.line_start[hr]
 
+                # TODO: Py3 vs. Py2
+                counts = {}
+                for i in range(field_count):
+                    word = self.parser.words[start + i]
 
-                count = counts.get(name, 0)
-                if count > 0 and self.mangle_dupe_cols:
-                    header.append('%s.%d' % (name, count))
-                else:
-                    header.append(name)
-                counts[name] = count + 1
+                    if self.c_encoding == NULL and not PY3:
+                        name = PyBytes_FromString(word)
+                    else:
+                        if self.c_encoding == NULL or self.c_encoding == b'utf-8':
+                            name = PyUnicode_FromString(word)
+                        else:
+                            name = PyUnicode_Decode(word, strlen(word),
+                                                    self.c_encoding, errors)
+
+                    if name == '':
+                        name = 'Unnamed: %d' % i
+
+                    count = counts.get(name, 0)
+                    if count > 0 and self.mangle_dupe_cols:
+                        this_header.append('%s.%d' % (name, count))
+                    else:
+                        this_header.append(name)
+                    counts[name] = count + 1
 
-            data_line = self.parser.header + 1
+                data_line = hr + 1
+                header.append(this_header)
 
             if self.names is not None:
-                header = self.names
+                header = [ self.names ]
 
         elif self.names is not None:
             # Enforce this unless usecols
@@ -597,11 +624,11 @@ cdef class TextReader:
             if self.parser.lines < 1:
                 self._tokenize_rows(1)
 
-            header = self.names
+            header = [ self.names ]
             data_line = 0
 
             if self.parser.lines < 1:
-                field_count = len(header)
+                field_count = len(header[0])
             else:
                 field_count = self.parser.line_fields[data_line]
         else:
@@ -613,7 +640,7 @@ cdef class TextReader:
 
         # Corner case, not enough lines in the file
         if self.parser.lines < data_line + 1:
-            field_count = len(header)
+            field_count = len(header[0])
         else: # not self.has_usecols:
 
             field_count = self.parser.line_fields[data_line]
@@ -622,7 +649,7 @@ cdef class TextReader:
             if self.names is not None:
                 field_count = max(field_count, len(self.names))
 
-            passed_count = len(header)
+            passed_count = len(header[0])
 
             # if passed_count > field_count:
             #     raise CParserError('Column names have %d fields, '
@@ -1038,10 +1065,10 @@ cdef class TextReader:
             if self.header is not None:
                 j = i - self.leading_cols
                 # hack for #2442
-                if j == len(self.header):
+                if j == len(self.header[0]):
                     return j
                 else:
-                    return self.header[j]
+                    return self.header[0][j]
             else:
                 return None
 
diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index 09cddd07e1c1d..81fda37acbb71 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -463,7 +463,7 @@ static int end_line(parser_t *self) {
 
     /* printf("Line: %d, Fields: %d, Ex-fields: %d\n", self->lines, fields, ex_fields); */
 
-    if (!(self->lines <= self->header + 1)
+    if (!(self->lines <= self->header_end + 1)
         && (self->expected_fields < 0 && fields > ex_fields)) {
         // increment file line count
         self->file_lines++;
@@ -498,7 +498,7 @@ static int end_line(parser_t *self) {
     }
     else {
         /* missing trailing delimiters */
-        if ((self->lines >= self->header + 1) && fields < ex_fields) {
+        if ((self->lines >= self->header_end + 1) && fields < ex_fields) {
 
             /* Might overrun the buffer when closing fields */
             if (make_stream_space(self, ex_fields - fields) < 0) {
diff --git a/pandas/src/parser/tokenizer.h b/pandas/src/parser/tokenizer.h
index 566e89ae5f9a7..5ba1b99a29d39 100644
--- a/pandas/src/parser/tokenizer.h
+++ b/pandas/src/parser/tokenizer.h
@@ -195,6 +195,8 @@ typedef struct parser_t {
     char thousands;
 
     int header; // Boolean: 1: has header, 0: no header
+    int header_start; // header row start
+    int header_end;   // header row end
 
     void *skipset;
     int skip_footer;
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index d8eb2748dda29..c19de854de130 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4996,13 +4996,19 @@ def test_to_csv_multiindex(self):
              self.tsframe.index = old_index  # needed if setUP becomes classmethod
 
         with ensure_clean(pname) as path:
-            # column & index are mi
-            import pdb; pdb.set_trace()
+            # GH3571, GH1651, GH3141
+
+            # column & index are multi-iindex
             df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
             df.to_csv(path)
+            result = read_csv(path,header=[0,1,2,3],index_col=[0,1])
+            assert_frame_equal(df,result)
 
-            result = pd.read_csv(path,header=[0,1,2,3],index_col=[0,1])
-
+            # column is mi
+            df = mkdf(5,3,r_idx_nlevels=1,c_idx_nlevels=4)
+            df.to_csv(path)
+            result = read_csv(path,header=[0,1,2,3],index_col=0)
+            assert_frame_equal(df,result)
 
         with ensure_clean(pname) as path:
             # empty

From c64555b006fe545d6e4542667ddf31ff91275274 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sat, 11 May 2013 09:31:54 -0400
Subject: [PATCH 3/7] TST: more test cases

ENH: catching some invalid option combinations

BUG: fix as_recarray

DOC: io.rst updated
---
 doc/source/io.rst               | 20 +++++++++++++++++++-
 pandas/io/parsers.py            | 22 +++++++++++++++++++---
 pandas/io/tests/test_parsers.py | 33 +++++++++++++++++++++++++++++++++
 pandas/src/parser.pyx           |  3 +++
 4 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index f15f758c42b18..5c0567b21dbf4 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -57,7 +57,10 @@ They can take a number of arguments:
     specified, data types will be inferred.
   - ``header``: row number to use as the column names, and the start of the
     data.  Defaults to 0 if no ``names`` passed, otherwise ``None``. Explicitly
-    pass ``header=0`` to be able to replace existing names.
+    pass ``header=0`` to be able to replace existing names. The header can be
+    a list of integers that specify row locations for a multi-index on the columns
+    E.g. [0,1,3]. Interveaning rows that are not specified will be skipped.
+    (E.g. 2 in this example are skipped)
   - ``skiprows``: A collection of numbers for rows in the file to skip. Can
     also be an integer to skip the first ``n`` rows
   - ``index_col``: column number, column name, or list of column numbers/names,
@@ -253,6 +256,21 @@ If the header is in a row other than the first, pass the row number to
     data = 'skip this skip it\na,b,c\n1,2,3\n4,5,6\n7,8,9'
     pd.read_csv(StringIO(data), header=1)
 
+.. _io.multi_index_columns:
+
+Specifying a multi-index columns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By specifying list of row locations for the ``header`` argument, you
+can read in a multi-index for the columns. Specifying non-consecutive
+rows will skip the interveaing rows. The ``index_col`` must also be
+specified.
+
+.. ipython:: python
+
+    data = 'C0,C_l0_g0,C_l0_g1\nC1,C_l1_g0,C_l1_g1\nR0,,\nR_l0_g0,R0C0,R0C1\nR_l0_g1,R1C0,R1C1\nR_l0_g2,R2C0,R2C1\n'
+    pd.read_csv(StringIO(data), header=[0,1], index_col=[0])
+
 .. _io.usecols:
 
 Filtering columns (``usecols``)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index dca3dfb5e5cec..f380da680eca6 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -52,9 +52,11 @@ class DateConversionError(Exception):
 dialect : string or csv.Dialect instance, default None
     If None defaults to Excel dialect. Ignored if sep longer than 1 char
     See csv.Dialect documentation for more details
-header : int, default 0 if names parameter not specified, otherwise None
+header : int, default 0 if names parameter not specified,
     Row to use for the column labels of the parsed DataFrame. Specify None if
-    there is no header row.
+    there is no header row. Can be a list of integers that specify row
+    locations for a multi-index on the columns E.g. [0,1,3]. Interveaning
+    rows that are not specified (E.g. 2 in this example are skipped)
 skiprows : list-like or integer
     Row numbers to skip (0-indexed) or number of rows to skip (int)
     at the start of the file
@@ -531,6 +533,16 @@ def __init__(self, f, engine='python', **kwds):
         if kwds.get('header', 'infer') == 'infer':
             kwds['header'] = 0 if kwds.get('names') is None else None
 
+        # validate header options for mi
+        h = kwds['header']
+        if isinstance(h,(list,tuple,np.ndarray)):
+            if kwds.get('index_col') is None:
+                raise Exception("must have an index_col when have a "
+                                "multi-index header is specified")
+            if kwds.get('as_recarray'):
+                raise Exception("cannot specify as_recarray when "
+                                "specifying a multi-index header")
+
         self.orig_options = kwds
 
         # miscellanea
@@ -965,7 +977,8 @@ def extract(r):
                     self.col_names = [ r[0] if len(r[0]) else None for r in header ]
                     passed_names = True
                 else:
-                    raise Exception("must have an index_col when have a multi-index specified")
+                    raise Exception("must have an index_col when have a multi-index "
+                                    "header is specified")
             else:
                 self.names = list(self._reader.header[0])
 
@@ -1381,6 +1394,9 @@ def _infer_columns(self):
         names = self.names
 
         if self.header is not None:
+            if isinstance(self.header,(list,tuple,np.ndarray)):
+                raise Exception("PythonParser does not support a multi-index header")
+
             if len(self.buf) > 0:
                 line = self.buf[0]
             else:
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 38a31c042d120..bd55f9c74922c 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -20,6 +20,7 @@
                                TextFileReader, TextParser)
 from pandas.util.testing import (assert_almost_equal,
                                  assert_series_equal,
+                                 makeCustomDataframe as mkdf,
                                  network,
                                  ensure_clean)
 import pandas.util.testing as tm
@@ -994,6 +995,38 @@ def test_header_not_first_line(self):
         expected = self.read_csv(StringIO(data2), header=0, index_col=0)
         tm.assert_frame_equal(df, expected)
 
+    def test_header_multi_index(self):
+        expected = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
+
+        data = """\
+C0,,C_l0_g0,C_l0_g1,C_l0_g2
+
+C1,,C_l1_g0,C_l1_g1,C_l1_g2
+C2,,C_l2_g0,C_l2_g1,C_l2_g2
+C3,,C_l3_g0,C_l3_g1,C_l3_g2
+R0,R1,,,
+R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
+R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
+R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
+R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
+R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
+"""
+
+        # python-engine
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
+                          index_col=[0,1], engine='python')
+
+        # must specify index_col
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3])
+
+        # no as_recarray
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
+                          index_col=[0,1], as_recarray=True)
+
+        # skipping lines in the header
+        df = read_csv(StringIO(data), header=[0,2,3,4],index_col=[0,1])
+        tm.assert_frame_equal(df, expected)
+
     def test_pass_names_with_index(self):
         lines = self.data1.split('\n')
         no_header = '\n'.join(lines[1:])
diff --git a/pandas/src/parser.pyx b/pandas/src/parser.pyx
index 97e31515bec78..01b600c975cb9 100644
--- a/pandas/src/parser.pyx
+++ b/pandas/src/parser.pyx
@@ -1789,6 +1789,9 @@ def _to_structured_array(dict columns, object names):
 
     if names is None:
         names = ['%d' % i for i in range(len(columns))]
+    else:
+        # single line header
+        names = names[0]
 
     dt = np.dtype([(str(name), columns[i].dtype)
                    for i, name in enumerate(names)])

From d6573f536e055c498563901048e4736c01fc50b2 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 13 May 2013 08:34:29 -0400
Subject: [PATCH 4/7] ENH/CLN: refactor to support PythonParser as well as
 CParser

---
 pandas/io/parsers.py            | 153 +++++++++++++++++++++-----------
 pandas/io/tests/test_parsers.py |   7 +-
 pandas/tests/test_frame.py      |  17 ++--
 3 files changed, 108 insertions(+), 69 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index f380da680eca6..e2f4a59b24c87 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -755,7 +755,42 @@ def _should_parse_dates(self, i):
             else:
                 return (j in self.parse_dates) or (name in self.parse_dates)
 
-    def _make_index(self, data, alldata, columns):
+
+    def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_names=False):
+        """ extract and return the names, index_names, col_names
+            header is a list-of-lists returned from the parsers """
+        if len(header) < 2:
+            return header[0], index_names, col_names, passed_names
+
+        # the names are the tuples of the header that are not the index cols
+        # 0 is the name of the index, assuming index_col is a list of column
+        # numbers 
+        ic = self.index_col
+        if not isinstance(ic, (list,tuple,np.ndarray)):
+            ic = [ ic ]
+        sic = set(ic)
+
+        orig_header = list(header)
+        index_names = header.pop(-1) 
+        index_names = [ index_names[i] for i in ic ]
+        field_count = len(header[0])
+
+        def extract(r):
+            return tuple([ r[i] for i in range(field_count) if i not in sic ])
+
+        names = ic + zip(*[ extract(r) for r in header ])
+        col_names = [ r[0] if len(r[0]) else None for r in header ]
+        passed_names = True
+
+        return names, index_names, col_names, passed_names
+
+    def _maybe_make_multi_index_columns(self, columns, col_names=None):
+        # possibly create a column mi here
+        if len(columns) and not isinstance(columns, MultiIndex) and all([ isinstance(c,tuple) for c in columns]):
+            columns = MultiIndex.from_tuples(columns,names=col_names)
+        return columns
+
+    def _make_index(self, data, alldata, columns, indexnamerow=False):
         if not _is_index_col(self.index_col) or len(self.index_col) == 0:
             index = None
 
@@ -772,7 +807,15 @@ def _make_index(self, data, alldata, columns):
             index = self._get_complex_date_index(data, columns)
             index = self._agg_index(index, try_parse_dates=False)
 
-        return index
+        # add names for the index 
+        if indexnamerow:
+            coffset = len(indexnamerow) - len(columns)
+            index.names = indexnamerow[:coffset]
+
+        # maybe create a mi on the columns
+        columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+
+        return index, columns
 
     _implicit_index = False
 
@@ -955,27 +998,11 @@ def __init__(self, src, **kwds):
             self.names = None
         else:
             if len(self._reader.header) > 1:
-                # the names are the tuples of the header that are not the index cols
-                # 0 is the name of the index, assuming index_col is a list of column
-                # numbers 
+                # we have a multi index in the columns
                 if (self._reader.leading_cols == 0 and
                     _is_index_col(self.index_col)):
-                    ic = self.index_col
-                    if not isinstance(ic, (list,tuple,np.ndarray)):
-                        ic = [ ic ]
-                    sic = set(ic)
-
-                    header = list(self._reader.header)
-                    index_names = header.pop(-1) 
-                    self.index_names = [ index_names[i] for i in ic ]
-                    field_count = len(header[0])
-
-                    def extract(r):
-                        return tuple([ r[i] for i in range(field_count) if i not in sic ])
-
-                    self.names = ic + zip(*[ extract(r) for r in header ])
-                    self.col_names = [ r[0] if len(r[0]) else None for r in header ]
-                    passed_names = True
+                    self.names, self.index_names, self.col_names, passed_names = self._extract_multi_indexer_columns(
+                        self._reader.header, self.index_names, self.col_names, passed_names)
                 else:
                     raise Exception("must have an index_col when have a multi-index "
                                     "header is specified")
@@ -1089,11 +1116,10 @@ def read(self, nrows=None):
             data = dict((k, v) for k, (i, v) in zip(names, data))
 
             names, data = self._do_date_conversions(names, data)
-            index = self._make_index(data, alldata, names)
+            index, names = self._make_index(data, alldata, names)
 
-        # possibly create a column mi here
-        if all([ isinstance(c,tuple) for c in names]):
-            names = MultiIndex.from_tuples(names,names=self.col_names)
+        # maybe create a mi on the columns
+        names = self._maybe_make_multi_index_columns(names, self.col_names)
 
         return index, names, data
 
@@ -1252,6 +1278,13 @@ def __init__(self, f, **kwds):
             self.data = f
         self.columns = self._infer_columns()
 
+        # we are processing a multi index column
+        if len(self.columns) > 1:
+            self.columns, self.index_names, self.col_names, _ = self._extract_multi_indexer_columns(
+                self.columns, self.index_names, self.col_names)
+        else:
+            self.columns = self.columns[0]
+
         # get popped off for index
         self.orig_names = list(self.columns)
 
@@ -1259,9 +1292,11 @@ def __init__(self, f, **kwds):
         # multiple date column thing turning into a real spaghetti factory
 
         if not self._has_complex_date_col:
-            (self.index_names,
+            (index_names,
              self.orig_names, _) = self._get_index_name(self.columns)
             self._name_processed = True
+            if self.index_names is None:
+                self.index_names = index_names
         self._first_chunk = True
 
     def _make_reader(self, f):
@@ -1365,10 +1400,7 @@ def read(self, rows=None):
         columns, data = self._do_date_conversions(self.columns, data)
 
         data = self._convert_data(data)
-        index = self._make_index(data, alldata, columns)
-        if indexnamerow:
-            coffset = len(indexnamerow) - len(columns)
-            index.names = indexnamerow[:coffset]
+        index, columns = self._make_index(data, alldata, columns, indexnamerow)
 
         return index, columns, data
 
@@ -1394,39 +1426,52 @@ def _infer_columns(self):
         names = self.names
 
         if self.header is not None:
-            if isinstance(self.header,(list,tuple,np.ndarray)):
-                raise Exception("PythonParser does not support a multi-index header")
+            header = self.header
 
-            if len(self.buf) > 0:
-                line = self.buf[0]
+            # we have a mi columns, so read and extra line
+            if isinstance(header,(list,tuple,np.ndarray)):
+                header = list(header) + [header[-1]+1]
             else:
-                line = self._next_line()
-
-            while self.pos <= self.header:
-                line = self._next_line()
+                header = [ header ]
 
             columns = []
-            for i, c in enumerate(line):
-                if c == '':
-                    columns.append('Unnamed: %d' % i)
+            for hr in header:
+
+                if len(self.buf) > 0:
+                    line = self.buf[0]
                 else:
-                    columns.append(c)
+                    line = self._next_line()
 
-            if self.mangle_dupe_cols:
-                counts = {}
-                for i, col in enumerate(columns):
-                    cur_count = counts.get(col, 0)
-                    if cur_count > 0:
-                        columns[i] = '%s.%d' % (col, cur_count)
-                    counts[col] = cur_count + 1
+                while self.pos <= hr:
+                    line = self._next_line()
+
+                this_columns = []
+                for i, c in enumerate(line):
+                    if c == '':
+                        this_columns.append('Unnamed: %d' % i)
+                    else:
+                        this_columns.append(c)
+
+                if self.mangle_dupe_cols:
+                    counts = {}
+                    for i, col in enumerate(this_columns):
+                        cur_count = counts.get(col, 0)
+                        if cur_count > 0:
+                            this_columns[i] = '%s.%d' % (col, cur_count)
+                        counts[col] = cur_count + 1
+
+                columns.append(this_columns)
 
             self._clear_buffer()
 
             if names is not None:
-                if len(names) != len(columns):
+                if len(names) != len(columns[0]):
                     raise Exception('Number of passed names did not match '
                                     'number of header fields in the file')
-                columns = names
+                if len(columns) > 1:
+                    raise Exception('Cannot pass names with multi-index columns')
+                columns = [ names ]
+
         else:
             if len(self.buf) > 0:
                 line = self.buf[0]
@@ -1436,11 +1481,11 @@ def _infer_columns(self):
             ncols = len(line)
             if not names:
                 if self.prefix:
-                    columns = ['X%d' % i for i in range(ncols)]
+                    columns = [ ['X%d' % i for i in range(ncols)] ]
                 else:
-                    columns = range(ncols)
+                    columns = [ range(ncols) ]
             else:
-                columns = names
+                columns = [ names ]
 
         return columns
 
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index bd55f9c74922c..b9e773a916d4c 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -1012,9 +1012,10 @@ def test_header_multi_index(self):
 R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
 """
 
-        # python-engine
-        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
-                          index_col=[0,1], engine='python')
+        # basic test with both engines
+        for engine in ['c','python']:
+            df = read_csv(StringIO(data), header=[0,2,3,4],index_col=[0,1], engine=engine)
+            tm.assert_frame_equal(df, expected)
 
         # must specify index_col
         self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3])
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index c19de854de130..616fdd5ca2549 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4755,9 +4755,13 @@ def test_to_csv_moar(self):
         def _do_test(df,path,r_dtype=None,c_dtype=None,rnlvl=None,cnlvl=None,
                      dupe_col=False):
 
+               header = 0
+               if cnlvl:
+                    header = range(cnlvl)
+
                with ensure_clean(path) as path:
                     df.to_csv(path,encoding='utf8',chunksize=chunksize)
-                    recons = DataFrame.from_csv(path,parse_dates=False)
+                    recons = DataFrame.from_csv(path,header=header,parse_dates=False)
 
                def _to_uni(x):
                    if not isinstance(x,unicode):
@@ -4773,16 +4777,6 @@ def _to_uni(x):
                    recons.index = ix
                    recons = recons.iloc[:,rnlvl-1:]
 
-               if cnlvl:
-                   def stuple_to_tuple(x):
-                       import re
-                       x = x.split(",")
-                       x = map(lambda x: re.sub("[\'\"\s\(\)]","",x),x)
-                       return x
-
-                   cols=MultiIndex.from_tuples(map(stuple_to_tuple,recons.columns))
-                   recons.columns = cols
-
                type_map = dict(i='i',f='f',s='O',u='O',dt='O',p='O')
                if r_dtype:
                     if r_dtype == 'u': # unicode
@@ -4827,7 +4821,6 @@ def stuple_to_tuple(x):
 
                assert_frame_equal(df, recons,check_names=False,check_less_precise=True)
 
-
         N = 100
         chunksize=1000
 

From b0dadc5c3d1407e913f797d554241780f2d8a830 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 13 May 2013 12:55:06 -0400
Subject: [PATCH 5/7] BUG: unnamed columns in a multi-index will be named like:
 Unamed 2_level_0, so they are not duplicated

ENH: add options ``multi_index_columns_compat`` both to to_csv and read_csv (default is False),

    to force (when True) the previous behavior of creating a list of tuples (when writing), and
    reading as a list of tuples (and NOT as a MultiIndex)

DOC: add compat flags to io.rst
---
 doc/source/io.rst          |  7 +++++++
 pandas/core/format.py      | 11 +++++++----
 pandas/core/frame.py       |  9 +++++++--
 pandas/io/parsers.py       | 24 +++++++++++++++++++-----
 pandas/src/parser.pyx      | 16 ++++++++++++----
 pandas/tests/test_frame.py | 18 +++++++++++++++++-
 6 files changed, 69 insertions(+), 16 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 5c0567b21dbf4..ef223f64d43c8 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -115,6 +115,10 @@ They can take a number of arguments:
   - ``error_bad_lines``: if False then any lines causing an error will be skipped :ref:`bad lines <io.bad_lines>`
   - ``usecols``: a subset of columns to return, results in much faster parsing 
     time and lower memory usage.
+  - ``mangle_dup_columns``: boolean, default True, then duplicate columns will be specified 
+    as 'X.0'...'X.N', rather than 'X'...'X'
+  - ``multi_index_columns_compat``: boolean, default False, leave a list of tuples on columns
+    as is (default is to convert to a Multi Index on the columns)
 
 .. ipython:: python
    :suppress:
@@ -271,6 +275,9 @@ specified.
     data = 'C0,C_l0_g0,C_l0_g1\nC1,C_l1_g0,C_l1_g1\nR0,,\nR_l0_g0,R0C0,R0C1\nR_l0_g1,R1C0,R1C1\nR_l0_g2,R2C0,R2C1\n'
     pd.read_csv(StringIO(data), header=[0,1], index_col=[0])
 
+You can pass ``multi_index_columns_compat=True`` to preserve the pre-0.12 behavior of
+not converting a list of tuples in the columns to a Multi Index.
+
 .. _io.usecols:
 
 Filtering columns (``usecols``)
diff --git a/pandas/core/format.py b/pandas/core/format.py
index 2eaa17bc659c3..e7ac540343d84 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -772,9 +772,10 @@ def grouper(x):
 class CSVFormatter(object):
 
     def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
-               cols=None, header=True, index=True, index_label=None,
-               mode='w', nanRep=None, encoding=None, quoting=None,
-               line_terminator='\n', chunksize=None, engine=None):
+                 cols=None, header=True, index=True, index_label=None,
+                 mode='w', nanRep=None, encoding=None, quoting=None,
+                 line_terminator='\n', chunksize=None, engine=None,
+                 multi_index_columns_compat=False):
 
         self.engine = engine  # remove for 0.12
 
@@ -803,6 +804,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
             msg= "columns.is_unique == False not supported with engine='python'"
             raise NotImplementedError(msg)
 
+        self.multi_index_columns_compat=multi_index_columns_compat
         if cols is not None:
             if isinstance(cols,Index):
                 cols = cols.to_native_types(na_rep=na_rep,float_format=float_format)
@@ -959,7 +961,8 @@ def _save_header(self):
         index_label = self.index_label
         cols = self.cols
         header = self.header
-        has_mi_columns = isinstance(obj.columns, MultiIndex)
+        has_mi_columns = isinstance(obj.columns, MultiIndex
+                                    ) and not self.multi_index_columns_compat
         encoded_labels = []
 
         has_aliases = isinstance(header, (tuple, list, np.ndarray))
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 39742557ccc56..bb7416b23aab4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1391,7 +1391,8 @@ def to_panel(self):
     def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
                cols=None, header=True, index=True, index_label=None,
                mode='w', nanRep=None, encoding=None, quoting=None,
-               line_terminator='\n', chunksize=None,**kwds):
+               line_terminator='\n', chunksize=None,
+               multi_index_columns_compat=False, **kwds):
         """
         Write DataFrame to a comma-separated values (csv) file
 
@@ -1429,6 +1430,9 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
         quoting : optional constant from csv module
             defaults to csv.QUOTE_MINIMAL
         chunksize : rows to write at a time
+        multi_index_columns_compat : boolean, default False
+            write multi_index columns as a list of tuples (if True)
+            or new (expanded format)m if False)
         """
         if nanRep is not None:  # pragma: no cover
             import warnings
@@ -1445,7 +1449,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
                                          float_format=float_format, cols=cols,
                                          header=header, index=index,
                                          index_label=index_label,mode=mode,
-                                         chunksize=chunksize,engine=kwds.get("engine") )
+                                         chunksize=chunksize,engine=kwds.get("engine"),
+                                         multi_index_columns_compat=multi_index_columns_compat)
             formatter.save()
 
     def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index e2f4a59b24c87..78a941218c1d6 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -127,6 +127,11 @@ class DateConversionError(Exception):
 usecols : array-like
     Return a subset of the columns.
     Results in much faster parsing time and lower memory usage.
+mangle_dup_columns: boolean, default True
+    Duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X'
+multi_index_columns_compat: boolean, default False
+    Leave a list of tuples on columns as is (default is to convert to
+    a Multi Index on the columns)
 
 Returns
 -------
@@ -294,6 +299,7 @@ def _read(filepath_or_buffer, kwds):
     'squeeze': False,
     'compression': None,
     'mangle_dupe_cols': True,
+    'multi_index_columns_compat':False,
 }
 
 
@@ -380,7 +386,8 @@ def parser_f(filepath_or_buffer,
                  verbose=False,
                  encoding=None,
                  squeeze=False,
-                 mangle_dupe_cols=True
+                 mangle_dupe_cols=True,
+                 multi_index_columns_compat=False,
                  ):
 
         # Alias sep -> delimiter.
@@ -438,7 +445,7 @@ def parser_f(filepath_or_buffer,
                     error_bad_lines=error_bad_lines,
                     low_memory=low_memory,
                     buffer_lines=buffer_lines,
-                    mangle_dupe_cols=mangle_dupe_cols
+                    mangle_dupe_cols=mangle_dupe_cols,
             )
 
         return _read(filepath_or_buffer, kwds)
@@ -730,6 +737,7 @@ def __init__(self, kwds):
         self.na_values = kwds.get('na_values')
         self.true_values = kwds.get('true_values')
         self.false_values = kwds.get('false_values')
+        self.multi_index_columns_compat = kwds.get('multi_index_columns_compat',False)
 
         self._date_conv = _make_date_converter(date_parser=self.date_parser,
                                                dayfirst=self.dayfirst)
@@ -786,7 +794,8 @@ def extract(r):
 
     def _maybe_make_multi_index_columns(self, columns, col_names=None):
         # possibly create a column mi here
-        if len(columns) and not isinstance(columns, MultiIndex) and all([ isinstance(c,tuple) for c in columns]):
+        if not self.multi_index_columns_compat and len(columns) and not isinstance(
+            columns, MultiIndex) and all([ isinstance(c,tuple) for c in columns]):
             columns = MultiIndex.from_tuples(columns,names=col_names)
         return columns
 
@@ -1430,12 +1439,14 @@ def _infer_columns(self):
 
             # we have a mi columns, so read and extra line
             if isinstance(header,(list,tuple,np.ndarray)):
+                have_mi_columns = True
                 header = list(header) + [header[-1]+1]
             else:
+                have_mi_columns = False
                 header = [ header ]
 
             columns = []
-            for hr in header:
+            for level, hr in enumerate(header):
 
                 if len(self.buf) > 0:
                     line = self.buf[0]
@@ -1448,7 +1459,10 @@ def _infer_columns(self):
                 this_columns = []
                 for i, c in enumerate(line):
                     if c == '':
-                        this_columns.append('Unnamed: %d' % i)
+                        if have_mi_columns:
+                            this_columns.append('Unnamed: %d_level_%d' % (i,level))
+                        else:
+                            this_columns.append('Unnamed: %d' % i)
                     else:
                         this_columns.append(c)
 
diff --git a/pandas/src/parser.pyx b/pandas/src/parser.pyx
index 01b600c975cb9..62e9d39cd792d 100644
--- a/pandas/src/parser.pyx
+++ b/pandas/src/parser.pyx
@@ -232,7 +232,7 @@ cdef class TextReader:
     cdef:
         parser_t *parser
         object file_handle
-        bint factorize, na_filter, verbose, has_usecols
+        bint factorize, na_filter, verbose, has_usecols, has_mi_columns
         int parser_start
         list clocks
         char *c_encoding
@@ -252,6 +252,7 @@ cdef class TextReader:
         object encoding
         object compression
         object mangle_dupe_cols
+        object multi_index_columns_compat
         set noconvert, usecols
 
     def __cinit__(self, source,
@@ -304,12 +305,14 @@ cdef class TextReader:
                   skiprows=None,
                   skip_footer=0,
                   verbose=False,
-                  mangle_dupe_cols=True):
+                  mangle_dupe_cols=True,
+                  multi_index_columns_compat=False):
 
         self.parser = parser_new()
         self.parser.chunksize = tokenize_chunksize
 
         self.mangle_dupe_cols=mangle_dupe_cols
+        self.multi_index_columns_compat=multi_index_columns_compat
 
         # For timekeeping
         self.clocks = []
@@ -437,6 +440,7 @@ cdef class TextReader:
         self.leading_cols = 0
 
         # TODO: no header vs. header is not the first row
+        self.has_mi_columns = 0
         if header is None:
             # sentinel value
             self.parser.header_start = -1
@@ -454,6 +458,7 @@ cdef class TextReader:
                 self.parser.header_end = header[-1]
                 self.parser.header = header[0]
                 self.parser_start = header[-1] + 1
+                self.has_mi_columns = 1
                 self.header = header
             else:
                 self.parser.header_start = header
@@ -570,7 +575,7 @@ cdef class TextReader:
         if self.parser.header_start >= 0:
 
             # Header is in the file
-            for hr in self.header:
+            for level, hr in enumerate(self.header):
 
                 this_header = []
 
@@ -600,7 +605,10 @@ cdef class TextReader:
                                                     self.c_encoding, errors)
 
                     if name == '':
-                        name = 'Unnamed: %d' % i
+                        if self.has_mi_columns:
+                            name = 'Unnamed: %d_level_%d' % (i,level)
+                        else:
+                            name = 'Unnamed: %d' % i
 
                     count = counts.get(name, 0)
                     if count > 0 and self.mangle_dupe_cols:
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 616fdd5ca2549..101bdc76ba443 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4991,7 +4991,7 @@ def test_to_csv_multiindex(self):
         with ensure_clean(pname) as path:
             # GH3571, GH1651, GH3141
 
-            # column & index are multi-iindex
+            # column & index are multi-index
             df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
             df.to_csv(path)
             result = read_csv(path,header=[0,1,2,3],index_col=[0,1])
@@ -5003,6 +5003,22 @@ def test_to_csv_multiindex(self):
             result = read_csv(path,header=[0,1,2,3],index_col=0)
             assert_frame_equal(df,result)
 
+            # dup column names?
+            df = mkdf(5,3,r_idx_nlevels=3,c_idx_nlevels=4)
+            df.to_csv(path)
+            result = read_csv(path,header=[0,1,2,3],index_col=[0,1])
+            result.columns = ['R2','A','B','C']
+            new_result = result.reset_index().set_index(['R0','R1','R2'])
+            new_result.columns = df.columns
+            assert_frame_equal(df,new_result)
+
+            # column & index are multi-index (compatibility)
+            df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
+            df.to_csv(path,multi_index_columns_compat=True)
+            result = read_csv(path,header=0,index_col=[0,1],multi_index_columns_compat=True)
+            result.columns = df.columns
+            assert_frame_equal(df,result)
+
         with ensure_clean(pname) as path:
             # empty
             tsframe[:0].to_csv(path)

From a9a89f89e13cf006f6b58da1747aa65f86f74cfb Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Tue, 14 May 2013 16:57:28 -0400
Subject: [PATCH 6/7] DOC: updated releasenotes, v0.11.1 whatsnew, io.rst

CLN: changed formatting option: multi_index_columns_compat -> tupleize_cols

BUG: incorrectly writing sparse levels for the multi_index

DOC: slight docs changes

TST: added tests/fixes for dissallowed options in to_csv (cols=not None,index=False)

TST: from_csv not accepting tupleize_cols

ENH: allow index=False in to_csv with a multi_index column

     allow reading of a multi_index column with with index_col=None

DOC: updates to examples in io.rst and v0.11.1.rst

TST: disallow names, usecols, non-numeric in index_cols

BUG: raise on too many rows in the header if multi_index of columns
---
 RELEASE.rst                     | 15 +++++
 doc/source/io.rst               | 59 +++++++++++++-------
 doc/source/v0.11.1.txt          | 37 +++++++++++++
 pandas/core/format.py           | 31 +++++++----
 pandas/core/frame.py            | 15 +++--
 pandas/io/parsers.py            | 97 +++++++++++++++++++++------------
 pandas/io/tests/test_parsers.py | 26 ++++++---
 pandas/src/parser.pyx           |  9 +--
 pandas/tests/test_frame.py      | 76 +++++++++++++++++++++-----
 9 files changed, 265 insertions(+), 100 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index acb4f429e81b0..74bafd419af54 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -34,6 +34,15 @@ pandas 0.11.1
     courtesy of @cpcloud. (GH3477_)
   - Support for reading Amazon S3 files. (GH3504_)
   - Added module for reading and writing Stata files: pandas.io.stata (GH1512_)
+  - Added support for writing in ``to_csv`` and reading in ``read_csv``,
+    multi-index columns. The ``header`` option in ``read_csv`` now accepts a
+    list of the rows from which to read the index. Added the option,
+    ``tupleize_cols`` to provide compatiblity for the pre 0.11.1 behavior of
+    writing and reading multi-index columns via a list of tuples. The default in
+    0.11.1 is to write lists of tuples and *not* interpret list of tuples as a 
+    multi-index column.  
+    Note: The default value will change in 0.12 to make the default *to* write and
+    read multi-index columns in the new format. (GH3571_, GH1651_, GH3141_)
 
 **Improvements to existing features**
 
@@ -180,6 +189,7 @@ pandas 0.11.1
 .. _GH3596: https://github.com/pydata/pandas/issues/3596
 .. _GH3617: https://github.com/pydata/pandas/issues/3617
 .. _GH3435: https://github.com/pydata/pandas/issues/3435
+<<<<<<< HEAD
 .. _GH3611: https://github.com/pydata/pandas/issues/3611
 .. _GH3062: https://github.com/pydata/pandas/issues/3062
 .. _GH3624: https://github.com/pydata/pandas/issues/3624
@@ -187,6 +197,11 @@ pandas 0.11.1
 .. _GH3601: https://github.com/pydata/pandas/issues/3601
 .. _GH3631: https://github.com/pydata/pandas/issues/3631
 .. _GH1512: https://github.com/pydata/pandas/issues/1512
+=======
+.. _GH3571: https://github.com/pydata/pandas/issues/3571
+.. _GH1651: https://github.com/pydata/pandas/issues/1651
+.. _GH3141: https://github.com/pydata/pandas/issues/3141
+>>>>>>> DOC: updated releasenotes, v0.11.1 whatsnew, io.rst
 
 
 pandas 0.11.0
diff --git a/doc/source/io.rst b/doc/source/io.rst
index ef223f64d43c8..42ea4a2ca5d53 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -115,10 +115,10 @@ They can take a number of arguments:
   - ``error_bad_lines``: if False then any lines causing an error will be skipped :ref:`bad lines <io.bad_lines>`
   - ``usecols``: a subset of columns to return, results in much faster parsing 
     time and lower memory usage.
-  - ``mangle_dup_columns``: boolean, default True, then duplicate columns will be specified 
+  - ``mangle_dupe_cols``: boolean, default True, then duplicate columns will be specified 
     as 'X.0'...'X.N', rather than 'X'...'X'
-  - ``multi_index_columns_compat``: boolean, default False, leave a list of tuples on columns
-    as is (default is to convert to a Multi Index on the columns)
+  - ``tupleize_cols``: boolean, default True, if False, convert a list of tuples
+    to a multi-index of columns, otherwise, leave the column index as a list of tuples
 
 .. ipython:: python
    :suppress:
@@ -260,24 +260,6 @@ If the header is in a row other than the first, pass the row number to
     data = 'skip this skip it\na,b,c\n1,2,3\n4,5,6\n7,8,9'
     pd.read_csv(StringIO(data), header=1)
 
-.. _io.multi_index_columns:
-
-Specifying a multi-index columns
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-By specifying list of row locations for the ``header`` argument, you
-can read in a multi-index for the columns. Specifying non-consecutive
-rows will skip the interveaing rows. The ``index_col`` must also be
-specified.
-
-.. ipython:: python
-
-    data = 'C0,C_l0_g0,C_l0_g1\nC1,C_l1_g0,C_l1_g1\nR0,,\nR_l0_g0,R0C0,R0C1\nR_l0_g1,R1C0,R1C1\nR_l0_g2,R2C0,R2C1\n'
-    pd.read_csv(StringIO(data), header=[0,1], index_col=[0])
-
-You can pass ``multi_index_columns_compat=True`` to preserve the pre-0.12 behavior of
-not converting a list of tuples in the columns to a Multi Index.
-
 .. _io.usecols:
 
 Filtering columns (``usecols``)
@@ -787,6 +769,36 @@ column numbers to turn multiple columns into a ``MultiIndex``:
    df
    df.ix[1978]
 
+.. _io.multi_index_columns:
+
+Specifying a multi-index columns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By specifying list of row locations for the ``header`` argument, you
+can read in a multi-index for the columns. Specifying non-consecutive
+rows will skip the interveaing rows.
+
+.. ipython:: python
+
+   from pandas.util.testing import makeCustomDataframe as mkdf
+   df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
+   df.to_csv('mi.csv',tupleize_cols=False)
+   print open('mi.csv').read()
+   pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1],tupleize_cols=False)
+
+Note: The default behavior in 0.11.1 remains unchanged (``tupleize_cols=True``),
+but starting with 0.12, the default *to* write and read multi-index columns will be in the new 
+format (``tupleize_cols=False``)
+
+Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
+with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will be *lost*.
+
+.. ipython:: python
+   :suppress:
+
+   import os
+   os.remove('mi.csv')
+
 .. _io.sniff:
 
 Automatically "sniffing" the delimiter
@@ -870,6 +882,8 @@ function takes a number of arguments. Only the first is required.
   - ``sep`` : Field delimiter for the output file (default ",")
   - ``encoding``: a string representing the encoding to use if the contents are
     non-ascii, for python versions prior to 3
+  - ``tupleize_cols``: boolean, default True, if False, write as a list of tuples,
+    otherwise write in an expanded line format suitable for ``read_csv``
 
 Writing a formatted string
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -901,6 +915,9 @@ The Series object also has a ``to_string`` method, but with only the ``buf``,
 which, if set to ``True``, will additionally output the length of the Series.
 
 
+HTML
+----
+
 Reading HTML format
 ~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt
index aed95188db26e..a724ce96a7381 100644
--- a/doc/source/v0.11.1.txt
+++ b/doc/source/v0.11.1.txt
@@ -73,6 +73,7 @@ Enhancements
       an index with a different frequency than the existing, or attempting
       to append an index with a different name than the existing
     - support datelike columns with a timezone as data_columns (GH2852_)
+
   - ``fillna`` methods now raise a ``TypeError`` if the ``value`` parameter is
     a list or tuple.
   - Added module for reading and writing Stata files: pandas.io.stata (GH1512_)
@@ -80,6 +81,39 @@ Enhancements
     ``Series`` with object dtype. See the examples section in the regular docs
     :ref:`Replacing via String Expression <missing_data.replace_expression>`
 
+  - Multi-index column support for reading and writing csvs
+
+    - The ``header`` option in ``read_csv`` now accepts a
+      list of the rows from which to read the index.
+
+    - The option, ``tupleize_cols`` can now be specified in both ``to_csv`` and
+      ``read_csv``, to provide compatiblity for the pre 0.11.1 behavior of
+      writing and reading multi-index columns via a list of tuples. The default in
+      0.11.1 is to write lists of tuples and *not* interpret list of tuples as a 
+      multi-index column.  
+
+      Note: The default behavior in 0.11.1 remains unchanged, but starting with 0.12,
+      the default *to* write and read multi-index columns will be in the new 
+      format. (GH3571_, GH1651_, GH3141_)
+
+    - If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
+      with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will 
+      be *lost*.
+
+    .. ipython:: python
+
+       from pandas.util.testing import makeCustomDataframe as mkdf
+       df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
+       df.to_csv('mi.csv',tupleize_cols=False)
+       print open('mi.csv').read()
+       pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1],tupleize_cols=False)
+
+    .. ipython:: python
+       :suppress:
+
+       import os
+       os.remove('mi.csv')
+
 See the `full release notes
 <https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
 on GitHub for a complete list.
@@ -96,3 +130,6 @@ on GitHub for a complete list.
 .. _GH1512: https://github.com/pydata/pandas/issues/1512
 .. _GH2285: https://github.com/pydata/pandas/issues/2285
 .. _GH3631: https://github.com/pydata/pandas/issues/3631
+.. _GH3571: https://github.com/pydata/pandas/issues/3571
+.. _GH1651: https://github.com/pydata/pandas/issues/1651
+.. _GH3141: https://github.com/pydata/pandas/issues/3141
diff --git a/pandas/core/format.py b/pandas/core/format.py
index e7ac540343d84..cd4364edc6662 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -775,7 +775,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
                  cols=None, header=True, index=True, index_label=None,
                  mode='w', nanRep=None, encoding=None, quoting=None,
                  line_terminator='\n', chunksize=None, engine=None,
-                 multi_index_columns_compat=False):
+                 tupleize_cols=True):
 
         self.engine = engine  # remove for 0.12
 
@@ -804,7 +804,15 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
             msg= "columns.is_unique == False not supported with engine='python'"
             raise NotImplementedError(msg)
 
-        self.multi_index_columns_compat=multi_index_columns_compat
+        self.tupleize_cols = tupleize_cols
+        self.has_mi_columns = isinstance(obj.columns, MultiIndex
+                                         ) and not self.tupleize_cols
+
+        # validate mi options
+        if self.has_mi_columns:
+            if cols is not None:
+                raise Exception("cannot specify cols with a multi_index on the columns")
+
         if cols is not None:
             if isinstance(cols,Index):
                 cols = cols.to_native_types(na_rep=na_rep,float_format=float_format)
@@ -960,9 +968,8 @@ def _save_header(self):
         obj = self.obj
         index_label = self.index_label
         cols = self.cols
+        has_mi_columns = self.has_mi_columns
         header = self.header
-        has_mi_columns = isinstance(obj.columns, MultiIndex
-                                    ) and not self.multi_index_columns_compat
         encoded_labels = []
 
         has_aliases = isinstance(header, (tuple, list, np.ndarray))
@@ -1017,15 +1024,17 @@ def _save_header(self):
             # write out the names for each level, then ALL of the values for each level
             for i in range(columns.nlevels):
 
-                # name is the first column
-                col_line = [ columns.names[i] ]
+                # we need at least 1 index column to write our col names
+                col_line = []
+                if self.index:
+
+                    # name is the first column
+                    col_line.append( columns.names[i] )
 
-                # skipp len labels-1
-                if self.index and isinstance(index_label,list) and len(index_label)>1:
-                    col_line.extend([ '' ] * (len(index_label)-1))
+                    if isinstance(index_label,list) and len(index_label)>1:
+                        col_line.extend([ '' ] * (len(index_label)-1))
 
-                for j in range(len(columns)):
-                    col_line.append(columns.levels[i][j])
+                col_line.extend(columns.get_level_values(i))
 
                 writer.writerow(col_line)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bb7416b23aab4..d91d21db3ec1b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1250,7 +1250,7 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
 
     @classmethod
     def from_csv(cls, path, header=0, sep=',', index_col=0,
-                 parse_dates=True, encoding=None):
+                 parse_dates=True, encoding=None, tupleize_cols=False):
         """
         Read delimited file into DataFrame
 
@@ -1266,6 +1266,9 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
             is used. Different default from read_table
         parse_dates : boolean, default True
             Parse dates. Different default from read_table
+        tupleize_cols : boolean, default True
+            write multi_index columns as a list of tuples (if True)
+            or new (expanded format) if False)
 
         Notes
         -----
@@ -1280,7 +1283,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
         from pandas.io.parsers import read_table
         return read_table(path, header=header, sep=sep,
                           parse_dates=parse_dates, index_col=index_col,
-                          encoding=encoding)
+                          encoding=encoding,tupleize_cols=False)
 
     @classmethod
     def from_dta(dta, path, parse_dates=True, convert_categoricals=True, encoding=None, index_col=None):
@@ -1392,7 +1395,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
                cols=None, header=True, index=True, index_label=None,
                mode='w', nanRep=None, encoding=None, quoting=None,
                line_terminator='\n', chunksize=None,
-               multi_index_columns_compat=False, **kwds):
+               tupleize_cols=True, **kwds):
         """
         Write DataFrame to a comma-separated values (csv) file
 
@@ -1430,9 +1433,9 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
         quoting : optional constant from csv module
             defaults to csv.QUOTE_MINIMAL
         chunksize : rows to write at a time
-        multi_index_columns_compat : boolean, default False
+        tupleize_cols : boolean, default True
             write multi_index columns as a list of tuples (if True)
-            or new (expanded format)m if False)
+            or new (expanded format) if False)
         """
         if nanRep is not None:  # pragma: no cover
             import warnings
@@ -1450,7 +1453,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
                                          header=header, index=index,
                                          index_label=index_label,mode=mode,
                                          chunksize=chunksize,engine=kwds.get("engine"),
-                                         multi_index_columns_compat=multi_index_columns_compat)
+                                         tupleize_cols=tupleize_cols)
             formatter.save()
 
     def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 78a941218c1d6..8063a8d667c54 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -127,9 +127,9 @@ class DateConversionError(Exception):
 usecols : array-like
     Return a subset of the columns.
     Results in much faster parsing time and lower memory usage.
-mangle_dup_columns: boolean, default True
+mangle_dupe_cols: boolean, default True
     Duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X'
-multi_index_columns_compat: boolean, default False
+tupleize_cols: boolean, default False
     Leave a list of tuples on columns as is (default is to convert to
     a Multi Index on the columns)
 
@@ -299,7 +299,7 @@ def _read(filepath_or_buffer, kwds):
     'squeeze': False,
     'compression': None,
     'mangle_dupe_cols': True,
-    'multi_index_columns_compat':False,
+    'tupleize_cols':True,
 }
 
 
@@ -387,7 +387,7 @@ def parser_f(filepath_or_buffer,
                  encoding=None,
                  squeeze=False,
                  mangle_dupe_cols=True,
-                 multi_index_columns_compat=False,
+                 tupleize_cols=True,
                  ):
 
         # Alias sep -> delimiter.
@@ -446,6 +446,7 @@ def parser_f(filepath_or_buffer,
                     low_memory=low_memory,
                     buffer_lines=buffer_lines,
                     mangle_dupe_cols=mangle_dupe_cols,
+                    tupleize_cols=tupleize_cols,
             )
 
         return _read(filepath_or_buffer, kwds)
@@ -540,16 +541,6 @@ def __init__(self, f, engine='python', **kwds):
         if kwds.get('header', 'infer') == 'infer':
             kwds['header'] = 0 if kwds.get('names') is None else None
 
-        # validate header options for mi
-        h = kwds['header']
-        if isinstance(h,(list,tuple,np.ndarray)):
-            if kwds.get('index_col') is None:
-                raise Exception("must have an index_col when have a "
-                                "multi-index header is specified")
-            if kwds.get('as_recarray'):
-                raise Exception("cannot specify as_recarray when "
-                                "specifying a multi-index header")
-
         self.orig_options = kwds
 
         # miscellanea
@@ -737,11 +728,31 @@ def __init__(self, kwds):
         self.na_values = kwds.get('na_values')
         self.true_values = kwds.get('true_values')
         self.false_values = kwds.get('false_values')
-        self.multi_index_columns_compat = kwds.get('multi_index_columns_compat',False)
+        self.tupleize_cols = kwds.get('tupleize_cols',True)
 
         self._date_conv = _make_date_converter(date_parser=self.date_parser,
                                                dayfirst=self.dayfirst)
 
+        # validate header options for mi
+        self.header = kwds.get('header')
+        if isinstance(self.header,(list,tuple,np.ndarray)):
+            if kwds.get('as_recarray'):
+                raise Exception("cannot specify as_recarray when "
+                                "specifying a multi-index header")
+            if kwds.get('usecols'):
+                raise Exception("cannot specify usecols when "
+                                "specifying a multi-index header")
+            if kwds.get('names'):
+                raise Exception("cannot specify names when "
+                                "specifying a multi-index header")
+
+            # validate index_col that only contains integers
+            if self.index_col is not None:
+                if not (isinstance(self.index_col,(list,tuple,np.ndarray)) and all(
+                        [ com.is_integer(i) for i in self.index_col ]) or com.is_integer(self.index_col)):
+                    raise Exception("index_col must only contain row numbers "
+                                    "when specifying a multi-index header")
+                
         self._name_processed = False
 
     @property
@@ -774,27 +785,46 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_
         # 0 is the name of the index, assuming index_col is a list of column
         # numbers 
         ic = self.index_col
+        if ic is None:
+            ic = []
+
         if not isinstance(ic, (list,tuple,np.ndarray)):
             ic = [ ic ]
         sic = set(ic)
 
         orig_header = list(header)
+
+        # clean the index_names
         index_names = header.pop(-1) 
-        index_names = [ index_names[i] for i in ic ]
-        field_count = len(header[0])
+        (index_names, names, 
+         index_col) = _clean_index_names(index_names, self.index_col)
 
+        # extract the columns
+        field_count = len(header[0])
         def extract(r):
             return tuple([ r[i] for i in range(field_count) if i not in sic ])
+        columns = zip(*[ extract(r) for r in header ])
+        names = ic + columns
+
+        # if we find 'Unnamed' all of a single level, then our header was too long
+        for n in range(len(columns[0])):
+            if all([ 'Unnamed' in c[n] for c in columns ]):
+                raise Exception("Passed header=[%s] are too many rows for this "
+                                "multi_index of columns" % ','.join([ str(x) for x in self.header ]))
+
+        # clean the column names (if we have an index_col)
+        if len(ic):
+            col_names = [ r[0] if len(r[0]) and 'Unnamed' not in r[0] else None for r in header ]
+        else:
+            col_names = [ None ] * len(header)
 
-        names = ic + zip(*[ extract(r) for r in header ])
-        col_names = [ r[0] if len(r[0]) else None for r in header ]
         passed_names = True
 
         return names, index_names, col_names, passed_names
 
     def _maybe_make_multi_index_columns(self, columns, col_names=None):
         # possibly create a column mi here
-        if not self.multi_index_columns_compat and len(columns) and not isinstance(
+        if not self.tupleize_cols and len(columns) and not isinstance(
             columns, MultiIndex) and all([ isinstance(c,tuple) for c in columns]):
             columns = MultiIndex.from_tuples(columns,names=col_names)
         return columns
@@ -1008,13 +1038,8 @@ def __init__(self, src, **kwds):
         else:
             if len(self._reader.header) > 1:
                 # we have a multi index in the columns
-                if (self._reader.leading_cols == 0 and
-                    _is_index_col(self.index_col)):
-                    self.names, self.index_names, self.col_names, passed_names = self._extract_multi_indexer_columns(
-                        self._reader.header, self.index_names, self.col_names, passed_names)
-                else:
-                    raise Exception("must have an index_col when have a multi-index "
-                                    "header is specified")
+                self.names, self.index_names, self.col_names, passed_names = self._extract_multi_indexer_columns(
+                    self._reader.header, self.index_names, self.col_names, passed_names)
             else:
                 self.names = list(self._reader.header[0])
 
@@ -1248,7 +1273,6 @@ def __init__(self, f, **kwds):
             raise Exception("usecols not supported with engine='python'"
                             " or multicharacter separators (yet).")
 
-        self.header = kwds['header']
         self.encoding = kwds['encoding']
         self.compression = kwds['compression']
         self.skiprows = kwds['skiprows']
@@ -1466,14 +1490,15 @@ def _infer_columns(self):
                     else:
                         this_columns.append(c)
 
-                if self.mangle_dupe_cols:
-                    counts = {}
-                    for i, col in enumerate(this_columns):
-                        cur_count = counts.get(col, 0)
-                        if cur_count > 0:
-                            this_columns[i] = '%s.%d' % (col, cur_count)
-                        counts[col] = cur_count + 1
-
+                if not have_mi_columns:
+                    if self.mangle_dupe_cols:
+                        counts = {}
+                        for i, col in enumerate(this_columns):
+                            cur_count = counts.get(col, 0)
+                            if cur_count > 0:
+                                this_columns[i] = '%s.%d' % (col, cur_count)
+                            counts[col] = cur_count + 1
+        
                 columns.append(this_columns)
 
             self._clear_buffer()
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index b9e773a916d4c..be47f28749848 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -1014,20 +1014,30 @@ def test_header_multi_index(self):
 
         # basic test with both engines
         for engine in ['c','python']:
-            df = read_csv(StringIO(data), header=[0,2,3,4],index_col=[0,1], engine=engine)
+            df = read_csv(StringIO(data), header=[0,2,3,4],index_col=[0,1], tupleize_cols=False,
+                          engine=engine)
             tm.assert_frame_equal(df, expected)
 
-        # must specify index_col
-        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3])
+        # skipping lines in the header
+        df = read_csv(StringIO(data), header=[0,2,3,4],index_col=[0,1], tupleize_cols=False)
+        tm.assert_frame_equal(df, expected)
+
+        #### invalid options ####
 
         # no as_recarray
         self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
-                          index_col=[0,1], as_recarray=True)
-
-        # skipping lines in the header
-        df = read_csv(StringIO(data), header=[0,2,3,4],index_col=[0,1])
-        tm.assert_frame_equal(df, expected)
+                          index_col=[0,1], as_recarray=True, tupleize_cols=False)
 
+        # names
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
+                          index_col=[0,1], names=['foo','bar'], tupleize_cols=False)
+        # usecols
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
+                          index_col=[0,1], usecols=['foo','bar'], tupleize_cols=False)
+        # non-numeric index_col
+        self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3], 
+                          index_col=['foo','bar'], tupleize_cols=False)
+        
     def test_pass_names_with_index(self):
         lines = self.data1.split('\n')
         no_header = '\n'.join(lines[1:])
diff --git a/pandas/src/parser.pyx b/pandas/src/parser.pyx
index 62e9d39cd792d..46f74cbdc885d 100644
--- a/pandas/src/parser.pyx
+++ b/pandas/src/parser.pyx
@@ -252,7 +252,7 @@ cdef class TextReader:
         object encoding
         object compression
         object mangle_dupe_cols
-        object multi_index_columns_compat
+        object tupleize_cols
         set noconvert, usecols
 
     def __cinit__(self, source,
@@ -306,13 +306,13 @@ cdef class TextReader:
                   skip_footer=0,
                   verbose=False,
                   mangle_dupe_cols=True,
-                  multi_index_columns_compat=False):
+                  tupleize_cols=True):
 
         self.parser = parser_new()
         self.parser.chunksize = tokenize_chunksize
 
         self.mangle_dupe_cols=mangle_dupe_cols
-        self.multi_index_columns_compat=multi_index_columns_compat
+        self.tupleize_cols=tupleize_cols
 
         # For timekeeping
         self.clocks = []
@@ -452,6 +452,7 @@ cdef class TextReader:
             if isinstance(header, list) and len(header):
                 # need to artifically skip the final line
                 # which is still a header line
+                header = list(header)
                 header.append(header[-1]+1)
 
                 self.parser.header_start = header[0]
@@ -611,7 +612,7 @@ cdef class TextReader:
                             name = 'Unnamed: %d' % i
 
                     count = counts.get(name, 0)
-                    if count > 0 and self.mangle_dupe_cols:
+                    if count > 0 and self.mangle_dupe_cols and not self.has_mi_columns:
                         this_header.append('%s.%d' % (name, count))
                     else:
                         this_header.append(name)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 101bdc76ba443..68e69768097e7 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -4755,13 +4755,15 @@ def test_to_csv_moar(self):
         def _do_test(df,path,r_dtype=None,c_dtype=None,rnlvl=None,cnlvl=None,
                      dupe_col=False):
 
-               header = 0
                if cnlvl:
-                    header = range(cnlvl)
-
-               with ensure_clean(path) as path:
-                    df.to_csv(path,encoding='utf8',chunksize=chunksize)
-                    recons = DataFrame.from_csv(path,header=header,parse_dates=False)
+                   header = range(cnlvl)
+                   with ensure_clean(path) as path:
+                        df.to_csv(path,encoding='utf8',chunksize=chunksize,tupleize_cols=False)
+                        recons = DataFrame.from_csv(path,header=range(cnlvl),tupleize_cols=False,parse_dates=False)
+               else:
+                   with ensure_clean(path) as path:
+                       df.to_csv(path,encoding='utf8',chunksize=chunksize)
+                       recons = DataFrame.from_csv(path,header=0,parse_dates=False)
 
                def _to_uni(x):
                    if not isinstance(x,unicode):
@@ -4991,34 +4993,80 @@ def test_to_csv_multiindex(self):
         with ensure_clean(pname) as path:
             # GH3571, GH1651, GH3141
 
+            def _make_frame(names=None):
+                if names is True:
+                    names = ['first','second']
+                return DataFrame(np.random.randint(0,10,size=(3,3)), 
+                                 columns=MultiIndex.from_tuples([('bah', 'foo'), 
+                                                                 ('bah', 'bar'), 
+                                                                 ('ban', 'baz')],
+                                                                names=names))
+
             # column & index are multi-index
             df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
-            df.to_csv(path)
-            result = read_csv(path,header=[0,1,2,3],index_col=[0,1])
+            df.to_csv(path,tupleize_cols=False)
+            result = read_csv(path,header=[0,1,2,3],index_col=[0,1],tupleize_cols=False)
             assert_frame_equal(df,result)
 
             # column is mi
             df = mkdf(5,3,r_idx_nlevels=1,c_idx_nlevels=4)
-            df.to_csv(path)
-            result = read_csv(path,header=[0,1,2,3],index_col=0)
+            df.to_csv(path,tupleize_cols=False)
+            result = read_csv(path,header=[0,1,2,3],index_col=0,tupleize_cols=False)
             assert_frame_equal(df,result)
 
             # dup column names?
             df = mkdf(5,3,r_idx_nlevels=3,c_idx_nlevels=4)
-            df.to_csv(path)
-            result = read_csv(path,header=[0,1,2,3],index_col=[0,1])
+            df.to_csv(path,tupleize_cols=False)
+            result = read_csv(path,header=[0,1,2,3],index_col=[0,1],tupleize_cols=False)
             result.columns = ['R2','A','B','C']
             new_result = result.reset_index().set_index(['R0','R1','R2'])
             new_result.columns = df.columns
             assert_frame_equal(df,new_result)
 
+            # writing with no index
+            df = _make_frame()
+            df.to_csv(path,tupleize_cols=False,index=False)
+            result = read_csv(path,header=[0,1],tupleize_cols=False)
+            assert_frame_equal(df,result)
+
+            # we lose the names here
+            df = _make_frame(True)
+            df.to_csv(path,tupleize_cols=False,index=False)
+            result = read_csv(path,header=[0,1],tupleize_cols=False)
+            self.assert_(all([ x is None for x in result.columns.names ]))
+            result.columns.names = df.columns.names
+            assert_frame_equal(df,result)
+
+            # whatsnew example
+            df = _make_frame()
+            df.to_csv(path,tupleize_cols=False)
+            result = read_csv(path,header=[0,1],index_col=[0],tupleize_cols=False)
+            assert_frame_equal(df,result)
+
+            df = _make_frame(True)
+            df.to_csv(path,tupleize_cols=False)
+            result = read_csv(path,header=[0,1],index_col=[0],tupleize_cols=False)
+            assert_frame_equal(df,result)
+
             # column & index are multi-index (compatibility)
             df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
-            df.to_csv(path,multi_index_columns_compat=True)
-            result = read_csv(path,header=0,index_col=[0,1],multi_index_columns_compat=True)
+            df.to_csv(path,tupleize_cols=True)
+            result = read_csv(path,header=0,index_col=[0,1],tupleize_cols=True)
             result.columns = df.columns
             assert_frame_equal(df,result)
 
+            # invalid options
+            df = _make_frame(True)
+            df.to_csv(path,tupleize_cols=False)
+
+            # catch invalid headers
+            for i in [3,4,5,6,7]: 
+                 self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=range(i), index_col=0)
+            self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=[0,2], index_col=0)
+
+            # write with cols
+            self.assertRaises(Exception, df.to_csv, path,tupleize_cols=False,cols=['foo','bar'])
+
         with ensure_clean(pname) as path:
             # empty
             tsframe[:0].to_csv(path)

From faf4d53c58bbe430942afc3775e29192318beac7 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sat, 18 May 2013 19:12:58 -0400
Subject: [PATCH 7/7] TST: test for tupleize_cols=True,index=False

TST: better error messages on multi_index column read failure
---
 pandas/io/parsers.py       |  4 ++--
 pandas/src/parser.pyx      | 10 +++++++---
 pandas/tests/test_frame.py | 19 +++++++++++++++++++
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 8063a8d667c54..61be871e62595 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -809,8 +809,8 @@ def extract(r):
         # if we find 'Unnamed' all of a single level, then our header was too long
         for n in range(len(columns[0])):
             if all([ 'Unnamed' in c[n] for c in columns ]):
-                raise Exception("Passed header=[%s] are too many rows for this "
-                                "multi_index of columns" % ','.join([ str(x) for x in self.header ]))
+                raise _parser.CParserError("Passed header=[%s] are too many rows for this "
+                                           "multi_index of columns" % ','.join([ str(x) for x in self.header ]))
 
         # clean the column names (if we have an index_col)
         if len(ic):
diff --git a/pandas/src/parser.pyx b/pandas/src/parser.pyx
index 46f74cbdc885d..ee92e2e60960c 100644
--- a/pandas/src/parser.pyx
+++ b/pandas/src/parser.pyx
@@ -244,7 +244,7 @@ cdef class TextReader:
         object na_values, true_values, false_values
         object memory_map
         object as_recarray
-        object header, names, header_start, header_end
+        object header, orig_header, names, header_start, header_end
         object low_memory
         object skiprows
         object compact_ints, use_unsigned
@@ -441,6 +441,7 @@ cdef class TextReader:
 
         # TODO: no header vs. header is not the first row
         self.has_mi_columns = 0
+        self.orig_header = header
         if header is None:
             # sentinel value
             self.parser.header_start = -1
@@ -585,8 +586,11 @@ cdef class TextReader:
 
                 # e.g., if header=3 and file only has 2 lines
                 if self.parser.lines < hr + 1:
-                    raise CParserError('Passed header=%d but only %d lines in file'
-                                       % (self.parser.header, self.parser.lines))
+                    msg = self.orig_header
+                    if isinstance(msg,list):
+                           msg = "[%s], len of %d," % (','.join([ str(m) for m in msg ]),len(msg))
+                    raise CParserError('Passed header=%s but only %d lines in file'
+                                       % (msg, self.parser.lines))
 
                 field_count = self.parser.line_fields[hr]
                 start = self.parser.line_start[hr]
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 68e69768097e7..fa2e8131b6916 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -5037,6 +5037,13 @@ def _make_frame(names=None):
             result.columns.names = df.columns.names
             assert_frame_equal(df,result)
 
+            # tupleize_cols=True and index=False
+            df = _make_frame(True)
+            df.to_csv(path,tupleize_cols=True,index=False)
+            result = read_csv(path,header=0,tupleize_cols=True,index_col=None)
+            result.columns = df.columns
+            assert_frame_equal(df,result)
+
             # whatsnew example
             df = _make_frame()
             df.to_csv(path,tupleize_cols=False)
@@ -5060,6 +5067,18 @@ def _make_frame(names=None):
             df.to_csv(path,tupleize_cols=False)
 
             # catch invalid headers
+            try:
+                read_csv(path,tupleize_cols=False,header=range(3),index_col=0)
+            except (Exception), detail:
+                if not str(detail).startswith('Passed header=[0,1,2] are too many rows for this multi_index of columns'):
+                    raise AssertionError("failure in read_csv header=range(3)")
+
+            try:
+                read_csv(path,tupleize_cols=False,header=range(7),index_col=0)  
+            except (Exception), detail:
+                if not str(detail).startswith('Passed header=[0,1,2,3,4,5,6], len of 7, but only 6 lines in file'):
+                    raise AssertionError("failure in read_csv header=range(7)")
+
             for i in [3,4,5,6,7]: 
                  self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=range(i), index_col=0)
             self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=[0,2], index_col=0)