Skip to content

Commit ced00bc

Browse files
committed
ENH: Added multicolumn/multirow support for latex
- [X] closes pandas-dev#13508 - [X] tests added / passed - [X] passes `git diff upstream/master | flake8 --diff` - [X] whatsnew entry Print names of MultiIndex columns. Added "multicolumn" and "multirow" flags to to_latex which trigger the corresponding feature. "multicolumn_format" is used to select alignment. Multirow adds clines to visually separate sections.
1 parent 2eb6d38 commit ced00bc

File tree

6 files changed

+429
-170
lines changed

6 files changed

+429
-170
lines changed

doc/source/options.rst

Lines changed: 150 additions & 145 deletions
Large diffs are not rendered by default.

doc/source/whatsnew/v0.20.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ Other enhancements
181181
- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs <timedeltas.isoformat>` (:issue:`15136`)
182182
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
183183
- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
184+
- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements
184185
- ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`)
185186
- ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`).
186187
- ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`).

pandas/core/config_init.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,35 @@
239239
: bool
240240
This specifies if the to_latex method of a Dataframe uses escapes special
241241
characters.
242-
method. Valid values: False,True
242+
Valid values: False,True
243243
"""
244244

245245
pc_latex_longtable = """
246246
:bool
247247
This specifies if the to_latex method of a Dataframe uses the longtable
248248
format.
249-
method. Valid values: False,True
249+
Valid values: False,True
250+
"""
251+
252+
pc_latex_multicolumn = """
253+
: bool
254+
This specifies if the to_latex method of a Dataframe uses multicolumns
255+
to pretty-print MultiIndex columns.
256+
Valid values: False,True
257+
"""
258+
259+
pc_latex_multicolumn_format = """
260+
: string
261+
This specifies the format for multicolumn headers.
262+
Can be surrounded with '|'.
263+
Valid values: 'l', 'c', 'r', 'p{<width>}'
264+
"""
265+
266+
pc_latex_multirow = """
267+
: bool
268+
This specifies if the to_latex method of a Dataframe uses multirows
269+
to pretty-print MultiIndex rows.
270+
Valid values: False,True
250271
"""
251272

252273
style_backup = dict()
@@ -339,6 +360,12 @@ def mpl_style_cb(key):
339360
validator=is_bool)
340361
cf.register_option('latex.longtable', False, pc_latex_longtable,
341362
validator=is_bool)
363+
cf.register_option('latex.multicolumn', True, pc_latex_multicolumn,
364+
validator=is_bool)
365+
cf.register_option('latex.multicolumn_format', 'l', pc_latex_multicolumn,
366+
validator=is_text)
367+
cf.register_option('latex.multirow', False, pc_latex_multirow,
368+
validator=is_bool)
342369

343370
cf.deprecate_option('display.line_width',
344371
msg=pc_line_width_deprecation_warning,

pandas/core/frame.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,10 +1597,11 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
15971597
index=True, na_rep='NaN', formatters=None, float_format=None,
15981598
sparsify=None, index_names=True, bold_rows=True,
15991599
column_format=None, longtable=None, escape=None,
1600-
encoding=None, decimal='.'):
1601-
"""
1600+
encoding=None, decimal='.', multicolumn=None,
1601+
multicolumn_format=None, multirow=None):
1602+
r"""
16021603
Render a DataFrame to a tabular environment table. You can splice
1603-
this into a LaTeX document. Requires \\usepackage{booktabs}.
1604+
this into a LaTeX document. Requires \usepackage{booktabs}.
16041605
16051606
`to_latex`-specific options:
16061607
@@ -1611,27 +1612,54 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
16111612
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3
16121613
columns
16131614
longtable : boolean, default will be read from the pandas config module
1614-
default: False
1615+
Default: False.
16151616
Use a longtable environment instead of tabular. Requires adding
1616-
a \\usepackage{longtable} to your LaTeX preamble.
1617+
a \usepackage{longtable} to your LaTeX preamble.
16171618
escape : boolean, default will be read from the pandas config module
1618-
default: True
1619+
Default: True.
16191620
When set to False prevents from escaping latex special
16201621
characters in column names.
16211622
encoding : str, default None
16221623
A string representing the encoding to use in the output file,
16231624
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
16241625
decimal : string, default '.'
1625-
Character recognized as decimal separator, e.g. ',' in Europe
1626+
Character recognized as decimal separator, e.g. ',' in Europe.
16261627
16271628
.. versionadded:: 0.18.0
16281629
1630+
multicolumn : boolean, default True
1631+
Use \multicolumn to enhance MultiIndex columns.
1632+
The default will be read from the config module.
1633+
1634+
.. versionadded:: 0.20.0
1635+
1636+
multicolumn_format : str, default 'l'
1637+
The alignment for multicolumns, similar to `column_format`
1638+
The default will be read from the config module.
1639+
1640+
.. versionadded:: 0.20.0
1641+
1642+
multirow : boolean, default False
1643+
Use \multirow to enhance MultiIndex rows.
1644+
Requires adding a \usepackage{multirow} to your LaTeX preamble.
1645+
Will print centered labels (instead of top-aligned)
1646+
across the contained rows, separating groups via clines.
1647+
The default will be read from the pandas config module.
1648+
1649+
.. versionadded:: 0.20.0
1650+
16291651
"""
16301652
# Get defaults from the pandas config
16311653
if longtable is None:
16321654
longtable = get_option("display.latex.longtable")
16331655
if escape is None:
16341656
escape = get_option("display.latex.escape")
1657+
if multicolumn is None:
1658+
multicolumn = get_option("display.latex.multicolumn")
1659+
if multicolumn_format is None:
1660+
multicolumn_format = get_option("display.latex.multicolumn_format")
1661+
if multirow is None:
1662+
multirow = get_option("display.latex.multirow")
16351663

16361664
formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
16371665
col_space=col_space, na_rep=na_rep,
@@ -1643,7 +1671,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
16431671
index_names=index_names,
16441672
escape=escape, decimal=decimal)
16451673
formatter.to_latex(column_format=column_format, longtable=longtable,
1646-
encoding=encoding)
1674+
encoding=encoding, multicolumn=multicolumn,
1675+
multicolumn_format=multicolumn_format,
1676+
multirow=multirow)
16471677

16481678
if buf is None:
16491679
return formatter.buf.getvalue()

pandas/formats/format.py

Lines changed: 105 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -650,13 +650,17 @@ def _join_multiline(self, *strcols):
650650
st = ed
651651
return '\n\n'.join(str_lst)
652652

653-
def to_latex(self, column_format=None, longtable=False, encoding=None):
653+
def to_latex(self, column_format=None, longtable=False, encoding=None,
654+
multicolumn=False, multicolumn_format=None, multirow=False):
654655
"""
655656
Render a DataFrame to a LaTeX tabular/longtable environment output.
656657
"""
657658

658659
latex_renderer = LatexFormatter(self, column_format=column_format,
659-
longtable=longtable)
660+
longtable=longtable,
661+
multicolumn=multicolumn,
662+
multicolumn_format=multicolumn_format,
663+
multirow=multirow)
660664

661665
if encoding is None:
662666
encoding = 'ascii' if compat.PY2 else 'utf-8'
@@ -824,11 +828,15 @@ class LatexFormatter(TableFormatter):
824828
HTMLFormatter
825829
"""
826830

827-
def __init__(self, formatter, column_format=None, longtable=False):
831+
def __init__(self, formatter, column_format=None, longtable=False,
832+
multicolumn=False, multicolumn_format=None, multirow=False):
828833
self.fmt = formatter
829834
self.frame = self.fmt.frame
830835
self.column_format = column_format
831836
self.longtable = longtable
837+
self.multicolumn = multicolumn
838+
self.multicolumn_format = multicolumn_format
839+
self.multirow = multirow
832840

833841
def write_result(self, buf):
834842
"""
@@ -850,14 +858,21 @@ def get_col_type(dtype):
850858
else:
851859
return 'l'
852860

861+
# reestablish the MultiIndex that has been joined by _to_str_column
853862
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
854863
clevels = self.frame.columns.nlevels
855864
strcols.pop(0)
856865
name = any(self.frame.index.names)
866+
cname = any(self.frame.columns.names)
867+
lastcol = self.frame.index.nlevels - 1
857868
for i, lev in enumerate(self.frame.index.levels):
858869
lev2 = lev.format()
859870
blank = ' ' * len(lev2[0])
860-
lev3 = [blank] * clevels
871+
# display column names in last index-column
872+
if cname and i == lastcol:
873+
lev3 = [x if x else '{}' for x in self.frame.columns.names]
874+
else:
875+
lev3 = [blank] * clevels
861876
if name:
862877
lev3.append(lev.name)
863878
for level_idx, group in itertools.groupby(
@@ -885,10 +900,15 @@ def get_col_type(dtype):
885900
buf.write('\\begin{longtable}{%s}\n' % column_format)
886901
buf.write('\\toprule\n')
887902

888-
nlevels = self.frame.columns.nlevels
903+
ilevels = self.frame.index.nlevels
904+
clevels = self.frame.columns.nlevels
905+
nlevels = clevels
889906
if any(self.frame.index.names):
890907
nlevels += 1
891-
for i, row in enumerate(zip(*strcols)):
908+
strrows = list(zip(*strcols))
909+
self.clinebuf = []
910+
911+
for i, row in enumerate(strrows):
892912
if i == nlevels and self.fmt.header:
893913
buf.write('\\midrule\n') # End of header
894914
if self.longtable:
@@ -910,15 +930,94 @@ def get_col_type(dtype):
910930
if x else '{}') for x in row]
911931
else:
912932
crow = [x if x else '{}' for x in row]
933+
if i < clevels and self.fmt.header and self.multicolumn:
934+
# sum up columns to multicolumns
935+
crow = self._format_multicolumn(crow, ilevels)
936+
if i >= nlevels and self.fmt.index and self.multirow and\
937+
ilevels > 1:
938+
# sum up rows to multirows
939+
crow = self._format_multirow(crow, ilevels, i, strrows)
913940
buf.write(' & '.join(crow))
914941
buf.write(' \\\\\n')
942+
if self.multirow and i < len(strrows) - 1:
943+
self._print_cline(buf, i, len(strcols))
915944

916945
if not self.longtable:
917946
buf.write('\\bottomrule\n')
918947
buf.write('\\end{tabular}\n')
919948
else:
920949
buf.write('\\end{longtable}\n')
921950

951+
def _format_multicolumn(self, row, ilevels):
952+
"""
953+
Combine columns belonging to a group to a single multicolumn entry
954+
according to self.multicolumn_format
955+
956+
e.g.:
957+
a & & & b & c &
958+
will become
959+
\multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c}
960+
"""
961+
row2 = list(row[:ilevels])
962+
ncol = 1
963+
coltext = ''
964+
965+
def append_col():
966+
# write multicolumn if needed
967+
if ncol > 1:
968+
row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}'
969+
.format(ncol, self.multicolumn_format,
970+
coltext.strip()))
971+
# don't modify where not needed
972+
else:
973+
row2.append(coltext)
974+
for c in row[ilevels:]:
975+
if c.strip(): # if next col has text, write the previous
976+
if coltext:
977+
append_col()
978+
coltext = c
979+
ncol = 1
980+
else: # if not, add it to the previous multicolumn
981+
ncol += 1
982+
if coltext: # write last column name
983+
append_col()
984+
return row2
985+
986+
def _format_multirow(self, row, ilevels, i, rows):
987+
"""
988+
Check following rows, whether row should be a multirow
989+
990+
e.g.: becomes:
991+
a & 0 & \multirow{2}{*}{a} & 0 &
992+
& 1 & & 1 &
993+
b & 0 & \cline{1-2}
994+
b & 0 &
995+
"""
996+
for j in range(ilevels):
997+
if row[j].strip():
998+
nrow = 1
999+
for r in rows[i + 1:]:
1000+
if not r[j].strip():
1001+
nrow += 1
1002+
else:
1003+
break
1004+
if nrow > 1:
1005+
# overwrite non-multirow entry
1006+
row[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'\
1007+
.format(nrow, row[j].strip())
1008+
# save when to end the current block with \cline
1009+
self.clinebuf.append([i + nrow - 1, j + 1])
1010+
return row
1011+
1012+
def _print_cline(self, buf, i, l):
1013+
"""
1014+
Print clines after multirow-blocks are finished
1015+
"""
1016+
for cl in self.clinebuf:
1017+
if cl[0] == i:
1018+
buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1], l))
1019+
self.clinebuf = [x for x in self.clinebuf if x[0] != i]
1020+
9221021

9231022
class HTMLFormatter(TableFormatter):
9241023

0 commit comments

Comments
 (0)