Skip to content

Commit 6bb865a

Browse files
committed
ENH: Added multicolumn/multirow support for latex
- [X] closes pandas-dev#13508 - [X] tests added / passed - [X] passes `git diff upstream/master | flake8 --diff` - [X] whatsnew entry Print names of MultiIndex columns. Added "multicolumn" and "multirow" flags to to_latex which trigger the corresponding feature. "multicolumn_format" is used to select alignment. Multirow adds clines to visually separate sections.
1 parent a1b6587 commit 6bb865a

File tree

5 files changed

+200
-11
lines changed

5 files changed

+200
-11
lines changed

doc/source/whatsnew/v0.20.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ Other enhancements
141141
- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs <timedeltas.isoformat>` (:issue:`15136`)
142142
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
143143
- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
144+
- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements
144145
- ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`)
145146
- ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`).
146147
- ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`).

pandas/core/config_init.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,27 @@
249249
method. Valid values: False,True
250250
"""
251251

252+
pc_latex_multicolumn = """
253+
: bool
254+
This specifies if the to_latex method of a Dataframe uses multicolumns
255+
to pretty-print MultiIndex columns.
256+
method. Valid values: False,True
257+
"""
258+
259+
pc_latex_multicolumn_format = """
260+
: string
261+
This specifies the format for multicolumn headers.
262+
Can be surrounded with '|'.
263+
Valid values: 'l', 'c', 'r', 'p{<width>}'
264+
"""
265+
266+
pc_latex_multirow = """
267+
: bool
268+
This specifies if the to_latex method of a Dataframe uses multirows
269+
to pretty-print MultiIndex rows.
270+
method. Valid values: False,True
271+
"""
272+
252273
style_backup = dict()
253274

254275

@@ -338,6 +359,12 @@ def mpl_style_cb(key):
338359
validator=is_bool)
339360
cf.register_option('latex.longtable', False, pc_latex_longtable,
340361
validator=is_bool)
362+
cf.register_option('latex.multicolumn', True, pc_latex_multicolumn,
363+
validator=is_bool)
364+
cf.register_option('latex.multicolumn_format', 'l', pc_latex_multicolumn,
365+
validator=is_text)
366+
cf.register_option('latex.multirow', False, pc_latex_multirow,
367+
validator=is_bool)
341368

342369
cf.deprecate_option('display.line_width',
343370
msg=pc_line_width_deprecation_warning,

pandas/core/frame.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,10 +1577,11 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
15771577
index=True, na_rep='NaN', formatters=None, float_format=None,
15781578
sparsify=None, index_names=True, bold_rows=True,
15791579
column_format=None, longtable=None, escape=None,
1580-
encoding=None, decimal='.'):
1581-
"""
1580+
encoding=None, decimal='.', multicolumn=None,
1581+
multicolumn_format=None, multirow=None):
1582+
r"""
15821583
Render a DataFrame to a tabular environment table. You can splice
1583-
this into a LaTeX document. Requires \\usepackage{booktabs}.
1584+
this into a LaTeX document. Requires \usepackage{booktabs}.
15841585
15851586
`to_latex`-specific options:
15861587
@@ -1593,7 +1594,7 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
15931594
longtable : boolean, default will be read from the pandas config module
15941595
default: False
15951596
Use a longtable environment instead of tabular. Requires adding
1596-
a \\usepackage{longtable} to your LaTeX preamble.
1597+
a \usepackage{longtable} to your LaTeX preamble.
15971598
escape : boolean, default will be read from the pandas config module
15981599
default: True
15991600
When set to False prevents from escaping latex special
@@ -1606,12 +1607,37 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
16061607
16071608
.. versionadded:: 0.18.0
16081609
1610+
multicolumn : boolean, default True
1611+
default will be read from the config module
1612+
Use \multicolumn to enhance MultiIndex columns.
1613+
1614+
.. versionadded:: 0.20.0
1615+
1616+
multicolumn_format : str, default 'l'
1617+
default will be read from the config module
1618+
The alignment for multicolumns, similar to column_format
1619+
1620+
.. versionadded:: 0.20.0
1621+
1622+
multirow : boolean, default False
1623+
default will be read from the pandas config module
1624+
Use \multirow to enhance MultiIndex rows. Requires adding a
1625+
\usepackage{multirow} to your LaTeX preamble.
1626+
1627+
.. versionadded:: 0.20.0
1628+
16091629
"""
16101630
# Get defaults from the pandas config
16111631
if longtable is None:
16121632
longtable = get_option("display.latex.longtable")
16131633
if escape is None:
16141634
escape = get_option("display.latex.escape")
1635+
if multicolumn is None:
1636+
multicolumn = get_option("display.latex.multicolumn")
1637+
if multicolumn_format is None:
1638+
multicolumn_format = get_option("display.latex.multicolumn_format")
1639+
if multirow is None:
1640+
multirow = get_option("display.latex.multirow")
16151641

16161642
formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
16171643
col_space=col_space, na_rep=na_rep,
@@ -1623,7 +1649,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
16231649
index_names=index_names,
16241650
escape=escape, decimal=decimal)
16251651
formatter.to_latex(column_format=column_format, longtable=longtable,
1626-
encoding=encoding)
1652+
encoding=encoding, multicolumn=multicolumn,
1653+
multicolumn_format=multicolumn_format,
1654+
multirow=multirow)
16271655

16281656
if buf is None:
16291657
return formatter.buf.getvalue()

pandas/formats/format.py

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -646,13 +646,17 @@ def _join_multiline(self, *strcols):
646646
st = ed
647647
return '\n\n'.join(str_lst)
648648

649-
def to_latex(self, column_format=None, longtable=False, encoding=None):
649+
def to_latex(self, column_format=None, longtable=False, encoding=None,
650+
multicolumn=False, multicolumn_format=None, multirow=False):
650651
"""
651652
Render a DataFrame to a LaTeX tabular/longtable environment output.
652653
"""
653654

654655
latex_renderer = LatexFormatter(self, column_format=column_format,
655-
longtable=longtable)
656+
longtable=longtable,
657+
multicolumn=multicolumn,
658+
multicolumn_format=multicolumn_format,
659+
multirow=multirow)
656660

657661
if encoding is None:
658662
encoding = 'ascii' if compat.PY2 else 'utf-8'
@@ -820,11 +824,15 @@ class LatexFormatter(TableFormatter):
820824
HTMLFormatter
821825
"""
822826

823-
def __init__(self, formatter, column_format=None, longtable=False):
827+
def __init__(self, formatter, column_format=None, longtable=False,
828+
multicolumn=False, multicolumn_format=None, multirow=False):
824829
self.fmt = formatter
825830
self.frame = self.fmt.frame
826831
self.column_format = column_format
827832
self.longtable = longtable
833+
self.multicolumn = multicolumn
834+
self.multicolumn_format = multicolumn_format
835+
self.multirow = multirow
828836

829837
def write_result(self, buf):
830838
"""
@@ -846,14 +854,21 @@ def get_col_type(dtype):
846854
else:
847855
return 'l'
848856

857+
# reestablish the MultiIndex that has been joined by _to_str_column
849858
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
850859
clevels = self.frame.columns.nlevels
851860
strcols.pop(0)
852861
name = any(self.frame.index.names)
862+
cname = any(self.frame.columns.names)
863+
lastcol = self.frame.index.nlevels - 1
853864
for i, lev in enumerate(self.frame.index.levels):
854865
lev2 = lev.format()
855866
blank = ' ' * len(lev2[0])
856-
lev3 = [blank] * clevels
867+
# display column names in last index-column
868+
if cname and i == lastcol:
869+
lev3 = [x if x else '{}' for x in self.frame.columns.names]
870+
else:
871+
lev3 = [blank] * clevels
857872
if name:
858873
lev3.append(lev.name)
859874
for level_idx, group in itertools.groupby(
@@ -873,6 +888,7 @@ def get_col_type(dtype):
873888
compat.string_types): # pragma: no cover
874889
raise AssertionError('column_format must be str or unicode, not %s'
875890
% type(column_format))
891+
multicolumn_format = self.multicolumn_format
876892

877893
if not self.longtable:
878894
buf.write('\\begin{tabular}{%s}\n' % column_format)
@@ -881,10 +897,15 @@ def get_col_type(dtype):
881897
buf.write('\\begin{longtable}{%s}\n' % column_format)
882898
buf.write('\\toprule\n')
883899

884-
nlevels = self.frame.columns.nlevels
900+
ilevels = self.frame.index.nlevels
901+
clevels = self.frame.columns.nlevels
902+
nlevels = clevels
885903
if any(self.frame.index.names):
886904
nlevels += 1
887-
for i, row in enumerate(zip(*strcols)):
905+
strrows = list(zip(*strcols))
906+
clinebuf = []
907+
908+
for i, row in enumerate(strrows):
888909
if i == nlevels and self.fmt.header:
889910
buf.write('\\midrule\n') # End of header
890911
if self.longtable:
@@ -906,8 +927,59 @@ def get_col_type(dtype):
906927
if x else '{}') for x in row]
907928
else:
908929
crow = [x if x else '{}' for x in row]
930+
if i < clevels and self.fmt.header and self.multicolumn:
931+
# sum up columns to multicolumns
932+
row2 = list(crow[:ilevels])
933+
ncol = 1
934+
coltext = ''
935+
936+
def append_col():
937+
# write multicolumn if needed
938+
if ncol > 1:
939+
row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}'
940+
.format(ncol, multicolumn_format,
941+
coltext.strip()))
942+
# don't modify where not needed
943+
else:
944+
row2.append(coltext)
945+
for c in crow[ilevels:]:
946+
if c.strip(): # if next col has text, write the previous
947+
if coltext:
948+
append_col()
949+
coltext = c
950+
ncol = 1
951+
else: # if not, add it to the previous multicolumn
952+
ncol += 1
953+
if coltext: # write last column name
954+
append_col()
955+
crow = row2
956+
if i >= nlevels and self.fmt.index and self.multirow:
957+
# perform look-ahead to determine which rows can be joined
958+
for j in range(ilevels):
959+
if crow[j].strip():
960+
nrow = 1
961+
for r in strrows[i + 1:]:
962+
if not r[j].strip():
963+
nrow += 1
964+
else:
965+
break
966+
if nrow > 1:
967+
# overwrite non-multirow entry
968+
crow[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'\
969+
.format(nrow, crow[j].strip())
970+
# save when to end the current block with \cline
971+
clinebuf.append([nrow, j + 1])
909972
buf.write(' & '.join(crow))
910973
buf.write(' \\\\\n')
974+
if self.multirow and i < len(strrows) - 1:
975+
# during main block, check if sub-block transition takes place
976+
# print \cline to distinguish \multirow-areas
977+
for cl in clinebuf:
978+
cl[0] -= 1
979+
if cl[0] == 0:
980+
buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1],
981+
len(strcols)))
982+
clinebuf = [x for x in clinebuf if x[0]]
911983

912984
if not self.longtable:
913985
buf.write('\\bottomrule\n')

pandas/tests/formats/test_format.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3571,6 +3571,67 @@ def test_to_latex_multiindex(self):
35713571

35723572
self.assertEqual(result, expected)
35733573

3574+
def test_to_latex_multicolumnrow(self):
3575+
df = pd.DataFrame({
3576+
('c1',0):dict((x,x) for x in range(5)),
3577+
('c1',1):dict((x,x+5) for x in range(5)),
3578+
('c2',0):dict((x,x) for x in range(5)),
3579+
('c2',1):dict((x,x+5) for x in range(5)),
3580+
('c3',0):dict((x,x) for x in range(5))
3581+
})
3582+
result = df.to_latex(multicolumn=True)
3583+
expected = r"""\begin{tabular}{lrrrrr}
3584+
\toprule
3585+
{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
3586+
{} & 0 & 1 & 0 & 1 & 0 \\
3587+
\midrule
3588+
0 & 0 & 5 & 0 & 5 & 0 \\
3589+
1 & 1 & 6 & 1 & 6 & 1 \\
3590+
2 & 2 & 7 & 2 & 7 & 2 \\
3591+
3 & 3 & 8 & 3 & 8 & 3 \\
3592+
4 & 4 & 9 & 4 & 9 & 4 \\
3593+
\bottomrule
3594+
\end{tabular}
3595+
"""
3596+
self.assertEqual(result, expected)
3597+
3598+
result = df.T.to_latex(multirow=True)
3599+
expected = r"""\begin{tabular}{llrrrrr}
3600+
\toprule
3601+
& & 0 & 1 & 2 & 3 & 4 \\
3602+
\midrule
3603+
\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\
3604+
& 1 & 5 & 6 & 7 & 8 & 9 \\
3605+
\cline{1-7}
3606+
\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\
3607+
& 1 & 5 & 6 & 7 & 8 & 9 \\
3608+
\cline{1-7}
3609+
c3 & 0 & 0 & 1 & 2 & 3 & 4 \\
3610+
\bottomrule
3611+
\end{tabular}
3612+
"""
3613+
self.assertEqual(result, expected)
3614+
3615+
df.index = df.T.index
3616+
result = df.T.to_latex(multirow=True, multicolumn=True,
3617+
multicolumn_format='c')
3618+
expected = r"""\begin{tabular}{llrrrrr}
3619+
\toprule
3620+
& & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\
3621+
& & 0 & 1 & 0 & 1 & 0 \\
3622+
\midrule
3623+
\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\
3624+
& 1 & 5 & 6 & 7 & 8 & 9 \\
3625+
\cline{1-7}
3626+
\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\
3627+
& 1 & 5 & 6 & 7 & 8 & 9 \\
3628+
\cline{1-7}
3629+
c3 & 0 & 0 & 1 & 2 & 3 & 4 \\
3630+
\bottomrule
3631+
\end{tabular}
3632+
"""
3633+
self.assertEqual(result, expected)
3634+
35743635
def test_to_latex_escape(self):
35753636
a = 'a'
35763637
b = 'b'

0 commit comments

Comments
 (0)