Skip to content

Commit bef053f

Browse files
committed
ENH: Added multicolumn/multirow support for latex
- [X] closes pandas-dev#13508 - [X] tests added / passed - [X] passes `git diff upstream/master | flake8 --diff` - [X] whatsnew entry Print names of MultiIndex columns. Added "multicolumn" and "multirow" flags to to_latex which trigger the corresponding feature. Multirow adds clines to visually separate sections.
1 parent 3ccb501 commit bef053f

File tree

5 files changed

+206
-10
lines changed

5 files changed

+206
-10
lines changed

doc/source/whatsnew/v0.20.0.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ Other enhancements
108108

109109
- ``.select_dtypes()`` now allows `datetimetz` to generically select datetimes with tz (:issue:`14910`)
110110

111+
- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements
112+
113+
- ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (issue:`14714`)
111114

112115
.. _whatsnew_0200.api_breaking:
113116

pandas/core/config_init.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,27 @@
249249
method. Valid values: False,True
250250
"""
251251

252+
pc_latex_multicolumn = """
253+
: bool
254+
This specifies if the to_latex method of a Dataframe uses multicolumns
255+
to pretty-print MultiIndex columns.
256+
method. Valid values: False,True
257+
"""
258+
259+
pc_latex_multicolumn_format = """
260+
: string
261+
This specifies the format for multicolumn headers.
262+
Can be surrounded with '|'.
263+
Valid values: 'l', 'c', 'r', 'p{<width>}'
264+
"""
265+
266+
pc_latex_multirow = """
267+
: bool
268+
This specifies if the to_latex method of a Dataframe uses multirows
269+
to pretty-print MultiIndex rows.
270+
method. Valid values: False,True
271+
"""
272+
252273
style_backup = dict()
253274

254275

@@ -338,6 +359,12 @@ def mpl_style_cb(key):
338359
validator=is_bool)
339360
cf.register_option('latex.longtable', False, pc_latex_longtable,
340361
validator=is_bool)
362+
cf.register_option('latex.multicolumn', True, pc_latex_multicolumn,
363+
validator=is_bool)
364+
cf.register_option('latex.multicolumn_format', 'l', pc_latex_multicolumn,
365+
validator=is_text)
366+
cf.register_option('latex.multirow', False, pc_latex_multirow,
367+
validator=is_bool)
341368

342369
cf.deprecate_option('display.line_width',
343370
msg=pc_line_width_deprecation_warning,

pandas/core/frame.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,10 +1562,11 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
15621562
index=True, na_rep='NaN', formatters=None, float_format=None,
15631563
sparsify=None, index_names=True, bold_rows=True,
15641564
column_format=None, longtable=None, escape=None,
1565-
encoding=None, decimal='.'):
1566-
"""
1565+
encoding=None, decimal='.', multicolumn=None,
1566+
multicolumn_format=None, multirow=None):
1567+
r"""
15671568
Render a DataFrame to a tabular environment table. You can splice
1568-
this into a LaTeX document. Requires \\usepackage{booktabs}.
1569+
this into a LaTeX document. Requires \usepackage{booktabs}.
15691570
15701571
`to_latex`-specific options:
15711572
@@ -1578,7 +1579,7 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
15781579
longtable : boolean, default will be read from the pandas config module
15791580
default: False
15801581
Use a longtable environment instead of tabular. Requires adding
1581-
a \\usepackage{longtable} to your LaTeX preamble.
1582+
a \usepackage{longtable} to your LaTeX preamble.
15821583
escape : boolean, default will be read from the pandas config module
15831584
default: True
15841585
When set to False prevents from escaping latex special
@@ -1591,12 +1592,37 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
15911592
15921593
.. versionadded:: 0.18.0
15931594
1595+
multicolumn : boolean, default True
1596+
default will be read from the config module
1597+
Use \multicolumn to enhance MultiIndex columns.
1598+
1599+
.. versionadded:: 0.20.0
1600+
1601+
multicolumn_format : str, default 'l'
1602+
default will be read from the config module
1603+
The alignment for multicolumns, similar to column_format
1604+
1605+
.. versionadded:: 0.20.0
1606+
1607+
multirow : boolean, default False
1608+
default will be read from the pandas config module
1609+
Use \multirow to enhance MultiIndex rows. Requires adding a
1610+
\usepackage{multirow} to your LaTeX preamble.
1611+
1612+
.. versionadded:: 0.20.0
1613+
15941614
"""
15951615
# Get defaults from the pandas config
15961616
if longtable is None:
15971617
longtable = get_option("display.latex.longtable")
15981618
if escape is None:
15991619
escape = get_option("display.latex.escape")
1620+
if multicolumn is None:
1621+
multicolumn = get_option("display.latex.multicolumn")
1622+
if multicolumn_format is None:
1623+
multicolumn_format = get_option("display.latex.multicolumn_format")
1624+
if multirow is None:
1625+
multirow = get_option("display.latex.multirow")
16001626

16011627
formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
16021628
col_space=col_space, na_rep=na_rep,
@@ -1608,7 +1634,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
16081634
index_names=index_names,
16091635
escape=escape, decimal=decimal)
16101636
formatter.to_latex(column_format=column_format, longtable=longtable,
1611-
encoding=encoding)
1637+
encoding=encoding, multicolumn=multicolumn,
1638+
multicolumn_format=multicolumn_format,
1639+
multirow=multirow)
16121640

16131641
if buf is None:
16141642
return formatter.buf.getvalue()

pandas/formats/format.py

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -646,13 +646,17 @@ def _join_multiline(self, *strcols):
646646
st = ed
647647
return '\n\n'.join(str_lst)
648648

649-
def to_latex(self, column_format=None, longtable=False, encoding=None):
649+
def to_latex(self, column_format=None, longtable=False, encoding=None,
650+
multicolumn=False, multicolumn_format=None, multirow=False):
650651
"""
651652
Render a DataFrame to a LaTeX tabular/longtable environment output.
652653
"""
653654

654655
latex_renderer = LatexFormatter(self, column_format=column_format,
655-
longtable=longtable)
656+
longtable=longtable,
657+
multicolumn=multicolumn,
658+
multicolumn_format=multicolumn_format,
659+
multirow=multirow)
656660

657661
if encoding is None:
658662
encoding = 'ascii' if compat.PY2 else 'utf-8'
@@ -820,11 +824,15 @@ class LatexFormatter(TableFormatter):
820824
HTMLFormatter
821825
"""
822826

823-
def __init__(self, formatter, column_format=None, longtable=False):
827+
def __init__(self, formatter, column_format=None, longtable=False,
828+
multicolumn=False, multicolumn_format=None, multirow=False):
824829
self.fmt = formatter
825830
self.frame = self.fmt.frame
826831
self.column_format = column_format
827832
self.longtable = longtable
833+
self.multicolumn = multicolumn
834+
self.multicolumn_format = multicolumn_format
835+
self.multirow = multirow
828836

829837
def write_result(self, buf):
830838
"""
@@ -846,14 +854,21 @@ def get_col_type(dtype):
846854
else:
847855
return 'l'
848856

857+
# reestablish the MultiIndex that has been joined by _to_str_column
849858
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
850859
clevels = self.frame.columns.nlevels
851860
strcols.pop(0)
852861
name = any(self.frame.index.names)
862+
cname = any(self.frame.columns.names)
863+
lastcol = self.frame.index.nlevels - 1
853864
for i, lev in enumerate(self.frame.index.levels):
854865
lev2 = lev.format()
855866
blank = ' ' * len(lev2[0])
856-
lev3 = [blank] * clevels
867+
# display column names in last index-column
868+
if cname and i == lastcol:
869+
lev3 = [x if x else '{}' for x in self.frame.columns.names]
870+
else:
871+
lev3 = [blank] * clevels
857872
if name:
858873
lev3.append(lev.name)
859874
for level_idx, group in itertools.groupby(
@@ -873,6 +888,13 @@ def get_col_type(dtype):
873888
compat.string_types): # pragma: no cover
874889
raise AssertionError('column_format must be str or unicode, not %s'
875890
% type(column_format))
891+
multicolumn_format = self.multicolumn_format
892+
if multicolumn_format is None:
893+
multicolumn_format = get_option("display.latex.multicolumn_format")
894+
elif not isinstance(multicolumn_format,
895+
compat.string_types): # pragma: no cover
896+
raise AssertionError('multicolumn_format must be str or unicode,'
897+
' not %s' % type(multicolumn_format))
876898

877899
if not self.longtable:
878900
buf.write('\\begin{tabular}{%s}\n' % column_format)
@@ -881,10 +903,14 @@ def get_col_type(dtype):
881903
buf.write('\\begin{longtable}{%s}\n' % column_format)
882904
buf.write('\\toprule\n')
883905

906+
ilevels = self.frame.index.nlevels
884907
nlevels = self.frame.columns.nlevels
885908
if any(self.frame.index.names):
886909
nlevels += 1
887-
for i, row in enumerate(zip(*strcols)):
910+
strrows = list(zip(*strcols))
911+
clinebuf = []
912+
913+
for i, row in enumerate(strrows):
888914
if i == nlevels and self.fmt.header:
889915
buf.write('\\midrule\n') # End of header
890916
if self.longtable:
@@ -906,8 +932,59 @@ def get_col_type(dtype):
906932
if x else '{}') for x in row]
907933
else:
908934
crow = [x if x else '{}' for x in row]
935+
if i < nlevels and self.fmt.header and self.multicolumn:
936+
# sum up columns to multicolumns
937+
row2 = list(crow[:ilevels])
938+
ncol = 1
939+
coltext = ''
940+
941+
def append_col():
942+
# write multicolumn if needed
943+
if ncol > 1:
944+
row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}'
945+
.format(ncol, multicolumn_format,
946+
coltext.strip()))
947+
# don't modify where not needed
948+
else:
949+
row2.append(coltext)
950+
for c in crow[ilevels:]:
951+
if c.strip(): # if next col has text, write the previous
952+
if coltext:
953+
append_col()
954+
coltext = c
955+
ncol = 1
956+
else: # if not, add it to the previous multicolumn
957+
ncol += 1
958+
if coltext: # write last column name
959+
append_col()
960+
crow = row2
961+
if i >= nlevels and self.fmt.index and self.multirow:
962+
# perform look-ahead to determine which rows can be joined
963+
for j in range(ilevels):
964+
if crow[j].strip():
965+
nrow = 1
966+
for r in strrows[i + 1:]:
967+
if not r[j].strip():
968+
nrow += 1
969+
else:
970+
break
971+
if nrow > 1:
972+
# overwrite non-multirow entry
973+
crow[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'\
974+
.format(nrow, crow[j].strip())
975+
# save when to end the current block with \cline
976+
clinebuf.append([nrow, j + 1])
909977
buf.write(' & '.join(crow))
910978
buf.write(' \\\\\n')
979+
if self.multirow and i < len(strrows) - 1:
980+
# during main block, check if sub-block transition takes place
981+
# print \cline to distinguish \multirow-areas
982+
for cl in clinebuf:
983+
cl[0] -= 1
984+
if cl[0] == 0:
985+
buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1],
986+
len(strcols)))
987+
clinebuf = [x for x in clinebuf if x[0]]
911988

912989
if not self.longtable:
913990
buf.write('\\bottomrule\n')

pandas/tests/formats/test_format.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3004,6 +3004,67 @@ def test_to_latex_multiindex(self):
30043004

30053005
self.assertEqual(result, expected)
30063006

3007+
def test_to_latex_multicolumnrow(self):
3008+
df = pd.DataFrame({
3009+
('c1',0):dict((x,x) for x in range(5)),
3010+
('c1',1):dict((x,x+5) for x in range(5)),
3011+
('c2',0):dict((x,x) for x in range(5)),
3012+
('c2',1):dict((x,x+5) for x in range(5)),
3013+
('c3',0):dict((x,x) for x in range(5))
3014+
})
3015+
result = df.to_latex(multicolumn=True)
3016+
expected = r"""\begin{tabular}{lrrrrr}
3017+
\toprule
3018+
{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
3019+
{} & 0 & 1 & 0 & 1 & 0 \\
3020+
\midrule
3021+
0 & 0 & 5 & 0 & 5 & 0 \\
3022+
1 & 1 & 6 & 1 & 6 & 1 \\
3023+
2 & 2 & 7 & 2 & 7 & 2 \\
3024+
3 & 3 & 8 & 3 & 8 & 3 \\
3025+
4 & 4 & 9 & 4 & 9 & 4 \\
3026+
\bottomrule
3027+
\end{tabular}
3028+
"""
3029+
self.assertEqual(result, expected)
3030+
3031+
result = df.T.to_latex(multirow=True)
3032+
expected = r"""\begin{tabular}{llrrrrr}
3033+
\toprule
3034+
& & 0 & 1 & 2 & 3 & 4 \\
3035+
\midrule
3036+
\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\
3037+
& 1 & 5 & 6 & 7 & 8 & 9 \\
3038+
\cline{1-7}
3039+
\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\
3040+
& 1 & 5 & 6 & 7 & 8 & 9 \\
3041+
\cline{1-7}
3042+
c3 & 0 & 0 & 1 & 2 & 3 & 4 \\
3043+
\bottomrule
3044+
\end{tabular}
3045+
"""
3046+
self.assertEqual(result, expected)
3047+
3048+
df.index = df.T.index
3049+
result = df.T.to_latex(multirow=True, multicolumn=True,
3050+
multicolumn_format='c')
3051+
expected = r"""\begin{tabular}{llrrrrr}
3052+
\toprule
3053+
& & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\
3054+
& & 0 & 1 & 0 & 1 & 0 \\
3055+
\midrule
3056+
\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\
3057+
& 1 & 5 & 6 & 7 & 8 & 9 \\
3058+
\cline{1-7}
3059+
\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\
3060+
& 1 & 5 & 6 & 7 & 8 & 9 \\
3061+
\cline{1-7}
3062+
c3 & 0 & 0 & 1 & 2 & 3 & 4 \\
3063+
\bottomrule
3064+
\end{tabular}
3065+
"""
3066+
self.assertEqual(result, expected)
3067+
30073068
def test_to_latex_escape(self):
30083069
a = 'a'
30093070
b = 'b'

0 commit comments

Comments
 (0)