Skip to content

Latex bugs #20032

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 103 additions & 72 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,16 @@ class LatexFormatter(TableFormatter):
HTMLFormatter
"""

ESCAPE_MAPPING = {'_': '\\_',
'%': '\\%',
'$': '\\$',
'#': '\\#',
'{': '\\{',
'}': '\\}',
'~': '\\textasciitilde',
'^': '\\textasciicircum',
'&': '\\&'}

def __init__(self, formatter, column_format=None, longtable=False,
multicolumn=False, multicolumn_format=None, multirow=False):
self.fmt = formatter
Expand All @@ -881,12 +891,10 @@ def __init__(self, formatter, column_format=None, longtable=False,
self.multicolumn_format = multicolumn_format
self.multirow = multirow

def write_result(self, buf):
def _build_str_cols(self):
"""
Render a DataFrame to a LaTeX tabular/longtable environment output.
Builds the string representation of the columns
"""

# string representation of the columns
if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}')
.format(name=type(self.frame).__name__,
Expand All @@ -896,44 +904,18 @@ def write_result(self, buf):
else:
strcols = self.fmt._to_str_columns()

# reestablish the MultiIndex that has been joined by _to_str_column
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
strcols = self._rebuild_multi_index(strcols)
return strcols

def _build_col_format(self):
def get_col_type(dtype):
if issubclass(dtype.type, np.number):
return 'r'
else:
return 'l'

# reestablish the MultiIndex that has been joined by _to_str_column
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
clevels = self.frame.columns.nlevels
strcols.pop(0)
name = any(self.frame.index.names)
cname = any(self.frame.columns.names)
lastcol = self.frame.index.nlevels - 1
previous_lev3 = None
for i, lev in enumerate(self.frame.index.levels):
lev2 = lev.format()
blank = ' ' * len(lev2[0])
# display column names in last index-column
if cname and i == lastcol:
lev3 = [x if x else '{}' for x in self.frame.columns.names]
else:
lev3 = [blank] * clevels
if name:
lev3.append(lev.name)
current_idx_val = None
for level_idx in self.frame.index.labels[i]:
if ((previous_lev3 is None or
previous_lev3[len(lev3)].isspace()) and
lev2[level_idx] == current_idx_val):
# same index as above row and left index was the same
lev3.append(blank)
else:
# different value than above or left index different
lev3.append(lev2[level_idx])
current_idx_val = lev2[level_idx]
strcols.insert(i, lev3)
previous_lev3 = lev3

column_format = self.column_format
if column_format is None:
dtypes = self.frame.dtypes._values
Expand All @@ -945,19 +927,22 @@ def get_col_type(dtype):
compat.string_types): # pragma: no cover
raise AssertionError('column_format must be str or unicode, '
'not {typ}'.format(typ=type(column_format)))
return column_format

if not self.longtable:
buf.write('\\begin{{tabular}}{{{fmt}}}\n'
.format(fmt=column_format))
buf.write('\\toprule\n')
else:
buf.write('\\begin{{longtable}}{{{fmt}}}\n'
.format(fmt=column_format))
buf.write('\\toprule\n')
def write_result(self, buf):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did some refactoring here and broke this method down into a couple of smaller ones. Ideally, pieces like _rebuild_multi_index() could also be a bit clearer, but maybe I'll tackle that while looking at some of the other Latex-related bugs.

"""
Render a DataFrame to a LaTeX tabular/longtable environment output.
"""

ilevels = self.frame.index.nlevels
clevels = self.frame.columns.nlevels
nlevels = clevels
strcols = self._build_str_cols()
column_format = self._build_col_format()
table_type = 'longtable' if self.longtable else 'tabular'

buf.write('\\begin{{{typ}}}{{{fmt}}}\n'
.format(fmt=column_format, typ=table_type))
buf.write('\\toprule\n')

nlevels = self.frame.columns.nlevels
if any(self.frame.index.names):
nlevels += 1
strrows = list(zip(*strcols))
Expand All @@ -975,38 +960,51 @@ def get_col_type(dtype):
buf.write('\\endfoot\n\n')
buf.write('\\bottomrule\n')
buf.write('\\endlastfoot\n')
if self.fmt.kwds.get('escape', True):
# escape backslashes first
crow = [(x.replace('\\', '\\textbackslash').replace('_', '\\_')
.replace('%', '\\%').replace('$', '\\$')
.replace('#', '\\#').replace('{', '\\{')
.replace('}', '\\}').replace('~', '\\textasciitilde')
.replace('^', '\\textasciicircum').replace('&', '\\&')
if (x and x != '{}') else '{}') for x in row]
else:
crow = [x if x else '{}' for x in row]
if self.bold_rows and self.fmt.index:
# bold row labels
crow = ['\\textbf{{{x}}}'.format(x=x)
if j < ilevels and x.strip() not in ['', '{}'] else x
for j, x in enumerate(crow)]
if i < clevels and self.fmt.header and self.multicolumn:
# sum up columns to multicolumns
crow = self._format_multicolumn(crow, ilevels)
if (i >= nlevels and self.fmt.index and self.multirow and
ilevels > 1):
# sum up rows to multirows
crow = self._format_multirow(crow, ilevels, i, strrows)

crow = self._build_row(i, row, strrows)
buf.write(' & '.join(crow))
buf.write(' \\\\\n')
if self.multirow and i < len(strrows) - 1:
self._print_cline(buf, i, len(strcols))

if not self.longtable:
buf.write('\\bottomrule\n')
buf.write('\\end{tabular}\n')
else:
buf.write('\\end{longtable}\n')
buf.write('\\end{{{typ}}}\n'.format(typ=table_type))

def _build_row(self, i, row, strrows):
crow = self._escape_row(row)
ilevels = self.frame.index.nlevels
clevels = nlevels = self.frame.columns.nlevels
if any(self.frame.index.names):
nlevels += 1
if self.bold_rows and self.fmt.index:
# bold row labels
crow = ['\\textbf{{{x}}}'.format(x=x)
if j < ilevels and x.strip() not in ['', '{}'] else x
for j, x in enumerate(crow)]
if i < clevels and self.fmt.header and self.multicolumn:
# sum up columns to multicolumns
crow = self._format_multicolumn(crow, ilevels)
if (i >= nlevels and self.fmt.index and self.multirow and
ilevels > 1):
# sum up rows to multirows
crow = self._format_multirow(crow, ilevels, i, strrows)
return crow

def _escape_row(self, row):
def null_replace(x):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add doc-strings to added methods

if not x or x == '{}':
return '{}'
return x

def escape_item(x):
x = x.replace('\\', '\\textbackslash')
for k, v in LatexFormatter.ESCAPE_MAPPING.items():
x = x.replace(k, v)
return x
return [escape_item(null_replace(x))
if self.fmt.kwds.get('escape', True)
and x and x != '{}' else null_replace(x) for x in row]

def _format_multicolumn(self, row, ilevels):
r"""
Expand Down Expand Up @@ -1083,6 +1081,39 @@ def _print_cline(self, buf, i, icol):
# remove entries that have been written to buffer
self.clinebuf = [x for x in self.clinebuf if x[0] != i]

def _rebuild_multi_index(self, strcols):
strcols.pop(0)
previous_lev3 = None
for i, lev in enumerate(self.frame.index.levels):
lev2 = lev.format()
blank = ' ' * len(lev2[0])
# display column names in last index-column
if any(self.frame.columns.names) \
and i == (self.frame.index.nlevels - 1):
lev3 = [x if x else '{}' for x in self.frame.columns.names]
else:
lev3 = [blank] * self.frame.columns.nlevels
if any(map(lambda x: False if x is None else True,
self.frame.index.names)):
if lev.name:
lev3.append(u'{name}'.format(name=lev.name))
else:
lev3.append(lev.name)
current_idx_val = None
for level_idx in self.frame.index.labels[i]:
if ((previous_lev3 is None or
previous_lev3[len(lev3)].isspace())
and lev2[level_idx] == current_idx_val):
# same index as above row and left index was the same
lev3.append(blank)
else:
# different value than above or left index different
lev3.append(lev2[level_idx])
current_idx_val = lev2[level_idx]
strcols.insert(i, lev3)
previous_lev3 = lev3
return strcols


class HTMLFormatter(TableFormatter):

Expand Down
35 changes: 35 additions & 0 deletions pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,3 +621,38 @@ def test_to_latex_multiindex_names(self, name0, name1, axes):
\end{tabular}
""" % tuple(list(col_names) + [idx_names_row])
assert observed == expected

def test_to_latex_multiindex_non_string(self):
# GH 19981
df = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1])
observed = df.to_latex()
expected = r"""\begin{tabular}{llr}
\toprule
& & 2 \\
{} & 1 & \\
\midrule
1 & 2 & 3 \\
& & 3 \\
\bottomrule
\end{tabular}
"""
assert observed == expected

def test_to_latex_missing_rows(self):
# GH 18669
mi = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=['', None])
df = pd.DataFrame(-1, index=mi, columns=range(4))
observed = df.to_latex()
expected = r"""\begin{tabular}{llrrrr}
\toprule
& & 0 & 1 & 2 & 3 \\
\midrule
{} & {} & & & & \\
1 & 3 & -1 & -1 & -1 & -1 \\
& 4 & -1 & -1 & -1 & -1 \\
2 & 3 & -1 & -1 & -1 & -1 \\
& 4 & -1 & -1 & -1 & -1 \\
\bottomrule
\end{tabular}
"""
assert observed == expected