diff --git a/doc/source/io.rst b/doc/source/io.rst index fd83f1a24edab..f9da5ee6d6737 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2596,6 +2596,28 @@ table CSS classes. Note that these classes are *appended* to the existing print(df.to_html(classes=['awesome_table_class', 'even_more_awesome_class'])) +The ``render_links`` argument provides the ability to add hyperlinks to cells +that contain URLs. + +.. versionadded:: 0.24 + +.. ipython:: python + + url_df = pd.DataFrame({ + 'name': ['Python', 'Pandas'], + 'url': ['https://www.python.org/', 'http://pandas.pydata.org']}) + print(url_df.to_html(render_links=True)) + +.. ipython:: python + :suppress: + + write_html(url_df, 'render_links', render_links=True) + +HTML: + +.. raw:: html + :file: _static/render_links.html + Finally, the ``escape`` argument allows you to control whether the "<", ">" and "&" characters escaped in the resulting HTML (by default it is ``True``). So to get the HTML without escaped characters pass ``escape=False`` diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 6095865fde87c..bf74d00bf5c42 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -27,6 +27,8 @@ New features - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`) - :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`) - :func:`DataFrame.read_feather` now accepts ``columns`` as an argument, allowing the user to specify which columns should be read. (:issue:`24025`) +- :func:`DataFrame.to_html` now accepts ``render_links`` as an argument, allowing the user to generate HTML with links to any URLs that appear in the DataFrame. + See the :ref:`section on writing HTML ` in the IO docs for example usage. (:issue:`2679`) .. _whatsnew_0240.values_api: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c83fb0d30844c..6b74fd7e06de9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2044,8 +2044,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, max_rows=None, max_cols=None, show_dimensions=False, decimal='.', - bold_rows=True, classes=None, escape=True, - notebook=False, border=None, table_id=None): + bold_rows=True, classes=None, escape=True, notebook=False, + border=None, table_id=None, render_links=False): """ Render a DataFrame as an HTML table. %(shared_params)s @@ -2067,6 +2067,12 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, A css id is included in the opening `` tag if specified. .. versionadded:: 0.23.0 + + render_links : bool, default False + Convert URLs to HTML links. + + .. versionadded:: 0.24.0 + %(returns)s See Also -------- @@ -2088,7 +2094,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, - decimal=decimal, table_id=table_id) + decimal=decimal, table_id=table_id, + render_links=render_links) # TODO: a generic formatter wld b in DataFrameFormatter formatter.to_html(classes=classes, notebook=notebook, border=border) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 9b371d00d8072..acd89e9ec8492 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -383,7 +383,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, justify=None, float_format=None, sparsify=None, index_names=True, line_width=None, max_rows=None, max_cols=None, show_dimensions=False, decimal='.', - table_id=None, **kwds): + table_id=None, render_links=False, **kwds): self.frame = frame if buf is not None: self.buf = _expand_user(_stringify_path(buf)) @@ -410,6 +410,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, len(self.frame)) self.show_dimensions = show_dimensions self.table_id = table_id + self.render_links = render_links if justify is None: self.justify = get_option("display.colheader_justify") @@ -731,7 +732,8 @@ def to_html(self, classes=None, notebook=False, border=None): """ from pandas.io.formats.html import HTMLFormatter html_renderer = HTMLFormatter(self, classes=classes, notebook=notebook, - border=border, table_id=self.table_id) + border=border, table_id=self.table_id, + render_links=self.render_links) if hasattr(self.buf, 'write'): html_renderer.write_result(self.buf) elif isinstance(self.buf, compat.string_types): diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 6425e655959bd..cac0c699d7046 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -15,6 +15,7 @@ import pandas.core.common as com from pandas.core.config import get_option +from pandas.io.common import _is_url from pandas.io.formats.format import ( TableFormatter, buffer_put_lines, get_level_lengths) from pandas.io.formats.printing import pprint_thing @@ -25,7 +26,7 @@ class HTMLFormatter(TableFormatter): indent_delta = 2 def __init__(self, formatter, classes=None, notebook=False, border=None, - table_id=None): + table_id=None, render_links=False): self.fmt = formatter self.classes = classes @@ -40,6 +41,7 @@ def __init__(self, formatter, classes=None, notebook=False, border=None, border = get_option('display.html.border') self.border = border self.table_id = table_id + self.render_links = render_links @property def is_truncated(self): @@ -76,9 +78,19 @@ def _write_cell(self, s, kind='td', indent=0, tags=None): ('>', r'>')]) else: esc = {} + rs = pprint_thing(s, escape_chars=esc).strip() - self.write(u'{start}{rs}' - .format(start=start_tag, rs=rs, kind=kind), indent) + + if self.render_links and _is_url(rs): + rs_unescaped = pprint_thing(s, escape_chars={}).strip() + start_tag += ''.format( + url=rs_unescaped) + end_a = '' + else: + end_a = '' + + self.write(u'{start}{rs}{end_a}'.format( + start=start_tag, rs=rs, end_a=end_a, kind=kind), indent) def write_tr(self, line, indent=0, indent_delta=0, header=False, align=None, tags=None, nindex_levels=0): diff --git a/pandas/tests/io/formats/data/render_links_false.html b/pandas/tests/io/formats/data/render_links_false.html new file mode 100644 index 0000000000000..6509a0e985597 --- /dev/null +++ b/pandas/tests/io/formats/data/render_links_false.html @@ -0,0 +1,24 @@ +
+ + + + + + + + + + + + + + + + + + + + + + +
foobarNone
00http://pandas.pydata.org/?q1=a&q2=bpydata.org
10www.pydata.orgpydata.org
diff --git a/pandas/tests/io/formats/data/render_links_true.html b/pandas/tests/io/formats/data/render_links_true.html new file mode 100644 index 0000000000000..e9cb5632aad1d --- /dev/null +++ b/pandas/tests/io/formats/data/render_links_true.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
foobarNone
00http://pandas.pydata.org/?q1=a&q2=bpydata.org
10www.pydata.orgpydata.org
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 627689b865148..9662b3d514cb8 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -477,3 +477,19 @@ def test_to_html_float_format_no_fixed_width(self, datapath): df = DataFrame({'x': [100.0]}) expected = expected_html(datapath, 'gh22270_expected_output') assert df.to_html(float_format='%.0f') == expected + + @pytest.mark.parametrize("render_links, file_name", [ + (True, 'render_links_true'), + (False, 'render_links_false'), + ]) + def test_to_html_render_links(self, render_links, file_name, datapath): + # GH 2679 + data = [ + [0, 'http://pandas.pydata.org/?q1=a&q2=b', 'pydata.org'], + [0, 'www.pydata.org', 'pydata.org'] + ] + df = DataFrame(data, columns=['foo', 'bar', None]) + + result = df.to_html(render_links=render_links) + expected = expected_html(datapath, file_name) + assert result == expected