From 3224b3f44f44cbbd104142bb66aff117a8a233c8 Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Thu, 3 Sep 2020 23:58:11 +0800 Subject: [PATCH 1/5] BUG: extra leading space in to_string when index=False --- doc/source/whatsnew/v1.1.2.rst | 1 + pandas/io/formats/format.py | 26 +++++++++++---- pandas/tests/io/formats/test_format.py | 42 +++++++++++++++++++++--- pandas/tests/io/formats/test_to_latex.py | 22 ++++++------- 4 files changed, 69 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index c740c7b3882c9..4dad7c17fab11 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -33,6 +33,7 @@ Bug fixes - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`) - Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`) - Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`36051`) +- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 461ef6823918e..3c830ca0a9ff2 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -345,6 +345,7 @@ def _get_formatted_values(self) -> List[str]: None, float_format=self.float_format, na_rep=self.na_rep, + leading_space=self.index, ) def to_string(self) -> str: @@ -960,6 +961,7 @@ def _format_col(self, i: int) -> List[str]: na_rep=self.na_rep, space=self.col_space.get(frame.columns[i]), decimal=self.decimal, + leading_space=self.index, ) def to_html( @@ -1111,7 +1113,7 @@ def format_array( space: Optional[Union[str, int]] = None, justify: str = "right", decimal: str = ".", - leading_space: Optional[bool] = None, + leading_space: Optional[bool] = True, quoting: Optional[int] = None, ) -> List[str]: """ @@ -1194,7 +1196,7 @@ def __init__( decimal: str = ".", quoting: Optional[int] = None, fixed_width: bool = True, - leading_space: Optional[bool] = None, + leading_space: Optional[bool] = True, ): self.values = values self.digits = digits @@ -1395,9 +1397,11 @@ def format_values_with(float_format): float_format: Optional[FloatFormatType] if self.float_format is None: if self.fixed_width: - float_format = partial( - "{value: .{digits:d}f}".format, digits=self.digits - ) + if self.leading_space is True: + fmt_str = "{value: .{digits:d}f}" + else: + fmt_str = "{value:.{digits:d}f}" + float_format = partial(fmt_str.format, digits=self.digits) else: float_format = self.float_format else: @@ -1429,7 +1433,11 @@ def format_values_with(float_format): ).any() if has_small_values or (too_long and has_large_values): - float_format = partial("{value: .{digits:d}e}".format, digits=self.digits) + if self.leading_space is True: + fmt_str = "{value: .{digits:d}e}" + else: + fmt_str = "{value:.{digits:d}e}" + float_format = partial(fmt_str.format, digits=self.digits) formatted_values = format_values_with(float_format) return formatted_values @@ -1444,7 +1452,11 @@ def _format_strings(self) -> List[str]: class IntArrayFormatter(GenericArrayFormatter): def _format_strings(self) -> List[str]: - formatter = self.formatter or (lambda x: f"{x: d}") + if self.leading_space is False: + formatter_str = lambda x: f"{x:d}".format(x=x) + else: + formatter_str = lambda x: f"{x: d}".format(x=x) + formatter = self.formatter or formatter_str fmt_values = [formatter(x) for x in self.values] return fmt_values diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 1bbfe4d7d74af..cb8db400cab5c 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1546,11 +1546,11 @@ def test_to_string_no_index(self): df_s = df.to_string(index=False) # Leading space is expected for positive numbers. - expected = " x y z\n 11 33 AAA\n 22 -44 " + expected = " x y z\n11 33 AAA\n22 -44 " assert df_s == expected df_s = df[["y", "x", "z"]].to_string(index=False) - expected = " y x z\n 33 11 AAA\n-44 22 " + expected = " y x z\n 33 11 AAA\n-44 22 " assert df_s == expected def test_to_string_line_width_no_index(self): @@ -1565,7 +1565,7 @@ def test_to_string_line_width_no_index(self): df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) df_s = df.to_string(line_width=1, index=False) - expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 " + expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " assert df_s == expected @@ -2269,7 +2269,7 @@ def test_to_string_without_index(self): # GH 11729 Test index=False option s = Series([1, 2, 3, 4]) result = s.to_string(index=False) - expected = " 1\n" + " 2\n" + " 3\n" + " 4" + expected = "1\n" + "2\n" + "3\n" + "4" assert result == expected def test_unicode_name_in_footer(self): @@ -3391,3 +3391,37 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method): msg = "buf is not a file name and it has no write method" with pytest.raises(TypeError, match=msg): getattr(float_frame, method)(buf=object()) + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ("a", "a"), + (["a", "b"], "a\nb"), + ([1, "a"], "1\na"), + (1, "1"), + ([0, -1], " 0\n-1"), + (1.0, "1.0"), + ([" a", " b"], " a\n b"), + ([".1", "1"], ".1\n 1"), + (["10", "-10"], " 10\n-10"), + ], +) +def test_format_remove_leading_space_series(input_array, expected): + # GH: 24980 + s = pd.Series(input_array).to_string(index=False) + assert s == expected + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ({"A": ["a"]}, "A\na"), + ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"), + ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"), + ], +) +def test_format_remove_leading_space_dataframe(input_array, expected): + # GH: 24980 + df = pd.DataFrame(input_array).to_string(index=False) + assert df == expected \ No newline at end of file diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 96a9ed2b86cf4..9dfd851e91c65 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -50,10 +50,10 @@ def test_to_latex(self, float_frame): withoutindex_result = df.to_latex(index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - a & b \\ + a & b \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """ @@ -413,7 +413,7 @@ def test_to_latex_longtable(self): withoutindex_result = df.to_latex(index=False, longtable=True) withoutindex_expected = r"""\begin{longtable}{rl} \toprule - a & b \\ + a & b \\ \midrule \endhead \midrule @@ -423,8 +423,8 @@ def test_to_latex_longtable(self): \bottomrule \endlastfoot - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \end{longtable} """ @@ -663,8 +663,8 @@ def test_to_latex_no_header(self): withoutindex_result = df.to_latex(index=False, header=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - 1 & b1 \\ - 2 & b2 \\ +1 & b1 \\ +2 & b2 \\ \bottomrule \end{tabular} """ @@ -690,10 +690,10 @@ def test_to_latex_specified_header(self): withoutindex_result = df.to_latex(header=["AA", "BB"], index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule -AA & BB \\ +AA & BB \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """ From 180147ff0e974407d270dd6456d11cff5874f324 Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Fri, 4 Sep 2020 00:22:12 +0800 Subject: [PATCH 2/5] ADD: a newline at end of file --- pandas/tests/io/formats/test_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index cb8db400cab5c..1d49695cf0a4a 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3424,4 +3424,4 @@ def test_format_remove_leading_space_series(input_array, expected): def test_format_remove_leading_space_dataframe(input_array, expected): # GH: 24980 df = pd.DataFrame(input_array).to_string(index=False) - assert df == expected \ No newline at end of file + assert df == expected From 31fad65dadb690a8f57c1ee2a34e86bc72b0239e Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Fri, 4 Sep 2020 00:46:40 +0800 Subject: [PATCH 3/5] UPT: move to v1.2.0 whatsnew note --- doc/source/whatsnew/v1.1.2.rst | 1 - doc/source/whatsnew/v1.2.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index 4dad7c17fab11..c740c7b3882c9 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -33,7 +33,6 @@ Bug fixes - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`) - Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`) - Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`36051`) -- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b07351d05defb..a6b0c1b2031fd 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -212,6 +212,7 @@ Performance improvements Bug fixes ~~~~~~~~~ +- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`) Categorical From af0e64f4641e5686f8ef41ee2134b931e5e10c0a Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Sat, 5 Sep 2020 16:55:59 +0800 Subject: [PATCH 4/5] DOC: update docstring of leading_space --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 9ce5b93dc6f74..e19a6933f39d4 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1129,7 +1129,7 @@ def format_array( space justify decimal - leading_space : bool, optional + leading_space : bool, optional, default True Whether the array should be formatted with a leading space. When an array as a column of a Series or DataFrame, we do want the leading space to pad between columns. From 2c1805536af51189a457b0e6cc8d338d32b3abbc Mon Sep 17 00:00:00 2001 From: Honfung Wong <21543236+onshek@users.noreply.github.com> Date: Sun, 6 Sep 2020 10:49:57 +0800 Subject: [PATCH 5/5] UPD: update whatsnew v1.2.0 --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2ee1f5f875824..b7f2729eed1ab 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -255,7 +255,7 @@ Conversion Strings ^^^^^^^ -- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`) +- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`) - -