Skip to content

Commit 1983866

Browse files
feature #49580: support new-style float_format string in to_csv
feat(to_csv): support new-style float_format strings using str.format Detect and process new-style format strings (e.g., "{:,.2f}") in the float_format parameter of to_csv. - Check if float_format is a string and matches new-style pattern - Convert it to a callable (e.g., lambda x: float_format.format(x)) - Ensure compatibility with NaN values and mixed data types - Improves formatting output for floats when exporting to CSV Example: df = pd.DataFrame([1234.56789, 9876.54321]) df.to_csv(float_format="{:,.2f}") # now outputs formatted values like 1,234.57 Co-authored-by: Pedro Santos <[email protected]>
1 parent b64f438 commit 1983866

File tree

4 files changed

+203
-5
lines changed

4 files changed

+203
-5
lines changed

pandas/_libs/tslibs/offsets.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5108,8 +5108,8 @@ def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
51085108
warnings.warn(
51095109
f"\'{name}\' is deprecated and will be removed "
51105110
f"in a future version, please use "
5111-
f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\'"
5112-
f" instead.",
5111+
f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\' "
5112+
f"instead.",
51135113
FutureWarning,
51145114
stacklevel=find_stack_level(),
51155115
)
@@ -5122,8 +5122,8 @@ def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
51225122
warnings.warn(
51235123
f"\'{name}\' is deprecated and will be removed "
51245124
f"in a future version, please use "
5125-
f"\'{_name}\'"
5126-
f" instead.",
5125+
f"\'{_name}\' "
5126+
f"instead.",
51275127
FutureWarning,
51285128
stacklevel=find_stack_level(),
51295129
)

pandas/io/formats/format.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ def __init__(
455455
self.na_rep = na_rep
456456
self.formatters = self._initialize_formatters(formatters)
457457
self.justify = self._initialize_justify(justify)
458-
self.float_format = float_format
458+
self.float_format = self._validate_float_format(float_format)
459459
self.sparsify = self._initialize_sparsify(sparsify)
460460
self.show_index_names = index_names
461461
self.decimal = decimal
@@ -850,6 +850,34 @@ def _get_column_name_list(self) -> list[Hashable]:
850850
names.append("" if columns.name is None else columns.name)
851851
return names
852852

853+
def _validate_float_format(
854+
self, fmt: FloatFormatType | None
855+
) -> FloatFormatType | None:
856+
"""
857+
Validates and processes the float_format argument.
858+
Converts new-style format strings to callables.
859+
"""
860+
861+
if fmt is None:
862+
return None
863+
864+
if callable(fmt):
865+
return fmt
866+
867+
if isinstance(fmt, str):
868+
if "%" in fmt:
869+
# Keeps old-style format strings as they are (C code handles them)
870+
return fmt
871+
else:
872+
try:
873+
_ = fmt.format(1.0) # Test with an arbitrary float
874+
return lambda x: fmt.format(x)
875+
except (ValueError, KeyError, IndexError) as e:
876+
raise ValueError(f"Invalid new-style format string {fmt!r}") from e
877+
878+
# If fmt is neither None, nor callable, nor a successfully processed string,
879+
raise ValueError("float_format must be a string or callable")
880+
853881

854882
class DataFrameRenderer:
855883
"""Class for creating dataframe output in multiple formats.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas import DataFrame
5+
6+
pytestmark = pytest.mark.usefixtures("benchmark")
7+
8+
# Create a single generator instance for all tests
9+
rng = np.random.default_rng(seed=42)
10+
11+
12+
def test_benchmark_old_style_format(benchmark):
13+
df = DataFrame(rng.random((1000, 1000)))
14+
benchmark(lambda: df.to_csv(float_format="%.6f"))
15+
16+
17+
def test_benchmark_new_style_format(benchmark):
18+
df = DataFrame(rng.random((1000, 1000)))
19+
benchmark(lambda: df.to_csv(float_format="{:.6f}"))
20+
21+
22+
def test_benchmark_new_style_thousands(benchmark):
23+
df = DataFrame(rng.random((1000, 1000)))
24+
benchmark(lambda: df.to_csv(float_format="{:,.2f}"))
25+
26+
27+
def test_benchmark_callable_format(benchmark):
28+
df = DataFrame(rng.random((1000, 1000)))
29+
benchmark(lambda: df.to_csv(float_format=lambda x: f"{x:.6f}"))

pandas/tests/io/formats/test_to_csv.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import io
22
import os
33
import sys
4+
import warnings
45
from zipfile import ZipFile
56

67
from _csv import Error
@@ -741,3 +742,143 @@ def test_to_csv_iterative_compression_buffer(compression):
741742
pd.read_csv(buffer, compression=compression, index_col=0), df
742743
)
743744
assert not buffer.closed
745+
746+
747+
def test_new_style_float_format_basic():
748+
df = DataFrame({"A": [1234.56789, 9876.54321]})
749+
result = df.to_csv(float_format="{:.2f}")
750+
expected = ",A\n0,1234.57\n1,9876.54\n"
751+
assert result == expected
752+
753+
754+
def test_new_style_float_format_thousands():
755+
df = DataFrame({"A": [1234.56789, 9876.54321]})
756+
result = df.to_csv(float_format="{:,.2f}")
757+
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
758+
assert result == expected
759+
760+
761+
def test_new_style_scientific_format():
762+
df = DataFrame({"A": [0.000123, 0.000456]})
763+
result = df.to_csv(float_format="{:.2e}")
764+
expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
765+
assert result == expected
766+
767+
768+
def test_new_style_with_nan():
769+
df = DataFrame({"A": [1.23, np.nan, 4.56]})
770+
result = df.to_csv(float_format="{:.2f}", na_rep="NA")
771+
expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
772+
assert result == expected
773+
774+
775+
def test_new_style_with_mixed_types():
776+
df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
777+
result = df.to_csv(float_format="{:.2f}")
778+
expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
779+
assert result == expected
780+
781+
782+
def test_new_style_with_mixed_types_in_column():
783+
df = DataFrame({"A": [1.23, "text", 4.56]})
784+
with warnings.catch_warnings(record=True):
785+
warnings.simplefilter("always")
786+
result = df.to_csv(float_format="{:.2f}")
787+
788+
expected = ",A\n0,1.23\n1,text\n2,4.56\n"
789+
assert result == expected
790+
791+
792+
def test_invalid_new_style_format_missing_brace():
793+
df = DataFrame({"A": [1.23]})
794+
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"):
795+
df.to_csv(float_format="{:.2f")
796+
797+
798+
def test_invalid_new_style_format_specifier():
799+
df = DataFrame({"A": [1.23]})
800+
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"):
801+
df.to_csv(float_format="{:.2z}")
802+
803+
804+
def test_old_style_format_compatibility():
805+
df = DataFrame({"A": [1234.56789, 9876.54321]})
806+
result = df.to_csv(float_format="%.2f")
807+
expected = ",A\n0,1234.57\n1,9876.54\n"
808+
assert result == expected
809+
810+
811+
def test_callable_float_format_compatibility():
812+
df = DataFrame({"A": [1234.56789, 9876.54321]})
813+
result = df.to_csv(float_format=lambda x: f"{x:,.2f}")
814+
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
815+
assert result == expected
816+
817+
818+
def test_no_float_format():
819+
df = DataFrame({"A": [1.23, 4.56]})
820+
result = df.to_csv(float_format=None)
821+
expected = ",A\n0,1.23\n1,4.56\n"
822+
assert result == expected
823+
824+
825+
def test_large_numbers():
826+
df = DataFrame({"A": [1e308, 2e308]})
827+
result = df.to_csv(float_format="{:.2e}")
828+
expected = ",A\n0,1.00e+308\n1,inf\n"
829+
assert result == expected
830+
831+
832+
def test_zero_and_negative():
833+
df = DataFrame({"A": [0.0, -1.23456]})
834+
result = df.to_csv(float_format="{:+.2f}")
835+
expected = ",A\n0,+0.00\n1,-1.23\n"
836+
assert result == expected
837+
838+
839+
def test_unicode_format():
840+
df = DataFrame({"A": [1.23, 4.56]})
841+
result = df.to_csv(float_format="{:.2f}€", encoding="utf-8")
842+
expected = ",A\n0,1.23€\n1,4.56€\n"
843+
assert result == expected
844+
845+
846+
def test_empty_dataframe():
847+
df = DataFrame({"A": []})
848+
result = df.to_csv(float_format="{:.2f}")
849+
expected = ",A\n"
850+
assert result == expected
851+
852+
853+
def test_multi_column_float():
854+
df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
855+
result = df.to_csv(float_format="{:.2f}")
856+
expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
857+
assert result == expected
858+
859+
860+
def test_invalid_float_format_type():
861+
df = DataFrame({"A": [1.23]})
862+
with pytest.raises(ValueError, match="float_format must be a string or callable"):
863+
df.to_csv(float_format=123)
864+
865+
866+
def test_new_style_with_inf():
867+
df = DataFrame({"A": [1.23, np.inf, -np.inf]})
868+
result = df.to_csv(float_format="{:.2f}", na_rep="NA")
869+
expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
870+
assert result == expected
871+
872+
873+
def test_new_style_with_precision_edge():
874+
df = DataFrame({"A": [1.23456789]})
875+
result = df.to_csv(float_format="{:.10f}")
876+
expected = ",A\n0,1.2345678900\n"
877+
assert result == expected
878+
879+
880+
def test_new_style_with_template():
881+
df = DataFrame({"A": [1234.56789]})
882+
result = df.to_csv(float_format="Value: {:,.2f}")
883+
expected = ',A\n0,"Value: 1,234.57"\n'
884+
assert result == expected

0 commit comments

Comments
 (0)