-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
To string with encoding #28951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
To string with encoding #28951
Changes from all commits
ae76e46
3985040
8eea18e
d2c70ee
835b22d
4e30d7c
78ba34b
56fdad9
83ccffd
19801bb
d44afa7
b4b983b
acb2c6e
236db38
93e8b11
1f3e55d
b0364d2
12ddc8f
f82fe78
5ef9cec
df58c1f
496f68f
725b2ec
698d3b7
96cc810
1a35eeb
f865ac3
757a1f5
835cdb8
648fa55
3698a2a
f8917de
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,17 +73,19 @@ def filepath_or_buffer(filepath_or_buffer_id, tmp_path): | |
|
||
|
||
@pytest.fixture | ||
def assert_filepath_or_buffer_equals(filepath_or_buffer, filepath_or_buffer_id): | ||
def assert_filepath_or_buffer_equals( | ||
filepath_or_buffer, filepath_or_buffer_id, encoding | ||
): | ||
""" | ||
Assertion helper for checking filepath_or_buffer. | ||
""" | ||
|
||
def _assert_filepath_or_buffer_equals(expected): | ||
if filepath_or_buffer_id == "string": | ||
with open(filepath_or_buffer) as f: | ||
with open(filepath_or_buffer, encoding=encoding) as f: | ||
result = f.read() | ||
elif filepath_or_buffer_id == "pathlike": | ||
result = filepath_or_buffer.read_text() | ||
result = filepath_or_buffer.read_text(encoding=encoding) | ||
elif filepath_or_buffer_id == "buffer": | ||
result = filepath_or_buffer.getvalue() | ||
assert result == expected | ||
|
@@ -3240,14 +3242,32 @@ def test_repr_html_ipython_config(ip): | |
|
||
|
||
@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) | ||
@pytest.mark.parametrize( | ||
"encoding, data", | ||
[(None, "abc"), ("utf-8", "abc"), ("gbk", "造成输出中文显示乱码"), ("foo", "abc")], | ||
) | ||
def test_filepath_or_buffer_arg( | ||
float_frame, method, filepath_or_buffer, assert_filepath_or_buffer_equals | ||
method, | ||
filepath_or_buffer, | ||
assert_filepath_or_buffer_equals, | ||
encoding, | ||
data, | ||
filepath_or_buffer_id, | ||
): | ||
df = float_frame | ||
expected = getattr(df, method)() | ||
df = DataFrame([data]) | ||
|
||
getattr(df, method)(buf=filepath_or_buffer) | ||
assert_filepath_or_buffer_equals(expected) | ||
if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
with pytest.raises( | ||
ValueError, match="buf is not a file name and encoding is specified." | ||
): | ||
getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) | ||
elif encoding == "foo": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can remove the invalid encoding; this doesn't test any function pandas provides rather just builtin Python functionality There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @simonjayhawkins thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason I asked for it to be added was so that the precedence of the Exceptions was checked and to confirm the encoding parameter was passed to the builtin function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh OK my mistake - just didn't see that was asked for previously (lost in GH comments) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so agree with #28951 (comment) but this sort of compensates. If we conform the encoding is passed, then reading back in is only testing Python functionality. float_frame will probably work with any encoding, so maybe best to modify float_frame if encoding=="gbq". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should work... diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 096fc6cb4..490cecb41 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -73,17 +73,19 @@ def filepath_or_buffer(filepath_or_buffer_id, tmp_path):
@pytest.fixture
-def assert_filepath_or_buffer_equals(filepath_or_buffer, filepath_or_buffer_id):
+def assert_filepath_or_buffer_equals(
+ filepath_or_buffer, filepath_or_buffer_id, encoding
+):
"""
Assertion helper for checking filepath_or_buffer.
"""
def _assert_filepath_or_buffer_equals(expected):
if filepath_or_buffer_id == "string":
- with open(filepath_or_buffer) as f:
+ with open(filepath_or_buffer, encoding=encoding) as f:
result = f.read()
elif filepath_or_buffer_id == "pathlike":
- result = filepath_or_buffer.read_text()
+ result = filepath_or_buffer.read_text(encoding=encoding)
elif filepath_or_buffer_id == "buffer":
result = filepath_or_buffer.getvalue()
assert result == expected
@@ -3250,6 +3252,8 @@ def test_filepath_or_buffer_arg(
filepath_or_buffer_id,
):
df = float_frame
+ if encoding == "gbk":
+ float_frame.iloc[0, 0] = "造成输出中文显示乱码"
if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None:
with pytest.raises( |
||
with pytest.raises(LookupError, match="unknown encoding"): | ||
getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) | ||
else: | ||
expected = getattr(df, method)() | ||
getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) | ||
assert_filepath_or_buffer_equals(expected) | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason for this to be a fixture instead of just a global function? This way of invoking the function seems very magical; I think easier if not a fixture There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall I make this change as a part of this PR itself. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm sorry thought it was a part of this PR. I think OK to do here but let's see what @simonjayhawkins thinks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is fine as is. |
||
|
||
|
||
@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) | ||
|
Uh oh!
There was an error while loading. Please reload this page.