|
1 | 1 | import pytest
|
2 | 2 | import sys
|
3 | 3 | import warnings
|
4 |
| -from functools import cached_property |
| 4 | +from contextlib import nullcontext |
| 5 | +from functools import cached_property, partial |
5 | 6 | from inspect import getsourcelines, getsourcefile
|
6 | 7 |
|
7 | 8 | from numpydoc import validate
|
@@ -85,6 +86,50 @@ def test_extract_ignore_validation_comments(tmp_path, file_contents, expected):
|
85 | 86 | assert validate.extract_ignore_validation_comments(filepath) == expected
|
86 | 87 |
|
87 | 88 |
|
| 89 | +@pytest.mark.parametrize( |
| 90 | + "assumed_encoding", |
| 91 | + ( |
| 92 | + pytest.param("utf-8", id="utf8_codec"), |
| 93 | + pytest.param("cp1252", id="cp1252_codec"), |
| 94 | + ), |
| 95 | +) |
| 96 | +@pytest.mark.parametrize( |
| 97 | + ("classname", "actual_encoding"), |
| 98 | + ( |
| 99 | + pytest.param("MÿClass", "cp1252", id="cp1252_file"), |
| 100 | + pytest.param("My\u0081Class", "utf-8", id="utf8_file"), |
| 101 | + ), |
| 102 | +) |
| 103 | +def test_encodings(tmp_path, classname, actual_encoding, assumed_encoding): |
| 104 | + """Test handling of different source file encodings.""" |
| 105 | + # write file as bytes with `actual_encoding` |
| 106 | + filepath = tmp_path / "ignore_comments.py" |
| 107 | + file_contents = f"class {classname}:\n pass" |
| 108 | + with open(filepath, "wb") as file: |
| 109 | + file.write(file_contents.encode(actual_encoding)) |
| 110 | + # this should fail on the ÿ in MÿClass. It represents the (presumed rare) case where |
| 111 | + # a user's editor saved the source file in cp1252 (or anything other than utf-8). |
| 112 | + if actual_encoding == "cp1252" and assumed_encoding == "utf-8": |
| 113 | + context = partial( |
| 114 | + pytest.raises, |
| 115 | + UnicodeDecodeError, |
| 116 | + match="can't decode byte 0xff in position 7: invalid start byte", |
| 117 | + ) |
| 118 | + # this is the more likely case: file was utf-8 encoded, but Python on Windows uses |
| 119 | + # the system codepage to read the file. This case is fixed by numpy/numpydoc#510 |
| 120 | + elif actual_encoding == "utf-8" and assumed_encoding == "cp1252": |
| 121 | + context = partial( |
| 122 | + pytest.raises, |
| 123 | + UnicodeDecodeError, |
| 124 | + match="can't decode byte 0x81 in position 9: character maps to <undefined>", |
| 125 | + ) |
| 126 | + else: |
| 127 | + context = nullcontext |
| 128 | + with context(): |
| 129 | + result = validate.extract_ignore_validation_comments(filepath, assumed_encoding) |
| 130 | + assert result == {} |
| 131 | + |
| 132 | + |
88 | 133 | class GoodDocStrings:
|
89 | 134 | """
|
90 | 135 | Collection of good doc strings.
|
|
0 commit comments