From de225420b0b6a8cbca6873ba187a073cddcdeaca Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 16:27:16 -0400 Subject: [PATCH 01/10] Fix test_file_cli on Windows On Windows (only), a file created by NamedTemporaryFile can't be opened again while it is already open. That causes some tests to fail with PermissionError even when the code under test is correct. To fix it without making the test code longer or more complicated, we can use the tmp_path pytest fixture, which makes a temporary directory (separate for each test and each run) where we can create whatever file(s) we need, and which handles cleanup. This has the further benefit of being a more idiomatic way to work with temporary files in pytest tests. This commit makes that change for test_file_cli. --- openai/tests/test_file_cli.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/openai/tests/test_file_cli.py b/openai/tests/test_file_cli.py index 69ea29e2a0..7b761bd9c1 100644 --- a/openai/tests/test_file_cli.py +++ b/openai/tests/test_file_cli.py @@ -1,19 +1,18 @@ import json import subprocess import time -from tempfile import NamedTemporaryFile +from pathlib import Path STILL_PROCESSING = "File is still processing. Check back later." -def test_file_cli() -> None: +def test_file_cli(tmp_path: Path) -> None: contents = json.dumps({"prompt": "1 + 3 =", "completion": "4"}) + "\n" - with NamedTemporaryFile(suffix=".jsonl", mode="wb") as train_file: - train_file.write(contents.encode("utf-8")) - train_file.flush() - create_output = subprocess.check_output( - ["openai", "api", "files.create", "-f", train_file.name, "-p", "fine-tune"] - ) + train_file = tmp_path / "example.jsonl" + train_file.write_bytes(contents.encode("utf-8")) + create_output = subprocess.check_output( + ["openai", "api", "files.create", "-f", train_file, "-p", "fine-tune"] + ) file_obj = json.loads(create_output) assert file_obj["bytes"] == len(contents) file_id: str = file_obj["id"] From b4ee76e6c542cee4dd0e3dcdec02a41365cf7d9b Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 16:43:14 -0400 Subject: [PATCH 02/10] Divide test_file_cli into stanzas per assertion For improved readability. --- openai/tests/test_file_cli.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openai/tests/test_file_cli.py b/openai/tests/test_file_cli.py index 7b761bd9c1..15cbd13626 100644 --- a/openai/tests/test_file_cli.py +++ b/openai/tests/test_file_cli.py @@ -13,10 +13,13 @@ def test_file_cli(tmp_path: Path) -> None: create_output = subprocess.check_output( ["openai", "api", "files.create", "-f", train_file, "-p", "fine-tune"] ) + file_obj = json.loads(create_output) assert file_obj["bytes"] == len(contents) + file_id: str = file_obj["id"] assert file_id.startswith("file-") + start_time = time.time() while True: delete_result = subprocess.run( From 9c8b869e6b93e2fa67cf5e4ba95f3705faa151f0 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 17:24:39 -0400 Subject: [PATCH 03/10] Fix api_key_path tests on Windows As in test_file_cli, these failed with PermissionError on Windows by trying to open a file created by NamedTemporary file while it is already open. This likewise switches to using the pytest tmp_path fixture. The api_key_file fixture is modified to request and use it, and to provide a Path object, which test case functions call write_text on instead of printing and flushing. Since this was a major change to the api_key_file fixture, I've also taken the opportunity to have it delegate patching and automatically unpatching openai.api_key_path to another pytest fixture, allowing its code to be shorter and simpler. --- openai/tests/test_util.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/openai/tests/test_util.py b/openai/tests/test_util.py index 6220ccb7f4..1f77ab0a5e 100644 --- a/openai/tests/test_util.py +++ b/openai/tests/test_util.py @@ -1,32 +1,27 @@ import json -from tempfile import NamedTemporaryFile +from pathlib import Path import pytest +from pytest import MonkeyPatch import openai from openai import util @pytest.fixture(scope="function") -def api_key_file(): - saved_path = openai.api_key_path - try: - with NamedTemporaryFile(prefix="openai-api-key", mode="wt") as tmp: - openai.api_key_path = tmp.name - yield tmp - finally: - openai.api_key_path = saved_path - - -def test_openai_api_key_path(api_key_file) -> None: - print("sk-foo", file=api_key_file) - api_key_file.flush() +def api_key_file(tmp_path: Path, monkeypatch: MonkeyPatch) -> Path: + key_file = tmp_path / "openai_api_key" + monkeypatch.setattr("openai.api_key_path", str(key_file)) + return key_file + + +def test_openai_api_key_path(api_key_file: Path) -> None: + api_key_file.write_text("sk-foo\n") assert util.default_api_key() == "sk-foo" -def test_openai_api_key_path_with_malformed_key(api_key_file) -> None: - print("malformed-api-key", file=api_key_file) - api_key_file.flush() +def test_openai_api_key_path_with_malformed_key(api_key_file: Path) -> None: + api_key_file.write_text("malformed-api-key\n") with pytest.raises(ValueError, match="Malformed API key"): util.default_api_key() From d4a0d6b5d1c45b26cb82282e622b03fce797eaeb Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 17:39:47 -0400 Subject: [PATCH 04/10] Remove unused import (It's unused because of how openai.api_key_path now gets patched.) --- openai/tests/test_util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openai/tests/test_util.py b/openai/tests/test_util.py index 1f77ab0a5e..ca2063a9e3 100644 --- a/openai/tests/test_util.py +++ b/openai/tests/test_util.py @@ -4,7 +4,6 @@ import pytest from pytest import MonkeyPatch -import openai from openai import util From 181a553d3391ae1b3c619029e8b83360e67c0bcb Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 18:00:39 -0400 Subject: [PATCH 05/10] Fix command syntax in test_long_examples_validator This fixes one of the problems running test_long_examples_validator on Windows: the command run by subprocess.run giving a "The filename, directory name, or volume label syntax is incorrect" error. It happens because the command to run is specified as a list of one element that is a string for the shell to interpret, rather than that string itself. This ends up working on Unix-like systems, but on Windows it results in extra quotation (adjacent `"` characters) that cmd.exe doesn't understand. This changes the subprocess.run call to what was probably intended, but it may make sense to modify it further to make it more consistent with how the subprocess module is used in other tests. Note that test_long_examples_validator is not fully fixed; now it raises PermissionError on Windows (the NamedTemporaryFile issue). --- openai/tests/test_long_examples_validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py index 949a7cbbae..2cf730eca8 100644 --- a/openai/tests/test_long_examples_validator.py +++ b/openai/tests/test_long_examples_validator.py @@ -37,7 +37,7 @@ def test_long_examples_validator() -> None: training_data.flush() prepared_data_cmd_output = subprocess.run( - [f"openai tools fine_tunes.prepare_data -f {training_data.name}"], + f"openai tools fine_tunes.prepare_data -f {training_data.name}", stdout=subprocess.PIPE, text=True, input="y\ny\ny\ny\ny", # apply all recommendations, one at a time From 60652c8b6fb3f17882d5e0e9e7c212ff92dea079 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 18:19:22 -0400 Subject: [PATCH 06/10] Make subprocess usage more consistent This makes the subprocess.run call in test_long_examples_validator more similar to how other tests use the subprocess module, by passing a list of separate command-line arguments instead of using a shell to parse a single string. --- openai/tests/test_long_examples_validator.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py index 2cf730eca8..5f19ac27dc 100644 --- a/openai/tests/test_long_examples_validator.py +++ b/openai/tests/test_long_examples_validator.py @@ -37,13 +37,12 @@ def test_long_examples_validator() -> None: training_data.flush() prepared_data_cmd_output = subprocess.run( - f"openai tools fine_tunes.prepare_data -f {training_data.name}", + ["openai", "tools", "fine_tunes.prepare_data", "-f", training_data.name], stdout=subprocess.PIPE, text=True, input="y\ny\ny\ny\ny", # apply all recommendations, one at a time stderr=subprocess.PIPE, encoding="utf-8", - shell=True, ) # validate data was prepared successfully From 37347b4e1629323536b397e38207967da442d6eb Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 18:41:50 -0400 Subject: [PATCH 07/10] Fix test_long_examples_validator PermissionError This fixes the Windows-specific PermissionError that happened because of how NamedTemporaryFile was used. As in other such tests, the pytest tmp_path fixture is now used instead. --- openai/tests/test_long_examples_validator.py | 30 ++++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py index 5f19ac27dc..ef399a0199 100644 --- a/openai/tests/test_long_examples_validator.py +++ b/openai/tests/test_long_examples_validator.py @@ -1,6 +1,6 @@ import json import subprocess -from tempfile import NamedTemporaryFile +from pathlib import Path import pytest @@ -10,7 +10,7 @@ @pytest.mark.skipif(not HAS_PANDAS, reason=PANDAS_INSTRUCTIONS) @pytest.mark.skipif(not HAS_NUMPY, reason=NUMPY_INSTRUCTIONS) -def test_long_examples_validator() -> None: +def test_long_examples_validator(tmp_path: Path) -> None: """ Ensures that long_examples_validator() handles previously applied recommendations, namely dropped duplicates, without resulting in a KeyError. @@ -30,20 +30,20 @@ def test_long_examples_validator() -> None: {"prompt": long_prompt, "completion": long_completion}, # 2 of 2 duplicates ] - with NamedTemporaryFile(suffix=".jsonl", mode="w") as training_data: - print(training_data.name) + training_data = tmp_path / "data.jsonl" + print(training_data) # show the full path to the temporary file + with open(training_data, mode="w") as file: for prompt_completion_row in unprepared_training_data: - training_data.write(json.dumps(prompt_completion_row) + "\n") - training_data.flush() - - prepared_data_cmd_output = subprocess.run( - ["openai", "tools", "fine_tunes.prepare_data", "-f", training_data.name], - stdout=subprocess.PIPE, - text=True, - input="y\ny\ny\ny\ny", # apply all recommendations, one at a time - stderr=subprocess.PIPE, - encoding="utf-8", - ) + print(json.dumps(prompt_completion_row), file=file) + + prepared_data_cmd_output = subprocess.run( + ["openai", "tools", "fine_tunes.prepare_data", "-f", training_data], + stdout=subprocess.PIPE, + text=True, + input="y\ny\ny\ny\ny", # apply all recommendations, one at a time + stderr=subprocess.PIPE, + encoding="utf-8", + ) # validate data was prepared successfully assert prepared_data_cmd_output.stderr == "" From 39b6f4f43f2face257271ddf6adbe72accdbb14d Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 20:56:54 -0400 Subject: [PATCH 08/10] Fix UnicodeDecodeError from subprocess in Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In test_long_examples_validator, subprocess.run was treating the child process's standard streams as UTF-8 encoded. It's usually good practice to specify UTF-8, but in this case, the child process uses a different encoding for its standard streams on Windows. On many Windows systems, this is CP-1252 (Windows-1252), and the curly apostrophe in the "After you’ve fine-tuned a model..." message triggers a UnicodeDecodeError. Although the message could be changed, the test's correcteness probably shouldn't rely on knowledge of the output besides what we specifically test. This has subprocess.run use the system default encoding for the child process's standard streams. Since the child process is itself Python, this should match in nearly all circumstances. --- openai/tests/test_long_examples_validator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py index ef399a0199..bc94821f8c 100644 --- a/openai/tests/test_long_examples_validator.py +++ b/openai/tests/test_long_examples_validator.py @@ -42,7 +42,6 @@ def test_long_examples_validator(tmp_path: Path) -> None: text=True, input="y\ny\ny\ny\ny", # apply all recommendations, one at a time stderr=subprocess.PIPE, - encoding="utf-8", ) # validate data was prepared successfully From 81f418b9c05639375b2238b25133b097375ffa78 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 21:52:02 -0400 Subject: [PATCH 09/10] Make tests using temporary files more consistent - Code style (variable names, keyword argument order) - File encoding (specify as UTF-8 in all, not just some, cases) - Child process standard stream encoding (system default) (The encoding of the temporary files, and of stdin/stdout for the child processes, are separate. Even when text from the temporary files is output by a child process, it is encoded with whatever the stream's encoding is, which for standard streams is the default system encoding.) --- openai/tests/test_file_cli.py | 4 ++-- openai/tests/test_long_examples_validator.py | 12 ++++++------ openai/tests/test_util.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/openai/tests/test_file_cli.py b/openai/tests/test_file_cli.py index 15cbd13626..0d615396c1 100644 --- a/openai/tests/test_file_cli.py +++ b/openai/tests/test_file_cli.py @@ -8,7 +8,7 @@ def test_file_cli(tmp_path: Path) -> None: contents = json.dumps({"prompt": "1 + 3 =", "completion": "4"}) + "\n" - train_file = tmp_path / "example.jsonl" + train_file = tmp_path / "data.jsonl" train_file.write_bytes(contents.encode("utf-8")) create_output = subprocess.check_output( ["openai", "api", "files.create", "-f", train_file, "-p", "fine-tune"] @@ -26,7 +26,7 @@ def test_file_cli(tmp_path: Path) -> None: ["openai", "api", "files.delete", "-i", file_id], stdout=subprocess.PIPE, stderr=subprocess.PIPE, - encoding="utf-8", + text=True, ) if delete_result.returncode == 0: break diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py index bc94821f8c..0e2905424c 100644 --- a/openai/tests/test_long_examples_validator.py +++ b/openai/tests/test_long_examples_validator.py @@ -30,18 +30,18 @@ def test_long_examples_validator(tmp_path: Path) -> None: {"prompt": long_prompt, "completion": long_completion}, # 2 of 2 duplicates ] - training_data = tmp_path / "data.jsonl" - print(training_data) # show the full path to the temporary file - with open(training_data, mode="w") as file: + train_file = tmp_path / "data.jsonl" + print(train_file) # show the full path to the temporary file + with open(train_file, mode="w", encoding="utf-8") as file: for prompt_completion_row in unprepared_training_data: print(json.dumps(prompt_completion_row), file=file) prepared_data_cmd_output = subprocess.run( - ["openai", "tools", "fine_tunes.prepare_data", "-f", training_data], + ["openai", "tools", "fine_tunes.prepare_data", "-f", train_file], stdout=subprocess.PIPE, - text=True, - input="y\ny\ny\ny\ny", # apply all recommendations, one at a time stderr=subprocess.PIPE, + input="y\ny\ny\ny\ny", # apply all recommendations, one at a time + text=True, ) # validate data was prepared successfully diff --git a/openai/tests/test_util.py b/openai/tests/test_util.py index ca2063a9e3..4b3d812e14 100644 --- a/openai/tests/test_util.py +++ b/openai/tests/test_util.py @@ -15,12 +15,12 @@ def api_key_file(tmp_path: Path, monkeypatch: MonkeyPatch) -> Path: def test_openai_api_key_path(api_key_file: Path) -> None: - api_key_file.write_text("sk-foo\n") + api_key_file.write_text("sk-foo\n", encoding="utf-8") assert util.default_api_key() == "sk-foo" def test_openai_api_key_path_with_malformed_key(api_key_file: Path) -> None: - api_key_file.write_text("malformed-api-key\n") + api_key_file.write_text("malformed-api-key\n", encoding="utf-8") with pytest.raises(ValueError, match="Malformed API key"): util.default_api_key() From b810d1199c37d7a17962f1dcb2a774b4c8eef8de Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 12 Jul 2023 22:58:38 -0400 Subject: [PATCH 10/10] Fix subprocess TypeError (Windows, Python 3.7) When using Python 3.7 on Windows (but not on other systems, and not on Windows with any later version of Python), subprocess.run and related functions don't support Path objects as command line arguments, due to https://github.com/python/cpython/issues/85919. This explicitly converts them to str in those calls. This change can be undone whenever this codebase drops Python 3.7 support. --- openai/tests/test_file_cli.py | 2 +- openai/tests/test_long_examples_validator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openai/tests/test_file_cli.py b/openai/tests/test_file_cli.py index 0d615396c1..7fb2bf10b0 100644 --- a/openai/tests/test_file_cli.py +++ b/openai/tests/test_file_cli.py @@ -11,7 +11,7 @@ def test_file_cli(tmp_path: Path) -> None: train_file = tmp_path / "data.jsonl" train_file.write_bytes(contents.encode("utf-8")) create_output = subprocess.check_output( - ["openai", "api", "files.create", "-f", train_file, "-p", "fine-tune"] + ["openai", "api", "files.create", "-f", str(train_file), "-p", "fine-tune"] ) file_obj = json.loads(create_output) diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py index 0e2905424c..610cea88f8 100644 --- a/openai/tests/test_long_examples_validator.py +++ b/openai/tests/test_long_examples_validator.py @@ -37,7 +37,7 @@ def test_long_examples_validator(tmp_path: Path) -> None: print(json.dumps(prompt_completion_row), file=file) prepared_data_cmd_output = subprocess.run( - ["openai", "tools", "fine_tunes.prepare_data", "-f", train_file], + ["openai", "tools", "fine_tunes.prepare_data", "-f", str(train_file)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, input="y\ny\ny\ny\ny", # apply all recommendations, one at a time