From de225420b0b6a8cbca6873ba187a073cddcdeaca Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 16:27:16 -0400
Subject: [PATCH 01/10] Fix test_file_cli on Windows

On Windows (only), a file created by NamedTemporaryFile can't be
opened again while it is already open. That causes some tests to
fail with PermissionError even when the code under test is correct.

To fix it without making the test code longer or more complicated,
we can use the tmp_path pytest fixture, which makes a temporary
directory (separate for each test and each run) where we can create
whatever file(s) we need, and which handles cleanup. This has the
further benefit of being a more idiomatic way to work with
temporary files in pytest tests.

This commit makes that change for test_file_cli.
---
 openai/tests/test_file_cli.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/openai/tests/test_file_cli.py b/openai/tests/test_file_cli.py
index 69ea29e2a0..7b761bd9c1 100644
--- a/openai/tests/test_file_cli.py
+++ b/openai/tests/test_file_cli.py
@@ -1,19 +1,18 @@
 import json
 import subprocess
 import time
-from tempfile import NamedTemporaryFile
+from pathlib import Path
 
 STILL_PROCESSING = "File is still processing. Check back later."
 
 
-def test_file_cli() -> None:
+def test_file_cli(tmp_path: Path) -> None:
     contents = json.dumps({"prompt": "1 + 3 =", "completion": "4"}) + "\n"
-    with NamedTemporaryFile(suffix=".jsonl", mode="wb") as train_file:
-        train_file.write(contents.encode("utf-8"))
-        train_file.flush()
-        create_output = subprocess.check_output(
-            ["openai", "api", "files.create", "-f", train_file.name, "-p", "fine-tune"]
-        )
+    train_file = tmp_path / "example.jsonl"
+    train_file.write_bytes(contents.encode("utf-8"))
+    create_output = subprocess.check_output(
+        ["openai", "api", "files.create", "-f", train_file, "-p", "fine-tune"]
+    )
     file_obj = json.loads(create_output)
     assert file_obj["bytes"] == len(contents)
     file_id: str = file_obj["id"]

From b4ee76e6c542cee4dd0e3dcdec02a41365cf7d9b Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 16:43:14 -0400
Subject: [PATCH 02/10] Divide test_file_cli into stanzas per assertion

For improved readability.
---
 openai/tests/test_file_cli.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/openai/tests/test_file_cli.py b/openai/tests/test_file_cli.py
index 7b761bd9c1..15cbd13626 100644
--- a/openai/tests/test_file_cli.py
+++ b/openai/tests/test_file_cli.py
@@ -13,10 +13,13 @@ def test_file_cli(tmp_path: Path) -> None:
     create_output = subprocess.check_output(
         ["openai", "api", "files.create", "-f", train_file, "-p", "fine-tune"]
     )
+
     file_obj = json.loads(create_output)
     assert file_obj["bytes"] == len(contents)
+
     file_id: str = file_obj["id"]
     assert file_id.startswith("file-")
+
     start_time = time.time()
     while True:
         delete_result = subprocess.run(

From 9c8b869e6b93e2fa67cf5e4ba95f3705faa151f0 Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 17:24:39 -0400
Subject: [PATCH 03/10] Fix api_key_path tests on Windows

As in test_file_cli, these failed with PermissionError on Windows
by trying to open a file created by NamedTemporary file while it is
already open. This likewise switches to using the pytest tmp_path
fixture. The api_key_file fixture is modified to request and use
it, and to provide a Path object, which test case functions call
write_text on instead of printing and flushing.

Since this was a major change to the api_key_file fixture, I've
also taken the opportunity to have it delegate patching and
automatically unpatching openai.api_key_path to another pytest
fixture, allowing its code to be shorter and simpler.
---
 openai/tests/test_util.py | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/openai/tests/test_util.py b/openai/tests/test_util.py
index 6220ccb7f4..1f77ab0a5e 100644
--- a/openai/tests/test_util.py
+++ b/openai/tests/test_util.py
@@ -1,32 +1,27 @@
 import json
-from tempfile import NamedTemporaryFile
+from pathlib import Path
 
 import pytest
+from pytest import MonkeyPatch
 
 import openai
 from openai import util
 
 
 @pytest.fixture(scope="function")
-def api_key_file():
-    saved_path = openai.api_key_path
-    try:
-        with NamedTemporaryFile(prefix="openai-api-key", mode="wt") as tmp:
-            openai.api_key_path = tmp.name
-            yield tmp
-    finally:
-        openai.api_key_path = saved_path
-
-
-def test_openai_api_key_path(api_key_file) -> None:
-    print("sk-foo", file=api_key_file)
-    api_key_file.flush()
+def api_key_file(tmp_path: Path, monkeypatch: MonkeyPatch) -> Path:
+    key_file = tmp_path / "openai_api_key"
+    monkeypatch.setattr("openai.api_key_path", str(key_file))
+    return key_file
+
+
+def test_openai_api_key_path(api_key_file: Path) -> None:
+    api_key_file.write_text("sk-foo\n")
     assert util.default_api_key() == "sk-foo"
 
 
-def test_openai_api_key_path_with_malformed_key(api_key_file) -> None:
-    print("malformed-api-key", file=api_key_file)
-    api_key_file.flush()
+def test_openai_api_key_path_with_malformed_key(api_key_file: Path) -> None:
+    api_key_file.write_text("malformed-api-key\n")
     with pytest.raises(ValueError, match="Malformed API key"):
         util.default_api_key()
 

From d4a0d6b5d1c45b26cb82282e622b03fce797eaeb Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 17:39:47 -0400
Subject: [PATCH 04/10] Remove unused import

(It's unused because of how openai.api_key_path now gets patched.)
---
 openai/tests/test_util.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/openai/tests/test_util.py b/openai/tests/test_util.py
index 1f77ab0a5e..ca2063a9e3 100644
--- a/openai/tests/test_util.py
+++ b/openai/tests/test_util.py
@@ -4,7 +4,6 @@
 import pytest
 from pytest import MonkeyPatch
 
-import openai
 from openai import util
 
 

From 181a553d3391ae1b3c619029e8b83360e67c0bcb Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 18:00:39 -0400
Subject: [PATCH 05/10] Fix command syntax in test_long_examples_validator

This fixes one of the problems running test_long_examples_validator
on Windows: the command run by subprocess.run giving a "The
filename, directory name, or volume label syntax is incorrect"
error.

It happens because the command to run is specified as a list of one
element that is a string for the shell to interpret, rather than
that string itself. This ends up working on Unix-like systems, but
on Windows it results in extra quotation (adjacent `"` characters)
that cmd.exe doesn't understand.

This changes the subprocess.run call to what was probably intended,
but it may make sense to modify it further to make it more
consistent with how the subprocess module is used in other tests.

Note that test_long_examples_validator is not fully fixed; now it
raises PermissionError on Windows (the NamedTemporaryFile issue).
---
 openai/tests/test_long_examples_validator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py
index 949a7cbbae..2cf730eca8 100644
--- a/openai/tests/test_long_examples_validator.py
+++ b/openai/tests/test_long_examples_validator.py
@@ -37,7 +37,7 @@ def test_long_examples_validator() -> None:
             training_data.flush()
 
         prepared_data_cmd_output = subprocess.run(
-            [f"openai tools fine_tunes.prepare_data -f {training_data.name}"],
+            f"openai tools fine_tunes.prepare_data -f {training_data.name}",
             stdout=subprocess.PIPE,
             text=True,
             input="y\ny\ny\ny\ny",  # apply all recommendations, one at a time

From 60652c8b6fb3f17882d5e0e9e7c212ff92dea079 Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 18:19:22 -0400
Subject: [PATCH 06/10] Make subprocess usage more consistent

This makes the subprocess.run call in test_long_examples_validator
more similar to how other tests use the subprocess module, by
passing a list of separate command-line arguments instead of using
a shell to parse a single string.
---
 openai/tests/test_long_examples_validator.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py
index 2cf730eca8..5f19ac27dc 100644
--- a/openai/tests/test_long_examples_validator.py
+++ b/openai/tests/test_long_examples_validator.py
@@ -37,13 +37,12 @@ def test_long_examples_validator() -> None:
             training_data.flush()
 
         prepared_data_cmd_output = subprocess.run(
-            f"openai tools fine_tunes.prepare_data -f {training_data.name}",
+            ["openai", "tools", "fine_tunes.prepare_data", "-f", training_data.name],
             stdout=subprocess.PIPE,
             text=True,
             input="y\ny\ny\ny\ny",  # apply all recommendations, one at a time
             stderr=subprocess.PIPE,
             encoding="utf-8",
-            shell=True,
         )
 
     # validate data was prepared successfully

From 37347b4e1629323536b397e38207967da442d6eb Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 18:41:50 -0400
Subject: [PATCH 07/10] Fix test_long_examples_validator PermissionError

This fixes the Windows-specific PermissionError that happened
because of how NamedTemporaryFile was used. As in other such tests,
the pytest tmp_path fixture is now used instead.
---
 openai/tests/test_long_examples_validator.py | 30 ++++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py
index 5f19ac27dc..ef399a0199 100644
--- a/openai/tests/test_long_examples_validator.py
+++ b/openai/tests/test_long_examples_validator.py
@@ -1,6 +1,6 @@
 import json
 import subprocess
-from tempfile import NamedTemporaryFile
+from pathlib import Path
 
 import pytest
 
@@ -10,7 +10,7 @@
 
 @pytest.mark.skipif(not HAS_PANDAS, reason=PANDAS_INSTRUCTIONS)
 @pytest.mark.skipif(not HAS_NUMPY, reason=NUMPY_INSTRUCTIONS)
-def test_long_examples_validator() -> None:
+def test_long_examples_validator(tmp_path: Path) -> None:
     """
     Ensures that long_examples_validator() handles previously applied recommendations,
     namely dropped duplicates, without resulting in a KeyError.
@@ -30,20 +30,20 @@ def test_long_examples_validator() -> None:
         {"prompt": long_prompt, "completion": long_completion},  # 2 of 2 duplicates
     ]
 
-    with NamedTemporaryFile(suffix=".jsonl", mode="w") as training_data:
-        print(training_data.name)
+    training_data = tmp_path / "data.jsonl"
+    print(training_data)  # show the full path to the temporary file
+    with open(training_data, mode="w") as file:
         for prompt_completion_row in unprepared_training_data:
-            training_data.write(json.dumps(prompt_completion_row) + "\n")
-            training_data.flush()
-
-        prepared_data_cmd_output = subprocess.run(
-            ["openai", "tools", "fine_tunes.prepare_data", "-f", training_data.name],
-            stdout=subprocess.PIPE,
-            text=True,
-            input="y\ny\ny\ny\ny",  # apply all recommendations, one at a time
-            stderr=subprocess.PIPE,
-            encoding="utf-8",
-        )
+            print(json.dumps(prompt_completion_row), file=file)
+
+    prepared_data_cmd_output = subprocess.run(
+        ["openai", "tools", "fine_tunes.prepare_data", "-f", training_data],
+        stdout=subprocess.PIPE,
+        text=True,
+        input="y\ny\ny\ny\ny",  # apply all recommendations, one at a time
+        stderr=subprocess.PIPE,
+        encoding="utf-8",
+    )
 
     # validate data was prepared successfully
     assert prepared_data_cmd_output.stderr == ""

From 39b6f4f43f2face257271ddf6adbe72accdbb14d Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 20:56:54 -0400
Subject: [PATCH 08/10] Fix UnicodeDecodeError from subprocess in Windows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In test_long_examples_validator, subprocess.run was treating the
child process's standard streams as UTF-8 encoded. It's usually
good practice to specify UTF-8, but in this case, the child
process uses a different encoding for its standard streams on
Windows.

On many Windows systems, this is CP-1252 (Windows-1252), and the
curly apostrophe in the "After you’ve fine-tuned a model..."
message triggers a UnicodeDecodeError. Although the message could
be changed, the test's correcteness probably shouldn't rely on
knowledge of the output besides what we specifically test.

This has subprocess.run use the system default encoding for the
child process's standard streams. Since the child process is itself
Python, this should match in nearly all circumstances.
---
 openai/tests/test_long_examples_validator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py
index ef399a0199..bc94821f8c 100644
--- a/openai/tests/test_long_examples_validator.py
+++ b/openai/tests/test_long_examples_validator.py
@@ -42,7 +42,6 @@ def test_long_examples_validator(tmp_path: Path) -> None:
         text=True,
         input="y\ny\ny\ny\ny",  # apply all recommendations, one at a time
         stderr=subprocess.PIPE,
-        encoding="utf-8",
     )
 
     # validate data was prepared successfully

From 81f418b9c05639375b2238b25133b097375ffa78 Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 21:52:02 -0400
Subject: [PATCH 09/10] Make tests using temporary files more consistent

- Code style (variable names, keyword argument order)
- File encoding (specify as UTF-8 in all, not just some, cases)
- Child process standard stream encoding (system default)

(The encoding of the temporary files, and of stdin/stdout for the
child processes, are separate. Even when text from the temporary
files is output by a child process, it is encoded with whatever the
stream's encoding is, which for standard streams is the default
system encoding.)
---
 openai/tests/test_file_cli.py                |  4 ++--
 openai/tests/test_long_examples_validator.py | 12 ++++++------
 openai/tests/test_util.py                    |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/openai/tests/test_file_cli.py b/openai/tests/test_file_cli.py
index 15cbd13626..0d615396c1 100644
--- a/openai/tests/test_file_cli.py
+++ b/openai/tests/test_file_cli.py
@@ -8,7 +8,7 @@
 
 def test_file_cli(tmp_path: Path) -> None:
     contents = json.dumps({"prompt": "1 + 3 =", "completion": "4"}) + "\n"
-    train_file = tmp_path / "example.jsonl"
+    train_file = tmp_path / "data.jsonl"
     train_file.write_bytes(contents.encode("utf-8"))
     create_output = subprocess.check_output(
         ["openai", "api", "files.create", "-f", train_file, "-p", "fine-tune"]
@@ -26,7 +26,7 @@ def test_file_cli(tmp_path: Path) -> None:
             ["openai", "api", "files.delete", "-i", file_id],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
-            encoding="utf-8",
+            text=True,
         )
         if delete_result.returncode == 0:
             break
diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py
index bc94821f8c..0e2905424c 100644
--- a/openai/tests/test_long_examples_validator.py
+++ b/openai/tests/test_long_examples_validator.py
@@ -30,18 +30,18 @@ def test_long_examples_validator(tmp_path: Path) -> None:
         {"prompt": long_prompt, "completion": long_completion},  # 2 of 2 duplicates
     ]
 
-    training_data = tmp_path / "data.jsonl"
-    print(training_data)  # show the full path to the temporary file
-    with open(training_data, mode="w") as file:
+    train_file = tmp_path / "data.jsonl"
+    print(train_file)  # show the full path to the temporary file
+    with open(train_file, mode="w", encoding="utf-8") as file:
         for prompt_completion_row in unprepared_training_data:
             print(json.dumps(prompt_completion_row), file=file)
 
     prepared_data_cmd_output = subprocess.run(
-        ["openai", "tools", "fine_tunes.prepare_data", "-f", training_data],
+        ["openai", "tools", "fine_tunes.prepare_data", "-f", train_file],
         stdout=subprocess.PIPE,
-        text=True,
-        input="y\ny\ny\ny\ny",  # apply all recommendations, one at a time
         stderr=subprocess.PIPE,
+        input="y\ny\ny\ny\ny",  # apply all recommendations, one at a time
+        text=True,
     )
 
     # validate data was prepared successfully
diff --git a/openai/tests/test_util.py b/openai/tests/test_util.py
index ca2063a9e3..4b3d812e14 100644
--- a/openai/tests/test_util.py
+++ b/openai/tests/test_util.py
@@ -15,12 +15,12 @@ def api_key_file(tmp_path: Path, monkeypatch: MonkeyPatch) -> Path:
 
 
 def test_openai_api_key_path(api_key_file: Path) -> None:
-    api_key_file.write_text("sk-foo\n")
+    api_key_file.write_text("sk-foo\n", encoding="utf-8")
     assert util.default_api_key() == "sk-foo"
 
 
 def test_openai_api_key_path_with_malformed_key(api_key_file: Path) -> None:
-    api_key_file.write_text("malformed-api-key\n")
+    api_key_file.write_text("malformed-api-key\n", encoding="utf-8")
     with pytest.raises(ValueError, match="Malformed API key"):
         util.default_api_key()
 

From b810d1199c37d7a17962f1dcb2a774b4c8eef8de Mon Sep 17 00:00:00 2001
From: Eliah Kagan <degeneracypressure@gmail.com>
Date: Wed, 12 Jul 2023 22:58:38 -0400
Subject: [PATCH 10/10] Fix subprocess TypeError (Windows, Python 3.7)

When using Python 3.7 on Windows (but not on other systems, and not
on Windows with any later version of Python), subprocess.run and
related functions don't support Path objects as command line
arguments, due to https://github.com/python/cpython/issues/85919.

This explicitly converts them to str in those calls. This change
can be undone whenever this codebase drops Python 3.7 support.
---
 openai/tests/test_file_cli.py                | 2 +-
 openai/tests/test_long_examples_validator.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/openai/tests/test_file_cli.py b/openai/tests/test_file_cli.py
index 0d615396c1..7fb2bf10b0 100644
--- a/openai/tests/test_file_cli.py
+++ b/openai/tests/test_file_cli.py
@@ -11,7 +11,7 @@ def test_file_cli(tmp_path: Path) -> None:
     train_file = tmp_path / "data.jsonl"
     train_file.write_bytes(contents.encode("utf-8"))
     create_output = subprocess.check_output(
-        ["openai", "api", "files.create", "-f", train_file, "-p", "fine-tune"]
+        ["openai", "api", "files.create", "-f", str(train_file), "-p", "fine-tune"]
     )
 
     file_obj = json.loads(create_output)
diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py
index 0e2905424c..610cea88f8 100644
--- a/openai/tests/test_long_examples_validator.py
+++ b/openai/tests/test_long_examples_validator.py
@@ -37,7 +37,7 @@ def test_long_examples_validator(tmp_path: Path) -> None:
             print(json.dumps(prompt_completion_row), file=file)
 
     prepared_data_cmd_output = subprocess.run(
-        ["openai", "tools", "fine_tunes.prepare_data", "-f", train_file],
+        ["openai", "tools", "fine_tunes.prepare_data", "-f", str(train_file)],
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         input="y\ny\ny\ny\ny",  # apply all recommendations, one at a time