From b1556891e602db096c5d589b88e0f9989b3fea6d Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 9 Jan 2025 20:04:32 +0800 Subject: [PATCH 1/3] fix tests Signed-off-by: youkaichao --- tests/multi_step/test_correctness_async_llm.py | 3 +-- tests/utils.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/multi_step/test_correctness_async_llm.py b/tests/multi_step/test_correctness_async_llm.py index 7203d635c2fa..8456a463adee 100644 --- a/tests/multi_step/test_correctness_async_llm.py +++ b/tests/multi_step/test_correctness_async_llm.py @@ -16,7 +16,6 @@ NUM_PROMPTS = [10] DEFAULT_SERVER_ARGS: List[str] = [ - "--disable-log-requests", "--worker-use-ray", "--gpu-memory-utilization", "0.85", @@ -110,7 +109,7 @@ async def test_multi_step( # Spin up client/server & issue completion API requests. # Default `max_wait_seconds` is 240 but was empirically - # was raised 3x to 720 *just for this test* due to + # was raised 5x to 1200 *just for this test* due to # observed timeouts in GHA CI ref_completions = await completions_with_server_args( prompts, diff --git a/tests/utils.py b/tests/utils.py index bf3d88194e4c..66d4c71d98a8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -780,7 +780,6 @@ async def completions_with_server_args( assert len(max_tokens) == len(prompts) outputs = None - max_wait_seconds = 240 * 3 # 240 is default with RemoteOpenAIServer(model_name, server_cli_args, max_wait_seconds=max_wait_seconds) as server: From 9c33962e11c7a96670dc8898ccaee2ca47f77c9e Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 9 Jan 2025 20:24:32 +0800 Subject: [PATCH 2/3] add api request timeout Signed-off-by: youkaichao --- tests/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index 66d4c71d98a8..691bf163801a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -157,13 +157,19 @@ def url_root(self) -> str: def url_for(self, *parts: str) -> str: return self.url_root + "/" + "/".join(parts) - def get_client(self): + def get_client(self, **kwargs): + if "timeout" not in kwargs: + kwargs["timeout"] = 60 return openai.OpenAI( base_url=self.url_for("v1"), api_key=self.DUMMY_API_KEY, + max_retries=0, + **kwargs, ) def get_async_client(self, **kwargs): + if "timeout" not in kwargs: + kwargs["timeout"] = 60 return openai.AsyncOpenAI(base_url=self.url_for("v1"), api_key=self.DUMMY_API_KEY, max_retries=0, From 15499e9f3c24920f5403431c9c74052aa7211e52 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 9 Jan 2025 21:40:53 +0800 Subject: [PATCH 3/3] fix? Signed-off-by: youkaichao --- tests/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 691bf163801a..f4eecf19e8c6 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -159,7 +159,7 @@ def url_for(self, *parts: str) -> str: def get_client(self, **kwargs): if "timeout" not in kwargs: - kwargs["timeout"] = 60 + kwargs["timeout"] = 600 return openai.OpenAI( base_url=self.url_for("v1"), api_key=self.DUMMY_API_KEY, @@ -169,7 +169,7 @@ def get_client(self, **kwargs): def get_async_client(self, **kwargs): if "timeout" not in kwargs: - kwargs["timeout"] = 60 + kwargs["timeout"] = 600 return openai.AsyncOpenAI(base_url=self.url_for("v1"), api_key=self.DUMMY_API_KEY, max_retries=0,