diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index c2f2ae6bbd..d19f910446 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.3.8"
+  ".": "1.3.9"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1cb12572d1..372f3ccaa3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,21 @@
 # Changelog
 
+## 1.3.9 (2023-12-12)
+
+Full Changelog: [v1.3.8...v1.3.9](https://github.com/openai/openai-python/compare/v1.3.8...v1.3.9)
+
+### Documentation
+
+* improve README timeout comment ([#964](https://github.com/openai/openai-python/issues/964)) ([3c3ed5e](https://github.com/openai/openai-python/commit/3c3ed5edd938a9333e8d2fa47cb4b44178eef89a))
+* small Improvement in the async chat response code ([#959](https://github.com/openai/openai-python/issues/959)) ([fb9d0a3](https://github.com/openai/openai-python/commit/fb9d0a358fa232043d9d5c149b6a888d50127c7b))
+* small streaming readme improvements ([#962](https://github.com/openai/openai-python/issues/962)) ([f3be2e5](https://github.com/openai/openai-python/commit/f3be2e5cc24988471e6cedb3e34bdfd3123edc63))
+
+
+### Refactors
+
+* **client:** simplify cleanup ([#966](https://github.com/openai/openai-python/issues/966)) ([5c138f4](https://github.com/openai/openai-python/commit/5c138f4a7947e5b4aae8779fae78ca51269b355a))
+* simplify internal error handling ([#968](https://github.com/openai/openai-python/issues/968)) ([d187f6b](https://github.com/openai/openai-python/commit/d187f6b6e4e646cca39c6ca35c618aa5c1bfbd61))
+
 ## 1.3.8 (2023-12-08)
 
 Full Changelog: [v1.3.7...v1.3.8](https://github.com/openai/openai-python/compare/v1.3.7...v1.3.8)
diff --git a/README.md b/README.md
index 471fd88ab1..f89d0bdb28 100644
--- a/README.md
+++ b/README.md
@@ -97,8 +97,7 @@ stream = client.chat.completions.create(
     stream=True,
 )
 for chunk in stream:
-    if chunk.choices[0].delta.content is not None:
-        print(chunk.choices[0].delta.content)
+    print(chunk.choices[0].delta.content or "", end="")
 ```
 
 The async client uses the exact same interface.
@@ -108,14 +107,18 @@ from openai import AsyncOpenAI
 
 client = AsyncOpenAI()
 
-stream = await client.chat.completions.create(
-    model="gpt-4",
-    messages=[{"role": "user", "content": "Say this is a test"}],
-    stream=True,
-)
-async for chunk in stream:
-    if chunk.choices[0].delta.content is not None:
-        print(chunk.choices[0].delta.content)
+
+async def main():
+    stream = await client.chat.completions.create(
+        model="gpt-4",
+        messages=[{"role": "user", "content": "Say this is a test"}],
+        stream=True,
+    )
+    async for chunk in stream:
+        print(chunk.choices[0].delta.content or "", end="")
+
+
+asyncio.run(main())
 ```
 
 ## Module-level client
@@ -359,7 +362,7 @@ from openai import OpenAI
 
 # Configure the default for all requests:
 client = OpenAI(
-    # default is 60s
+    # 20 seconds (default is 10 minutes)
     timeout=20.0,
 )
 
diff --git a/pyproject.toml b/pyproject.toml
index fab8bf4250..99d537d22e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.3.8"
+version = "1.3.9"
 description = "The official Python library for the openai API"
 readme = "README.md"
 license = "Apache-2.0"
@@ -84,7 +84,7 @@ typecheck = { chain = [
 ]}
 "typecheck:pyright" = "pyright"
 "typecheck:verify-types" = "pyright --verifytypes openai --ignoreexternal"
-"typecheck:mypy" = "mypy --enable-incomplete-feature=Unpack ."
+"typecheck:mypy" = "mypy ."
 
 [build-system]
 requires = ["hatchling"]
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index d90f777cdc..0d66b3c682 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -221,13 +221,6 @@ def _client(self, value: _httpx.Client) -> None:  # type: ignore
 
         http_client = value
 
-    @override
-    def __del__(self) -> None:
-        try:
-            super().__del__()
-        except Exception:
-            pass
-
 
 class _AzureModuleClient(_ModuleClient, AzureOpenAI):  # type: ignore
     ...
diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py
index bbbb8a54ab..92189617b5 100644
--- a/src/openai/_base_client.py
+++ b/src/openai/_base_client.py
@@ -5,6 +5,7 @@
 import time
 import uuid
 import email
+import asyncio
 import inspect
 import logging
 import platform
@@ -672,9 +673,16 @@ def _idempotency_key(self) -> str:
         return f"stainless-python-retry-{uuid.uuid4()}"
 
 
+class SyncHttpxClientWrapper(httpx.Client):
+    def __del__(self) -> None:
+        try:
+            self.close()
+        except Exception:
+            pass
+
+
 class SyncAPIClient(BaseClient[httpx.Client, Stream[Any]]):
     _client: httpx.Client
-    _has_custom_http_client: bool
     _default_stream_cls: type[Stream[Any]] | None = None
 
     def __init__(
@@ -747,7 +755,7 @@ def __init__(
             custom_headers=custom_headers,
             _strict_response_validation=_strict_response_validation,
         )
-        self._client = http_client or httpx.Client(
+        self._client = http_client or SyncHttpxClientWrapper(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
@@ -755,7 +763,6 @@ def __init__(
             transport=transport,
             limits=limits,
         )
-        self._has_custom_http_client = bool(http_client)
 
     def is_closed(self) -> bool:
         return self._client.is_closed
@@ -866,40 +873,25 @@ def _request(
         request = self._build_request(options)
         self._prepare_request(request)
 
-        response = None
-
         try:
             response = self._client.send(
                 request,
                 auth=self.custom_auth,
                 stream=stream or self._should_stream_response_body(request=request),
             )
-            log.debug(
-                'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
-            )
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            if retries > 0 and self._should_retry(err.response):
-                err.response.close()
+        except httpx.TimeoutException as err:
+            if retries > 0:
                 return self._retry_request(
                     options,
                     cast_to,
                     retries,
-                    err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
+                    response_headers=None,
                 )
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                err.response.read()
-
-            raise self._make_status_error_from_response(err.response) from None
-        except httpx.TimeoutException as err:
-            if response is not None:
-                response.close()
-
+            raise APITimeoutError(request=request) from err
+        except Exception as err:
             if retries > 0:
                 return self._retry_request(
                     options,
@@ -907,25 +899,35 @@ def _request(
                     retries,
                     stream=stream,
                     stream_cls=stream_cls,
-                    response_headers=response.headers if response is not None else None,
+                    response_headers=None,
                 )
 
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            if response is not None:
-                response.close()
+            raise APIConnectionError(request=request) from err
 
-            if retries > 0:
+        log.debug(
+            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+        )
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+            if retries > 0 and self._should_retry(err.response):
+                err.response.close()
                 return self._retry_request(
                     options,
                     cast_to,
                     retries,
+                    err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
-                    response_headers=response.headers if response is not None else None,
                 )
 
-            raise APIConnectionError(request=request) from err
+            # If the response is streamed then we need to explicitly read the response
+            # to completion before attempting to access the response text.
+            if not err.response.is_closed:
+                err.response.read()
+
+            raise self._make_status_error_from_response(err.response) from None
 
         return self._process_response(
             cast_to=cast_to,
@@ -1135,9 +1137,17 @@ def get_api_list(
         return self._request_api_list(model, page, opts)
 
 
+class AsyncHttpxClientWrapper(httpx.AsyncClient):
+    def __del__(self) -> None:
+        try:
+            # TODO(someday): support non asyncio runtimes here
+            asyncio.get_running_loop().create_task(self.aclose())
+        except Exception:
+            pass
+
+
 class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
     _client: httpx.AsyncClient
-    _has_custom_http_client: bool
     _default_stream_cls: type[AsyncStream[Any]] | None = None
 
     def __init__(
@@ -1210,7 +1220,7 @@ def __init__(
             custom_headers=custom_headers,
             _strict_response_validation=_strict_response_validation,
         )
-        self._client = http_client or httpx.AsyncClient(
+        self._client = http_client or AsyncHttpxClientWrapper(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
@@ -1218,7 +1228,6 @@ def __init__(
             transport=transport,
             limits=limits,
         )
-        self._has_custom_http_client = bool(http_client)
 
     def is_closed(self) -> bool:
         return self._client.is_closed
@@ -1326,40 +1335,25 @@ async def _request(
         request = self._build_request(options)
         await self._prepare_request(request)
 
-        response = None
-
         try:
             response = await self._client.send(
                 request,
                 auth=self.custom_auth,
                 stream=stream or self._should_stream_response_body(request=request),
             )
-            log.debug(
-                'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
-            )
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            if retries > 0 and self._should_retry(err.response):
-                await err.response.aclose()
+        except httpx.TimeoutException as err:
+            if retries > 0:
                 return await self._retry_request(
                     options,
                     cast_to,
                     retries,
-                    err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
+                    response_headers=None,
                 )
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                await err.response.aread()
-
-            raise self._make_status_error_from_response(err.response) from None
-        except httpx.TimeoutException as err:
-            if response is not None:
-                await response.aclose()
-
+            raise APITimeoutError(request=request) from err
+        except Exception as err:
             if retries > 0:
                 return await self._retry_request(
                     options,
@@ -1367,25 +1361,35 @@ async def _request(
                     retries,
                     stream=stream,
                     stream_cls=stream_cls,
-                    response_headers=response.headers if response is not None else None,
+                    response_headers=None,
                 )
 
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            if response is not None:
-                await response.aclose()
+            raise APIConnectionError(request=request) from err
 
-            if retries > 0:
+        log.debug(
+            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+        )
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+            if retries > 0 and self._should_retry(err.response):
+                await err.response.aclose()
                 return await self._retry_request(
                     options,
                     cast_to,
                     retries,
+                    err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
-                    response_headers=response.headers if response is not None else None,
                 )
 
-            raise APIConnectionError(request=request) from err
+            # If the response is streamed then we need to explicitly read the response
+            # to completion before attempting to access the response text.
+            if not err.response.is_closed:
+                await err.response.aread()
+
+            raise self._make_status_error_from_response(err.response) from None
 
         return self._process_response(
             cast_to=cast_to,
diff --git a/src/openai/_client.py b/src/openai/_client.py
index 8cf0fa6797..dacadf5aff 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import os
-import asyncio
 from typing import Any, Union, Mapping
 from typing_extensions import Self, override
 
@@ -205,16 +204,6 @@ def copy(
     # client.with_options(timeout=10).foo.create(...)
     with_options = copy
 
-    def __del__(self) -> None:
-        if not hasattr(self, "_has_custom_http_client") or not hasattr(self, "close"):
-            # this can happen if the '__init__' method raised an error
-            return
-
-        if self._has_custom_http_client:
-            return
-
-        self.close()
-
     @override
     def _make_status_error(
         self,
@@ -415,19 +404,6 @@ def copy(
     # client.with_options(timeout=10).foo.create(...)
     with_options = copy
 
-    def __del__(self) -> None:
-        if not hasattr(self, "_has_custom_http_client") or not hasattr(self, "close"):
-            # this can happen if the '__init__' method raised an error
-            return
-
-        if self._has_custom_http_client:
-            return
-
-        try:
-            asyncio.get_running_loop().create_task(self.close())
-        except Exception:
-            pass
-
     @override
     def _make_status_error(
         self,
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 7c90447cbc..3c646d4ffe 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless.
 
 __title__ = "openai"
-__version__ = "1.3.8"  # x-release-please-version
+__version__ = "1.3.9"  # x-release-please-version
diff --git a/tests/test_client.py b/tests/test_client.py
index cd374a49db..0959185df2 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -24,7 +24,6 @@
     OpenAIError,
     APIStatusError,
     APITimeoutError,
-    APIConnectionError,
     APIResponseValidationError,
 )
 from openai._base_client import (
@@ -46,14 +45,8 @@ def _get_params(client: BaseClient[Any, Any]) -> dict[str, str]:
     return dict(url.params)
 
 
-_original_response_init = cast(Any, httpx.Response.__init__)  # type: ignore
-
-
-def _low_retry_response_init(*args: Any, **kwargs: Any) -> Any:
-    headers = cast("list[tuple[bytes, bytes]]", kwargs["headers"])
-    headers.append((b"retry-after", b"0.1"))
-
-    return _original_response_init(*args, **kwargs)
+def _low_retry_timeout(*_args: Any, **_kwargs: Any) -> float:
+    return 0.1
 
 
 def _get_open_connections(client: OpenAI | AsyncOpenAI) -> int:
@@ -591,14 +584,6 @@ def test_absolute_request_url(self, client: OpenAI) -> None:
         )
         assert request.url == "https://myapi.com/foo"
 
-    def test_client_del(self) -> None:
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
-
-        client.__del__()
-
-        assert client.is_closed()
-
     def test_copied_client_does_not_close_http(self) -> None:
         client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()
@@ -606,9 +591,8 @@ def test_copied_client_does_not_close_http(self) -> None:
         copied = client.copy()
         assert copied is not client
 
-        copied.__del__()
+        del copied
 
-        assert not copied.is_closed()
         assert not client.is_closed()
 
     def test_client_context_manager(self) -> None:
@@ -687,103 +671,51 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
-    @mock.patch("httpx.Response.__init__", _low_retry_response_init)
-    def test_retrying_timeout_errors_doesnt_leak(self) -> None:
-        def raise_for_status(response: httpx.Response) -> None:
-            raise httpx.TimeoutException("Test timeout error", request=response.request)
-
-        with mock.patch("httpx.Response.raise_for_status", raise_for_status):
-            with pytest.raises(APITimeoutError):
-                self.client.post(
-                    "/chat/completions",
-                    body=dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                    cast_to=httpx.Response,
-                    options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
-                )
-
-        assert _get_open_connections(self.client) == 0
-
-    @mock.patch("httpx.Response.__init__", _low_retry_response_init)
-    def test_retrying_runtime_errors_doesnt_leak(self) -> None:
-        def raise_for_status(_response: httpx.Response) -> None:
-            raise RuntimeError("Test error")
-
-        with mock.patch("httpx.Response.raise_for_status", raise_for_status):
-            with pytest.raises(APIConnectionError):
-                self.client.post(
-                    "/chat/completions",
-                    body=dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                    cast_to=httpx.Response,
-                    options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
-                )
-
-        assert _get_open_connections(self.client) == 0
-
-    @mock.patch("httpx.Response.__init__", _low_retry_response_init)
-    def test_retrying_status_errors_doesnt_leak(self) -> None:
-        def raise_for_status(response: httpx.Response) -> None:
-            response.status_code = 500
-            raise httpx.HTTPStatusError("Test 500 error", response=response, request=response.request)
-
-        with mock.patch("httpx.Response.raise_for_status", raise_for_status):
-            with pytest.raises(APIStatusError):
-                self.client.post(
-                    "/chat/completions",
-                    body=dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                    cast_to=httpx.Response,
-                    options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
-                )
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+        respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+
+        with pytest.raises(APITimeoutError):
+            self.client.post(
+                "/chat/completions",
+                body=dict(
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": "Say this is a test",
+                        }
+                    ],
+                    model="gpt-3.5-turbo",
+                ),
+                cast_to=httpx.Response,
+                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+            )
 
         assert _get_open_connections(self.client) == 0
 
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    def test_status_error_within_httpx(self, respx_mock: MockRouter) -> None:
-        respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+    def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+        respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
 
-        def on_response(response: httpx.Response) -> None:
-            raise httpx.HTTPStatusError(
-                "Simulating an error inside httpx",
-                response=response,
-                request=response.request,
+        with pytest.raises(APIStatusError):
+            self.client.post(
+                "/chat/completions",
+                body=dict(
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": "Say this is a test",
+                        }
+                    ],
+                    model="gpt-3.5-turbo",
+                ),
+                cast_to=httpx.Response,
+                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
             )
 
-        client = OpenAI(
-            base_url=base_url,
-            api_key=api_key,
-            _strict_response_validation=True,
-            http_client=httpx.Client(
-                event_hooks={
-                    "response": [on_response],
-                }
-            ),
-            max_retries=0,
-        )
-        with pytest.raises(APIStatusError):
-            client.post("/foo", cast_to=httpx.Response)
+        assert _get_open_connections(self.client) == 0
 
 
 class TestAsyncOpenAI:
@@ -1325,15 +1257,6 @@ def test_absolute_request_url(self, client: AsyncOpenAI) -> None:
         )
         assert request.url == "https://myapi.com/foo"
 
-    async def test_client_del(self) -> None:
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
-
-        client.__del__()
-
-        await asyncio.sleep(0.2)
-        assert client.is_closed()
-
     async def test_copied_client_does_not_close_http(self) -> None:
         client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()
@@ -1341,10 +1264,9 @@ async def test_copied_client_does_not_close_http(self) -> None:
         copied = client.copy()
         assert copied is not client
 
-        copied.__del__()
+        del copied
 
         await asyncio.sleep(0.2)
-        assert not copied.is_closed()
         assert not client.is_closed()
 
     async def test_client_context_manager(self) -> None:
@@ -1427,101 +1349,48 @@ async def test_parse_retry_after_header(self, remaining_retries: int, retry_afte
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
-    @mock.patch("httpx.Response.__init__", _low_retry_response_init)
-    async def test_retrying_timeout_errors_doesnt_leak(self) -> None:
-        def raise_for_status(response: httpx.Response) -> None:
-            raise httpx.TimeoutException("Test timeout error", request=response.request)
-
-        with mock.patch("httpx.Response.raise_for_status", raise_for_status):
-            with pytest.raises(APITimeoutError):
-                await self.client.post(
-                    "/chat/completions",
-                    body=dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                    cast_to=httpx.Response,
-                    options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
-                )
-
-        assert _get_open_connections(self.client) == 0
-
-    @mock.patch("httpx.Response.__init__", _low_retry_response_init)
-    async def test_retrying_runtime_errors_doesnt_leak(self) -> None:
-        def raise_for_status(_response: httpx.Response) -> None:
-            raise RuntimeError("Test error")
-
-        with mock.patch("httpx.Response.raise_for_status", raise_for_status):
-            with pytest.raises(APIConnectionError):
-                await self.client.post(
-                    "/chat/completions",
-                    body=dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                    cast_to=httpx.Response,
-                    options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
-                )
-
-        assert _get_open_connections(self.client) == 0
-
-    @mock.patch("httpx.Response.__init__", _low_retry_response_init)
-    async def test_retrying_status_errors_doesnt_leak(self) -> None:
-        def raise_for_status(response: httpx.Response) -> None:
-            response.status_code = 500
-            raise httpx.HTTPStatusError("Test 500 error", response=response, request=response.request)
-
-        with mock.patch("httpx.Response.raise_for_status", raise_for_status):
-            with pytest.raises(APIStatusError):
-                await self.client.post(
-                    "/chat/completions",
-                    body=dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                    cast_to=httpx.Response,
-                    options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
-                )
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+        respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+
+        with pytest.raises(APITimeoutError):
+            await self.client.post(
+                "/chat/completions",
+                body=dict(
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": "Say this is a test",
+                        }
+                    ],
+                    model="gpt-3.5-turbo",
+                ),
+                cast_to=httpx.Response,
+                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+            )
 
         assert _get_open_connections(self.client) == 0
 
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_status_error_within_httpx(self, respx_mock: MockRouter) -> None:
-        respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+    async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+        respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
 
-        def on_response(response: httpx.Response) -> None:
-            raise httpx.HTTPStatusError(
-                "Simulating an error inside httpx",
-                response=response,
-                request=response.request,
+        with pytest.raises(APIStatusError):
+            await self.client.post(
+                "/chat/completions",
+                body=dict(
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": "Say this is a test",
+                        }
+                    ],
+                    model="gpt-3.5-turbo",
+                ),
+                cast_to=httpx.Response,
+                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
             )
 
-        client = AsyncOpenAI(
-            base_url=base_url,
-            api_key=api_key,
-            _strict_response_validation=True,
-            http_client=httpx.AsyncClient(
-                event_hooks={
-                    "response": [on_response],
-                }
-            ),
-            max_retries=0,
-        )
-        with pytest.raises(APIStatusError):
-            await client.post("/foo", cast_to=httpx.Response)
+        assert _get_open_connections(self.client) == 0