From 67602a9e1e3402d113a7da28778ec3778e887866 Mon Sep 17 00:00:00 2001 From: Chad Retz Date: Thu, 17 Apr 2025 10:34:58 -0500 Subject: [PATCH] Investigate test flakes --- .github/workflows/ci.yml | 16 ++++++++++++++-- tests/__init__.py | 2 ++ tests/conftest.py | 15 ++++++++------- tests/contrib/test_opentelemetry.py | 6 +++++- tests/test_client.py | 6 +++++- tests/test_workflow.py | 7 +++++-- tests/testing/test_workflow.py | 11 ++++++++--- tests/worker/test_activity.py | 8 ++++++-- tests/worker/test_interceptor.py | 7 +++++-- tests/worker/test_replayer.py | 7 +++++-- tests/worker/test_update_with_start.py | 7 +++++-- tests/worker/test_worker.py | 7 +++++-- tests/worker/test_workflow.py | 17 ++++++++++++----- tests/worker/workflow_sandbox/test_runner.py | 8 ++++++-- 14 files changed, 91 insertions(+), 33 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06a361d1c..3e8d2fb7f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,7 @@ on: jobs: # Build and test the project build-lint-test: + timeout-minutes: 30 strategy: fail-fast: false matrix: @@ -16,7 +17,8 @@ jobs: os: [ubuntu-latest, ubuntu-arm, macos-intel, macos-arm, windows-latest] include: # On 3.9 there is a problem with import errors caused by pytests' loader that surface due - # to a bug in CPython, so we avoid using the assert rewriter. + # to a bug in CPython (https://github.com/python/cpython/issues/91351), so we avoid using + # the assert rewriter. - python: "3.9" pytestExtraArgs: "--assert=plain" - os: ubuntu-latest @@ -31,6 +33,12 @@ jobs: runsOn: ubuntu-24.04-arm64-2-core - os: macos-intel runsOn: macos-13 + # On 3.13.3 there is some issue with macOS intel where it hangs after pytest with some + # test that may have a worker that cannot properly shutdown, but it does not occur on + # other versions, platforms, etc. See https://github.com/temporalio/sdk-python/issues/834. + - os: macos-intel + python: "3.13" + pythonOverride: "3.13.2" - os: macos-arm runsOn: macos-latest runs-on: ${{ matrix.runsOn || matrix.os }} @@ -44,7 +52,7 @@ jobs: workspaces: temporalio/bridge -> target - uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python }} + python-version: ${{ matrix.pythonOverride || matrix.python }} - uses: arduino/setup-protoc@v3 with: # TODO(cretz): Can upgrade proto when https://github.com/arduino/setup-protoc/issues/99 fixed @@ -59,12 +67,15 @@ jobs: - run: poe build-develop - run: mkdir junit-xml - run: poe test ${{matrix.pytestExtraArgs}} -s --junit-xml=junit-xml/${{ matrix.python }}--${{ matrix.os }}.xml + timeout-minutes: 10 # Time skipping doesn't yet support ARM - if: ${{ !endsWith(matrix.os, '-arm') }} run: poe test ${{matrix.pytestExtraArgs}} -s --workflow-environment time-skipping --junit-xml=junit-xml/${{ matrix.python }}--${{ matrix.os }}--time-skipping.xml + timeout-minutes: 10 # Check cloud if proper target and not on fork - if: ${{ matrix.cloudTestTarget && (github.event.pull_request.head.repo.full_name == '' || github.event.pull_request.head.repo.full_name == 'temporalio/sdk-python') }} run: poe test ${{matrix.pytestExtraArgs}} -s -k test_cloud_client --junit-xml=junit-xml/${{ matrix.python }}--${{ matrix.os }}--cloud.xml + timeout-minutes: 10 env: TEMPORAL_CLIENT_CLOUD_API_KEY: ${{ secrets.TEMPORAL_CLIENT_CLOUD_API_KEY }} TEMPORAL_CLIENT_CLOUD_API_VERSION: 2024-05-13-00 @@ -94,6 +105,7 @@ jobs: poe format [[ -z $(git status --porcelain temporalio) ]] || (git diff temporalio; echo "Protos changed"; exit 1) poe test -s + timeout-minutes: 10 # Do docs stuff (only on one host) - name: Build API docs diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb..03466f8a1 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,2 @@ +# TODO: Change back to "default" after next CLI release +DEV_SERVER_DOWNLOAD_VERSION = "v1.3.1-persistence-fix.0" diff --git a/tests/conftest.py b/tests/conftest.py index e2c542c15..6cd1f80fe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,8 @@ import pytest import pytest_asyncio +from . import DEV_SERVER_DOWNLOAD_VERSION + # If there is an integration test environment variable set, we must remove the # first path from the sys.path so we can import the wheel instead if os.getenv("TEMPORAL_INTEGRATION_TEST"): @@ -114,8 +116,7 @@ async def env(env_type: str) -> AsyncGenerator[WorkflowEnvironment, None]: "--dynamic-config-value", "system.enableDeploymentVersions=true", ], - # TODO: Remove after next CLI release - dev_server_download_version="v1.3.1-priority.0", + dev_server_download_version=DEV_SERVER_DOWNLOAD_VERSION, ) elif env_type == "time-skipping": env = await WorkflowEnvironment.start_time_skipping() @@ -139,11 +140,11 @@ async def worker( await worker.close() -# There is an issue on Windows 3.9 tests in GitHub actions where even though all -# tests pass, an unclear outer area is killing the process with a bad exit code. -# This windows-only hook forcefully kills the process as success when the exit -# code from pytest is a success. -if sys.version_info < (3, 10) and sys.platform == "win32": +# There is an issue on 3.9 tests in GitHub actions where even though all tests +# pass, an unclear outer area is killing the process with a bad exit code. This +# hook forcefully kills the process as success when the exit code from pytest +# is a success. +if sys.version_info < (3, 10): @pytest.hookimpl(hookwrapper=True, trylast=True) def pytest_cmdline_main(config): diff --git a/tests/contrib/test_opentelemetry.py b/tests/contrib/test_opentelemetry.py index 5ecf49126..e42e6b977 100644 --- a/tests/contrib/test_opentelemetry.py +++ b/tests/contrib/test_opentelemetry.py @@ -7,7 +7,6 @@ from datetime import timedelta from typing import Iterable, List, Optional -import pytest from opentelemetry.sdk.trace import ReadableSpan, TracerProvider from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter @@ -21,6 +20,11 @@ from temporalio.testing import WorkflowEnvironment from temporalio.worker import UnsandboxedWorkflowRunner, Worker +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + @dataclass class TracingActivityParam: diff --git a/tests/test_client.py b/tests/test_client.py index f143dada5..418d9ff53 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -8,7 +8,6 @@ import google.protobuf.any_pb2 import google.protobuf.message -import pytest from google.protobuf import json_format import temporalio.api.common.v1 @@ -106,6 +105,11 @@ KSWorkflowParams, ) +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + async def test_start_id_reuse( client: Client, worker: ExternalWorker, env: WorkflowEnvironment diff --git a/tests/test_workflow.py b/tests/test_workflow.py index 8e4b0bca8..21dd7680e 100644 --- a/tests/test_workflow.py +++ b/tests/test_workflow.py @@ -2,11 +2,14 @@ import itertools from typing import Sequence -import pytest - from temporalio import workflow from temporalio.common import RawValue, VersioningBehavior +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + class GoodDefnBase: @workflow.run diff --git a/tests/testing/test_workflow.py b/tests/testing/test_workflow.py index 126269f12..014f58d62 100644 --- a/tests/testing/test_workflow.py +++ b/tests/testing/test_workflow.py @@ -6,8 +6,6 @@ from time import monotonic from typing import Any, List, Optional, Union -import pytest - from temporalio import activity, workflow from temporalio.client import ( Client, @@ -31,8 +29,14 @@ TimeoutType, ) from temporalio.testing import WorkflowEnvironment +from tests import DEV_SERVER_DOWNLOAD_VERSION from tests.helpers import new_worker +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + @workflow.defn class ReallySlowWorkflow: @@ -306,7 +310,8 @@ async def test_search_attributes_on_dev_server( float_attr, bool_attr, datetime_attr, - ] + ], + dev_server_download_version=DEV_SERVER_DOWNLOAD_VERSION, ) as env: handle = await env.client.start_workflow( "some-workflow", diff --git a/tests/worker/test_activity.py b/tests/worker/test_activity.py index e69557c8e..70ba8cfde 100644 --- a/tests/worker/test_activity.py +++ b/tests/worker/test_activity.py @@ -15,8 +15,6 @@ from datetime import datetime, timedelta, timezone from typing import Any, Callable, List, NoReturn, Optional, Sequence, Type -import pytest - from temporalio import activity, workflow from temporalio.client import ( AsyncActivityHandle, @@ -48,6 +46,12 @@ KSWorkflowParams, ) +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + + _default_shared_state_manager = SharedStateManager.create_from_multiprocessing( multiprocessing.Manager() ) diff --git a/tests/worker/test_interceptor.py b/tests/worker/test_interceptor.py index 1392cd350..c17ce1397 100644 --- a/tests/worker/test_interceptor.py +++ b/tests/worker/test_interceptor.py @@ -3,8 +3,6 @@ from datetime import timedelta from typing import Any, Callable, List, NoReturn, Optional, Tuple, Type -import pytest - from temporalio import activity, workflow from temporalio.client import Client, WorkflowUpdateFailedError from temporalio.exceptions import ApplicationError @@ -30,6 +28,11 @@ WorkflowOutboundInterceptor, ) +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + interceptor_traces: List[Tuple[str, Any]] = [] diff --git a/tests/worker/test_replayer.py b/tests/worker/test_replayer.py index b55e251fa..f232b82aa 100644 --- a/tests/worker/test_replayer.py +++ b/tests/worker/test_replayer.py @@ -6,8 +6,6 @@ from pathlib import Path from typing import Any, Dict, Optional, Type -import pytest - from temporalio import activity, workflow from temporalio.client import Client, WorkflowFailureError, WorkflowHistory from temporalio.exceptions import ApplicationError @@ -26,6 +24,11 @@ SignalsActivitiesTimersUpdatesTracingWorkflow, ) +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + @activity.defn async def say_hello(name: str) -> str: diff --git a/tests/worker/test_update_with_start.py b/tests/worker/test_update_with_start.py index e399d39ee..a8131cd05 100644 --- a/tests/worker/test_update_with_start.py +++ b/tests/worker/test_update_with_start.py @@ -8,8 +8,6 @@ from typing import Any, Iterator, Mapping, Optional from unittest.mock import patch -import pytest - import temporalio.api.common.v1 import temporalio.api.workflowservice.v1 from temporalio import activity, workflow @@ -33,6 +31,11 @@ new_worker, ) +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + @activity.defn async def activity_called_by_update() -> None: diff --git a/tests/worker/test_worker.py b/tests/worker/test_worker.py index 67ce97ae9..76eb6a238 100644 --- a/tests/worker/test_worker.py +++ b/tests/worker/test_worker.py @@ -7,8 +7,6 @@ from datetime import timedelta from typing import Any, Awaitable, Callable, Optional, Sequence -import pytest - import temporalio.api.enums.v1 import temporalio.worker._worker from temporalio import activity, workflow @@ -45,6 +43,11 @@ from temporalio.workflow import VersioningIntent from tests.helpers import assert_eventually, new_worker, worker_versioning_enabled +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + def test_load_default_worker_binary_id(): # Just run it twice and confirm it didn't change diff --git a/tests/worker/test_workflow.py b/tests/worker/test_workflow.py index f57f4b9fd..9490b0d0b 100644 --- a/tests/worker/test_workflow.py +++ b/tests/worker/test_workflow.py @@ -35,7 +35,6 @@ ) from urllib.request import urlopen -import pytest from google.protobuf.timestamp_pb2 import Timestamp from typing_extensions import Literal, Protocol, runtime_checkable @@ -111,6 +110,7 @@ WorkflowInstanceDetails, WorkflowRunner, ) +from tests import DEV_SERVER_DOWNLOAD_VERSION from tests.helpers import ( admitted_update_task, assert_eq_eventually, @@ -127,6 +127,11 @@ external_wait_cancel, ) +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + @workflow.defn class HelloWorkflow: @@ -5083,7 +5088,9 @@ async def test_workflow_replace_worker_client(client: Client, env: WorkflowEnvir # we will terminate both. We have to use a ticking workflow with only one # poller to force a quick re-poll to recognize our client change quickly (as # opposed to just waiting the minute for poll timeout). - async with await WorkflowEnvironment.start_local() as other_env: + async with await WorkflowEnvironment.start_local( + dev_server_download_version=DEV_SERVER_DOWNLOAD_VERSION + ) as other_env: # Start both workflows on different servers task_queue = f"tq-{uuid.uuid4()}" handle1 = await client.start_workflow( @@ -5964,9 +5971,9 @@ async def _do_first_completion_command_is_honored_test( result = await handle.result() except WorkflowFailureError as err: if main_workflow_returns_before_signal_completions: - assert ( - False - ), "Expected no error due to main workflow coroutine returning first" + raise RuntimeError( + "Expected no error due to main workflow coroutine returning first" + ) else: assert str(err.cause).startswith("Client should see this error") else: diff --git a/tests/worker/workflow_sandbox/test_runner.py b/tests/worker/workflow_sandbox/test_runner.py index 14b2c94c3..6d622dfa7 100644 --- a/tests/worker/workflow_sandbox/test_runner.py +++ b/tests/worker/workflow_sandbox/test_runner.py @@ -12,8 +12,6 @@ from enum import IntEnum from typing import Callable, Dict, List, Optional, Sequence, Set, Type -import pytest - from temporalio import activity, workflow from temporalio.client import Client, WorkflowFailureError, WorkflowHandle from temporalio.exceptions import ApplicationError @@ -28,6 +26,12 @@ from tests.worker.workflow_sandbox.testmodules import stateful_module from tests.worker.workflow_sandbox.testmodules.proto import SomeMessage +# Passing through because Python 3.9 has an import bug at +# https://github.com/python/cpython/issues/91351 +with workflow.unsafe.imports_passed_through(): + import pytest + + global_state = ["global orig"] # We just access os.name in here to show we _can_. It's access-restricted at # runtime only