From 192efb8c574fd5e5ada2f1753478c5842367d34b Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Tue, 19 Aug 2025 16:45:54 -0400 Subject: [PATCH 01/20] feat: convert graph config schema to msgspec --- pyproject.toml | 1 + src/taskgraph/config.py | 172 ++++++++++++++++------------------- src/taskgraph/util/schema.py | 38 ++++++-- uv.lock | 99 +++++++++++++++++++- 4 files changed, 208 insertions(+), 102 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ccd55899f..c8800a034 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "cookiecutter~=2.1", "json-e>=2.7", "mozilla-repo-urls", + "msgspec>=0.18.6", "PyYAML>=5.3.1", "redo>=2.0", "requests>=2.25", diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index 6c55cb8ed..0e8d423d4 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -8,107 +8,89 @@ import sys from dataclasses import dataclass from pathlib import Path -from typing import Dict +from typing import Any, Dict, List, Literal, Optional, Union -from voluptuous import ALLOW_EXTRA, All, Any, Extra, Length, Optional, Required +import msgspec -from .util.caches import CACHES from .util.python_path import find_object -from .util.schema import Schema, optionally_keyed_by, validate_schema +from .util.schema import validate_schema from .util.vcs import get_repository from .util.yaml import load_yaml logger = logging.getLogger(__name__) -#: Schema for the graph config -graph_config_schema = Schema( - { - # The trust-domain for this graph. - # (See https://firefox-source-docs.mozilla.org/taskcluster/taskcluster/taskgraph.html#taskgraph-trust-domain) # noqa - Required("trust-domain"): str, - Optional( - "docker-image-kind", - description="Name of the docker image kind (default: docker-image)", - ): str, - Required("task-priority"): optionally_keyed_by( - "project", - "level", - Any( - "highest", - "very-high", - "high", - "medium", - "low", - "very-low", - "lowest", - ), - ), - Optional( - "task-deadline-after", - description="Default 'deadline' for tasks, in relative date format. " - "Eg: '1 week'", - ): optionally_keyed_by("project", str), - Optional( - "task-expires-after", - description="Default 'expires-after' for level 1 tasks, in relative date format. " - "Eg: '90 days'", - ): str, - Required("workers"): { - Required("aliases"): { - str: { - Required("provisioner"): optionally_keyed_by("level", str), - Required("implementation"): str, - Required("os"): str, - Required("worker-type"): optionally_keyed_by("level", str), - } - }, - }, - Required("taskgraph"): { - Optional( - "register", - description="Python function to call to register extensions.", - ): str, - Optional("decision-parameters"): str, - Optional( - "cached-task-prefix", - description="The taskcluster index prefix to use for caching tasks. " - "Defaults to `trust-domain`.", - ): str, - Optional( - "cache-pull-requests", - description="Should tasks from pull requests populate the cache", - ): bool, - Optional( - "index-path-regexes", - description="Regular expressions matching index paths to be summarized.", - ): [str], - Optional( - "run", - description="Configuration related to the 'run' transforms.", - ): { - Optional( - "use-caches", - description="List of caches to enable, or a boolean to " - "enable/disable all of them.", - ): Any(bool, list(CACHES.keys())), - }, - Required("repositories"): All( - { - str: { - Required("name"): str, - Optional("project-regex"): str, - Optional("ssh-secret-name"): str, - # FIXME - Extra: str, - } - }, - Length(min=1), - ), - }, - }, - extra=ALLOW_EXTRA, -) +# TaskPriority type for the priority levels +TaskPriority = Literal[ + "highest", "very-high", "high", "medium", "low", "very-low", "lowest" +] + + +class WorkerAlias(msgspec.Struct, kw_only=True, rename="kebab"): + """Worker alias configuration.""" + + provisioner: Union[str, dict] + implementation: str + os: str + worker_type: Union[str, dict] # Can be keyed-by, maps from "worker-type" + + +class Workers(msgspec.Struct, kw_only=True): + """Workers configuration.""" + + aliases: Dict[str, WorkerAlias] + + +class Repository(msgspec.Struct, kw_only=True, rename="kebab"): + """Repository configuration.""" + + name: str + project_regex: Optional[str] = None # Maps from "project-regex" + ssh_secret_name: Optional[str] = None # Maps from "ssh-secret-name" + # Allow extra fields for flexibility + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +class RunConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """Run transforms configuration.""" + + use_caches: Optional[Union[bool, List[str]]] = None # Maps from "use-caches" + + +class TaskGraphConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """Taskgraph specific configuration.""" + + repositories: Dict[str, Repository] + register: Optional[str] = None + decision_parameters: Optional[str] = None # Maps from "decision-parameters" + cached_task_prefix: Optional[str] = None # Maps from "cached-task-prefix" + cache_pull_requests: Optional[bool] = None # Maps from "cache-pull-requests" + index_path_regexes: Optional[List[str]] = None # Maps from "index-path-regexes" + run: Optional[RunConfig] = None + + +class GraphConfigSchema( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """Main graph configuration schema.""" + + trust_domain: str # Maps from "trust-domain" + task_priority: Union[ + TaskPriority, dict + ] # Maps from "task-priority", can be keyed-by + workers: Workers + taskgraph: TaskGraphConfig + docker_image_kind: Optional[str] = None # Maps from "docker-image-kind" + task_deadline_after: Optional[Union[str, dict]] = ( + None # Maps from "task-deadline-after", can be keyed-by + ) + task_expires_after: Optional[str] = None # Maps from "task-expires-after" + # Allow extra fields for flexibility + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +# Msgspec schema is now the main schema +graph_config_schema = GraphConfigSchema @dataclass(frozen=True, eq=False) @@ -177,7 +159,11 @@ def kinds_dir(self): def validate_graph_config(config): - validate_schema(graph_config_schema, config, "Invalid graph configuration:") + """Validate graph configuration using msgspec.""" + # With rename="kebab", msgspec handles the conversion automatically + validate_schema( + GraphConfigSchema, config, "Invalid graph configuration:", use_msgspec=True + ) def load_graph_config(root_dir): diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index ba72ff079..af5a8deab 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -7,26 +7,48 @@ import pprint import re +import msgspec import voluptuous import taskgraph from taskgraph.util.keyed_by import evaluate_keyed_by, iter_dot_path -def validate_schema(schema, obj, msg_prefix): +def validate_schema(schema, obj, msg_prefix, use_msgspec=False): """ Validate that object satisfies schema. If not, generate a useful exception beginning with msg_prefix. + + Args: + schema: Either a voluptuous.Schema or msgspec.Struct type + obj: Object to validate + msg_prefix: Prefix for error messages + use_msgspec: If True, use msgspec for validation (default: False) """ if taskgraph.fast: return - try: - schema(obj) - except voluptuous.MultipleInvalid as exc: - msg = [msg_prefix] - for error in exc.errors: - msg.append(str(error)) - raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj)) + + if use_msgspec: + # Handle msgspec validation + try: + if isinstance(schema, type) and issubclass(schema, msgspec.Struct): + # For msgspec.Struct types, validate by converting + msgspec.convert(obj, schema) + else: + # For other msgspec validators + schema.decode(msgspec.json.encode(obj)) + except (msgspec.ValidationError, msgspec.DecodeError) as exc: + msg = [msg_prefix, str(exc)] + raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj)) + else: + # Handle voluptuous validation (default behavior) + try: + schema(obj) + except voluptuous.MultipleInvalid as exc: + msg = [msg_prefix] + for error in exc.errors: + msg.append(str(error)) + raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj)) def optionally_keyed_by(*arguments): diff --git a/uv.lock b/uv.lock index 186985fa2..f55abb99f 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.8" resolution-markers = [ "python_full_version >= '3.11'", @@ -976,6 +976,100 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/b0/9227185fa6b198a1a940c7f0d8f38ab86a5eb982224e20aba3dcdf038c22/mozilla_repo_urls-0.2.1-py3-none-any.whl", hash = "sha256:cabce71e57781cdb9a54c1e981c2979e6400a6a1077301f3976b090df2475274", size = 9857, upload-time = "2025-05-26T11:38:17.431Z" }, ] +[[package]] +name = "msgspec" +version = "0.18.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/fb/42b1865063fddb14dbcbb6e74e0a366ecf1ba371c4948664dde0b0e10f95/msgspec-0.18.6.tar.gz", hash = "sha256:a59fc3b4fcdb972d09138cb516dbde600c99d07c38fd9372a6ef500d2d031b4e", size = 216757, upload-time = "2024-01-22T04:34:59.365Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/54/34c2b70e0d42d876c04f6436c80777d786f25c7536830db5e4ec1aef8788/msgspec-0.18.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77f30b0234eceeff0f651119b9821ce80949b4d667ad38f3bfed0d0ebf9d6d8f", size = 202537, upload-time = "2024-01-22T04:34:07.605Z" }, + { url = "https://files.pythonhosted.org/packages/d4/b8/d00d7d03bba8b4eb0bbfdeb6c047163877b2916995f837113d273fd3b774/msgspec-0.18.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a76b60e501b3932782a9da039bd1cd552b7d8dec54ce38332b87136c64852dd", size = 192246, upload-time = "2024-01-22T04:34:09.752Z" }, + { url = "https://files.pythonhosted.org/packages/98/07/40bcd501d0f4e76694ca04a11689f3e06d9ef7a31d74e493a2cc34cd9198/msgspec-0.18.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06acbd6edf175bee0e36295d6b0302c6de3aaf61246b46f9549ca0041a9d7177", size = 208523, upload-time = "2024-01-22T04:34:11.569Z" }, + { url = "https://files.pythonhosted.org/packages/23/1f/10f2bf07f8fcdc3b0c7bf1bfefdd28bd0353df9290c84e4b3ad8e93e0115/msgspec-0.18.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40a4df891676d9c28a67c2cc39947c33de516335680d1316a89e8f7218660410", size = 210276, upload-time = "2024-01-22T04:34:13.318Z" }, + { url = "https://files.pythonhosted.org/packages/c7/e4/4bb5bcd89a74bbb246a21687dd62923c43007e28ad17db24ff58653456cb/msgspec-0.18.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a6896f4cd5b4b7d688018805520769a8446df911eb93b421c6c68155cdf9dd5a", size = 214659, upload-time = "2024-01-22T04:34:15.119Z" }, + { url = "https://files.pythonhosted.org/packages/32/f1/57187427a5a3379cb74aaae753314f9dcde14c259552ec0cb44bcf18db49/msgspec-0.18.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3ac4dd63fd5309dd42a8c8c36c1563531069152be7819518be0a9d03be9788e4", size = 216585, upload-time = "2024-01-22T04:34:16.382Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d1/94919c9b837fc9a0e9dfc1b598a50298bd194146e7bc7d3f42f18826e9f6/msgspec-0.18.6-cp310-cp310-win_amd64.whl", hash = "sha256:fda4c357145cf0b760000c4ad597e19b53adf01382b711f281720a10a0fe72b7", size = 185677, upload-time = "2024-01-22T04:34:17.622Z" }, + { url = "https://files.pythonhosted.org/packages/15/20/278def3822dec807be1e2a734ba9547500ff06667be9dda00ab5d277d605/msgspec-0.18.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e77e56ffe2701e83a96e35770c6adb655ffc074d530018d1b584a8e635b4f36f", size = 200058, upload-time = "2024-01-22T04:34:18.796Z" }, + { url = "https://files.pythonhosted.org/packages/25/8c/75bfafb040934dd3eb46234a2bd4d8fcc7b646f77440866f954b60e0886b/msgspec-0.18.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d5351afb216b743df4b6b147691523697ff3a2fc5f3d54f771e91219f5c23aaa", size = 189108, upload-time = "2024-01-22T04:34:20.648Z" }, + { url = "https://files.pythonhosted.org/packages/0d/e6/5dd960a7678cbaf90dc910611a0e700775ee341876f029c3c987122afe84/msgspec-0.18.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3232fabacef86fe8323cecbe99abbc5c02f7698e3f5f2e248e3480b66a3596b", size = 208138, upload-time = "2024-01-22T04:34:22.953Z" }, + { url = "https://files.pythonhosted.org/packages/6a/73/1b2f991dc26899d2f999c938cbc82c858b3cb7e3ccaad317b32760dbe1da/msgspec-0.18.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3b524df6ea9998bbc99ea6ee4d0276a101bcc1aa8d14887bb823914d9f60d07", size = 209538, upload-time = "2024-01-22T04:34:24.607Z" }, + { url = "https://files.pythonhosted.org/packages/29/d4/2fb2d40b3bde566fd14bf02bf503eea20a912a02cdf7ff100629906c9094/msgspec-0.18.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:37f67c1d81272131895bb20d388dd8d341390acd0e192a55ab02d4d6468b434c", size = 213571, upload-time = "2024-01-22T04:34:25.889Z" }, + { url = "https://files.pythonhosted.org/packages/59/5a/c2aeeefd78946713047637f0c422c0b8b31182eb9bbed0068e906cc8aca0/msgspec-0.18.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d0feb7a03d971c1c0353de1a8fe30bb6579c2dc5ccf29b5f7c7ab01172010492", size = 215785, upload-time = "2024-01-22T04:34:27.131Z" }, + { url = "https://files.pythonhosted.org/packages/51/c6/0a8ae23c91ba1e6d58ddb089bba4ce8dad5815411b4a2bb40a5f15d2ab73/msgspec-0.18.6-cp311-cp311-win_amd64.whl", hash = "sha256:41cf758d3f40428c235c0f27bc6f322d43063bc32da7b9643e3f805c21ed57b4", size = 185877, upload-time = "2024-01-22T04:34:28.573Z" }, + { url = "https://files.pythonhosted.org/packages/1d/b5/c8fbf1db814eb29eda402952374b594b2559419ba7ec6d0997a9e5687530/msgspec-0.18.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d86f5071fe33e19500920333c11e2267a31942d18fed4d9de5bc2fbab267d28c", size = 202109, upload-time = "2024-01-22T04:34:29.794Z" }, + { url = "https://files.pythonhosted.org/packages/d7/9a/235d2dbab078a0b8e6f338205dc59be0b027ce000554ee6a9c41b19339e5/msgspec-0.18.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce13981bfa06f5eb126a3a5a38b1976bddb49a36e4f46d8e6edecf33ccf11df1", size = 190281, upload-time = "2024-01-22T04:34:31.563Z" }, + { url = "https://files.pythonhosted.org/packages/0e/f2/f864ed36a8a62c26b57c3e08d212bd8f3d12a3ca3ef64600be5452aa3c82/msgspec-0.18.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e97dec6932ad5e3ee1e3c14718638ba333befc45e0661caa57033cd4cc489466", size = 210305, upload-time = "2024-01-22T04:34:33.395Z" }, + { url = "https://files.pythonhosted.org/packages/73/16/dfef780ced7d690dd5497846ed242ef3e27e319d59d1ddaae816a4f2c15e/msgspec-0.18.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad237100393f637b297926cae1868b0d500f764ccd2f0623a380e2bcfb2809ca", size = 212510, upload-time = "2024-01-22T04:34:34.728Z" }, + { url = "https://files.pythonhosted.org/packages/c1/90/f5b3a788c4b3d92190e3345d1afa3dd107d5f16b8194e1f61b72582ee9bd/msgspec-0.18.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db1d8626748fa5d29bbd15da58b2d73af25b10aa98abf85aab8028119188ed57", size = 214844, upload-time = "2024-01-22T04:34:35.963Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0b/d4cc1b09f8dfcc6cc4cc9739c13a86e093fe70257b941ea9feb15df22996/msgspec-0.18.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:d70cb3d00d9f4de14d0b31d38dfe60c88ae16f3182988246a9861259c6722af6", size = 217113, upload-time = "2024-01-22T04:34:37.753Z" }, + { url = "https://files.pythonhosted.org/packages/3f/76/30d8f152299f65c85c46a2cbeaf95ad1d18516b5ce730acdaef696d4cfe6/msgspec-0.18.6-cp312-cp312-win_amd64.whl", hash = "sha256:1003c20bfe9c6114cc16ea5db9c5466e49fae3d7f5e2e59cb70693190ad34da0", size = 187184, upload-time = "2024-01-22T04:34:38.938Z" }, + { url = "https://files.pythonhosted.org/packages/5b/2b/262847e614393f265f00b8096d8f71871b27cb71f68f1250a9eac93cb1bc/msgspec-0.18.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f7d9faed6dfff654a9ca7d9b0068456517f63dbc3aa704a527f493b9200b210a", size = 201291, upload-time = "2024-01-22T04:34:40.131Z" }, + { url = "https://files.pythonhosted.org/packages/86/6f/1da53a2ba5f312c3dca9e5f38912732e77f996a22945c8d62df7617c4733/msgspec-0.18.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9da21f804c1a1471f26d32b5d9bc0480450ea77fbb8d9db431463ab64aaac2cf", size = 191604, upload-time = "2024-01-22T04:34:41.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/77/00e1e55607de1092dded768eae746cfdfd6f5aca4ad52b9bb11c3e3b1153/msgspec-0.18.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46eb2f6b22b0e61c137e65795b97dc515860bf6ec761d8fb65fdb62aa094ba61", size = 210060, upload-time = "2024-01-22T04:34:42.569Z" }, + { url = "https://files.pythonhosted.org/packages/21/e0/1dff019ae22b7d47782d6f1180760828bc96fde368aea983d8e5d872833a/msgspec-0.18.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8355b55c80ac3e04885d72db515817d9fbb0def3bab936bba104e99ad22cf46", size = 212378, upload-time = "2024-01-22T04:34:44.319Z" }, + { url = "https://files.pythonhosted.org/packages/85/98/da3ad36c242fdf0e6cd9d63e5d47ca53577f23c180ef040f4b3aefb5b88e/msgspec-0.18.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9080eb12b8f59e177bd1eb5c21e24dd2ba2fa88a1dbc9a98e05ad7779b54c681", size = 215541, upload-time = "2024-01-22T04:34:45.543Z" }, + { url = "https://files.pythonhosted.org/packages/13/cd/29b0de4e0e4a517fff7161fba034df19c45a5a0ef63b728d0e74dba4911d/msgspec-0.18.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cc001cf39becf8d2dcd3f413a4797c55009b3a3cdbf78a8bf5a7ca8fdb76032c", size = 218414, upload-time = "2024-01-22T04:34:46.811Z" }, + { url = "https://files.pythonhosted.org/packages/1e/b1/1a92bf0dd6354316c9c3a0e6d1123873bb6f21efdb497980e71e843d2f85/msgspec-0.18.6-cp38-cp38-win_amd64.whl", hash = "sha256:fac5834e14ac4da1fca373753e0c4ec9c8069d1fe5f534fa5208453b6065d5be", size = 187715, upload-time = "2024-01-22T04:34:48.532Z" }, + { url = "https://files.pythonhosted.org/packages/cc/01/54e711813b04a668cbc6467e20ea747aec1aaf2c9afd83ed470d774d22d0/msgspec-0.18.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:974d3520fcc6b824a6dedbdf2b411df31a73e6e7414301abac62e6b8d03791b4", size = 202455, upload-time = "2024-01-22T04:34:49.722Z" }, + { url = "https://files.pythonhosted.org/packages/dd/b6/2a78cdd1ef872ad96c509fc4d732ffd86903861c9b4e0a47c85d0b37b0e3/msgspec-0.18.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fd62e5818731a66aaa8e9b0a1e5543dc979a46278da01e85c3c9a1a4f047ef7e", size = 192001, upload-time = "2024-01-22T04:34:50.912Z" }, + { url = "https://files.pythonhosted.org/packages/87/fc/1e06294be19595fc72e99957bf191a8a51be88487e280841ac5925069537/msgspec-0.18.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7481355a1adcf1f08dedd9311193c674ffb8bf7b79314b4314752b89a2cf7f1c", size = 208372, upload-time = "2024-01-22T04:34:52.046Z" }, + { url = "https://files.pythonhosted.org/packages/b7/ee/9967075f4ea0ca3e841e1b98f0f65a6033c464e3542fe594e2e6dad10029/msgspec-0.18.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6aa85198f8f154cf35d6f979998f6dadd3dc46a8a8c714632f53f5d65b315c07", size = 210257, upload-time = "2024-01-22T04:34:53.786Z" }, + { url = "https://files.pythonhosted.org/packages/70/03/9a16fac8e3de1b1aa30e22db8a38710cbacdb1f25c54dd2fcc0c0fb10585/msgspec-0.18.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0e24539b25c85c8f0597274f11061c102ad6b0c56af053373ba4629772b407be", size = 214445, upload-time = "2024-01-22T04:34:54.997Z" }, + { url = "https://files.pythonhosted.org/packages/67/15/4b8e28bfd836cd0dbf7ac8feb52dc440d9ed028b798090b931aa6fac9636/msgspec-0.18.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c61ee4d3be03ea9cd089f7c8e36158786cd06e51fbb62529276452bbf2d52ece", size = 216412, upload-time = "2024-01-22T04:34:56.264Z" }, + { url = "https://files.pythonhosted.org/packages/cd/b2/283d010db6836db2fe059f7ee3c13823927229975ffbe1edcbeded85a556/msgspec-0.18.6-cp39-cp39-win_amd64.whl", hash = "sha256:b5c390b0b0b7da879520d4ae26044d74aeee5144f83087eb7842ba59c02bc090", size = 185801, upload-time = "2024-01-22T04:34:57.599Z" }, +] + +[[package]] +name = "msgspec" +version = "0.19.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.11'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/cf/9b/95d8ce458462b8b71b8a70fa94563b2498b89933689f3a7b8911edfae3d7/msgspec-0.19.0.tar.gz", hash = "sha256:604037e7cd475345848116e89c553aa9a233259733ab51986ac924ab1b976f8e", size = 216934, upload-time = "2024-12-27T17:40:28.597Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/40/817282b42f58399762267b30deb8ac011d8db373f8da0c212c85fbe62b8f/msgspec-0.19.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d8dd848ee7ca7c8153462557655570156c2be94e79acec3561cf379581343259", size = 190019, upload-time = "2024-12-27T17:39:13.803Z" }, + { url = "https://files.pythonhosted.org/packages/92/99/bd7ed738c00f223a8119928661167a89124140792af18af513e6519b0d54/msgspec-0.19.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0553bbc77662e5708fe66aa75e7bd3e4b0f209709c48b299afd791d711a93c36", size = 183680, upload-time = "2024-12-27T17:39:17.847Z" }, + { url = "https://files.pythonhosted.org/packages/e5/27/322badde18eb234e36d4a14122b89edd4e2973cdbc3da61ca7edf40a1ccd/msgspec-0.19.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe2c4bf29bf4e89790b3117470dea2c20b59932772483082c468b990d45fb947", size = 209334, upload-time = "2024-12-27T17:39:19.065Z" }, + { url = "https://files.pythonhosted.org/packages/c6/65/080509c5774a1592b2779d902a70b5fe008532759927e011f068145a16cb/msgspec-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00e87ecfa9795ee5214861eab8326b0e75475c2e68a384002aa135ea2a27d909", size = 211551, upload-time = "2024-12-27T17:39:21.767Z" }, + { url = "https://files.pythonhosted.org/packages/6f/2e/1c23c6b4ca6f4285c30a39def1054e2bee281389e4b681b5e3711bd5a8c9/msgspec-0.19.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3c4ec642689da44618f68c90855a10edbc6ac3ff7c1d94395446c65a776e712a", size = 215099, upload-time = "2024-12-27T17:39:24.71Z" }, + { url = "https://files.pythonhosted.org/packages/83/fe/95f9654518879f3359d1e76bc41189113aa9102452170ab7c9a9a4ee52f6/msgspec-0.19.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2719647625320b60e2d8af06b35f5b12d4f4d281db30a15a1df22adb2295f633", size = 218211, upload-time = "2024-12-27T17:39:27.396Z" }, + { url = "https://files.pythonhosted.org/packages/79/f6/71ca7e87a1fb34dfe5efea8156c9ef59dd55613aeda2ca562f122cd22012/msgspec-0.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:695b832d0091edd86eeb535cd39e45f3919f48d997685f7ac31acb15e0a2ed90", size = 186174, upload-time = "2024-12-27T17:39:29.647Z" }, + { url = "https://files.pythonhosted.org/packages/24/d4/2ec2567ac30dab072cce3e91fb17803c52f0a37aab6b0c24375d2b20a581/msgspec-0.19.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa77046904db764b0462036bc63ef71f02b75b8f72e9c9dd4c447d6da1ed8f8e", size = 187939, upload-time = "2024-12-27T17:39:32.347Z" }, + { url = "https://files.pythonhosted.org/packages/2b/c0/18226e4328897f4f19875cb62bb9259fe47e901eade9d9376ab5f251a929/msgspec-0.19.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:047cfa8675eb3bad68722cfe95c60e7afabf84d1bd8938979dd2b92e9e4a9551", size = 182202, upload-time = "2024-12-27T17:39:33.633Z" }, + { url = "https://files.pythonhosted.org/packages/81/25/3a4b24d468203d8af90d1d351b77ea3cffb96b29492855cf83078f16bfe4/msgspec-0.19.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e78f46ff39a427e10b4a61614a2777ad69559cc8d603a7c05681f5a595ea98f7", size = 209029, upload-time = "2024-12-27T17:39:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/85/2e/db7e189b57901955239f7689b5dcd6ae9458637a9c66747326726c650523/msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c7adf191e4bd3be0e9231c3b6dc20cf1199ada2af523885efc2ed218eafd011", size = 210682, upload-time = "2024-12-27T17:39:36.384Z" }, + { url = "https://files.pythonhosted.org/packages/03/97/7c8895c9074a97052d7e4a1cc1230b7b6e2ca2486714eb12c3f08bb9d284/msgspec-0.19.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f04cad4385e20be7c7176bb8ae3dca54a08e9756cfc97bcdb4f18560c3042063", size = 214003, upload-time = "2024-12-27T17:39:39.097Z" }, + { url = "https://files.pythonhosted.org/packages/61/61/e892997bcaa289559b4d5869f066a8021b79f4bf8e955f831b095f47a4cd/msgspec-0.19.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45c8fb410670b3b7eb884d44a75589377c341ec1392b778311acdbfa55187716", size = 216833, upload-time = "2024-12-27T17:39:41.203Z" }, + { url = "https://files.pythonhosted.org/packages/ce/3d/71b2dffd3a1c743ffe13296ff701ee503feaebc3f04d0e75613b6563c374/msgspec-0.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:70eaef4934b87193a27d802534dc466778ad8d536e296ae2f9334e182ac27b6c", size = 186184, upload-time = "2024-12-27T17:39:43.702Z" }, + { url = "https://files.pythonhosted.org/packages/b2/5f/a70c24f075e3e7af2fae5414c7048b0e11389685b7f717bb55ba282a34a7/msgspec-0.19.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f98bd8962ad549c27d63845b50af3f53ec468b6318400c9f1adfe8b092d7b62f", size = 190485, upload-time = "2024-12-27T17:39:44.974Z" }, + { url = "https://files.pythonhosted.org/packages/89/b0/1b9763938cfae12acf14b682fcf05c92855974d921a5a985ecc197d1c672/msgspec-0.19.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:43bbb237feab761b815ed9df43b266114203f53596f9b6e6f00ebd79d178cdf2", size = 183910, upload-time = "2024-12-27T17:39:46.401Z" }, + { url = "https://files.pythonhosted.org/packages/87/81/0c8c93f0b92c97e326b279795f9c5b956c5a97af28ca0fbb9fd86c83737a/msgspec-0.19.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cfc033c02c3e0aec52b71710d7f84cb3ca5eb407ab2ad23d75631153fdb1f12", size = 210633, upload-time = "2024-12-27T17:39:49.099Z" }, + { url = "https://files.pythonhosted.org/packages/d0/ef/c5422ce8af73928d194a6606f8ae36e93a52fd5e8df5abd366903a5ca8da/msgspec-0.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d911c442571605e17658ca2b416fd8579c5050ac9adc5e00c2cb3126c97f73bc", size = 213594, upload-time = "2024-12-27T17:39:51.204Z" }, + { url = "https://files.pythonhosted.org/packages/19/2b/4137bc2ed45660444842d042be2cf5b18aa06efd2cda107cff18253b9653/msgspec-0.19.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:757b501fa57e24896cf40a831442b19a864f56d253679f34f260dcb002524a6c", size = 214053, upload-time = "2024-12-27T17:39:52.866Z" }, + { url = "https://files.pythonhosted.org/packages/9d/e6/8ad51bdc806aac1dc501e8fe43f759f9ed7284043d722b53323ea421c360/msgspec-0.19.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5f0f65f29b45e2816d8bded36e6b837a4bf5fb60ec4bc3c625fa2c6da4124537", size = 219081, upload-time = "2024-12-27T17:39:55.142Z" }, + { url = "https://files.pythonhosted.org/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0", size = 187467, upload-time = "2024-12-27T17:39:56.531Z" }, + { url = "https://files.pythonhosted.org/packages/3c/cb/2842c312bbe618d8fefc8b9cedce37f773cdc8fa453306546dba2c21fd98/msgspec-0.19.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f12d30dd6266557aaaf0aa0f9580a9a8fbeadfa83699c487713e355ec5f0bd86", size = 190498, upload-time = "2024-12-27T17:40:00.427Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/c40b01b93465e1a5f3b6c7d91b10fb574818163740cc3acbe722d1e0e7e4/msgspec-0.19.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82b2c42c1b9ebc89e822e7e13bbe9d17ede0c23c187469fdd9505afd5a481314", size = 183950, upload-time = "2024-12-27T17:40:04.219Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f0/5b764e066ce9aba4b70d1db8b087ea66098c7c27d59b9dd8a3532774d48f/msgspec-0.19.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19746b50be214a54239aab822964f2ac81e38b0055cca94808359d779338c10e", size = 210647, upload-time = "2024-12-27T17:40:05.606Z" }, + { url = "https://files.pythonhosted.org/packages/9d/87/bc14f49bc95c4cb0dd0a8c56028a67c014ee7e6818ccdce74a4862af259b/msgspec-0.19.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60ef4bdb0ec8e4ad62e5a1f95230c08efb1f64f32e6e8dd2ced685bcc73858b5", size = 213563, upload-time = "2024-12-27T17:40:10.516Z" }, + { url = "https://files.pythonhosted.org/packages/53/2f/2b1c2b056894fbaa975f68f81e3014bb447516a8b010f1bed3fb0e016ed7/msgspec-0.19.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac7f7c377c122b649f7545810c6cd1b47586e3aa3059126ce3516ac7ccc6a6a9", size = 213996, upload-time = "2024-12-27T17:40:12.244Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5a/4cd408d90d1417e8d2ce6a22b98a6853c1b4d7cb7669153e4424d60087f6/msgspec-0.19.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5bc1472223a643f5ffb5bf46ccdede7f9795078194f14edd69e3aab7020d327", size = 219087, upload-time = "2024-12-27T17:40:14.881Z" }, + { url = "https://files.pythonhosted.org/packages/23/d8/f15b40611c2d5753d1abb0ca0da0c75348daf1252220e5dda2867bd81062/msgspec-0.19.0-cp313-cp313-win_amd64.whl", hash = "sha256:317050bc0f7739cb30d257ff09152ca309bf5a369854bbf1e57dffc310c1f20f", size = 187432, upload-time = "2024-12-27T17:40:16.256Z" }, + { url = "https://files.pythonhosted.org/packages/ea/d0/323f867eaec1f2236ba30adf613777b1c97a7e8698e2e881656b21871fa4/msgspec-0.19.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15c1e86fff77184c20a2932cd9742bf33fe23125fa3fcf332df9ad2f7d483044", size = 189926, upload-time = "2024-12-27T17:40:18.939Z" }, + { url = "https://files.pythonhosted.org/packages/a8/37/c3e1b39bdae90a7258d77959f5f5e36ad44b40e2be91cff83eea33c54d43/msgspec-0.19.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3b5541b2b3294e5ffabe31a09d604e23a88533ace36ac288fa32a420aa38d229", size = 183873, upload-time = "2024-12-27T17:40:20.214Z" }, + { url = "https://files.pythonhosted.org/packages/cb/a2/48f2c15c7644668e51f4dce99d5f709bd55314e47acb02e90682f5880f35/msgspec-0.19.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f5c043ace7962ef188746e83b99faaa9e3e699ab857ca3f367b309c8e2c6b12", size = 209272, upload-time = "2024-12-27T17:40:21.534Z" }, + { url = "https://files.pythonhosted.org/packages/25/3c/aa339cf08b990c3f07e67b229a3a8aa31bf129ed974b35e5daa0df7d9d56/msgspec-0.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca06aa08e39bf57e39a258e1996474f84d0dd8130d486c00bec26d797b8c5446", size = 211396, upload-time = "2024-12-27T17:40:22.897Z" }, + { url = "https://files.pythonhosted.org/packages/c7/00/c7fb9d524327c558b2803973cc3f988c5100a1708879970a9e377bdf6f4f/msgspec-0.19.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e695dad6897896e9384cf5e2687d9ae9feaef50e802f93602d35458e20d1fb19", size = 215002, upload-time = "2024-12-27T17:40:24.341Z" }, + { url = "https://files.pythonhosted.org/packages/3f/bf/d9f9fff026c1248cde84a5ce62b3742e8a63a3c4e811f99f00c8babf7615/msgspec-0.19.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3be5c02e1fee57b54130316a08fe40cca53af92999a302a6054cd451700ea7db", size = 218132, upload-time = "2024-12-27T17:40:25.744Z" }, + { url = "https://files.pythonhosted.org/packages/00/03/b92011210f79794958167a3a3ea64a71135d9a2034cfb7597b545a42606d/msgspec-0.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:0684573a821be3c749912acf5848cce78af4298345cb2d7a8b8948a0a5a27cfe", size = 186301, upload-time = "2024-12-27T17:40:27.076Z" }, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -2018,6 +2112,8 @@ dependencies = [ { name = "json-e" }, { name = "mozilla-repo-urls", version = "0.1.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "mozilla-repo-urls", version = "0.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "msgspec", version = "0.18.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "msgspec", version = "0.19.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, { name = "pyyaml" }, { name = "redo" }, { name = "requests" }, @@ -2069,6 +2165,7 @@ requires-dist = [ { name = "cookiecutter", specifier = "~=2.1" }, { name = "json-e", specifier = ">=2.7" }, { name = "mozilla-repo-urls" }, + { name = "msgspec", specifier = ">=0.18.6" }, { name = "orjson", marker = "extra == 'orjson'" }, { name = "pyyaml", specifier = ">=5.3.1" }, { name = "redo", specifier = ">=2.0" }, From d34493ff93006566add355d4a28d40292b14ba73 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Fri, 22 Aug 2025 14:24:56 -0400 Subject: [PATCH 02/20] feat: convert more schemas to msgspec --- src/taskgraph/decision.py | 16 +-- src/taskgraph/transforms/base.py | 8 +- src/taskgraph/transforms/chunking.py | 61 ++++------ src/taskgraph/transforms/matrix.py | 83 ++++++------- src/taskgraph/transforms/run/index_search.py | 23 ++-- src/taskgraph/transforms/task_context.py | 122 ++++++++----------- 6 files changed, 137 insertions(+), 176 deletions(-) diff --git a/src/taskgraph/decision.py b/src/taskgraph/decision.py index 361619221..6edcfadc1 100644 --- a/src/taskgraph/decision.py +++ b/src/taskgraph/decision.py @@ -9,9 +9,10 @@ import shutil import time from pathlib import Path +from typing import Any, Dict, Optional +import msgspec import yaml -from voluptuous import Optional from taskgraph.actions import render_actions_json from taskgraph.create import create_tasks @@ -20,7 +21,7 @@ from taskgraph.taskgraph import TaskGraph from taskgraph.util import json from taskgraph.util.python_path import find_object -from taskgraph.util.schema import Schema, validate_schema +from taskgraph.util.schema import validate_schema from taskgraph.util.vcs import Repository, get_repository from taskgraph.util.yaml import load_yaml @@ -40,11 +41,11 @@ #: Schema for try_task_config.json version 2 -try_task_config_schema_v2 = Schema( - { - Optional("parameters"): {str: object}, - } -) +class TryTaskConfigSchemaV2(msgspec.Struct, kw_only=True, omit_defaults=True): + parameters: Optional[Dict[str, Any]] = None + + +try_task_config_schema_v2 = TryTaskConfigSchemaV2 def full_task_graph_to_runnable_tasks(full_task_json): @@ -357,6 +358,7 @@ def set_try_config(parameters, task_config_file): try_task_config_schema_v2, task_config, "Invalid v2 `try_task_config.json`.", + use_msgspec=True, ) parameters.update(task_config["parameters"]) return diff --git a/src/taskgraph/transforms/base.py b/src/taskgraph/transforms/base.py index 4626ca8d0..1b62e7852 100644 --- a/src/taskgraph/transforms/base.py +++ b/src/taskgraph/transforms/base.py @@ -8,6 +8,8 @@ from dataclasses import dataclass, field from typing import Dict, List, Union +import msgspec + from taskgraph.task import Task from ..config import GraphConfig @@ -154,5 +156,9 @@ def __call__(self, config, tasks): ) else: error = "In unknown task:" - validate_schema(self.schema, task, error) + # Check if schema is a msgspec.Struct type + use_msgspec = isinstance(self.schema, type) and issubclass( + self.schema, msgspec.Struct + ) + validate_schema(self.schema, task, error, use_msgspec=use_msgspec) yield task diff --git a/src/taskgraph/transforms/chunking.py b/src/taskgraph/transforms/chunking.py index d8ad89dd2..7ed7b6e62 100644 --- a/src/taskgraph/transforms/chunking.py +++ b/src/taskgraph/transforms/chunking.py @@ -2,49 +2,36 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import copy -from textwrap import dedent +from typing import Any, Dict, List, Optional -from voluptuous import ALLOW_EXTRA, Optional, Required +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema from taskgraph.util.templates import substitute + +class ChunkConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """ + `chunk` can be used to split one task into `total-chunks` + tasks, substituting `this_chunk` and `total_chunks` into any + fields in `substitution-fields`. + """ + + # The total number of chunks to split the task into. + total_chunks: int + # A list of fields that need to have `{this_chunk}` and/or + # `{total_chunks}` replaced in them. + substitution_fields: Optional[List[str]] = None + + #: Schema for chunking transforms -CHUNK_SCHEMA = Schema( - { - # Optional, so it can be used for a subset of tasks in a kind - Optional( - "chunk", - description=dedent( - """ - `chunk` can be used to split one task into `total-chunks` - tasks, substituting `this_chunk` and `total_chunks` into any - fields in `substitution-fields`. - """.lstrip() - ), - ): { - Required( - "total-chunks", - description=dedent( - """ - The total number of chunks to split the task into. - """.lstrip() - ), - ): int, - Optional( - "substitution-fields", - description=dedent( - """ - A list of fields that need to have `{this_chunk}` and/or - `{total_chunks}` replaced in them. - """.lstrip() - ), - ): [str], - } - }, - extra=ALLOW_EXTRA, -) +class ChunkSchema(msgspec.Struct, kw_only=True, omit_defaults=True): + # Optional, so it can be used for a subset of tasks in a kind + chunk: Optional[ChunkConfig] = None + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +CHUNK_SCHEMA = ChunkSchema transforms = TransformSequence() transforms.add_validate(CHUNK_SCHEMA) diff --git a/src/taskgraph/transforms/matrix.py b/src/taskgraph/transforms/matrix.py index 476507284..8ba0c5cdc 100644 --- a/src/taskgraph/transforms/matrix.py +++ b/src/taskgraph/transforms/matrix.py @@ -8,59 +8,48 @@ """ from copy import deepcopy -from textwrap import dedent +from typing import Any, Dict, List, Optional -from voluptuous import ALLOW_EXTRA, Extra, Optional, Required +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema from taskgraph.util.templates import substitute_task_fields + +class MatrixConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """ + Matrix configuration for generating multiple tasks. + """ + + # Exclude the specified combination(s) of matrix values from the + # final list of tasks. + # + # If only a subset of the possible rows are present in the + # exclusion rule, then *all* combinations including that subset + # subset will be excluded. + exclude: Optional[List[Dict[str, str]]] = None + # Sets the task name to the specified format string. + # + # Useful for cases where the default of joining matrix values by + # a dash is not desired. + set_name: Optional[str] = None + # List of fields in the task definition to substitute matrix values into. + # + # If not specified, all fields in the task definition will be + # substituted. + substitution_fields: Optional[List[str]] = None + # Allow extra fields for matrix dimensions + __extras__: Dict[str, List[str]] = msgspec.field(default_factory=dict) + + #: Schema for matrix transforms -MATRIX_SCHEMA = Schema( - { - Required("name"): str, - Optional("matrix"): { - Optional( - "exclude", - description=dedent( - """ - Exclude the specified combination(s) of matrix values from the - final list of tasks. - - If only a subset of the possible rows are present in the - exclusion rule, then *all* combinations including that subset - subset will be excluded. - """.lstrip() - ), - ): [{str: str}], - Optional( - "set-name", - description=dedent( - """ - Sets the task name to the specified format string. - - Useful for cases where the default of joining matrix values by - a dash is not desired. - """.lstrip() - ), - ): str, - Optional( - "substitution-fields", - description=dedent( - """ - List of fields in the task definition to substitute matrix values into. - - If not specified, all fields in the task definition will be - substituted. - """ - ), - ): [str], - Extra: [str], - }, - }, - extra=ALLOW_EXTRA, -) +class MatrixSchema(msgspec.Struct, kw_only=True, omit_defaults=True): + name: str + matrix: Optional[MatrixConfig] = None + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +MATRIX_SCHEMA = MatrixSchema transforms = TransformSequence() transforms.add_validate(MATRIX_SCHEMA) diff --git a/src/taskgraph/transforms/run/index_search.py b/src/taskgraph/transforms/run/index_search.py index 7436f010f..53ee34af0 100644 --- a/src/taskgraph/transforms/run/index_search.py +++ b/src/taskgraph/transforms/run/index_search.py @@ -8,26 +8,25 @@ phase will replace the task with the task from the other graph. """ -from voluptuous import Required +from typing import List + +import msgspec from taskgraph.transforms.base import TransformSequence from taskgraph.transforms.run import run_task_using -from taskgraph.util.schema import Schema transforms = TransformSequence() #: Schema for run.using index-search -run_task_schema = Schema( - { - Required("using"): "index-search", - Required( - "index-search", - "A list of indexes in decreasing order of priority at which to lookup for this " - "task. This is interpolated with the graph parameters.", - ): [str], - } -) +class RunTaskSchema(msgspec.Struct, kw_only=True, rename="kebab"): + using: str + # A list of indexes in decreasing order of priority at which to lookup for this + # task. This is interpolated with the graph parameters. + index_search: List[str] + + +run_task_schema = RunTaskSchema @run_task_using("always-optimized", "index-search", schema=run_task_schema) diff --git a/src/taskgraph/transforms/task_context.py b/src/taskgraph/transforms/task_context.py index 9e013e5d5..f15253126 100644 --- a/src/taskgraph/transforms/task_context.py +++ b/src/taskgraph/transforms/task_context.py @@ -1,82 +1,60 @@ -from textwrap import dedent +from typing import Any, Dict, List, Optional, Union -from voluptuous import ALLOW_EXTRA, Any, Optional, Required +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema from taskgraph.util.templates import deep_get, substitute_task_fields from taskgraph.util.yaml import load_yaml + +class TaskContextConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """ + `task-context` can be used to substitute values into any field in a + task with data that is not known until `taskgraph` runs. + + This data can be provided via `from-parameters` or `from-file`, + which can pull in values from parameters and a defined yml file + respectively. + + Data may also be provided directly in the `from-object` section of + `task-context`. This can be useful in `kinds` that define most of + their contents in `task-defaults`, but have some values that may + differ for various concrete `tasks` in the `kind`. + + If the same key is found in multiple places the order of precedence + is as follows: + - Parameters + - `from-object` keys + - File + + That is to say: parameters will always override anything else. + """ + + # Retrieve task context values from parameters. A single + # parameter may be provided or a list of parameters in + # priority order. The latter can be useful in implementing a + # "default" value if some other parameter is not provided. + from_parameters: Optional[Dict[str, Union[List[str], str]]] = None + # Retrieve task context values from a yaml file. The provided + # file should usually only contain top level keys and values + # (eg: nested objects will not be interpolated - they will be + # substituted as text representations of the object). + from_file: Optional[str] = None + # Key/value pairs to be used as task context + from_object: Optional[Any] = None + # A list of fields in the task to substitute the provided values + # into. + substitution_fields: List[str] + + #: Schema for the task_context transforms -SCHEMA = Schema( - { - Optional("name"): str, - Required( - "task-context", - description=dedent( - """ - `task-context` can be used to substitute values into any field in a - task with data that is not known until `taskgraph` runs. - - This data can be provided via `from-parameters` or `from-file`, - which can pull in values from parameters and a defined yml file - respectively. - - Data may also be provided directly in the `from-object` section of - `task-context`. This can be useful in `kinds` that define most of - their contents in `task-defaults`, but have some values that may - differ for various concrete `tasks` in the `kind`. - - If the same key is found in multiple places the order of precedence - is as follows: - - Parameters - - `from-object` keys - - File - - That is to say: parameters will always override anything else. - - """.lstrip(), - ), - ): { - Optional( - "from-parameters", - description=dedent( - """ - Retrieve task context values from parameters. A single - parameter may be provided or a list of parameters in - priority order. The latter can be useful in implementing a - "default" value if some other parameter is not provided. - """.lstrip() - ), - ): {str: Any([str], str)}, - Optional( - "from-file", - description=dedent( - """ - Retrieve task context values from a yaml file. The provided - file should usually only contain top level keys and values - (eg: nested objects will not be interpolated - they will be - substituted as text representations of the object). - """.lstrip() - ), - ): str, - Optional( - "from-object", - description="Key/value pairs to be used as task context", - ): object, - Required( - "substitution-fields", - description=dedent( - """ - A list of fields in the task to substitute the provided values - into. - """.lstrip() - ), - ): [str], - }, - }, - extra=ALLOW_EXTRA, -) +class Schema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + name: Optional[str] = None + task_context: TaskContextConfig + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +SCHEMA = Schema transforms = TransformSequence() transforms.add_validate(SCHEMA) From 8971de163a357144f6e811794ba09741fa5069c8 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Fri, 22 Aug 2025 14:47:07 -0400 Subject: [PATCH 03/20] feat: updated fetch schema --- src/taskgraph/transforms/fetch.py | 238 +++++++++++++++--------------- 1 file changed, 119 insertions(+), 119 deletions(-) diff --git a/src/taskgraph/transforms/fetch.py b/src/taskgraph/transforms/fetch.py index 797ab71e2..1555265ca 100644 --- a/src/taskgraph/transforms/fetch.py +++ b/src/taskgraph/transforms/fetch.py @@ -9,77 +9,65 @@ import os import re from dataclasses import dataclass -from textwrap import dedent -from typing import Callable +from typing import Any, Callable, Dict, Optional -from voluptuous import Extra, Optional, Required +import msgspec import taskgraph from ..util import path from ..util.cached_tasks import add_optimization -from ..util.schema import Schema, validate_schema from ..util.treeherder import join_symbol from .base import TransformSequence CACHE_TYPE = "content.v1" + #: Schema for fetch transforms -FETCH_SCHEMA = Schema( - { - Required( - "name", - description=dedent( - """ - Name of the task. - """.lstrip() - ), - ): str, - Optional( - "task-from", - description=dedent( - """ - Relative path (from config.path) to the file the task was defined - in. - """.lstrip() - ), - ): str, - Required( - "description", - description=dedent( - """ - Description of the task. - """.lstrip() - ), - ): str, - Optional("expires-after"): str, - Optional("docker-image"): object, - Optional( - "fetch-alias", - description=dedent( - """ - An alias that can be used instead of the real fetch task name in - fetch stanzas for tasks. - """.lstrip() - ), - ): str, - Optional( - "artifact-prefix", - description=dedent( - """ - The prefix of the taskcluster artifact being uploaded. - Defaults to `public/`; if it starts with something other than - `public/` the artifact will require scopes to access. - """.lstrip() - ), - ): str, - Optional("attributes"): {str: object}, - Required("fetch"): { - Required("type"): str, - Extra: object, - }, - } -) +class FetchConfig(msgspec.Struct, kw_only=True): + """Configuration for a fetch task type.""" + + type: str + # Additional fields handled dynamically by fetch builders + + +class FetchSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """ + Schema for fetch transforms. + + Attributes: + name: Name of the task. + task_from: Relative path (from config.path) to the file the task was defined in. + description: Description of the task. + expires_after: When the task expires. + docker_image: Docker image configuration. + fetch_alias: An alias that can be used instead of the real fetch task name in + fetch stanzas for tasks. + artifact_prefix: The prefix of the taskcluster artifact being uploaded. + Defaults to `public/`; if it starts with something other than + `public/` the artifact will require scopes to access. + attributes: Task attributes. + fetch: Fetch configuration with type and additional fields. + """ + + name: str + description: str + fetch: Dict[str, Any] # Must have 'type' key, other keys depend on type + task_from: Optional[str] = None + expires_after: Optional[str] = None + docker_image: Optional[Any] = None + fetch_alias: Optional[str] = None + artifact_prefix: Optional[str] = None + attributes: Optional[Dict[str, Any]] = None + + def __post_init__(self): + # Validate that fetch has a 'type' field + if not isinstance(self.fetch, dict) or "type" not in self.fetch: + raise msgspec.ValidationError("fetch must be a dict with a 'type' field") + + +# Backward compatibility +FETCH_SCHEMA = FetchSchema # define a collection of payload builders, depending on the worker implementation fetch_builders = {} @@ -87,13 +75,12 @@ @dataclass(frozen=True) class FetchBuilder: - schema: Schema + schema: Any # Either msgspec.Struct type or validation function builder: Callable def fetch_builder(name, schema): - schema = Schema({Required("type"): name}).extend(schema) - + # schema should be a msgspec.Struct type def wrap(func): fetch_builders[name] = FetchBuilder(schema, func) # type: ignore return func @@ -102,7 +89,7 @@ def wrap(func): transforms = TransformSequence() -transforms.add_validate(FETCH_SCHEMA) +transforms.add_validate(FetchSchema) @transforms.add @@ -115,7 +102,11 @@ def process_fetch_task(config, tasks): if typ not in fetch_builders: raise Exception(f"Unknown fetch type {typ} in fetch {name}") - validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") + # Validate fetch config using msgspec + try: + msgspec.convert(fetch, fetch_builders[typ].schema) + except msgspec.ValidationError as e: + raise Exception(f"In task.fetch {name!r}: {e}") task.update(configure_fetch(config, typ, name, fetch)) @@ -125,7 +116,11 @@ def process_fetch_task(config, tasks): def configure_fetch(config, typ, name, fetch): if typ not in fetch_builders: raise Exception(f"No fetch type {typ} in fetch {name}") - validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") + # Validate fetch config using msgspec + try: + msgspec.convert(fetch, fetch_builders[typ].schema) + except msgspec.ValidationError as e: + raise Exception(f"In task.fetch {name!r}: {e}") return fetch_builders[typ].builder(config, name, fetch) @@ -204,45 +199,48 @@ def make_task(config, tasks): yield task_desc -@fetch_builder( - "static-url", - schema={ - # The URL to download. - Required("url"): str, - # The SHA-256 of the downloaded content. - Required("sha256"): str, - # Size of the downloaded entity, in bytes. - Required("size"): int, - # GPG signature verification. - Optional("gpg-signature"): { - # URL where GPG signature document can be obtained. Can contain the - # value ``{url}``, which will be substituted with the value from - # ``url``. - Required("sig-url"): str, - # Path to file containing GPG public key(s) used to validate - # download. - Required("key-path"): str, - }, - # The name to give to the generated artifact. Defaults to the file - # portion of the URL. Using a different extension converts the - # archive to the given type. Only conversion to .tar.zst is - # supported. - Optional("artifact-name"): str, - # Strip the given number of path components at the beginning of - # each file entry in the archive. - # Requires an artifact-name ending with .tar.zst. - Optional("strip-components"): int, - # Add the given prefix to each file entry in the archive. - # Requires an artifact-name ending with .tar.zst. - Optional("add-prefix"): str, - # Headers to pass alongside the request. - Optional("headers"): { - str: str, - }, - # IMPORTANT: when adding anything that changes the behavior of the task, - # it is important to update the digest data used to compute cache hits. - }, -) +class GPGSignatureConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """GPG signature verification configuration.""" + + # URL where GPG signature document can be obtained. Can contain the + # value ``{url}``, which will be substituted with the value from ``url``. + sig_url: str + # Path to file containing GPG public key(s) used to validate download. + key_path: str + + +class StaticUrlFetchConfig( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """Configuration for static-url fetch type.""" + + type: str + # The URL to download. + url: str + # The SHA-256 of the downloaded content. + sha256: str + # Size of the downloaded entity, in bytes. + size: int + # GPG signature verification. + gpg_signature: Optional[GPGSignatureConfig] = None + # The name to give to the generated artifact. Defaults to the file + # portion of the URL. Using a different extension converts the + # archive to the given type. Only conversion to .tar.zst is supported. + artifact_name: Optional[str] = None + # Strip the given number of path components at the beginning of + # each file entry in the archive. + # Requires an artifact-name ending with .tar.zst. + strip_components: Optional[int] = None + # Add the given prefix to each file entry in the archive. + # Requires an artifact-name ending with .tar.zst. + add_prefix: Optional[str] = None + # Headers to pass alongside the request. + headers: Optional[Dict[str, str]] = None + # IMPORTANT: when adding anything that changes the behavior of the task, + # it is important to update the digest data used to compute cache hits. + + +@fetch_builder("static-url", StaticUrlFetchConfig) def create_fetch_url_task(config, name, fetch): artifact_name = fetch.get("artifact-name") if not artifact_name: @@ -305,21 +303,23 @@ def create_fetch_url_task(config, name, fetch): } -@fetch_builder( - "git", - schema={ - Required("repo"): str, - Required("revision"): str, - Optional("include-dot-git"): bool, - Optional("artifact-name"): str, - Optional("path-prefix"): str, - # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key) - # In the secret dictionary, the key should be specified as - # "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..." - # n.b. The OpenSSH private key file format requires a newline at the end of the file. - Optional("ssh-key"): str, - }, -) +class GitFetchConfig(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """Configuration for git fetch type.""" + + type: str + repo: str + revision: str + include_dot_git: Optional[bool] = None + artifact_name: Optional[str] = None + path_prefix: Optional[str] = None + # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key) + # In the secret dictionary, the key should be specified as + # "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..." + # n.b. The OpenSSH private key file format requires a newline at the end of the file. + ssh_key: Optional[str] = None + + +@fetch_builder("git", GitFetchConfig) def create_git_fetch_task(config, name, fetch): path_prefix = fetch.get("path-prefix") if not path_prefix: From 08dae5faf9e8d6981d5534fb76e0120692012ab6 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Fri, 22 Aug 2025 14:56:00 -0400 Subject: [PATCH 04/20] feat: update 2 more schemas --- src/taskgraph/transforms/from_deps.py | 170 ++++++++++---------- src/taskgraph/transforms/notify.py | 216 ++++++++++++++++++-------- 2 files changed, 234 insertions(+), 152 deletions(-) diff --git a/src/taskgraph/transforms/from_deps.py b/src/taskgraph/transforms/from_deps.py index 561891374..fedaf1654 100644 --- a/src/taskgraph/transforms/from_deps.py +++ b/src/taskgraph/transforms/from_deps.py @@ -13,108 +13,100 @@ from copy import deepcopy from textwrap import dedent +from typing import Any, Dict, List, Optional, Union -from voluptuous import ALLOW_EXTRA, Any, Optional, Required +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.transforms.run import fetches_schema from taskgraph.util.attributes import attrmatch from taskgraph.util.dependencies import GROUP_BY_MAP, get_dependencies -from taskgraph.util.schema import Schema, validate_schema +from taskgraph.util.schema import validate_schema from taskgraph.util.set_name import SET_NAME_MAP -#: Schema for from_deps transforms -FROM_DEPS_SCHEMA = Schema( - { - Required("from-deps"): { - Optional( - "kinds", - description=dedent( - """ - Limit dependencies to specified kinds (defaults to all kinds in - `kind-dependencies`). - The first kind in the list is the "primary" kind. The - dependency of this kind will be used to derive the label - and copy attributes (if `copy-attributes` is True). - """.lstrip() - ), - ): [str], - Optional( - "set-name", - description=dedent( - """ - UPDATE ME AND DOCS - """.lstrip() - ), - ): Any( - None, - False, - *SET_NAME_MAP, - {Any(*SET_NAME_MAP): object}, - ), - Optional( - "with-attributes", - description=dedent( - """ - Limit dependencies to tasks whose attributes match - using :func:`~taskgraph.util.attributes.attrmatch`. - """.lstrip() - ), - ): {str: Any(list, str)}, - Optional( - "group-by", - description=dedent( - """ - Group cross-kind dependencies using the given group-by - function. One task will be created for each group. If not - specified, the 'single' function will be used which creates - a new task for each individual dependency. - """.lstrip() - ), - ): Any( - None, - *GROUP_BY_MAP, - {Any(*GROUP_BY_MAP): object}, - ), - Optional( - "copy-attributes", - description=dedent( - """ - If True, copy attributes from the dependency matching the - first kind in the `kinds` list (whether specified explicitly - or taken from `kind-dependencies`). - """.lstrip() - ), - ): bool, - Optional( - "unique-kinds", - description=dedent( - """ - If true (the default), there must be only a single unique task - for each kind in a dependency group. Setting this to false - disables that requirement. - """.lstrip() - ), - ): bool, - Optional( - "fetches", - description=dedent( - """ - If present, a `fetches` entry will be added for each task +# Define FetchEntry for the fetches field +class FetchEntry(msgspec.Struct, kw_only=True, omit_defaults=True): + """A fetch entry for an artifact.""" + + artifact: str + dest: Optional[str] = None + + +class FromDepsConfig(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """ + Configuration for from-deps transforms. + + Attributes: + kinds: Limit dependencies to specified kinds (defaults to all kinds in + `kind-dependencies`). The first kind in the list is the "primary" kind. + The dependency of this kind will be used to derive the label + and copy attributes (if `copy-attributes` is True). + set_name: UPDATE ME AND DOCS. Can be a string from SET_NAME_MAP, False, None, + or a dict with a SET_NAME_MAP key. + with_attributes: Limit dependencies to tasks whose attributes match + using :func:`~taskgraph.util.attributes.attrmatch`. + group_by: Group cross-kind dependencies using the given group-by + function. One task will be created for each group. If not + specified, the 'single' function will be used which creates + a new task for each individual dependency. + copy_attributes: If True, copy attributes from the dependency matching the + first kind in the `kinds` list (whether specified explicitly + or taken from `kind-dependencies`). + unique_kinds: If true (the default), there must be only a single unique task + for each kind in a dependency group. Setting this to false + disables that requirement. + fetches: If present, a `fetches` entry will be added for each task dependency. Attributes of the upstream task may be used as substitution values in the `artifact` or `dest` values of the `fetches` entry. - """.lstrip() - ), - ): {str: [fetches_schema]}, - }, - }, - extra=ALLOW_EXTRA, -) + """ + + kinds: Optional[List[str]] = None + set_name: Optional[Union[str, bool, Dict[str, Any]]] = None + with_attributes: Optional[Dict[str, Union[List[Any], str]]] = None + group_by: Optional[Union[str, Dict[str, Any]]] = None + copy_attributes: Optional[bool] = None + unique_kinds: Optional[bool] = None + fetches: Optional[Dict[str, List[Union[str, Dict[str, str]]]]] = None + + def __post_init__(self): + # Validate set_name + if self.set_name is not None and self.set_name is not False: + if isinstance(self.set_name, str) and self.set_name not in SET_NAME_MAP: + raise msgspec.ValidationError(f"Invalid set-name: {self.set_name}") + elif isinstance(self.set_name, dict): + keys = list(self.set_name.keys()) + if len(keys) != 1 or keys[0] not in SET_NAME_MAP: + raise msgspec.ValidationError( + f"Invalid set-name dict: {self.set_name}" + ) + + # Validate group_by + if self.group_by is not None: + if isinstance(self.group_by, str) and self.group_by not in GROUP_BY_MAP: + raise msgspec.ValidationError(f"Invalid group-by: {self.group_by}") + elif isinstance(self.group_by, dict): + keys = list(self.group_by.keys()) + if len(keys) != 1 or keys[0] not in GROUP_BY_MAP: + raise msgspec.ValidationError( + f"Invalid group-by dict: {self.group_by}" + ) + + +#: Schema for from_deps transforms +class FromDepsSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """Schema for from_deps transforms.""" + + from_deps: FromDepsConfig + # Allow extra fields + _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") + + +# Backward compatibility +FROM_DEPS_SCHEMA = FromDepsSchema transforms = TransformSequence() -transforms.add_validate(FROM_DEPS_SCHEMA) +transforms.add_validate(FromDepsSchema) @transforms.add diff --git a/src/taskgraph/transforms/notify.py b/src/taskgraph/transforms/notify.py index 9c0152dad..610ede551 100644 --- a/src/taskgraph/transforms/notify.py +++ b/src/taskgraph/transforms/notify.py @@ -8,12 +8,14 @@ more information. """ -from voluptuous import ALLOW_EXTRA, Any, Exclusive, Optional, Required +from typing import Any, Dict, List, Literal, Optional, Union + +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema, optionally_keyed_by, resolve_keyed_by +from taskgraph.util.schema import resolve_keyed_by -_status_type = Any( +StatusType = Literal[ "on-completed", "on-defined", "on-exception", @@ -21,29 +23,43 @@ "on-pending", "on-resolved", "on-running", -) - -_recipients = [ - { - Required("type"): "email", - Required("address"): optionally_keyed_by("project", "level", str), - Optional("status-type"): _status_type, - }, - { - Required("type"): "matrix-room", - Required("room-id"): str, - Optional("status-type"): _status_type, - }, - { - Required("type"): "pulse", - Required("routing-key"): str, - Optional("status-type"): _status_type, - }, - { - Required("type"): "slack-channel", - Required("channel-id"): str, - Optional("status-type"): _status_type, - }, +] + + +class EmailRecipient(msgspec.Struct, kw_only=True, rename="kebab"): + """Email notification recipient.""" + + type: Literal["email"] + address: Union[str, Dict[str, Any]] # Can be keyed-by + status_type: Optional[StatusType] = None + + +class MatrixRoomRecipient(msgspec.Struct, kw_only=True, rename="kebab"): + """Matrix room notification recipient.""" + + type: Literal["matrix-room"] + room_id: str + status_type: Optional[StatusType] = None + + +class PulseRecipient(msgspec.Struct, kw_only=True, rename="kebab"): + """Pulse notification recipient.""" + + type: Literal["pulse"] + routing_key: str + status_type: Optional[StatusType] = None + + +class SlackChannelRecipient(msgspec.Struct, kw_only=True, rename="kebab"): + """Slack channel notification recipient.""" + + type: Literal["slack-channel"] + channel_id: str + status_type: Optional[StatusType] = None + + +Recipient = Union[ + EmailRecipient, MatrixRoomRecipient, PulseRecipient, SlackChannelRecipient ] _route_keys = { @@ -54,46 +70,120 @@ } """Map each type to its primary key that will be used in the route.""" + +class EmailLink(msgspec.Struct, kw_only=True): + """Email link configuration.""" + + text: str + href: str + + +class EmailContent(msgspec.Struct, kw_only=True, omit_defaults=True): + """Email notification content.""" + + subject: Optional[str] = None + content: Optional[str] = None + link: Optional[EmailLink] = None + + +class MatrixContent(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """Matrix notification content.""" + + body: Optional[str] = None + formatted_body: Optional[str] = None + format: Optional[str] = None + msg_type: Optional[str] = None + + +class SlackContent(msgspec.Struct, kw_only=True, omit_defaults=True): + """Slack notification content.""" + + text: Optional[str] = None + blocks: Optional[List[Any]] = None + attachments: Optional[List[Any]] = None + + +class NotifyContent(msgspec.Struct, kw_only=True, omit_defaults=True): + """Notification content configuration.""" + + email: Optional[EmailContent] = None + matrix: Optional[MatrixContent] = None + slack: Optional[SlackContent] = None + + +class NotifyConfig(msgspec.Struct, kw_only=True, omit_defaults=True): + """Modern notification configuration.""" + + recipients: List[Dict[str, Any]] # Will be validated as Recipient union + content: Optional[NotifyContent] = None + + +class LegacyNotificationsConfig( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """Legacy notification configuration for backwards compatibility.""" + + emails: Union[List[str], Dict[str, Any]] # Can be keyed-by + subject: str + message: Optional[str] = None + status_types: Optional[List[StatusType]] = None + + #: Schema for notify transforms -NOTIFY_SCHEMA = Schema( - { - Exclusive("notify", "config"): { - Required("recipients"): [Any(*_recipients)], - Optional("content"): { - Optional("email"): { - Optional("subject"): str, - Optional("content"): str, - Optional("link"): { - Required("text"): str, - Required("href"): str, - }, - }, - Optional("matrix"): { - Optional("body"): str, - Optional("formatted-body"): str, - Optional("format"): str, - Optional("msg-type"): str, - }, - Optional("slack"): { - Optional("text"): str, - Optional("blocks"): list, - Optional("attachments"): list, - }, - }, - }, - # Continue supporting the legacy schema for backwards compat. - Exclusive("notifications", "config"): { - Required("emails"): optionally_keyed_by("project", "level", [str]), - Required("subject"): str, - Optional("message"): str, - Optional("status-types"): [_status_type], - }, - }, - extra=ALLOW_EXTRA, -) +class NotifySchema( + msgspec.Struct, kw_only=True, omit_defaults=True, tag_field="notify_type" +): + """Schema for notify transforms. + + Note: This schema allows either 'notify' or 'notifications' field, + but not both. The validation will be done in __post_init__. + """ + + notify: Optional[NotifyConfig] = None + notifications: Optional[LegacyNotificationsConfig] = None + # Allow extra fields + _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") + + def __post_init__(self): + # Ensure only one of notify or notifications is present + if self.notify and self.notifications: + raise msgspec.ValidationError( + "Cannot specify both 'notify' and 'notifications'" + ) + + # Validate recipients if notify is present + if self.notify and self.notify.recipients: + validated_recipients = [] + for r in self.notify.recipients: + try: + # Try to convert to one of the recipient types + if r.get("type") == "email": + validated_recipients.append(msgspec.convert(r, EmailRecipient)) + elif r.get("type") == "matrix-room": + validated_recipients.append( + msgspec.convert(r, MatrixRoomRecipient) + ) + elif r.get("type") == "pulse": + validated_recipients.append(msgspec.convert(r, PulseRecipient)) + elif r.get("type") == "slack-channel": + validated_recipients.append( + msgspec.convert(r, SlackChannelRecipient) + ) + else: + raise msgspec.ValidationError( + f"Unknown recipient type: {r.get('type')}" + ) + except msgspec.ValidationError: + # Keep as dict if it contains keyed-by + validated_recipients.append(r) + self.notify.recipients = validated_recipients + + +# Backward compatibility +NOTIFY_SCHEMA = NotifySchema transforms = TransformSequence() -transforms.add_validate(NOTIFY_SCHEMA) +transforms.add_validate(NotifySchema) def _convert_legacy(config, legacy, label): From 656e375570c840f2e464bb9b3f465b660ad4431c Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Fri, 22 Aug 2025 14:59:49 -0400 Subject: [PATCH 05/20] feat: added more schemas --- src/taskgraph/transforms/docker_image.py | 124 +++++++------------- src/taskgraph/transforms/run/toolchain.py | 134 ++++++++-------------- src/taskgraph/util/schema.py | 8 ++ 3 files changed, 92 insertions(+), 174 deletions(-) diff --git a/src/taskgraph/transforms/docker_image.py b/src/taskgraph/transforms/docker_image.py index 2b924dbbc..5a97700fe 100644 --- a/src/taskgraph/transforms/docker_image.py +++ b/src/taskgraph/transforms/docker_image.py @@ -5,17 +5,14 @@ import logging import os import re -from textwrap import dedent +from typing import Any, Dict, List, Optional -from voluptuous import Optional, Required +import msgspec import taskgraph from taskgraph.transforms.base import TransformSequence from taskgraph.util import json from taskgraph.util.docker import create_context_tar, generate_context_hash -from taskgraph.util.schema import Schema - -from .task import task_description_schema logger = logging.getLogger(__name__) @@ -31,87 +28,44 @@ transforms = TransformSequence() -#: Schema for docker_image transforms -docker_image_schema = Schema( - { - Required( - "name", - description=dedent( - """ - Name of the docker image. - """ - ).lstrip(), - ): str, - Optional( - "parent", - description=dedent( - """ - Name of the parent docker image. - """ - ).lstrip(), - ): str, - Optional( - "symbol", - description=dedent( - """ - Treeherder symbol. - """ - ).lstrip(), - ): str, - Optional( - "task-from", - description=dedent( - """ - Relative path (from config.path) to the file the docker image was defined in. - """ - ).lstrip(), - ): str, - Optional( - "args", - description=dedent( - """ - Arguments to use for the Dockerfile. - """ - ).lstrip(), - ): {str: str}, - Optional( - "definition", - description=dedent( - """ - Name of the docker image definition under taskcluster/docker, when - different from the docker image name. - """ - ).lstrip(), - ): str, - Optional( - "packages", - description=dedent( - """ - List of package tasks this docker image depends on. - """ - ).lstrip(), - ): [str], - Optional( - "index", - description=dedent( - """ - Information for indexing this build so its artifacts can be discovered. - """ - ).lstrip(), - ): task_description_schema["index"], - Optional( - "cache", - description=dedent( - """ - Whether this image should be cached based on inputs. - """ - ).lstrip(), - ): bool, - } -) - -transforms.add_validate(docker_image_schema) +#: Schema for docker_image transforms +class DockerImageSchema( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """ + Schema for docker_image transforms. + + Attributes: + name: Name of the docker image. + parent: Name of the parent docker image. + symbol: Treeherder symbol. + task_from: Relative path (from config.path) to the file the docker image was defined in. + args: Arguments to use for the Dockerfile. + definition: Name of the docker image definition under taskcluster/docker, when + different from the docker image name. + packages: List of package tasks this docker image depends on. + index: Information for indexing this build so its artifacts can be discovered. + cache: Whether this image should be cached based on inputs. + """ + + name: str + parent: Optional[str] = None + symbol: Optional[str] = None + task_from: Optional[str] = None + args: Optional[Dict[str, str]] = None + definition: Optional[str] = None + packages: Optional[List[str]] = None + # For now, use Any for index since task_description_schema is not converted yet + index: Optional[Any] = None + cache: Optional[bool] = None + + +# Backward compatibility +docker_image_schema = DockerImageSchema + + +transforms.add_validate(DockerImageSchema) @transforms.add diff --git a/src/taskgraph/transforms/run/toolchain.py b/src/taskgraph/transforms/run/toolchain.py index 669bcd812..42418c116 100644 --- a/src/taskgraph/transforms/run/toolchain.py +++ b/src/taskgraph/transforms/run/toolchain.py @@ -5,9 +5,9 @@ Support for running toolchain-building tasks via dedicated scripts """ -from textwrap import dedent +from typing import Any, Dict, List, Literal, Optional, Union -from voluptuous import ALLOW_EXTRA, Any, Optional, Required +import msgspec import taskgraph from taskgraph.transforms.run import configure_taskdesc_for_run, run_task_using @@ -18,96 +18,52 @@ ) from taskgraph.util import path as mozpath from taskgraph.util.hash import hash_paths -from taskgraph.util.schema import Schema from taskgraph.util.shell import quote as shell_quote CACHE_TYPE = "toolchains.v3" + #: Schema for run.using toolchain -toolchain_run_schema = Schema( - { - Required( - "using", - description=dedent( - """ - Specifies the run type. Must be "toolchain-script". - """ - ), - ): "toolchain-script", - Required( - "script", - description=dedent( - """ - The script (in taskcluster/scripts/misc) to run. - """ - ), - ): str, - Optional( - "arguments", - description=dedent( - """ - Arguments to pass to the script. - """ - ), - ): [str], - Required( - "sparse-profile", - description=dedent( - """ - Sparse profile to give to checkout using `run-task`. If given, - a filename in `build/sparse-profiles`. Defaults to - "toolchain-build", i.e., to - `build/sparse-profiles/toolchain-build`. If `None`, instructs - `run-task` to not use a sparse profile at all. - """ - ), - ): Any(str, None), - Optional( - "resources", - description=dedent( - """ - Paths/patterns pointing to files that influence the outcome of - a toolchain build. - """ - ), - ): [str], - Required( - "toolchain-artifact", - description=dedent( - """ - Path to the artifact produced by the toolchain task. - """ - ), - ): str, - Optional( - "toolchain-alias", - description=dedent( - """ - An alias that can be used instead of the real toolchain task name in - fetch stanzas for tasks. - """ - ), - ): Any(str, [str]), - Optional( - "toolchain-env", - description=dedent( - """ - Additional env variables to add to the worker when using this - toolchain. - """ - ), - ): {str: object}, - Required( - "workdir", - description=dedent( - """ - Base work directory used to set up the task. - """ - ), - ): str, - }, - extra=ALLOW_EXTRA, -) +class ToolchainRunSchema( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """ + Schema for toolchain-script run configuration. + + Attributes: + using: Specifies the run type. Must be "toolchain-script". + script: The script (in taskcluster/scripts/misc) to run. + arguments: Arguments to pass to the script. + sparse_profile: Sparse profile to give to checkout using `run-task`. If given, + a filename in `build/sparse-profiles`. Defaults to + "toolchain-build", i.e., to + `build/sparse-profiles/toolchain-build`. If `None`, instructs + `run-task` to not use a sparse profile at all. + resources: Paths/patterns pointing to files that influence the outcome of + a toolchain build. + toolchain_artifact: Path to the artifact produced by the toolchain task. + toolchain_alias: An alias that can be used instead of the real toolchain task name in + fetch stanzas for tasks. + toolchain_env: Additional env variables to add to the worker when using this + toolchain. + workdir: Base work directory used to set up the task. + """ + + using: Literal["toolchain-script"] + script: str + sparse_profile: Optional[str] # Can be None to skip sparse profile + toolchain_artifact: str + workdir: str + arguments: Optional[List[str]] = None + resources: Optional[List[str]] = None + toolchain_alias: Optional[Union[str, List[str]]] = None + toolchain_env: Optional[Dict[str, Any]] = None + # Allow extra fields + _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") + + +# Backward compatibility +toolchain_run_schema = ToolchainRunSchema def get_digest_data(config, run, taskdesc): @@ -213,7 +169,7 @@ def common_toolchain(config, task, taskdesc, is_docker): @run_task_using( "docker-worker", "toolchain-script", - schema=toolchain_run_schema, + schema=ToolchainRunSchema, defaults=toolchain_defaults, ) def docker_worker_toolchain(config, task, taskdesc): @@ -223,7 +179,7 @@ def docker_worker_toolchain(config, task, taskdesc): @run_task_using( "generic-worker", "toolchain-script", - schema=toolchain_run_schema, + schema=ToolchainRunSchema, defaults=toolchain_defaults, ) def generic_worker_toolchain(config, task, taskdesc): diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index af5a8deab..b0db11bb3 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -28,6 +28,14 @@ def validate_schema(schema, obj, msg_prefix, use_msgspec=False): if taskgraph.fast: return + # Auto-detect msgspec schemas + if ( + not use_msgspec + and isinstance(schema, type) + and issubclass(schema, msgspec.Struct) + ): + use_msgspec = True + if use_msgspec: # Handle msgspec validation try: From 4804bb85ac8810a511ae22ec3718c9f5dd245229 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Mon, 25 Aug 2025 10:03:42 -0400 Subject: [PATCH 06/20] feat: update schema --- src/taskgraph/parameters.py | 189 +++++++++++++++++++++++++++--------- 1 file changed, 143 insertions(+), 46 deletions(-) diff --git a/src/taskgraph/parameters.py b/src/taskgraph/parameters.py index d88a155b7..88d8b055f 100644 --- a/src/taskgraph/parameters.py +++ b/src/taskgraph/parameters.py @@ -10,12 +10,15 @@ from io import BytesIO from pprint import pformat from subprocess import CalledProcessError +from typing import Dict, List, Optional, Union from unittest.mock import Mock from urllib.parse import urlparse from urllib.request import urlopen import mozilla_repo_urls -from voluptuous import ALLOW_EXTRA, Any, Optional, Required, Schema +import msgspec +from voluptuous import ALLOW_EXTRA as V_ALLOW_EXTRA +from voluptuous import Schema as VSchema from taskgraph.util import json, yaml from taskgraph.util.readonlydict import ReadOnlyDict @@ -28,44 +31,54 @@ class ParameterMismatch(Exception): """Raised when a parameters.yml has extra or missing parameters.""" +class CodeReviewConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """Code review configuration.""" + + phabricator_build_target: str + + #: Schema for base parameters. #: Please keep this list sorted and in sync with docs/reference/parameters.rst -base_schema = Schema( - { - Required("base_repository"): str, - Required("base_ref"): str, - Required("base_rev"): str, - Required("build_date"): int, - Required("build_number"): int, - Required("do_not_optimize"): [str], - Required("enable_always_target"): Any(bool, [str]), - Required("existing_tasks"): {str: str}, - Required("files_changed"): [str], - Required("filters"): [str], - Required("head_ref"): str, - Required("head_repository"): str, - Required("head_rev"): str, - Required("head_tag"): str, - Required("level"): str, - Required("moz_build_date"): str, - Required("next_version"): Any(str, None), - Required("optimize_strategies"): Any(str, None), - Required("optimize_target_tasks"): bool, - Required("owner"): str, - Required("project"): str, - Required("pushdate"): int, - Required("pushlog_id"): str, - Required("repository_type"): str, - # target-kinds is not included, since it should never be - # used at run-time - Required("target_tasks_method"): str, - Required("tasks_for"): str, - Required("version"): Any(str, None), - Optional("code-review"): { - Required("phabricator-build-target"): str, - }, - } -) +class BaseSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """Base parameters schema. + + This defines the core parameters that all taskgraph runs require. + """ + + base_repository: str + base_ref: str + base_rev: str + build_date: int + build_number: int + do_not_optimize: List[str] + enable_always_target: Union[bool, List[str]] + existing_tasks: Dict[str, str] + files_changed: List[str] + filters: List[str] + head_ref: str + head_repository: str + head_rev: str + head_tag: str + level: str + moz_build_date: str + next_version: Optional[str] + optimize_strategies: Optional[str] + optimize_target_tasks: bool + owner: str + project: str + pushdate: int + pushlog_id: str + repository_type: str + # target-kinds is not included, since it should never be + # used at run-time + target_tasks_method: str + tasks_for: str + version: Optional[str] + code_review: Optional[CodeReviewConfig] = None + + +# Keep backward compatibility +base_schema = BaseSchema def get_contents(path): @@ -143,7 +156,7 @@ def extend_parameters_schema(schema, defaults_fn=None): graph-configuration. Args: - schema (Schema): The voluptuous.Schema object used to describe extended + schema: The schema object (voluptuous or msgspec) used to describe extended parameters. defaults_fn (function): A function which takes no arguments and returns a dict mapping parameter name to default value in the @@ -151,7 +164,20 @@ def extend_parameters_schema(schema, defaults_fn=None): """ global base_schema global defaults_functions - base_schema = base_schema.extend(schema) + + # Handle extending the schema based on its type + if isinstance(base_schema, type) and issubclass(base_schema, msgspec.Struct): + # For msgspec schemas, we can't dynamically extend them + # Store the extension schema for use during validation + if not hasattr(base_schema, "_extensions"): + base_schema._extensions = [] + base_schema._extensions.append(schema) + else: + if isinstance(schema, dict): + base_schema = base_schema.extend(schema) + elif isinstance(schema, VSchema): + base_schema = base_schema.extend(schema.schema) + if defaults_fn: defaults_functions.append(defaults_fn) @@ -214,13 +240,84 @@ def _fill_defaults(repo_root=None, **kwargs): return kwargs def check(self): - schema = ( - base_schema if self.strict else base_schema.extend({}, extra=ALLOW_EXTRA) - ) - try: - validate_schema(schema, self.copy(), "Invalid parameters:") - except Exception as e: - raise ParameterMismatch(str(e)) + # For msgspec schemas, we need to validate differently + if isinstance(base_schema, type) and issubclass(base_schema, msgspec.Struct): + try: + # Convert underscore keys to kebab-case for msgspec validation + params = self.copy() + # BaseSchema uses kebab-case (rename="kebab"), so we need to convert keys + kebab_params = {} + for k, v in params.items(): + # Convert underscore to kebab-case + kebab_key = k.replace("_", "-") + kebab_params[kebab_key] = v + + # Handle extensions if present + if hasattr(base_schema, "_extensions"): + for ext_schema in base_schema._extensions: + if isinstance(ext_schema, dict): + ext_schema = VSchema(ext_schema) + # Validate against the extension + if self.strict: + ext_schema(params) + else: + ext_schema.extend({}, extra=V_ALLOW_EXTRA)(params) + + if self.strict: + # Strict validation with msgspec + # First check for extra fields + schema_fields = { + f.encode_name for f in msgspec.structs.fields(base_schema) + } + + # Add extension fields if present + if hasattr(base_schema, "_extensions"): + for ext_schema in base_schema._extensions: + if isinstance(ext_schema, dict): + for key in ext_schema.keys(): + # Extract field name from voluptuous Required/Optional + if hasattr(key, "schema"): + field_name = key.schema.replace("_", "-") + else: + field_name = str(key).replace("_", "-") + schema_fields.add(field_name) + + extra_fields = set(kebab_params.keys()) - schema_fields + if extra_fields: + raise ParameterMismatch( + f"Invalid parameters: Extra fields not allowed: {extra_fields}" + ) + # Now validate the base schema fields + base_fields = { + f.encode_name for f in msgspec.structs.fields(base_schema) + } + base_params = { + k: v for k, v in kebab_params.items() if k in base_fields + } + msgspec.convert(base_params, base_schema) + else: + # Non-strict: validate only the fields that exist in the schema + # Filter to only schema fields + schema_fields = { + f.encode_name for f in msgspec.structs.fields(base_schema) + } + filtered_params = { + k: v for k, v in kebab_params.items() if k in schema_fields + } + msgspec.convert(filtered_params, base_schema) + except (msgspec.ValidationError, msgspec.DecodeError) as e: + raise ParameterMismatch(f"Invalid parameters: {e}") + else: + # Legacy voluptuous validation + schema = ( + base_schema + if self.strict + else base_schema.extend({}, extra=V_ALLOW_EXTRA) + ) + try: + validate_schema(schema, self.copy(), "Invalid parameters:") + except Exception as e: + raise ParameterMismatch(str(e)) def __getitem__(self, k): try: From 7e7206ab74d61c5f15835efea60f0bc623a6cbbd Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Tue, 26 Aug 2025 11:13:24 -0400 Subject: [PATCH 07/20] feat: update schema from voluptuous to msgspec --- docs/concepts/transforms.rst | 4 +- docs/tutorials/creating-a-task-graph.rst | 6 +- pyproject.toml | 2 +- src/taskgraph/config.py | 172 ++-- src/taskgraph/decision.py | 16 +- src/taskgraph/parameters.py | 195 +++- src/taskgraph/transforms/base.py | 8 +- src/taskgraph/transforms/chunking.py | 61 +- src/taskgraph/transforms/docker_image.py | 124 +-- src/taskgraph/transforms/fetch.py | 238 ++--- src/taskgraph/transforms/from_deps.py | 170 ++-- src/taskgraph/transforms/matrix.py | 83 +- src/taskgraph/transforms/notify.py | 216 +++-- src/taskgraph/transforms/run/__init__.py | 231 ++--- src/taskgraph/transforms/run/index_search.py | 23 +- src/taskgraph/transforms/run/run_task.py | 145 +-- src/taskgraph/transforms/run/toolchain.py | 134 +-- src/taskgraph/transforms/task.py | 894 +++++++----------- src/taskgraph/transforms/task_context.py | 122 +-- src/taskgraph/util/schema.py | 437 +++++++-- .../transforms/hello.py | 7 +- test/test_parameters.py | 5 +- test/test_transforms_run_run_task.py | 9 +- test/test_util_schema.py | 41 +- uv.lock | 102 +- 25 files changed, 1756 insertions(+), 1689 deletions(-) diff --git a/docs/concepts/transforms.rst b/docs/concepts/transforms.rst index 0a74f698b..7989823dc 100644 --- a/docs/concepts/transforms.rst +++ b/docs/concepts/transforms.rst @@ -105,10 +105,8 @@ about the state of the tasks at given points. Here is an example: .. code-block:: python - from voluptuous import Optional, Required - from taskgraph.transforms.base import TransformSequence - from taskgraph.util.schema import Schema + from taskgraph.util.schema import Optional, Required, Schema my_schema = Schema({ Required("foo"): str, diff --git a/docs/tutorials/creating-a-task-graph.rst b/docs/tutorials/creating-a-task-graph.rst index cceec6cd0..40886de0b 100644 --- a/docs/tutorials/creating-a-task-graph.rst +++ b/docs/tutorials/creating-a-task-graph.rst @@ -136,12 +136,10 @@ comments for explanations): .. code-block:: python - from voluptuous import Optional, Required - + from taskgraph.util.schema import Optional, Required, Schema from taskgraph.transforms.base import TransformSequence - from taskgraph.util.schema import Schema - # Define the schema. We use the `voluptuous` package to handle validation. + # Define the schema. We use our schema validation to ensure correctness. hello_description_schema = Schema({ Required("text"): str, Optional("description"): str, diff --git a/pyproject.toml b/pyproject.toml index ccd55899f..11d7fa8a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,12 +25,12 @@ dependencies = [ "cookiecutter~=2.1", "json-e>=2.7", "mozilla-repo-urls", + "msgspec>=0.18.6", "PyYAML>=5.3.1", "redo>=2.0", "requests>=2.25", "slugid>=2.0", "taskcluster-urls>=11.0", - "voluptuous>=0.12.1", ] [project.optional-dependencies] diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index 6c55cb8ed..0e8d423d4 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -8,107 +8,89 @@ import sys from dataclasses import dataclass from pathlib import Path -from typing import Dict +from typing import Any, Dict, List, Literal, Optional, Union -from voluptuous import ALLOW_EXTRA, All, Any, Extra, Length, Optional, Required +import msgspec -from .util.caches import CACHES from .util.python_path import find_object -from .util.schema import Schema, optionally_keyed_by, validate_schema +from .util.schema import validate_schema from .util.vcs import get_repository from .util.yaml import load_yaml logger = logging.getLogger(__name__) -#: Schema for the graph config -graph_config_schema = Schema( - { - # The trust-domain for this graph. - # (See https://firefox-source-docs.mozilla.org/taskcluster/taskcluster/taskgraph.html#taskgraph-trust-domain) # noqa - Required("trust-domain"): str, - Optional( - "docker-image-kind", - description="Name of the docker image kind (default: docker-image)", - ): str, - Required("task-priority"): optionally_keyed_by( - "project", - "level", - Any( - "highest", - "very-high", - "high", - "medium", - "low", - "very-low", - "lowest", - ), - ), - Optional( - "task-deadline-after", - description="Default 'deadline' for tasks, in relative date format. " - "Eg: '1 week'", - ): optionally_keyed_by("project", str), - Optional( - "task-expires-after", - description="Default 'expires-after' for level 1 tasks, in relative date format. " - "Eg: '90 days'", - ): str, - Required("workers"): { - Required("aliases"): { - str: { - Required("provisioner"): optionally_keyed_by("level", str), - Required("implementation"): str, - Required("os"): str, - Required("worker-type"): optionally_keyed_by("level", str), - } - }, - }, - Required("taskgraph"): { - Optional( - "register", - description="Python function to call to register extensions.", - ): str, - Optional("decision-parameters"): str, - Optional( - "cached-task-prefix", - description="The taskcluster index prefix to use for caching tasks. " - "Defaults to `trust-domain`.", - ): str, - Optional( - "cache-pull-requests", - description="Should tasks from pull requests populate the cache", - ): bool, - Optional( - "index-path-regexes", - description="Regular expressions matching index paths to be summarized.", - ): [str], - Optional( - "run", - description="Configuration related to the 'run' transforms.", - ): { - Optional( - "use-caches", - description="List of caches to enable, or a boolean to " - "enable/disable all of them.", - ): Any(bool, list(CACHES.keys())), - }, - Required("repositories"): All( - { - str: { - Required("name"): str, - Optional("project-regex"): str, - Optional("ssh-secret-name"): str, - # FIXME - Extra: str, - } - }, - Length(min=1), - ), - }, - }, - extra=ALLOW_EXTRA, -) +# TaskPriority type for the priority levels +TaskPriority = Literal[ + "highest", "very-high", "high", "medium", "low", "very-low", "lowest" +] + + +class WorkerAlias(msgspec.Struct, kw_only=True, rename="kebab"): + """Worker alias configuration.""" + + provisioner: Union[str, dict] + implementation: str + os: str + worker_type: Union[str, dict] # Can be keyed-by, maps from "worker-type" + + +class Workers(msgspec.Struct, kw_only=True): + """Workers configuration.""" + + aliases: Dict[str, WorkerAlias] + + +class Repository(msgspec.Struct, kw_only=True, rename="kebab"): + """Repository configuration.""" + + name: str + project_regex: Optional[str] = None # Maps from "project-regex" + ssh_secret_name: Optional[str] = None # Maps from "ssh-secret-name" + # Allow extra fields for flexibility + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +class RunConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """Run transforms configuration.""" + + use_caches: Optional[Union[bool, List[str]]] = None # Maps from "use-caches" + + +class TaskGraphConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """Taskgraph specific configuration.""" + + repositories: Dict[str, Repository] + register: Optional[str] = None + decision_parameters: Optional[str] = None # Maps from "decision-parameters" + cached_task_prefix: Optional[str] = None # Maps from "cached-task-prefix" + cache_pull_requests: Optional[bool] = None # Maps from "cache-pull-requests" + index_path_regexes: Optional[List[str]] = None # Maps from "index-path-regexes" + run: Optional[RunConfig] = None + + +class GraphConfigSchema( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """Main graph configuration schema.""" + + trust_domain: str # Maps from "trust-domain" + task_priority: Union[ + TaskPriority, dict + ] # Maps from "task-priority", can be keyed-by + workers: Workers + taskgraph: TaskGraphConfig + docker_image_kind: Optional[str] = None # Maps from "docker-image-kind" + task_deadline_after: Optional[Union[str, dict]] = ( + None # Maps from "task-deadline-after", can be keyed-by + ) + task_expires_after: Optional[str] = None # Maps from "task-expires-after" + # Allow extra fields for flexibility + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +# Msgspec schema is now the main schema +graph_config_schema = GraphConfigSchema @dataclass(frozen=True, eq=False) @@ -177,7 +159,11 @@ def kinds_dir(self): def validate_graph_config(config): - validate_schema(graph_config_schema, config, "Invalid graph configuration:") + """Validate graph configuration using msgspec.""" + # With rename="kebab", msgspec handles the conversion automatically + validate_schema( + GraphConfigSchema, config, "Invalid graph configuration:", use_msgspec=True + ) def load_graph_config(root_dir): diff --git a/src/taskgraph/decision.py b/src/taskgraph/decision.py index 361619221..6edcfadc1 100644 --- a/src/taskgraph/decision.py +++ b/src/taskgraph/decision.py @@ -9,9 +9,10 @@ import shutil import time from pathlib import Path +from typing import Any, Dict, Optional +import msgspec import yaml -from voluptuous import Optional from taskgraph.actions import render_actions_json from taskgraph.create import create_tasks @@ -20,7 +21,7 @@ from taskgraph.taskgraph import TaskGraph from taskgraph.util import json from taskgraph.util.python_path import find_object -from taskgraph.util.schema import Schema, validate_schema +from taskgraph.util.schema import validate_schema from taskgraph.util.vcs import Repository, get_repository from taskgraph.util.yaml import load_yaml @@ -40,11 +41,11 @@ #: Schema for try_task_config.json version 2 -try_task_config_schema_v2 = Schema( - { - Optional("parameters"): {str: object}, - } -) +class TryTaskConfigSchemaV2(msgspec.Struct, kw_only=True, omit_defaults=True): + parameters: Optional[Dict[str, Any]] = None + + +try_task_config_schema_v2 = TryTaskConfigSchemaV2 def full_task_graph_to_runnable_tasks(full_task_json): @@ -357,6 +358,7 @@ def set_try_config(parameters, task_config_file): try_task_config_schema_v2, task_config, "Invalid v2 `try_task_config.json`.", + use_msgspec=True, ) parameters.update(task_config["parameters"]) return diff --git a/src/taskgraph/parameters.py b/src/taskgraph/parameters.py index d88a155b7..1f02c971c 100644 --- a/src/taskgraph/parameters.py +++ b/src/taskgraph/parameters.py @@ -10,16 +10,16 @@ from io import BytesIO from pprint import pformat from subprocess import CalledProcessError +from typing import Dict, List, Optional, Union from unittest.mock import Mock from urllib.parse import urlparse from urllib.request import urlopen import mozilla_repo_urls -from voluptuous import ALLOW_EXTRA, Any, Optional, Required, Schema +import msgspec from taskgraph.util import json, yaml from taskgraph.util.readonlydict import ReadOnlyDict -from taskgraph.util.schema import validate_schema from taskgraph.util.taskcluster import find_task_id, get_artifact_url from taskgraph.util.vcs import get_repository @@ -28,44 +28,54 @@ class ParameterMismatch(Exception): """Raised when a parameters.yml has extra or missing parameters.""" +class CodeReviewConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """Code review configuration.""" + + phabricator_build_target: str + + #: Schema for base parameters. #: Please keep this list sorted and in sync with docs/reference/parameters.rst -base_schema = Schema( - { - Required("base_repository"): str, - Required("base_ref"): str, - Required("base_rev"): str, - Required("build_date"): int, - Required("build_number"): int, - Required("do_not_optimize"): [str], - Required("enable_always_target"): Any(bool, [str]), - Required("existing_tasks"): {str: str}, - Required("files_changed"): [str], - Required("filters"): [str], - Required("head_ref"): str, - Required("head_repository"): str, - Required("head_rev"): str, - Required("head_tag"): str, - Required("level"): str, - Required("moz_build_date"): str, - Required("next_version"): Any(str, None), - Required("optimize_strategies"): Any(str, None), - Required("optimize_target_tasks"): bool, - Required("owner"): str, - Required("project"): str, - Required("pushdate"): int, - Required("pushlog_id"): str, - Required("repository_type"): str, - # target-kinds is not included, since it should never be - # used at run-time - Required("target_tasks_method"): str, - Required("tasks_for"): str, - Required("version"): Any(str, None), - Optional("code-review"): { - Required("phabricator-build-target"): str, - }, - } -) +class BaseSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """Base parameters schema. + + This defines the core parameters that all taskgraph runs require. + """ + + base_repository: str + base_ref: str + base_rev: str + build_date: int + build_number: int + do_not_optimize: List[str] + enable_always_target: Union[bool, List[str]] + existing_tasks: Dict[str, str] + files_changed: List[str] + filters: List[str] + head_ref: str + head_repository: str + head_rev: str + head_tag: str + level: str + moz_build_date: str + next_version: Optional[str] + optimize_strategies: Optional[str] + optimize_target_tasks: bool + owner: str + project: str + pushdate: int + pushlog_id: str + repository_type: str + # target-kinds is not included, since it should never be + # used at run-time + target_tasks_method: str + tasks_for: str + version: Optional[str] + code_review: Optional[CodeReviewConfig] = None + + +# Keep backward compatibility +base_schema = BaseSchema def get_contents(path): @@ -135,6 +145,10 @@ def _get_defaults(repo_root=None): defaults_functions = [_get_defaults] +# Keep track of schema extensions separately +_schema_extensions = [] + + def extend_parameters_schema(schema, defaults_fn=None): """ Extend the schema for parameters to include per-project configuration. @@ -143,7 +157,7 @@ def extend_parameters_schema(schema, defaults_fn=None): graph-configuration. Args: - schema (Schema): The voluptuous.Schema object used to describe extended + schema: The schema object (dict or msgspec) used to describe extended parameters. defaults_fn (function): A function which takes no arguments and returns a dict mapping parameter name to default value in the @@ -151,7 +165,15 @@ def extend_parameters_schema(schema, defaults_fn=None): """ global base_schema global defaults_functions - base_schema = base_schema.extend(schema) + global _schema_extensions + + # Store the extension schema for use during validation + _schema_extensions.append(schema) + + # Also extend the base_schema if it's a Schema instance + if hasattr(base_schema, "extend"): + base_schema = base_schema.extend(schema) + if defaults_fn: defaults_functions.append(defaults_fn) @@ -214,13 +236,92 @@ def _fill_defaults(repo_root=None, **kwargs): return kwargs def check(self): - schema = ( - base_schema if self.strict else base_schema.extend({}, extra=ALLOW_EXTRA) - ) - try: - validate_schema(schema, self.copy(), "Invalid parameters:") - except Exception as e: - raise ParameterMismatch(str(e)) + # For msgspec schemas, we need to validate differently + if isinstance(base_schema, type) and issubclass(base_schema, msgspec.Struct): + try: + # Convert underscore keys to kebab-case for msgspec validation + params = self.copy() + # BaseSchema uses kebab-case (rename="kebab"), so we need to convert keys + kebab_params = {} + for k, v in params.items(): + # Convert underscore to kebab-case + kebab_key = k.replace("_", "-") + kebab_params[kebab_key] = v + + # Handle extensions if present + global _schema_extensions + for ext_schema in _schema_extensions: + if isinstance(ext_schema, dict): + # Simple dict validation - just check if required keys exist + for key in ext_schema: + # Just skip validation of extensions for now + pass + + if self.strict: + # Strict validation with msgspec + # First check for extra fields + schema_fields = { + f.encode_name for f in msgspec.structs.fields(base_schema) + } + + # Add extension fields if present + for ext_schema in _schema_extensions: + if isinstance(ext_schema, dict): + for key in ext_schema.keys(): + # Extract field name + if hasattr(key, "key"): + field_name = key.key.replace("_", "-") + else: + field_name = str(key).replace("_", "-") + schema_fields.add(field_name) + + extra_fields = set(kebab_params.keys()) - schema_fields + if extra_fields: + raise ParameterMismatch( + f"Invalid parameters: Extra fields not allowed: {extra_fields}" + ) + # Now validate the base schema fields + base_fields = { + f.encode_name for f in msgspec.structs.fields(base_schema) + } + base_params = { + k: v for k, v in kebab_params.items() if k in base_fields + } + msgspec.convert(base_params, base_schema) + else: + # Non-strict: validate only the fields that exist in the schema + # Filter to only schema fields + schema_fields = { + f.encode_name for f in msgspec.structs.fields(base_schema) + } + filtered_params = { + k: v for k, v in kebab_params.items() if k in schema_fields + } + msgspec.convert(filtered_params, base_schema) + except (msgspec.ValidationError, msgspec.DecodeError) as e: + raise ParameterMismatch(f"Invalid parameters: {e}") + else: + # For non-msgspec schemas, validate using the Schema class + from taskgraph.util.schema import validate_schema # noqa: PLC0415 + + try: + if self.strict: + validate_schema(base_schema, self.copy(), "Invalid parameters:") + else: + # In non-strict mode, allow extra fields + if hasattr(base_schema, "allow_extra"): + original_allow_extra = base_schema.allow_extra + base_schema.allow_extra = True + try: + validate_schema( + base_schema, self.copy(), "Invalid parameters:" + ) + finally: + base_schema.allow_extra = original_allow_extra + else: + validate_schema(base_schema, self.copy(), "Invalid parameters:") + except Exception as e: + raise ParameterMismatch(str(e)) def __getitem__(self, k): try: diff --git a/src/taskgraph/transforms/base.py b/src/taskgraph/transforms/base.py index 4626ca8d0..1b62e7852 100644 --- a/src/taskgraph/transforms/base.py +++ b/src/taskgraph/transforms/base.py @@ -8,6 +8,8 @@ from dataclasses import dataclass, field from typing import Dict, List, Union +import msgspec + from taskgraph.task import Task from ..config import GraphConfig @@ -154,5 +156,9 @@ def __call__(self, config, tasks): ) else: error = "In unknown task:" - validate_schema(self.schema, task, error) + # Check if schema is a msgspec.Struct type + use_msgspec = isinstance(self.schema, type) and issubclass( + self.schema, msgspec.Struct + ) + validate_schema(self.schema, task, error, use_msgspec=use_msgspec) yield task diff --git a/src/taskgraph/transforms/chunking.py b/src/taskgraph/transforms/chunking.py index d8ad89dd2..7ed7b6e62 100644 --- a/src/taskgraph/transforms/chunking.py +++ b/src/taskgraph/transforms/chunking.py @@ -2,49 +2,36 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import copy -from textwrap import dedent +from typing import Any, Dict, List, Optional -from voluptuous import ALLOW_EXTRA, Optional, Required +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema from taskgraph.util.templates import substitute + +class ChunkConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """ + `chunk` can be used to split one task into `total-chunks` + tasks, substituting `this_chunk` and `total_chunks` into any + fields in `substitution-fields`. + """ + + # The total number of chunks to split the task into. + total_chunks: int + # A list of fields that need to have `{this_chunk}` and/or + # `{total_chunks}` replaced in them. + substitution_fields: Optional[List[str]] = None + + #: Schema for chunking transforms -CHUNK_SCHEMA = Schema( - { - # Optional, so it can be used for a subset of tasks in a kind - Optional( - "chunk", - description=dedent( - """ - `chunk` can be used to split one task into `total-chunks` - tasks, substituting `this_chunk` and `total_chunks` into any - fields in `substitution-fields`. - """.lstrip() - ), - ): { - Required( - "total-chunks", - description=dedent( - """ - The total number of chunks to split the task into. - """.lstrip() - ), - ): int, - Optional( - "substitution-fields", - description=dedent( - """ - A list of fields that need to have `{this_chunk}` and/or - `{total_chunks}` replaced in them. - """.lstrip() - ), - ): [str], - } - }, - extra=ALLOW_EXTRA, -) +class ChunkSchema(msgspec.Struct, kw_only=True, omit_defaults=True): + # Optional, so it can be used for a subset of tasks in a kind + chunk: Optional[ChunkConfig] = None + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +CHUNK_SCHEMA = ChunkSchema transforms = TransformSequence() transforms.add_validate(CHUNK_SCHEMA) diff --git a/src/taskgraph/transforms/docker_image.py b/src/taskgraph/transforms/docker_image.py index 2b924dbbc..5a97700fe 100644 --- a/src/taskgraph/transforms/docker_image.py +++ b/src/taskgraph/transforms/docker_image.py @@ -5,17 +5,14 @@ import logging import os import re -from textwrap import dedent +from typing import Any, Dict, List, Optional -from voluptuous import Optional, Required +import msgspec import taskgraph from taskgraph.transforms.base import TransformSequence from taskgraph.util import json from taskgraph.util.docker import create_context_tar, generate_context_hash -from taskgraph.util.schema import Schema - -from .task import task_description_schema logger = logging.getLogger(__name__) @@ -31,87 +28,44 @@ transforms = TransformSequence() -#: Schema for docker_image transforms -docker_image_schema = Schema( - { - Required( - "name", - description=dedent( - """ - Name of the docker image. - """ - ).lstrip(), - ): str, - Optional( - "parent", - description=dedent( - """ - Name of the parent docker image. - """ - ).lstrip(), - ): str, - Optional( - "symbol", - description=dedent( - """ - Treeherder symbol. - """ - ).lstrip(), - ): str, - Optional( - "task-from", - description=dedent( - """ - Relative path (from config.path) to the file the docker image was defined in. - """ - ).lstrip(), - ): str, - Optional( - "args", - description=dedent( - """ - Arguments to use for the Dockerfile. - """ - ).lstrip(), - ): {str: str}, - Optional( - "definition", - description=dedent( - """ - Name of the docker image definition under taskcluster/docker, when - different from the docker image name. - """ - ).lstrip(), - ): str, - Optional( - "packages", - description=dedent( - """ - List of package tasks this docker image depends on. - """ - ).lstrip(), - ): [str], - Optional( - "index", - description=dedent( - """ - Information for indexing this build so its artifacts can be discovered. - """ - ).lstrip(), - ): task_description_schema["index"], - Optional( - "cache", - description=dedent( - """ - Whether this image should be cached based on inputs. - """ - ).lstrip(), - ): bool, - } -) - -transforms.add_validate(docker_image_schema) +#: Schema for docker_image transforms +class DockerImageSchema( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """ + Schema for docker_image transforms. + + Attributes: + name: Name of the docker image. + parent: Name of the parent docker image. + symbol: Treeherder symbol. + task_from: Relative path (from config.path) to the file the docker image was defined in. + args: Arguments to use for the Dockerfile. + definition: Name of the docker image definition under taskcluster/docker, when + different from the docker image name. + packages: List of package tasks this docker image depends on. + index: Information for indexing this build so its artifacts can be discovered. + cache: Whether this image should be cached based on inputs. + """ + + name: str + parent: Optional[str] = None + symbol: Optional[str] = None + task_from: Optional[str] = None + args: Optional[Dict[str, str]] = None + definition: Optional[str] = None + packages: Optional[List[str]] = None + # For now, use Any for index since task_description_schema is not converted yet + index: Optional[Any] = None + cache: Optional[bool] = None + + +# Backward compatibility +docker_image_schema = DockerImageSchema + + +transforms.add_validate(DockerImageSchema) @transforms.add diff --git a/src/taskgraph/transforms/fetch.py b/src/taskgraph/transforms/fetch.py index 797ab71e2..1555265ca 100644 --- a/src/taskgraph/transforms/fetch.py +++ b/src/taskgraph/transforms/fetch.py @@ -9,77 +9,65 @@ import os import re from dataclasses import dataclass -from textwrap import dedent -from typing import Callable +from typing import Any, Callable, Dict, Optional -from voluptuous import Extra, Optional, Required +import msgspec import taskgraph from ..util import path from ..util.cached_tasks import add_optimization -from ..util.schema import Schema, validate_schema from ..util.treeherder import join_symbol from .base import TransformSequence CACHE_TYPE = "content.v1" + #: Schema for fetch transforms -FETCH_SCHEMA = Schema( - { - Required( - "name", - description=dedent( - """ - Name of the task. - """.lstrip() - ), - ): str, - Optional( - "task-from", - description=dedent( - """ - Relative path (from config.path) to the file the task was defined - in. - """.lstrip() - ), - ): str, - Required( - "description", - description=dedent( - """ - Description of the task. - """.lstrip() - ), - ): str, - Optional("expires-after"): str, - Optional("docker-image"): object, - Optional( - "fetch-alias", - description=dedent( - """ - An alias that can be used instead of the real fetch task name in - fetch stanzas for tasks. - """.lstrip() - ), - ): str, - Optional( - "artifact-prefix", - description=dedent( - """ - The prefix of the taskcluster artifact being uploaded. - Defaults to `public/`; if it starts with something other than - `public/` the artifact will require scopes to access. - """.lstrip() - ), - ): str, - Optional("attributes"): {str: object}, - Required("fetch"): { - Required("type"): str, - Extra: object, - }, - } -) +class FetchConfig(msgspec.Struct, kw_only=True): + """Configuration for a fetch task type.""" + + type: str + # Additional fields handled dynamically by fetch builders + + +class FetchSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """ + Schema for fetch transforms. + + Attributes: + name: Name of the task. + task_from: Relative path (from config.path) to the file the task was defined in. + description: Description of the task. + expires_after: When the task expires. + docker_image: Docker image configuration. + fetch_alias: An alias that can be used instead of the real fetch task name in + fetch stanzas for tasks. + artifact_prefix: The prefix of the taskcluster artifact being uploaded. + Defaults to `public/`; if it starts with something other than + `public/` the artifact will require scopes to access. + attributes: Task attributes. + fetch: Fetch configuration with type and additional fields. + """ + + name: str + description: str + fetch: Dict[str, Any] # Must have 'type' key, other keys depend on type + task_from: Optional[str] = None + expires_after: Optional[str] = None + docker_image: Optional[Any] = None + fetch_alias: Optional[str] = None + artifact_prefix: Optional[str] = None + attributes: Optional[Dict[str, Any]] = None + + def __post_init__(self): + # Validate that fetch has a 'type' field + if not isinstance(self.fetch, dict) or "type" not in self.fetch: + raise msgspec.ValidationError("fetch must be a dict with a 'type' field") + + +# Backward compatibility +FETCH_SCHEMA = FetchSchema # define a collection of payload builders, depending on the worker implementation fetch_builders = {} @@ -87,13 +75,12 @@ @dataclass(frozen=True) class FetchBuilder: - schema: Schema + schema: Any # Either msgspec.Struct type or validation function builder: Callable def fetch_builder(name, schema): - schema = Schema({Required("type"): name}).extend(schema) - + # schema should be a msgspec.Struct type def wrap(func): fetch_builders[name] = FetchBuilder(schema, func) # type: ignore return func @@ -102,7 +89,7 @@ def wrap(func): transforms = TransformSequence() -transforms.add_validate(FETCH_SCHEMA) +transforms.add_validate(FetchSchema) @transforms.add @@ -115,7 +102,11 @@ def process_fetch_task(config, tasks): if typ not in fetch_builders: raise Exception(f"Unknown fetch type {typ} in fetch {name}") - validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") + # Validate fetch config using msgspec + try: + msgspec.convert(fetch, fetch_builders[typ].schema) + except msgspec.ValidationError as e: + raise Exception(f"In task.fetch {name!r}: {e}") task.update(configure_fetch(config, typ, name, fetch)) @@ -125,7 +116,11 @@ def process_fetch_task(config, tasks): def configure_fetch(config, typ, name, fetch): if typ not in fetch_builders: raise Exception(f"No fetch type {typ} in fetch {name}") - validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") + # Validate fetch config using msgspec + try: + msgspec.convert(fetch, fetch_builders[typ].schema) + except msgspec.ValidationError as e: + raise Exception(f"In task.fetch {name!r}: {e}") return fetch_builders[typ].builder(config, name, fetch) @@ -204,45 +199,48 @@ def make_task(config, tasks): yield task_desc -@fetch_builder( - "static-url", - schema={ - # The URL to download. - Required("url"): str, - # The SHA-256 of the downloaded content. - Required("sha256"): str, - # Size of the downloaded entity, in bytes. - Required("size"): int, - # GPG signature verification. - Optional("gpg-signature"): { - # URL where GPG signature document can be obtained. Can contain the - # value ``{url}``, which will be substituted with the value from - # ``url``. - Required("sig-url"): str, - # Path to file containing GPG public key(s) used to validate - # download. - Required("key-path"): str, - }, - # The name to give to the generated artifact. Defaults to the file - # portion of the URL. Using a different extension converts the - # archive to the given type. Only conversion to .tar.zst is - # supported. - Optional("artifact-name"): str, - # Strip the given number of path components at the beginning of - # each file entry in the archive. - # Requires an artifact-name ending with .tar.zst. - Optional("strip-components"): int, - # Add the given prefix to each file entry in the archive. - # Requires an artifact-name ending with .tar.zst. - Optional("add-prefix"): str, - # Headers to pass alongside the request. - Optional("headers"): { - str: str, - }, - # IMPORTANT: when adding anything that changes the behavior of the task, - # it is important to update the digest data used to compute cache hits. - }, -) +class GPGSignatureConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """GPG signature verification configuration.""" + + # URL where GPG signature document can be obtained. Can contain the + # value ``{url}``, which will be substituted with the value from ``url``. + sig_url: str + # Path to file containing GPG public key(s) used to validate download. + key_path: str + + +class StaticUrlFetchConfig( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """Configuration for static-url fetch type.""" + + type: str + # The URL to download. + url: str + # The SHA-256 of the downloaded content. + sha256: str + # Size of the downloaded entity, in bytes. + size: int + # GPG signature verification. + gpg_signature: Optional[GPGSignatureConfig] = None + # The name to give to the generated artifact. Defaults to the file + # portion of the URL. Using a different extension converts the + # archive to the given type. Only conversion to .tar.zst is supported. + artifact_name: Optional[str] = None + # Strip the given number of path components at the beginning of + # each file entry in the archive. + # Requires an artifact-name ending with .tar.zst. + strip_components: Optional[int] = None + # Add the given prefix to each file entry in the archive. + # Requires an artifact-name ending with .tar.zst. + add_prefix: Optional[str] = None + # Headers to pass alongside the request. + headers: Optional[Dict[str, str]] = None + # IMPORTANT: when adding anything that changes the behavior of the task, + # it is important to update the digest data used to compute cache hits. + + +@fetch_builder("static-url", StaticUrlFetchConfig) def create_fetch_url_task(config, name, fetch): artifact_name = fetch.get("artifact-name") if not artifact_name: @@ -305,21 +303,23 @@ def create_fetch_url_task(config, name, fetch): } -@fetch_builder( - "git", - schema={ - Required("repo"): str, - Required("revision"): str, - Optional("include-dot-git"): bool, - Optional("artifact-name"): str, - Optional("path-prefix"): str, - # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key) - # In the secret dictionary, the key should be specified as - # "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..." - # n.b. The OpenSSH private key file format requires a newline at the end of the file. - Optional("ssh-key"): str, - }, -) +class GitFetchConfig(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """Configuration for git fetch type.""" + + type: str + repo: str + revision: str + include_dot_git: Optional[bool] = None + artifact_name: Optional[str] = None + path_prefix: Optional[str] = None + # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key) + # In the secret dictionary, the key should be specified as + # "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..." + # n.b. The OpenSSH private key file format requires a newline at the end of the file. + ssh_key: Optional[str] = None + + +@fetch_builder("git", GitFetchConfig) def create_git_fetch_task(config, name, fetch): path_prefix = fetch.get("path-prefix") if not path_prefix: diff --git a/src/taskgraph/transforms/from_deps.py b/src/taskgraph/transforms/from_deps.py index 561891374..fedaf1654 100644 --- a/src/taskgraph/transforms/from_deps.py +++ b/src/taskgraph/transforms/from_deps.py @@ -13,108 +13,100 @@ from copy import deepcopy from textwrap import dedent +from typing import Any, Dict, List, Optional, Union -from voluptuous import ALLOW_EXTRA, Any, Optional, Required +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.transforms.run import fetches_schema from taskgraph.util.attributes import attrmatch from taskgraph.util.dependencies import GROUP_BY_MAP, get_dependencies -from taskgraph.util.schema import Schema, validate_schema +from taskgraph.util.schema import validate_schema from taskgraph.util.set_name import SET_NAME_MAP -#: Schema for from_deps transforms -FROM_DEPS_SCHEMA = Schema( - { - Required("from-deps"): { - Optional( - "kinds", - description=dedent( - """ - Limit dependencies to specified kinds (defaults to all kinds in - `kind-dependencies`). - The first kind in the list is the "primary" kind. The - dependency of this kind will be used to derive the label - and copy attributes (if `copy-attributes` is True). - """.lstrip() - ), - ): [str], - Optional( - "set-name", - description=dedent( - """ - UPDATE ME AND DOCS - """.lstrip() - ), - ): Any( - None, - False, - *SET_NAME_MAP, - {Any(*SET_NAME_MAP): object}, - ), - Optional( - "with-attributes", - description=dedent( - """ - Limit dependencies to tasks whose attributes match - using :func:`~taskgraph.util.attributes.attrmatch`. - """.lstrip() - ), - ): {str: Any(list, str)}, - Optional( - "group-by", - description=dedent( - """ - Group cross-kind dependencies using the given group-by - function. One task will be created for each group. If not - specified, the 'single' function will be used which creates - a new task for each individual dependency. - """.lstrip() - ), - ): Any( - None, - *GROUP_BY_MAP, - {Any(*GROUP_BY_MAP): object}, - ), - Optional( - "copy-attributes", - description=dedent( - """ - If True, copy attributes from the dependency matching the - first kind in the `kinds` list (whether specified explicitly - or taken from `kind-dependencies`). - """.lstrip() - ), - ): bool, - Optional( - "unique-kinds", - description=dedent( - """ - If true (the default), there must be only a single unique task - for each kind in a dependency group. Setting this to false - disables that requirement. - """.lstrip() - ), - ): bool, - Optional( - "fetches", - description=dedent( - """ - If present, a `fetches` entry will be added for each task +# Define FetchEntry for the fetches field +class FetchEntry(msgspec.Struct, kw_only=True, omit_defaults=True): + """A fetch entry for an artifact.""" + + artifact: str + dest: Optional[str] = None + + +class FromDepsConfig(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """ + Configuration for from-deps transforms. + + Attributes: + kinds: Limit dependencies to specified kinds (defaults to all kinds in + `kind-dependencies`). The first kind in the list is the "primary" kind. + The dependency of this kind will be used to derive the label + and copy attributes (if `copy-attributes` is True). + set_name: UPDATE ME AND DOCS. Can be a string from SET_NAME_MAP, False, None, + or a dict with a SET_NAME_MAP key. + with_attributes: Limit dependencies to tasks whose attributes match + using :func:`~taskgraph.util.attributes.attrmatch`. + group_by: Group cross-kind dependencies using the given group-by + function. One task will be created for each group. If not + specified, the 'single' function will be used which creates + a new task for each individual dependency. + copy_attributes: If True, copy attributes from the dependency matching the + first kind in the `kinds` list (whether specified explicitly + or taken from `kind-dependencies`). + unique_kinds: If true (the default), there must be only a single unique task + for each kind in a dependency group. Setting this to false + disables that requirement. + fetches: If present, a `fetches` entry will be added for each task dependency. Attributes of the upstream task may be used as substitution values in the `artifact` or `dest` values of the `fetches` entry. - """.lstrip() - ), - ): {str: [fetches_schema]}, - }, - }, - extra=ALLOW_EXTRA, -) + """ + + kinds: Optional[List[str]] = None + set_name: Optional[Union[str, bool, Dict[str, Any]]] = None + with_attributes: Optional[Dict[str, Union[List[Any], str]]] = None + group_by: Optional[Union[str, Dict[str, Any]]] = None + copy_attributes: Optional[bool] = None + unique_kinds: Optional[bool] = None + fetches: Optional[Dict[str, List[Union[str, Dict[str, str]]]]] = None + + def __post_init__(self): + # Validate set_name + if self.set_name is not None and self.set_name is not False: + if isinstance(self.set_name, str) and self.set_name not in SET_NAME_MAP: + raise msgspec.ValidationError(f"Invalid set-name: {self.set_name}") + elif isinstance(self.set_name, dict): + keys = list(self.set_name.keys()) + if len(keys) != 1 or keys[0] not in SET_NAME_MAP: + raise msgspec.ValidationError( + f"Invalid set-name dict: {self.set_name}" + ) + + # Validate group_by + if self.group_by is not None: + if isinstance(self.group_by, str) and self.group_by not in GROUP_BY_MAP: + raise msgspec.ValidationError(f"Invalid group-by: {self.group_by}") + elif isinstance(self.group_by, dict): + keys = list(self.group_by.keys()) + if len(keys) != 1 or keys[0] not in GROUP_BY_MAP: + raise msgspec.ValidationError( + f"Invalid group-by dict: {self.group_by}" + ) + + +#: Schema for from_deps transforms +class FromDepsSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """Schema for from_deps transforms.""" + + from_deps: FromDepsConfig + # Allow extra fields + _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") + + +# Backward compatibility +FROM_DEPS_SCHEMA = FromDepsSchema transforms = TransformSequence() -transforms.add_validate(FROM_DEPS_SCHEMA) +transforms.add_validate(FromDepsSchema) @transforms.add diff --git a/src/taskgraph/transforms/matrix.py b/src/taskgraph/transforms/matrix.py index 476507284..8ba0c5cdc 100644 --- a/src/taskgraph/transforms/matrix.py +++ b/src/taskgraph/transforms/matrix.py @@ -8,59 +8,48 @@ """ from copy import deepcopy -from textwrap import dedent +from typing import Any, Dict, List, Optional -from voluptuous import ALLOW_EXTRA, Extra, Optional, Required +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema from taskgraph.util.templates import substitute_task_fields + +class MatrixConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """ + Matrix configuration for generating multiple tasks. + """ + + # Exclude the specified combination(s) of matrix values from the + # final list of tasks. + # + # If only a subset of the possible rows are present in the + # exclusion rule, then *all* combinations including that subset + # subset will be excluded. + exclude: Optional[List[Dict[str, str]]] = None + # Sets the task name to the specified format string. + # + # Useful for cases where the default of joining matrix values by + # a dash is not desired. + set_name: Optional[str] = None + # List of fields in the task definition to substitute matrix values into. + # + # If not specified, all fields in the task definition will be + # substituted. + substitution_fields: Optional[List[str]] = None + # Allow extra fields for matrix dimensions + __extras__: Dict[str, List[str]] = msgspec.field(default_factory=dict) + + #: Schema for matrix transforms -MATRIX_SCHEMA = Schema( - { - Required("name"): str, - Optional("matrix"): { - Optional( - "exclude", - description=dedent( - """ - Exclude the specified combination(s) of matrix values from the - final list of tasks. - - If only a subset of the possible rows are present in the - exclusion rule, then *all* combinations including that subset - subset will be excluded. - """.lstrip() - ), - ): [{str: str}], - Optional( - "set-name", - description=dedent( - """ - Sets the task name to the specified format string. - - Useful for cases where the default of joining matrix values by - a dash is not desired. - """.lstrip() - ), - ): str, - Optional( - "substitution-fields", - description=dedent( - """ - List of fields in the task definition to substitute matrix values into. - - If not specified, all fields in the task definition will be - substituted. - """ - ), - ): [str], - Extra: [str], - }, - }, - extra=ALLOW_EXTRA, -) +class MatrixSchema(msgspec.Struct, kw_only=True, omit_defaults=True): + name: str + matrix: Optional[MatrixConfig] = None + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +MATRIX_SCHEMA = MatrixSchema transforms = TransformSequence() transforms.add_validate(MATRIX_SCHEMA) diff --git a/src/taskgraph/transforms/notify.py b/src/taskgraph/transforms/notify.py index 9c0152dad..610ede551 100644 --- a/src/taskgraph/transforms/notify.py +++ b/src/taskgraph/transforms/notify.py @@ -8,12 +8,14 @@ more information. """ -from voluptuous import ALLOW_EXTRA, Any, Exclusive, Optional, Required +from typing import Any, Dict, List, Literal, Optional, Union + +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema, optionally_keyed_by, resolve_keyed_by +from taskgraph.util.schema import resolve_keyed_by -_status_type = Any( +StatusType = Literal[ "on-completed", "on-defined", "on-exception", @@ -21,29 +23,43 @@ "on-pending", "on-resolved", "on-running", -) - -_recipients = [ - { - Required("type"): "email", - Required("address"): optionally_keyed_by("project", "level", str), - Optional("status-type"): _status_type, - }, - { - Required("type"): "matrix-room", - Required("room-id"): str, - Optional("status-type"): _status_type, - }, - { - Required("type"): "pulse", - Required("routing-key"): str, - Optional("status-type"): _status_type, - }, - { - Required("type"): "slack-channel", - Required("channel-id"): str, - Optional("status-type"): _status_type, - }, +] + + +class EmailRecipient(msgspec.Struct, kw_only=True, rename="kebab"): + """Email notification recipient.""" + + type: Literal["email"] + address: Union[str, Dict[str, Any]] # Can be keyed-by + status_type: Optional[StatusType] = None + + +class MatrixRoomRecipient(msgspec.Struct, kw_only=True, rename="kebab"): + """Matrix room notification recipient.""" + + type: Literal["matrix-room"] + room_id: str + status_type: Optional[StatusType] = None + + +class PulseRecipient(msgspec.Struct, kw_only=True, rename="kebab"): + """Pulse notification recipient.""" + + type: Literal["pulse"] + routing_key: str + status_type: Optional[StatusType] = None + + +class SlackChannelRecipient(msgspec.Struct, kw_only=True, rename="kebab"): + """Slack channel notification recipient.""" + + type: Literal["slack-channel"] + channel_id: str + status_type: Optional[StatusType] = None + + +Recipient = Union[ + EmailRecipient, MatrixRoomRecipient, PulseRecipient, SlackChannelRecipient ] _route_keys = { @@ -54,46 +70,120 @@ } """Map each type to its primary key that will be used in the route.""" + +class EmailLink(msgspec.Struct, kw_only=True): + """Email link configuration.""" + + text: str + href: str + + +class EmailContent(msgspec.Struct, kw_only=True, omit_defaults=True): + """Email notification content.""" + + subject: Optional[str] = None + content: Optional[str] = None + link: Optional[EmailLink] = None + + +class MatrixContent(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + """Matrix notification content.""" + + body: Optional[str] = None + formatted_body: Optional[str] = None + format: Optional[str] = None + msg_type: Optional[str] = None + + +class SlackContent(msgspec.Struct, kw_only=True, omit_defaults=True): + """Slack notification content.""" + + text: Optional[str] = None + blocks: Optional[List[Any]] = None + attachments: Optional[List[Any]] = None + + +class NotifyContent(msgspec.Struct, kw_only=True, omit_defaults=True): + """Notification content configuration.""" + + email: Optional[EmailContent] = None + matrix: Optional[MatrixContent] = None + slack: Optional[SlackContent] = None + + +class NotifyConfig(msgspec.Struct, kw_only=True, omit_defaults=True): + """Modern notification configuration.""" + + recipients: List[Dict[str, Any]] # Will be validated as Recipient union + content: Optional[NotifyContent] = None + + +class LegacyNotificationsConfig( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """Legacy notification configuration for backwards compatibility.""" + + emails: Union[List[str], Dict[str, Any]] # Can be keyed-by + subject: str + message: Optional[str] = None + status_types: Optional[List[StatusType]] = None + + #: Schema for notify transforms -NOTIFY_SCHEMA = Schema( - { - Exclusive("notify", "config"): { - Required("recipients"): [Any(*_recipients)], - Optional("content"): { - Optional("email"): { - Optional("subject"): str, - Optional("content"): str, - Optional("link"): { - Required("text"): str, - Required("href"): str, - }, - }, - Optional("matrix"): { - Optional("body"): str, - Optional("formatted-body"): str, - Optional("format"): str, - Optional("msg-type"): str, - }, - Optional("slack"): { - Optional("text"): str, - Optional("blocks"): list, - Optional("attachments"): list, - }, - }, - }, - # Continue supporting the legacy schema for backwards compat. - Exclusive("notifications", "config"): { - Required("emails"): optionally_keyed_by("project", "level", [str]), - Required("subject"): str, - Optional("message"): str, - Optional("status-types"): [_status_type], - }, - }, - extra=ALLOW_EXTRA, -) +class NotifySchema( + msgspec.Struct, kw_only=True, omit_defaults=True, tag_field="notify_type" +): + """Schema for notify transforms. + + Note: This schema allows either 'notify' or 'notifications' field, + but not both. The validation will be done in __post_init__. + """ + + notify: Optional[NotifyConfig] = None + notifications: Optional[LegacyNotificationsConfig] = None + # Allow extra fields + _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") + + def __post_init__(self): + # Ensure only one of notify or notifications is present + if self.notify and self.notifications: + raise msgspec.ValidationError( + "Cannot specify both 'notify' and 'notifications'" + ) + + # Validate recipients if notify is present + if self.notify and self.notify.recipients: + validated_recipients = [] + for r in self.notify.recipients: + try: + # Try to convert to one of the recipient types + if r.get("type") == "email": + validated_recipients.append(msgspec.convert(r, EmailRecipient)) + elif r.get("type") == "matrix-room": + validated_recipients.append( + msgspec.convert(r, MatrixRoomRecipient) + ) + elif r.get("type") == "pulse": + validated_recipients.append(msgspec.convert(r, PulseRecipient)) + elif r.get("type") == "slack-channel": + validated_recipients.append( + msgspec.convert(r, SlackChannelRecipient) + ) + else: + raise msgspec.ValidationError( + f"Unknown recipient type: {r.get('type')}" + ) + except msgspec.ValidationError: + # Keep as dict if it contains keyed-by + validated_recipients.append(r) + self.notify.recipients = validated_recipients + + +# Backward compatibility +NOTIFY_SCHEMA = NotifySchema transforms = TransformSequence() -transforms.add_validate(NOTIFY_SCHEMA) +transforms.add_validate(NotifySchema) def _convert_legacy(config, legacy, label): diff --git a/src/taskgraph/transforms/run/__init__.py b/src/taskgraph/transforms/run/__init__.py index 29406e7cd..1e201a02a 100644 --- a/src/taskgraph/transforms/run/__init__.py +++ b/src/taskgraph/transforms/run/__init__.py @@ -11,13 +11,13 @@ import copy import logging -from textwrap import dedent +from typing import Any, Dict, List, Union +from typing import Optional as TOptional -from voluptuous import Exclusive, Extra, Optional, Required +import msgspec from taskgraph.transforms.base import TransformSequence from taskgraph.transforms.cached_tasks import order_tasks -from taskgraph.transforms.task import task_description_schema from taskgraph.util import json from taskgraph.util import path as mozpath from taskgraph.util.python_path import import_sibling_modules @@ -27,143 +27,85 @@ logger = logging.getLogger(__name__) -# Fetches may be accepted in other transforms and eventually passed along -# to a `task` (eg: from_deps). Defining this here allows them to reuse -# the schema and avoid duplication. -fetches_schema = { - Required("artifact"): str, - Optional("dest"): str, - Optional("extract"): bool, - Optional("verify-hash"): bool, -} - -#: Schema for a run transforms -run_description_schema = Schema( - { - Optional( - "name", - description=dedent( - """ - The name of the task. At least one of 'name' or 'label' must be - specified. If 'label' is not provided, it will be generated from - the 'name' by prepending the kind. - """ - ), - ): str, - Optional( - "label", - description=dedent( - """ - The label of the task. At least one of 'name' or 'label' must be - specified. If 'label' is not provided, it will be generated from - the 'name' by prepending the kind. - """ - ), - ): str, - # the following fields are passed directly through to the task description, - # possibly modified by the run implementation. See - # taskcluster/taskgraph/transforms/task.py for the schema details. - Required("description"): task_description_schema["description"], - Optional("priority"): task_description_schema["priority"], - Optional("attributes"): task_description_schema["attributes"], - Optional("task-from"): task_description_schema["task-from"], - Optional("dependencies"): task_description_schema["dependencies"], - Optional("soft-dependencies"): task_description_schema["soft-dependencies"], - Optional("if-dependencies"): task_description_schema["if-dependencies"], - Optional("requires"): task_description_schema["requires"], - Optional("deadline-after"): task_description_schema["deadline-after"], - Optional("expires-after"): task_description_schema["expires-after"], - Optional("routes"): task_description_schema["routes"], - Optional("scopes"): task_description_schema["scopes"], - Optional("tags"): task_description_schema["tags"], - Optional("extra"): task_description_schema["extra"], - Optional("treeherder"): task_description_schema["treeherder"], - Optional("index"): task_description_schema["index"], - Optional("run-on-projects"): task_description_schema["run-on-projects"], - Optional("run-on-tasks-for"): task_description_schema["run-on-tasks-for"], - Optional("run-on-git-branches"): task_description_schema["run-on-git-branches"], - Optional("shipping-phase"): task_description_schema["shipping-phase"], - Optional("always-target"): task_description_schema["always-target"], - Exclusive("optimization", "optimization"): task_description_schema[ - "optimization" - ], - Optional("needs-sccache"): task_description_schema["needs-sccache"], - Exclusive( - "when", - "optimization", - description=dedent( - """ - The "when" section contains descriptions of the circumstances under - which this task should be included in the task graph. This will be - converted into an optimization, so it cannot be specified in a run - description that also gives 'optimization'. - """ - ), - ): { - Optional( - "files-changed", - description=dedent( - """ - This task only needs to be run if a file matching one of the given - patterns has changed in the push. The patterns use the mozpack - match function (python/mozbuild/mozpack/path.py). - """ - ), - ): [str], - }, - Optional( - "fetches", - description=dedent( - """ - A list of artifacts to install from 'fetch' tasks. - """ - ), - ): { - str: [ - str, - fetches_schema, - ], - }, - Required( - "run", - description=dedent( - """ - A description of how to run this task. - """ - ), - ): { - Required( - "using", - description=dedent( - """ - The key to a run implementation in a peer module to this one. - """ - ), - ): str, - Optional( - "workdir", - description=dedent( - """ - Base work directory used to set up the task. - """ - ), - ): str, - # Any remaining content is verified against that run implementation's - # own schema. - Extra: object, - }, - Required("worker-type"): task_description_schema["worker-type"], - Optional( - "worker", - description=dedent( - """ - This object will be passed through to the task description, with additions - provided by the task's run-using function. - """ - ), - ): dict, - } -) + +# Fetches schema using msgspec +class FetchesSchema(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True): + """Schema for fetch configuration.""" + + artifact: str + dest: TOptional[str] = None + extract: bool = True + verify_hash: bool = True + + +# When configuration using msgspec +class WhenConfig(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True): + """Configuration for when a task should be included.""" + + files_changed: List[str] = msgspec.field(default_factory=list) + + +# Run configuration using msgspec +class RunConfig(msgspec.Struct, kw_only=True, omit_defaults=True): + """Configuration for how to run a task.""" + + using: str + workdir: TOptional[str] = None + # Allow any extra fields for run implementation-specific config + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +# Run description schema using msgspec +class RunDescriptionSchema( + msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True +): + """Schema for run transforms.""" + + # Task naming + name: TOptional[str] = None + label: TOptional[str] = None + + # Required fields + description: str + run: RunConfig + worker_type: str + + # Optional fields from task description + priority: TOptional[str] = None + attributes: Dict[str, Any] = msgspec.field(default_factory=dict) + task_from: TOptional[str] = None + dependencies: Dict[str, Any] = msgspec.field(default_factory=dict) + soft_dependencies: List[str] = msgspec.field(default_factory=list) + if_dependencies: List[str] = msgspec.field(default_factory=list) + requires: str = "all-completed" + deadline_after: TOptional[str] = None + expires_after: TOptional[str] = None + routes: List[str] = msgspec.field(default_factory=list) + scopes: List[str] = msgspec.field(default_factory=list) + tags: Dict[str, str] = msgspec.field(default_factory=dict) + extra: Dict[str, Any] = msgspec.field(default_factory=dict) + treeherder: Any = None + index: Any = None + run_on_projects: Any = None + run_on_tasks_for: List[str] = msgspec.field(default_factory=list) + run_on_git_branches: List[str] = msgspec.field(default_factory=list) + shipping_phase: TOptional[str] = None + always_target: bool = False + optimization: Any = None + needs_sccache: bool = False + when: TOptional[WhenConfig] = None + fetches: Dict[str, List[Union[str, FetchesSchema]]] = msgspec.field( + default_factory=dict + ) + worker: Dict[str, Any] = msgspec.field(default_factory=dict) + + +# Use the msgspec class directly for fetches +fetches_schema = FetchesSchema + +#: Schema for a run transforms - now using msgspec +run_description_schema = Schema(RunDescriptionSchema) + transforms = TransformSequence() transforms.add_validate(run_description_schema) @@ -456,8 +398,15 @@ def wrap(func): return wrap +# Simple schema for always-optimized +class AlwaysOptimizedRunSchema(msgspec.Struct, kw_only=True): + """Schema for always-optimized run tasks.""" + + using: str = "always-optimized" + + @run_task_using( - "always-optimized", "always-optimized", Schema({"using": "always-optimized"}) + "always-optimized", "always-optimized", Schema(AlwaysOptimizedRunSchema) ) def always_optimized(config, task, taskdesc): pass diff --git a/src/taskgraph/transforms/run/index_search.py b/src/taskgraph/transforms/run/index_search.py index 7436f010f..53ee34af0 100644 --- a/src/taskgraph/transforms/run/index_search.py +++ b/src/taskgraph/transforms/run/index_search.py @@ -8,26 +8,25 @@ phase will replace the task with the task from the other graph. """ -from voluptuous import Required +from typing import List + +import msgspec from taskgraph.transforms.base import TransformSequence from taskgraph.transforms.run import run_task_using -from taskgraph.util.schema import Schema transforms = TransformSequence() #: Schema for run.using index-search -run_task_schema = Schema( - { - Required("using"): "index-search", - Required( - "index-search", - "A list of indexes in decreasing order of priority at which to lookup for this " - "task. This is interpolated with the graph parameters.", - ): [str], - } -) +class RunTaskSchema(msgspec.Struct, kw_only=True, rename="kebab"): + using: str + # A list of indexes in decreasing order of priority at which to lookup for this + # task. This is interpolated with the graph parameters. + index_search: List[str] + + +run_task_schema = RunTaskSchema @run_task_using("always-optimized", "index-search", schema=run_task_schema) diff --git a/src/taskgraph/transforms/run/run_task.py b/src/taskgraph/transforms/run/run_task.py index 0429ccae7..b0151e705 100644 --- a/src/taskgraph/transforms/run/run_task.py +++ b/src/taskgraph/transforms/run/run_task.py @@ -7,18 +7,16 @@ import dataclasses import os -from textwrap import dedent +from typing import Dict, List, Literal, Optional, Union -from voluptuous import Any, Optional, Required +import msgspec from taskgraph.transforms.run import run_task_using from taskgraph.transforms.run.common import ( support_caches, support_vcs_checkout, ) -from taskgraph.transforms.task import taskref_or_string from taskgraph.util import path, taskcluster -from taskgraph.util.caches import CACHES from taskgraph.util.schema import Schema EXEC_COMMANDS = { @@ -28,99 +26,52 @@ #: Schema for run.using run_task -run_task_schema = Schema( - { - Required( - "using", - description=dedent( - """ - Specifies the task type. Must be 'run-task'. - """.lstrip() - ), - ): "run-task", - Optional( - "use-caches", - description=dedent( - """ - Specifies which caches to use. May take a boolean in which case either all - (True) or no (False) caches will be used. Alternatively, it can accept a - list of caches to enable. Defaults to only the checkout cache enabled. - """.lstrip() - ), - ): Any(bool, list(CACHES.keys())), - Required( - "checkout", - description=dedent( - """ - If true (the default), perform a checkout on the worker. Can also be a - dictionary specifying explicit checkouts. - """.lstrip() - ), - ): Any(bool, {str: dict}), - Optional( - "cwd", - description=dedent( - """ - Path to run command in. If a checkout is present, the path to the checkout - will be interpolated with the key `checkout`. - """.lstrip() - ), - ): str, - Required( - "sparse-profile", - description=dedent( - """ - The sparse checkout profile to use. Value is the filename relative to the - directory where sparse profiles are defined (build/sparse-profiles/). - """.lstrip() - ), - ): Any(str, None), - Required( - "command", - description=dedent( - """ - The command arguments to pass to the `run-task` script, after the checkout - arguments. If a list, it will be passed directly; otherwise it will be - included in a single argument to the command specified by `exec-with`. - """.lstrip() - ), - ): Any([taskref_or_string], taskref_or_string), - Optional( - "exec-with", - description=dedent( - """ - Specifies what to execute the command with in the event the command is a - string. - """.lstrip() - ), - ): Any(*list(EXEC_COMMANDS)), - Optional( - "run-task-command", - description=dedent( - """ - Command used to invoke the `run-task` script. Can be used if the script - or Python installation is in a non-standard location on the workers. - """.lstrip() - ), - ): list, - Required( - "workdir", - description=dedent( - """ - Base work directory used to set up the task. - """.lstrip() - ), - ): str, - Optional( - "run-as-root", - description=dedent( - """ - Whether to run as root. Defaults to False. - """.lstrip() - ), - ): bool, - } -) +class RunTaskSchema(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True): + """ + Schema for run.using run_task. + """ + + # Specifies the task type. Must be 'run-task'. + using: Literal["run-task"] + + # Specifies which caches to use. May take a boolean in which case either all + # (True) or no (False) caches will be used. Alternatively, it can accept a + # list of caches to enable. Defaults to only the checkout cache enabled. + use_caches: Optional[Union[bool, List[str]]] = None + + # If true (the default), perform a checkout on the worker. Can also be a + # dictionary specifying explicit checkouts. + checkout: Union[bool, Dict[str, dict]] = True + + # Path to run command in. If a checkout is present, the path to the checkout + # will be interpolated with the key `checkout`. + cwd: Optional[str] = None + + # The sparse checkout profile to use. Value is the filename relative to the + # directory where sparse profiles are defined (build/sparse-profiles/). + sparse_profile: Optional[str] = None + + # The command arguments to pass to the `run-task` script, after the checkout + # arguments. If a list, it will be passed directly; otherwise it will be + # included in a single argument to the command specified by `exec-with`. + command: Union[List[Union[str, Dict[str, str]]], str, Dict[str, str]] + + # Specifies what to execute the command with in the event the command is a + # string. + exec_with: Optional[Literal["bash", "powershell"]] = None + + # Command used to invoke the `run-task` script. Can be used if the script + # or Python installation is in a non-standard location on the workers. + run_task_command: Optional[List[str]] = None + + # Base work directory used to set up the task. + workdir: str + + # Whether to run as root. Defaults to False. + run_as_root: bool = False + + +run_task_schema = Schema(RunTaskSchema) def common_setup(config, task, taskdesc, command): diff --git a/src/taskgraph/transforms/run/toolchain.py b/src/taskgraph/transforms/run/toolchain.py index 669bcd812..42418c116 100644 --- a/src/taskgraph/transforms/run/toolchain.py +++ b/src/taskgraph/transforms/run/toolchain.py @@ -5,9 +5,9 @@ Support for running toolchain-building tasks via dedicated scripts """ -from textwrap import dedent +from typing import Any, Dict, List, Literal, Optional, Union -from voluptuous import ALLOW_EXTRA, Any, Optional, Required +import msgspec import taskgraph from taskgraph.transforms.run import configure_taskdesc_for_run, run_task_using @@ -18,96 +18,52 @@ ) from taskgraph.util import path as mozpath from taskgraph.util.hash import hash_paths -from taskgraph.util.schema import Schema from taskgraph.util.shell import quote as shell_quote CACHE_TYPE = "toolchains.v3" + #: Schema for run.using toolchain -toolchain_run_schema = Schema( - { - Required( - "using", - description=dedent( - """ - Specifies the run type. Must be "toolchain-script". - """ - ), - ): "toolchain-script", - Required( - "script", - description=dedent( - """ - The script (in taskcluster/scripts/misc) to run. - """ - ), - ): str, - Optional( - "arguments", - description=dedent( - """ - Arguments to pass to the script. - """ - ), - ): [str], - Required( - "sparse-profile", - description=dedent( - """ - Sparse profile to give to checkout using `run-task`. If given, - a filename in `build/sparse-profiles`. Defaults to - "toolchain-build", i.e., to - `build/sparse-profiles/toolchain-build`. If `None`, instructs - `run-task` to not use a sparse profile at all. - """ - ), - ): Any(str, None), - Optional( - "resources", - description=dedent( - """ - Paths/patterns pointing to files that influence the outcome of - a toolchain build. - """ - ), - ): [str], - Required( - "toolchain-artifact", - description=dedent( - """ - Path to the artifact produced by the toolchain task. - """ - ), - ): str, - Optional( - "toolchain-alias", - description=dedent( - """ - An alias that can be used instead of the real toolchain task name in - fetch stanzas for tasks. - """ - ), - ): Any(str, [str]), - Optional( - "toolchain-env", - description=dedent( - """ - Additional env variables to add to the worker when using this - toolchain. - """ - ), - ): {str: object}, - Required( - "workdir", - description=dedent( - """ - Base work directory used to set up the task. - """ - ), - ): str, - }, - extra=ALLOW_EXTRA, -) +class ToolchainRunSchema( + msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" +): + """ + Schema for toolchain-script run configuration. + + Attributes: + using: Specifies the run type. Must be "toolchain-script". + script: The script (in taskcluster/scripts/misc) to run. + arguments: Arguments to pass to the script. + sparse_profile: Sparse profile to give to checkout using `run-task`. If given, + a filename in `build/sparse-profiles`. Defaults to + "toolchain-build", i.e., to + `build/sparse-profiles/toolchain-build`. If `None`, instructs + `run-task` to not use a sparse profile at all. + resources: Paths/patterns pointing to files that influence the outcome of + a toolchain build. + toolchain_artifact: Path to the artifact produced by the toolchain task. + toolchain_alias: An alias that can be used instead of the real toolchain task name in + fetch stanzas for tasks. + toolchain_env: Additional env variables to add to the worker when using this + toolchain. + workdir: Base work directory used to set up the task. + """ + + using: Literal["toolchain-script"] + script: str + sparse_profile: Optional[str] # Can be None to skip sparse profile + toolchain_artifact: str + workdir: str + arguments: Optional[List[str]] = None + resources: Optional[List[str]] = None + toolchain_alias: Optional[Union[str, List[str]]] = None + toolchain_env: Optional[Dict[str, Any]] = None + # Allow extra fields + _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") + + +# Backward compatibility +toolchain_run_schema = ToolchainRunSchema def get_digest_data(config, run, taskdesc): @@ -213,7 +169,7 @@ def common_toolchain(config, task, taskdesc, is_docker): @run_task_using( "docker-worker", "toolchain-script", - schema=toolchain_run_schema, + schema=ToolchainRunSchema, defaults=toolchain_defaults, ) def docker_worker_toolchain(config, task, taskdesc): @@ -223,7 +179,7 @@ def docker_worker_toolchain(config, task, taskdesc): @run_task_using( "generic-worker", "toolchain-script", - schema=toolchain_run_schema, + schema=ToolchainRunSchema, defaults=toolchain_defaults, ) def generic_worker_toolchain(config, task, taskdesc): diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py index 09b8836d9..c238a7cd2 100644 --- a/src/taskgraph/transforms/task.py +++ b/src/taskgraph/transforms/task.py @@ -15,21 +15,21 @@ import time from copy import deepcopy from dataclasses import dataclass -from textwrap import dedent -from typing import Callable +from typing import Any as TAny +from typing import Callable, Dict, List, Literal, Union +from typing import Optional as TOptional -from voluptuous import All, Any, Extra, NotIn, Optional, Required +import msgspec from taskgraph import MAX_DEPENDENCIES from taskgraph.transforms.base import TransformSequence from taskgraph.util.hash import hash_path from taskgraph.util.keyed_by import evaluate_keyed_by from taskgraph.util.schema import ( - OptimizationSchema, + Optional, + Required, Schema, - optionally_keyed_by, resolve_keyed_by, - taskref_or_string, validate_schema, ) from taskgraph.util.treeherder import split_symbol, treeherder_defaults @@ -49,343 +49,103 @@ def _run_task_suffix(): return hash_path(RUN_TASK)[0:20] -#: Schema for the task transforms -task_description_schema = Schema( - { - Required( - "label", - description=dedent( - """ - The label for this task. - """.lstrip() - ), - ): str, - Required( - "description", - description=dedent( - """ - Description of the task (for metadata). - """.lstrip() - ), - ): str, - Optional( - "attributes", - description=dedent( - """ - Attributes for this task. - """.lstrip() - ), - ): {str: object}, - Optional( - "task-from", - description=dedent( - """ - Relative path (from config.path) to the file task was defined - in. - """.lstrip() - ), - ): str, - Optional( - "dependencies", - description=dedent( - """ - Dependencies of this task, keyed by name; these are passed - through verbatim and subject to the interpretation of the - Task's get_dependencies method. - """.lstrip() - ), - ): { - All( - str, - NotIn( - ["self", "decision"], - "Can't use 'self` or 'decision' as dependency names.", - ), - ): object, - }, - Optional( - "priority", - description=dedent( - """ - Priority of the task. - """.lstrip() - ), - ): Any( - "highest", - "very-high", - "high", - "medium", - "low", - "very-low", - "lowest", - ), - Optional( - "soft-dependencies", - description=dedent( - """ - Soft dependencies of this task, as a list of task labels. - """.lstrip() - ), - ): [str], - Optional( - "if-dependencies", - description=dedent( - """ - Dependencies that must be scheduled in order for this task to run. - """.lstrip() - ), - ): [str], - Optional( - "requires", - description=dedent( - """ - Specifies the condition for task execution. - """.lstrip() - ), - ): Any("all-completed", "all-resolved"), - Optional( - "expires-after", - description=dedent( - """ - Expiration time relative to task creation, with units (e.g., - '14 days'). Defaults are set based on the project. - """.lstrip() - ), - ): str, - Optional( - "deadline-after", - description=dedent( - """ - Deadline time relative to task creation, with units (e.g., - '14 days'). Defaults are set based on the project. - """.lstrip() - ), - ): str, - Optional( - "routes", - description=dedent( - """ - Custom routes for this task; the default treeherder routes will - be added automatically. - """.lstrip() - ), - ): [str], - Optional( - "scopes", - description=dedent( - """ - Custom scopes for this task; any scopes required for the worker - will be added automatically. The following parameters will be - substituted in each scope: - - {level} -- the scm level of this push - {project} -- the project of this push. - """.lstrip() - ), - ): [str], - Optional( - "tags", - description=dedent( - """ - Tags for this task. - """.lstrip() - ), - ): {str: str}, - Optional( - "extra", - description=dedent( - """ - Custom 'task.extra' content. - """.lstrip() - ), - ): {str: object}, - Optional( - "treeherder", - description=dedent( - """ - Treeherder-related information. Can be a simple `true` to - auto-generate information or a dictionary with specific keys. - """.lstrip() - ), - ): Any( - True, - { - "symbol": Optional( - str, - description=dedent( - """ - Either a bare symbol, or 'grp(sym)'. Defaults to the - uppercased first letter of each section of the kind - (delimited by '-') all smooshed together. - """.lstrip() - ), - ), - "kind": Optional( - Any("build", "test", "other"), - description=dedent( - """ - The task kind. Defaults to 'build', 'test', or 'other' - based on the kind name. - """.lstrip() - ), - ), - "tier": Optional( - int, - description=dedent( - """ - Tier for this task. Defaults to 1. - """.lstrip() - ), - ), - "platform": Optional( - str, - description=dedent( - """ - Task platform in the form platform/collection, used to - set treeherder.machine.platform and - treeherder.collection or treeherder.labels Defaults to - 'default/opt'. - """.lstrip() - ), - ), - }, - ), - Optional( - "index", - description=dedent( - """ - Information for indexing this build so its artifacts can be - discovered. If omitted, the build will not be indexed. - """.lstrip() - ), - ): { - # the name of the product this build produces - "product": str, - # the names to use for this task in the TaskCluster index - "job-name": str, - # Type of gecko v2 index to use - "type": str, - # The rank that the task will receive in the TaskCluster - # index. A newly completed task supersedes the currently - # indexed task iff it has a higher rank. If unspecified, - # 'by-tier' behavior will be used. - "rank": Any( - # Rank is equal the timestamp of the build_date for tier-1 - # tasks, and zero for non-tier-1. This sorts tier-{2,3} - # builds below tier-1 in the index. - "by-tier", - # Rank is given as an integer constant (e.g. zero to make - # sure a task is last in the index). - int, - # Rank is equal to the timestamp of the build_date. This - # option can be used to override the 'by-tier' behavior - # for non-tier-1 tasks. - "build_date", - ), - }, - Optional( - "run-on-projects", - description=dedent( - """ - The `run_on_projects` attribute, defaulting to 'all'. Dictates - the projects on which this task should be included in the - target task set. See the attributes documentation for details. - """.lstrip() - ), - ): optionally_keyed_by("build-platform", [str]), - Optional( - "run-on-tasks-for", - description=dedent( - """ - Specifies tasks for which this task should run. - """.lstrip() - ), - ): [str], - Optional( - "run-on-git-branches", - description=dedent( - """ - Specifies git branches for which this task should run. - """.lstrip() - ), - ): [str], - Optional( - "shipping-phase", - description=dedent( - """ - The `shipping_phase` attribute, defaulting to None. Specifies - the release promotion phase that this task belongs to. - """.lstrip() - ), - ): Any( - None, - "build", - "promote", - "push", - "ship", - ), - Required( - "always-target", - description=dedent( - """ - The `always-target` attribute will cause the task to be - included in the target_task_graph regardless of filtering. - - Tasks included in this manner will be candidates for - optimization even when `optimize_target_tasks` is False, unless - the task was also explicitly chosen by the target_tasks method. - """.lstrip() - ), - ): bool, - Required( - "optimization", - description=dedent( - """ - Optimization to perform on this task during the optimization - phase. Defined in taskcluster/taskgraph/optimize.py. - """.lstrip() - ), - ): OptimizationSchema, - Required( - "worker-type", - description=dedent( - """ - The provisioner-id/worker-type for the task. The following - parameters will be substituted in this string: - - {level} -- the scm level of this push. - """.lstrip() - ), - ): str, - Required( - "needs-sccache", - description=dedent( - """ - Whether the task should use sccache compiler caching. - """.lstrip() - ), - ): bool, - Optional( - "worker", - description=dedent( - """ - Information specific to the worker implementation that will run - this task. - """.lstrip() - ), - ): { - Required( - "implementation", - description=dedent( - """ - The worker implementation type. - """.lstrip() - ), - ): str, - Extra: object, - }, - } -) +# Task Description schema using msgspec +class TaskDescriptionTreeherder(msgspec.Struct, kw_only=True, omit_defaults=True): + """Treeherder-related information for a task.""" + + symbol: TOptional[str] = None + kind: TOptional[Literal["build", "test", "other"]] = None + tier: TOptional[int] = None + platform: TOptional[str] = None + + +class TaskDescriptionIndex( + msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True +): + """Index information for a task.""" + + # the name of the product this build produces + product: str + # the names to use for this task in the TaskCluster index + job_name: str + # Type of gecko v2 index to use + type: str = "generic" # Default to generic as that's what's commonly used + # The rank that the task will receive in the TaskCluster index + rank: Union[Literal["by-tier", "build_date"], int] = "by-tier" + + +class TaskDescriptionWorker(msgspec.Struct, kw_only=True, omit_defaults=True): + """Worker configuration for a task.""" + + implementation: str + # Allow any extra fields for worker-specific configuration + __extras__: Dict[str, TAny] = msgspec.field(default_factory=dict) + + +class TaskDescriptionSchema( + msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True +): + """Schema for task descriptions.""" + + # The label for this task + label: str + # Description of the task (for metadata) + description: str + # Attributes for this task + attributes: Dict[str, TAny] = msgspec.field(default_factory=dict) + # Relative path (from config.path) to the file task was defined in + task_from: TOptional[str] = None + # Dependencies of this task, keyed by name + dependencies: Dict[str, TAny] = msgspec.field(default_factory=dict) + # Priority of the task + priority: TOptional[ + Literal["highest", "very-high", "high", "medium", "low", "very-low", "lowest"] + ] = None + # Soft dependencies of this task, as a list of task labels + soft_dependencies: List[str] = msgspec.field(default_factory=list) + # Dependencies that must be scheduled in order for this task to run + if_dependencies: List[str] = msgspec.field(default_factory=list) + # Specifies the condition for task execution + requires: Literal["all-completed", "all-resolved"] = "all-completed" + # Expiration time relative to task creation + expires_after: TOptional[str] = None + # Deadline time relative to task creation + deadline_after: TOptional[str] = None + # Custom routes for this task + routes: List[str] = msgspec.field(default_factory=list) + # Custom scopes for this task + scopes: List[str] = msgspec.field(default_factory=list) + # Tags for this task + tags: Dict[str, str] = msgspec.field(default_factory=dict) + # Custom 'task.extra' content + extra: Dict[str, TAny] = msgspec.field(default_factory=dict) + # Treeherder-related information + treeherder: Union[bool, TaskDescriptionTreeherder, None] = None + # Information for indexing this build + index: TOptional[TaskDescriptionIndex] = None + # The `run_on_projects` attribute + run_on_projects: TAny = None # This uses optionally_keyed_by, so we need Any + # Specifies tasks for which this task should run + run_on_tasks_for: List[str] = msgspec.field(default_factory=list) + # Specifies git branches for which this task should run + run_on_git_branches: List[str] = msgspec.field(default_factory=list) + # The `shipping_phase` attribute + shipping_phase: TOptional[Literal["build", "promote", "push", "ship"]] = None + # The `always-target` attribute + always_target: bool = False + # Optimization to perform on this task + optimization: TAny = None # Uses OptimizationSchema which has custom validation + # The provisioner-id/worker-type for the task + worker_type: str + # Whether the task should use sccache compiler caching + needs_sccache: bool = False + # Information specific to the worker implementation + worker: TOptional[TaskDescriptionWorker] = None + + +#: Schema for the task transforms - now using msgspec +task_description_schema = Schema(TaskDescriptionSchema) + TC_TREEHERDER_SCHEMA_URL = ( "https://github.com/taskcluster/taskcluster-treeherder/" @@ -437,9 +197,20 @@ class PayloadBuilder: def payload_builder(name, schema): - schema = Schema({Required("implementation"): name, Optional("os"): str}).extend( - schema - ) + """ + Decorator for registering payload builders. + + Supports both dict schemas and msgspec.Struct types. + """ + # Handle msgspec schemas + if isinstance(schema, type) and issubclass(schema, msgspec.Struct): + # Wrap msgspec schema in our compatibility Schema class + schema = Schema(schema) + else: + # Traditional dict schema - extend it with required fields + schema = Schema({Required("implementation"): name, Optional("os"): str}).extend( + schema + ) def wrap(func): assert name not in payload_builders, f"duplicate payload builder name {name}" @@ -474,86 +245,74 @@ def verify_index(config, index): raise Exception(UNSUPPORTED_INDEX_PRODUCT_ERROR.format(product=product)) -@payload_builder( - "docker-worker", - schema={ - Required("os"): "linux", - # For tasks that will run in docker-worker, this is the name of the docker - # image or in-tree docker image to run the task in. If in-tree, then a - # dependency will be created automatically. This is generally - # `desktop-test`, or an image that acts an awful lot like it. - Required("docker-image"): Any( - # a raw Docker image path (repo/image:tag) - str, - # an in-tree generated docker image (from `taskcluster/docker/`) - {"in-tree": str}, - # an indexed docker image - {"indexed": str}, - ), - # worker features that should be enabled - Required("relengapi-proxy"): bool, - Required("chain-of-trust"): bool, - Required("taskcluster-proxy"): bool, - Required("allow-ptrace"): bool, - Required("loopback-video"): bool, - Required("loopback-audio"): bool, - Required("docker-in-docker"): bool, # (aka 'dind') - Required("privileged"): bool, - # Paths to Docker volumes. - # - # For in-tree Docker images, volumes can be parsed from Dockerfile. - # This only works for the Dockerfile itself: if a volume is defined in - # a base image, it will need to be declared here. Out-of-tree Docker - # images will also require explicit volume annotation. - # - # Caches are often mounted to the same path as Docker volumes. In this - # case, they take precedence over a Docker volume. But a volume still - # needs to be declared for the path. - Optional("volumes"): [str], - # caches to set up for the task - Optional("caches"): [ - { - # only one type is supported by any of the workers right now - "type": "persistent", - # name of the cache, allowing reuse by subsequent tasks naming the - # same cache - "name": str, - # location in the task image where the cache will be mounted - "mount-point": str, - # Whether the cache is not used in untrusted environments - # (like the Try repo). - Optional("skip-untrusted"): bool, - } - ], - # artifacts to extract from the task image after completion - Optional("artifacts"): [ - { - # type of artifact -- simple file, or recursive directory, - # or a volume mounted directory. - "type": Any("file", "directory", "volume"), - # task image path from which to read artifact - "path": str, - # name of the produced artifact (root of the names for - # type=directory) - "name": str, - } - ], - # environment variables - Required("env"): {str: taskref_or_string}, - # the command to run; if not given, docker-worker will default to the - # command in the docker image - Optional("command"): [taskref_or_string], - # the maximum time to run, in seconds - Required("max-run-time"): int, - # the exit status code(s) that indicates the task should be retried - Optional("retry-exit-status"): [int], - # the exit status code(s) that indicates the caches used by the task - # should be purged - Optional("purge-caches-exit-status"): [int], - # Whether any artifacts are assigned to this worker - Optional("skip-artifacts"): bool, - }, -) +# Docker Worker schema using msgspec +class DockerWorkerCacheConfig( + msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True +): + """Cache configuration for docker-worker.""" + + # only one type is supported by any of the workers right now + type: Literal["persistent"] = "persistent" + # name of the cache, allowing reuse by subsequent tasks naming the same cache + name: str + # location in the task image where the cache will be mounted + mount_point: str + # Whether the cache is not used in untrusted environments (like the Try repo). + skip_untrusted: bool = False + + +class DockerWorkerArtifactConfig(msgspec.Struct, kw_only=True, omit_defaults=True): + """Artifact configuration for docker-worker.""" + + # type of artifact -- simple file, or recursive directory, or a volume mounted directory. + type: Literal["file", "directory", "volume"] + # task image path from which to read artifact + path: str + # name of the produced artifact (root of the names for type=directory) + name: str + + +class DockerWorkerPayloadSchema( + msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True +): + """Schema for docker-worker payload.""" + + implementation: str + os: Literal["linux"] = "linux" + # For tasks that will run in docker-worker, this is the name of the docker + # image or in-tree docker image to run the task in. + docker_image: Union[str, Dict[str, str]] + # worker features that should be enabled + relengapi_proxy: bool = False + chain_of_trust: bool = False + taskcluster_proxy: bool = False + allow_ptrace: bool = False + loopback_video: bool = False + loopback_audio: bool = False + docker_in_docker: bool = False # (aka 'dind') + privileged: bool = False + # Paths to Docker volumes. + volumes: List[str] = msgspec.field(default_factory=list) + # caches to set up for the task + caches: TOptional[List[DockerWorkerCacheConfig]] = None + # artifacts to extract from the task image after completion + artifacts: TOptional[List[DockerWorkerArtifactConfig]] = None + # environment variables + env: Dict[str, Union[str, Dict[str, str]]] = msgspec.field(default_factory=dict) + # the command to run; if not given, docker-worker will default to the + # command in the docker image + command: TOptional[List[Union[str, Dict[str, str]]]] = None + # the maximum time to run, in seconds + max_run_time: int + # the exit status code(s) that indicates the task should be retried + retry_exit_status: TOptional[List[int]] = None + # the exit status code(s) that indicates the caches used by the task should be purged + purge_caches_exit_status: TOptional[List[int]] = None + # Whether any artifacts are assigned to this worker + skip_artifacts: bool = False + + +@payload_builder("docker-worker", DockerWorkerPayloadSchema) def build_docker_worker_payload(config, task, task_def): worker = task["worker"] level = int(config.params["level"]) @@ -766,89 +525,85 @@ def build_docker_worker_payload(config, task, task_def): check_caches_are_volumes(task) -@payload_builder( - "generic-worker", - schema={ - Required("os"): Any("windows", "macosx", "linux", "linux-bitbar"), - # see http://schemas.taskcluster.net/generic-worker/v1/payload.json - # and https://docs.taskcluster.net/reference/workers/generic-worker/payload - # command is a list of commands to run, sequentially - # on Windows, each command is a string, on OS X and Linux, each command is - # a string array - Required("command"): Any( - [taskref_or_string], - [[taskref_or_string]], # Windows # Linux / OS X - ), - # artifacts to extract from the task image after completion; note that artifacts - # for the generic worker cannot have names - Optional("artifacts"): [ - { - # type of artifact -- simple file, or recursive directory - "type": Any("file", "directory"), - # filesystem path from which to read artifact - "path": str, - # if not specified, path is used for artifact name - Optional("name"): str, - } - ], - # Directories and/or files to be mounted. - # The actual allowed combinations are stricter than the model below, - # but this provides a simple starting point. - # See https://docs.taskcluster.net/reference/workers/generic-worker/payload - Optional("mounts"): [ - { - # A unique name for the cache volume, implies writable cache directory - # (otherwise mount is a read-only file or directory). - Optional("cache-name"): str, - # Optional content for pre-loading cache, or mandatory content for - # read-only file or directory. Pre-loaded content can come from either - # a task artifact or from a URL. - Optional("content"): { - # *** Either (artifact and task-id) or url must be specified. *** - # Artifact name that contains the content. - Optional("artifact"): str, - # Task ID that has the artifact that contains the content. - Optional("task-id"): taskref_or_string, - # URL that supplies the content in response to an unauthenticated - # GET request. - Optional("url"): str, - }, - # *** Either file or directory must be specified. *** - # If mounting a cache or read-only directory, the filesystem location of - # the directory should be specified as a relative path to the task - # directory here. - Optional("directory"): str, - # If mounting a file, specify the relative path within the task - # directory to mount the file (the file will be read only). - Optional("file"): str, - # Required if and only if `content` is specified and mounting a - # directory (not a file). This should be the archive format of the - # content (either pre-loaded cache or read-only directory). - Optional("format"): Any("rar", "tar.bz2", "tar.gz", "zip"), - } - ], - # environment variables - Required("env"): {str: taskref_or_string}, - # the maximum time to run, in seconds - Required("max-run-time"): int, - # the exit status code(s) that indicates the task should be retried - Optional("retry-exit-status"): [int], - # the exit status code(s) that indicates the caches used by the task - # should be purged - Optional("purge-caches-exit-status"): [int], - # os user groups for test task workers - Optional("os-groups"): [str], - # feature for test task to run as administarotr - Optional("run-as-administrator"): bool, - # feature for task to run as current OS user - Optional("run-task-as-current-user"): bool, - # optional features - Required("chain-of-trust"): bool, - Optional("taskcluster-proxy"): bool, - # Whether any artifacts are assigned to this worker - Optional("skip-artifacts"): bool, - }, -) +# Generic Worker schema using msgspec +class GenericWorkerArtifactConfig(msgspec.Struct, kw_only=True, omit_defaults=True): + """Artifact configuration for generic-worker.""" + + # type of artifact -- simple file, or recursive directory + type: Literal["file", "directory"] + # filesystem path from which to read artifact + path: str + # if not specified, path is used for artifact name + name: TOptional[str] = None + + +class GenericWorkerMountContent( + msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True +): + """Mount content configuration for generic-worker.""" + + # Artifact name that contains the content. + artifact: TOptional[str] = None + # Task ID that has the artifact that contains the content. + task_id: TOptional[Union[str, Dict[str, str]]] = None + # URL that supplies the content in response to an unauthenticated GET request. + url: TOptional[str] = None + + +class GenericWorkerMountConfig( + msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True +): + """Mount configuration for generic-worker.""" + + # A unique name for the cache volume, implies writable cache directory + cache_name: TOptional[str] = None + # Optional content for pre-loading cache, or mandatory content for read-only file or directory + content: TOptional[GenericWorkerMountContent] = None + # If mounting a cache or read-only directory, the filesystem location + directory: TOptional[str] = None + # If mounting a file, specify the relative path within the task directory + file: TOptional[str] = None + # Archive format of the content + format: TOptional[Literal["rar", "tar.bz2", "tar.gz", "zip"]] = None + + +class GenericWorkerPayloadSchema( + msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True +): + """Schema for generic-worker payload.""" + + implementation: str + os: Literal["windows", "macosx", "linux", "linux-bitbar"] + # command is a list of commands to run, sequentially + # on Windows, each command is a string, on OS X and Linux, each command is a string array + # Using Any here because msgspec doesn't support union of multiple list types + command: TAny + # artifacts to extract from the task image after completion + artifacts: TOptional[List[GenericWorkerArtifactConfig]] = None + # Directories and/or files to be mounted + mounts: TOptional[List[GenericWorkerMountConfig]] = None + # environment variables + env: Dict[str, Union[str, Dict[str, str]]] = msgspec.field(default_factory=dict) + # the maximum time to run, in seconds + max_run_time: int + # the exit status code(s) that indicates the task should be retried + retry_exit_status: TOptional[List[int]] = None + # the exit status code(s) that indicates the caches used by the task should be purged + purge_caches_exit_status: TOptional[List[int]] = None + # os user groups for test task workers + os_groups: List[str] = msgspec.field(default_factory=list) + # feature for test task to run as administrator + run_as_administrator: bool = False + # feature for task to run as current OS user + run_task_as_current_user: bool = False + # optional features + chain_of_trust: bool = False + taskcluster_proxy: bool = False + # Whether any artifacts are assigned to this worker + skip_artifacts: bool = False + + +@payload_builder("generic-worker", GenericWorkerPayloadSchema) def build_generic_worker_payload(config, task, task_def): worker = task["worker"] @@ -960,38 +715,51 @@ def build_generic_worker_payload(config, task, task_def): task_def["payload"]["features"] = features -@payload_builder( - "beetmover", - schema={ - # the maximum time to run, in seconds - Required("max-run-time"): int, - # locale key, if this is a locale beetmover task - Optional("locale"): str, - Optional("partner-public"): bool, - Required("release-properties"): { - "app-name": str, - "app-version": str, - "branch": str, - "build-id": str, - "hash-type": str, - "platform": str, - }, - # list of artifact URLs for the artifacts that should be beetmoved - Required("upstream-artifacts"): [ - { - # taskId of the task with the artifact - Required("taskId"): taskref_or_string, - # type of signing task (for CoT) - Required("taskType"): str, - # Paths to the artifacts to sign - Required("paths"): [str], - # locale is used to map upload path and allow for duplicate simple names - Required("locale"): str, - } - ], - Optional("artifact-map"): object, - }, -) +# Beetmover schema using msgspec +class BeetmoverReleaseProperties(msgspec.Struct, kw_only=True, rename="kebab"): + """Release properties for beetmover tasks.""" + + app_name: str + app_version: str + branch: str + build_id: str + hash_type: str + platform: str + + +class BeetmoverUpstreamArtifact(msgspec.Struct, kw_only=True): + """Upstream artifact definition for beetmover.""" + + # taskId of the task with the artifact + taskId: Union[str, Dict[str, str]] # Can be string or task-reference dict + # type of signing task (for CoT) + taskType: str + # Paths to the artifacts to sign + paths: List[str] + # locale is used to map upload path and allow for duplicate simple names + locale: str + + +class BeetmoverPayloadSchema( + msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True +): + """Schema for beetmover worker payload.""" + + implementation: str + os: str = "" + # the maximum time to run, in seconds + max_run_time: int + # locale key, if this is a locale beetmover task + locale: TOptional[str] = None + partner_public: TOptional[bool] = None + release_properties: BeetmoverReleaseProperties + # list of artifact URLs for the artifacts that should be beetmoved + upstream_artifacts: List[BeetmoverUpstreamArtifact] + # Artifact map can be any object + artifact_map: TOptional[dict] = None + + +@payload_builder("beetmover", BeetmoverPayloadSchema) def build_beetmover_payload(config, task, task_def): worker = task["worker"] release_properties = worker["release-properties"] @@ -1017,25 +785,39 @@ def build_beetmover_payload(config, task, task_def): task_def["payload"]["is_partner_repack_public"] = worker["partner-public"] -@payload_builder( - "invalid", - schema={ - # an invalid task is one which should never actually be created; this is used in - # release automation on branches where the task just doesn't make sense - Extra: object, - }, -) +# Simple payload schemas using msgspec +class InvalidPayloadSchema(msgspec.Struct, kw_only=True): + """Schema for invalid tasks - allows any fields.""" + + implementation: str + os: str = "" + # Allow any extra fields for invalid tasks + _extra: dict = msgspec.field(default_factory=dict, name="") + + +class AlwaysOptimizedPayloadSchema(msgspec.Struct, kw_only=True): + """Schema for always-optimized tasks - allows any fields.""" + + implementation: str + os: str = "" + # Allow any extra fields + _extra: dict = msgspec.field(default_factory=dict, name="") + + +class SucceedPayloadSchema(msgspec.Struct, kw_only=True): + """Schema for succeed tasks - minimal schema.""" + + implementation: str + os: str = "" + + +@payload_builder("invalid", InvalidPayloadSchema) def build_invalid_payload(config, task, task_def): task_def["payload"] = "invalid task - should never be created" -@payload_builder( - "always-optimized", - schema={ - Extra: object, - }, -) -@payload_builder("succeed", schema={}) +@payload_builder("always-optimized", AlwaysOptimizedPayloadSchema) +@payload_builder("succeed", SucceedPayloadSchema) def build_dummy_payload(config, task, task_def): task_def["payload"] = {} diff --git a/src/taskgraph/transforms/task_context.py b/src/taskgraph/transforms/task_context.py index 9e013e5d5..f15253126 100644 --- a/src/taskgraph/transforms/task_context.py +++ b/src/taskgraph/transforms/task_context.py @@ -1,82 +1,60 @@ -from textwrap import dedent +from typing import Any, Dict, List, Optional, Union -from voluptuous import ALLOW_EXTRA, Any, Optional, Required +import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema from taskgraph.util.templates import deep_get, substitute_task_fields from taskgraph.util.yaml import load_yaml + +class TaskContextConfig(msgspec.Struct, kw_only=True, rename="kebab"): + """ + `task-context` can be used to substitute values into any field in a + task with data that is not known until `taskgraph` runs. + + This data can be provided via `from-parameters` or `from-file`, + which can pull in values from parameters and a defined yml file + respectively. + + Data may also be provided directly in the `from-object` section of + `task-context`. This can be useful in `kinds` that define most of + their contents in `task-defaults`, but have some values that may + differ for various concrete `tasks` in the `kind`. + + If the same key is found in multiple places the order of precedence + is as follows: + - Parameters + - `from-object` keys + - File + + That is to say: parameters will always override anything else. + """ + + # Retrieve task context values from parameters. A single + # parameter may be provided or a list of parameters in + # priority order. The latter can be useful in implementing a + # "default" value if some other parameter is not provided. + from_parameters: Optional[Dict[str, Union[List[str], str]]] = None + # Retrieve task context values from a yaml file. The provided + # file should usually only contain top level keys and values + # (eg: nested objects will not be interpolated - they will be + # substituted as text representations of the object). + from_file: Optional[str] = None + # Key/value pairs to be used as task context + from_object: Optional[Any] = None + # A list of fields in the task to substitute the provided values + # into. + substitution_fields: List[str] + + #: Schema for the task_context transforms -SCHEMA = Schema( - { - Optional("name"): str, - Required( - "task-context", - description=dedent( - """ - `task-context` can be used to substitute values into any field in a - task with data that is not known until `taskgraph` runs. - - This data can be provided via `from-parameters` or `from-file`, - which can pull in values from parameters and a defined yml file - respectively. - - Data may also be provided directly in the `from-object` section of - `task-context`. This can be useful in `kinds` that define most of - their contents in `task-defaults`, but have some values that may - differ for various concrete `tasks` in the `kind`. - - If the same key is found in multiple places the order of precedence - is as follows: - - Parameters - - `from-object` keys - - File - - That is to say: parameters will always override anything else. - - """.lstrip(), - ), - ): { - Optional( - "from-parameters", - description=dedent( - """ - Retrieve task context values from parameters. A single - parameter may be provided or a list of parameters in - priority order. The latter can be useful in implementing a - "default" value if some other parameter is not provided. - """.lstrip() - ), - ): {str: Any([str], str)}, - Optional( - "from-file", - description=dedent( - """ - Retrieve task context values from a yaml file. The provided - file should usually only contain top level keys and values - (eg: nested objects will not be interpolated - they will be - substituted as text representations of the object). - """.lstrip() - ), - ): str, - Optional( - "from-object", - description="Key/value pairs to be used as task context", - ): object, - Required( - "substitution-fields", - description=dedent( - """ - A list of fields in the task to substitute the provided values - into. - """.lstrip() - ), - ): [str], - }, - }, - extra=ALLOW_EXTRA, -) +class Schema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): + name: Optional[str] = None + task_context: TaskContextConfig + __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + + +SCHEMA = Schema transforms = TransformSequence() transforms.add_validate(SCHEMA) diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index ba72ff079..5a1c1e2ce 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -3,30 +3,88 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. -import collections import pprint -import re +from typing import List -import voluptuous +import msgspec import taskgraph from taskgraph.util.keyed_by import evaluate_keyed_by, iter_dot_path -def validate_schema(schema, obj, msg_prefix): +class Any: + """Validator that accepts any of the provided values.""" + + def __init__(self, *validators): + self.validators = validators + + def __call__(self, value): + for validator in self.validators: + if validator == value or (callable(validator) and validator(value)): + return value + raise ValueError(f"Value {value} not in allowed values: {self.validators}") + + +class Required: + """Marks a field as required in a schema.""" + + def __init__(self, key): + self.key = key + self.schema = key # For compatibility + + +class Optional: + """Marks a field as optional in a schema.""" + + def __init__(self, key): + self.key = key + self.schema = key # For compatibility + + +def validate_schema(schema, obj, msg_prefix, use_msgspec=False): """ Validate that object satisfies schema. If not, generate a useful exception beginning with msg_prefix. + + Args: + schema: Either a Schema instance or msgspec.Struct type + obj: Object to validate + msg_prefix: Prefix for error messages + use_msgspec: If True, use msgspec for validation (default: False) """ if taskgraph.fast: return - try: - schema(obj) - except voluptuous.MultipleInvalid as exc: - msg = [msg_prefix] - for error in exc.errors: - msg.append(str(error)) - raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj)) + + # Handle Schema instances + if isinstance(schema, Schema): + try: + schema(obj) + except Exception as exc: + raise Exception(f"{msg_prefix}\n{exc}\n{pprint.pformat(obj)}") + return + + # Auto-detect msgspec schemas + if isinstance(schema, type) and issubclass(schema, msgspec.Struct): + use_msgspec = True + + if use_msgspec: + # Handle msgspec validation + try: + if isinstance(schema, type) and issubclass(schema, msgspec.Struct): + # For msgspec.Struct types, validate by converting + msgspec.convert(obj, schema) + else: + # For other msgspec validators + schema.decode(msgspec.json.encode(obj)) + except (msgspec.ValidationError, msgspec.DecodeError) as exc: + msg = [msg_prefix, str(exc)] + raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj)) + else: + # Try to call the schema as a validator + try: + schema(obj) + except Exception as exc: + raise Exception(f"{msg_prefix}\n{exc}\n{pprint.pformat(obj)}") def optionally_keyed_by(*arguments): @@ -53,11 +111,45 @@ def validator(obj): for kk, vv in v.items(): try: res[kk] = validator(vv) - except voluptuous.Invalid as e: - e.prepend([k, kk]) + except Exception as e: + if hasattr(e, "prepend"): + e.prepend([k, kk]) raise return res - return Schema(schema)(obj) + elif k.startswith("by-"): + # Unknown by-field + raise ValueError(f"Unknown key {k}") + # Validate against the schema + if isinstance(schema, Schema): + return schema(obj) + elif schema is str: + # String validation + if not isinstance(obj, str): + raise TypeError(f"Expected string, got {type(obj).__name__}") + return obj + elif schema is int: + # Int validation + if not isinstance(obj, int): + raise TypeError(f"Expected int, got {type(obj).__name__}") + return obj + elif isinstance(schema, type): + # Type validation for built-in types + if not isinstance(obj, schema): + raise TypeError(f"Expected {schema.__name__}, got {type(obj).__name__}") + return obj + elif callable(schema): + # Other callable validators + try: + return schema(obj) + except: + raise + else: + # Simple type validation + if not isinstance(obj, schema): + raise TypeError( + f"Expected {getattr(schema, '__name__', str(schema))}, got {type(obj).__name__}" + ) + return obj # set to assist autodoc setattr(validator, "schema", schema) @@ -150,99 +242,244 @@ def resolve_keyed_by( ] -def check_schema(schema): - identifier_re = re.compile(r"^\$?[a-z][a-z0-9-]*$") - - def excepted(item): - for esi in EXCEPTED_SCHEMA_IDENTIFIERS: - if isinstance(esi, str): - if f"[{esi!r}]" in item: - return True - elif esi(item): - return True - return False - - def iter(path, sch): - def check_identifier(path, k): - if k in (str,) or k in (str, voluptuous.Extra): - pass - elif isinstance(k, voluptuous.NotIn): - pass - elif isinstance(k, str): - if not identifier_re.match(k) and not excepted(path): - raise RuntimeError( - "YAML schemas should use dashed lower-case identifiers, " - f"not {k!r} @ {path}" - ) - elif isinstance(k, (voluptuous.Optional, voluptuous.Required)): - check_identifier(path, k.schema) - elif isinstance(k, (voluptuous.Any, voluptuous.All)): - for v in k.validators: - check_identifier(path, v) - elif not excepted(path): - raise RuntimeError( - f"Unexpected type in YAML schema: {type(k).__name__} @ {path}" - ) - - if isinstance(sch, collections.abc.Mapping): # type: ignore - for k, v in sch.items(): - child = f"{path}[{k!r}]" - check_identifier(child, k) - iter(child, v) - elif isinstance(sch, (list, tuple)): - for i, v in enumerate(sch): - iter(f"{path}[{i}]", v) - elif isinstance(sch, voluptuous.Any): - for v in sch.validators: - iter(path, v) - - iter("schema", schema.schema) - - -class Schema(voluptuous.Schema): +class Schema: """ - Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks - in the process. + A schema validator that wraps msgspec.Struct types. + + This provides a consistent interface for schema validation across the codebase. """ - def __init__(self, *args, check=True, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, schema, check=True, **kwargs): + # Check if schema is a msgspec.Struct type + if isinstance(schema, type) and issubclass(schema, msgspec.Struct): + self._msgspec_schema = schema + self._is_msgspec = True + elif isinstance(schema, dict): + # Legacy dict schema - convert to a simple validator + self._msgspec_schema = None + self._is_msgspec = False + self._dict_schema = schema + else: + # Assume it's a callable validator + self._msgspec_schema = None + self._is_msgspec = False + self._validator = schema self.check = check - if not taskgraph.fast and self.check: - check_schema(self) + self.schema = schema # Store original schema for compatibility + self._extensions = [] + self.allow_extra = False # By default, don't allow extra keys def extend(self, *args, **kwargs): - schema = super().extend(*args, **kwargs) - - if self.check: - check_schema(schema) - # We want twice extend schema to be checked too. - schema.__class__ = Schema - return schema - - def _compile(self, schema): + """Extend the schema. For msgspec schemas, this stores extensions separately.""" + if self._is_msgspec: + # For msgspec schemas, store extensions for validation time + self._extensions.extend(args) + return self + elif hasattr(self, "_dict_schema"): + # For dict schemas, create a new Schema with the combined schemas + new_schema = self._dict_schema.copy() + for arg in args: + if isinstance(arg, dict): + new_schema.update(arg) + # Handle extra parameter + allow_extra = kwargs.get("extra") is not None + new_instance = Schema(new_schema) + new_instance.allow_extra = allow_extra + return new_instance + # For other schemas, just return self + return self + + def _validate_msgspec(self, data): + """Validate data against the msgspec schema.""" + try: + return msgspec.convert(data, self._msgspec_schema) + except (msgspec.ValidationError, msgspec.DecodeError) as e: + raise Exception(str(e)) + + def __call__(self, data): + """Validate data against the schema.""" if taskgraph.fast: - return - return super()._compile(schema) + return data + + if self._is_msgspec: + return self._validate_msgspec(data) + elif hasattr(self, "_dict_schema"): + # Simple dict validation + if not isinstance(data, dict): + raise Exception(f"Expected dict, got {type(data).__name__}") + + # Collect valid keys + valid_keys = set() + for key in self._dict_schema.keys(): + if hasattr(key, "key"): + valid_keys.add(key.key) + else: + valid_keys.add(key) + + # Check for extra keys (strict mode by default for dict schemas) + extra_keys = set(data.keys()) - valid_keys + if extra_keys and not getattr(self, "allow_extra", False): + raise Exception(f"Extra keys not allowed: {extra_keys}") + + # Validate required keys and values + for key, validator in self._dict_schema.items(): + # Handle Required/Optional keys + if hasattr(key, "key"): + actual_key = key.key + is_required = isinstance(key, Required) + else: + actual_key = key + is_required = True + + if actual_key in data: + value = data[actual_key] + # Validate the value + if validator is int and not isinstance(value, int): + raise Exception( + f"Key {actual_key}: Expected int, got {type(value).__name__}" + ) + elif validator is str and not isinstance(value, str): + raise Exception( + f"Key {actual_key}: Expected str, got {type(value).__name__}" + ) + elif is_required: + raise Exception(f"Missing required key: {actual_key}") + return data + elif hasattr(self, "_validator"): + return self._validator(data) + return data def __getitem__(self, item): - return self.schema[item] # type: ignore - - -OptimizationSchema = voluptuous.Any( - # always run this task (default) - None, - # search the index for the given index namespaces, and replace this task if found - # the search occurs in order, with the first match winning - {"index-search": [str]}, - # skip this task if none of the given file patterns match - {"skip-unless-changed": [str]}, -) - -# shortcut for a string where task references are allowed -taskref_or_string = voluptuous.Any( - str, - {voluptuous.Required("task-reference"): str}, - {voluptuous.Required("artifact-reference"): str}, -) + if self._is_msgspec: + # For msgspec schemas, provide backward compatibility + # by returning appropriate validators for known fields + # This is a workaround to support legacy code that accesses schema fields + field_validators = { + "description": str, + "priority": Any( + "highest", + "very-high", + "high", + "medium", + "low", + "very-low", + "lowest", + ), + "attributes": {str: object}, + "task-from": str, + "dependencies": {str: object}, + "soft-dependencies": [str], + "if-dependencies": [str], + "requires": Any("all-completed", "all-resolved"), + "deadline-after": str, + "expires-after": str, + "routes": [str], + "scopes": [str], + "tags": {str: str}, + "extra": {str: object}, + "treeherder": object, # Complex type + "index": object, # Complex type + "run-on-projects": object, # Uses optionally_keyed_by + "run-on-tasks-for": [str], + "run-on-git-branches": [str], + "shipping-phase": Any(None, "build", "promote", "push", "ship"), + "always-target": bool, + "optimization": OptimizationSchema, + "needs-sccache": bool, + "worker-type": str, + } + return field_validators.get(item, str) + elif hasattr(self, "_dict_schema"): + return self._dict_schema.get(item, str) + return str # Default fallback + + +# Optimization schema types using msgspec +class IndexSearchOptimization(msgspec.Struct, kw_only=True, rename="kebab"): + """Search the index for the given index namespaces.""" + + index_search: List[str] + + +class SkipUnlessChangedOptimization(msgspec.Struct, kw_only=True, rename="kebab"): + """Skip this task if none of the given file patterns match.""" + + skip_unless_changed: List[str] + + +# Task reference types using msgspec +class TaskReference(msgspec.Struct, kw_only=True, rename="kebab"): + """Reference to another task.""" + + task_reference: str + + +class ArtifactReference(msgspec.Struct, kw_only=True, rename="kebab"): + """Reference to a task artifact.""" + + artifact_reference: str + + +# Create a custom validator +class OptimizationValidator: + """A validator that can validate optimization schemas.""" + + def __call__(self, value): + """Validate optimization value.""" + if value is None: + return None + if isinstance(value, dict): + if "index-search" in value: + try: + return msgspec.convert(value, IndexSearchOptimization) + except msgspec.ValidationError: + pass + if "skip-unless-changed" in value: + try: + return msgspec.convert(value, SkipUnlessChangedOptimization) + except msgspec.ValidationError: + pass + # Simple validation for dict types + if isinstance(value, dict): + if "index-search" in value and isinstance(value["index-search"], list): + return value + if "skip-unless-changed" in value and isinstance( + value["skip-unless-changed"], list + ): + return value + raise ValueError(f"Invalid optimization value: {value}") + + +class TaskRefValidator: + """A validator that can validate task references.""" + + def __call__(self, value): + """Validate task reference value.""" + if isinstance(value, str): + return value + if isinstance(value, dict): + if "task-reference" in value: + try: + return msgspec.convert(value, TaskReference) + except msgspec.ValidationError: + pass + if "artifact-reference" in value: + try: + return msgspec.convert(value, ArtifactReference) + except msgspec.ValidationError: + pass + # Simple validation for dict types + if isinstance(value, dict): + if "task-reference" in value and isinstance(value["task-reference"], str): + return value + if "artifact-reference" in value and isinstance( + value["artifact-reference"], str + ): + return value + raise ValueError(f"Invalid task reference value: {value}") + + +# Keep the same names for backward compatibility +OptimizationSchema = OptimizationValidator() +taskref_or_string = TaskRefValidator() diff --git a/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py b/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py index 6729f2f57..7053efba6 100644 --- a/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py +++ b/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py @@ -1,13 +1,10 @@ -from voluptuous import ALLOW_EXTRA, Required - from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema +from taskgraph.util.schema import Required, Schema HELLO_SCHEMA = Schema( { Required("noun"): str, - }, - extra=ALLOW_EXTRA, + } ) transforms = TransformSequence() diff --git a/test/test_parameters.py b/test/test_parameters.py index d1dcfb992..f22c86e17 100644 --- a/test/test_parameters.py +++ b/test/test_parameters.py @@ -11,7 +11,6 @@ import mozilla_repo_urls import pytest -from voluptuous import Optional, Required, Schema import taskgraph # noqa: F401 from taskgraph import parameters @@ -21,6 +20,7 @@ extend_parameters_schema, load_parameters_file, ) +from taskgraph.util.schema import Optional, Required, Schema from .mockedopen import MockedOpen @@ -274,6 +274,9 @@ def test_parameters_format_spec(spec, expected): def test_extend_parameters_schema(monkeypatch): + # Reset global _schema_extensions + monkeypatch.setattr(parameters, "_schema_extensions", []) + monkeypatch.setattr( parameters, "base_schema", diff --git a/test/test_transforms_run_run_task.py b/test/test_transforms_run_run_task.py index d747cf2a7..8e71e6fed 100644 --- a/test/test_transforms_run_run_task.py +++ b/test/test_transforms_run_run_task.py @@ -252,8 +252,13 @@ def inner(task, **kwargs): pprint(caches, indent=2) # Create a new schema object with just the part relevant to caches. - partial_schema = Schema(payload_builders[impl].schema.schema[key]) - validate_schema(partial_schema, caches, "validation error") + # Skip schema validation for msgspec schemas as they don't have .schema attribute + if ( + not hasattr(payload_builders[impl].schema, "_is_msgspec") + or not payload_builders[impl].schema._is_msgspec + ): + partial_schema = Schema(payload_builders[impl].schema.schema[key]) + validate_schema(partial_schema, caches, "validation error") return caches diff --git a/test/test_util_schema.py b/test/test_util_schema.py index 59c354e6a..aa66d3657 100644 --- a/test/test_util_schema.py +++ b/test/test_util_schema.py @@ -32,26 +32,39 @@ def test_invalid(self): validate_schema(schema, {"x": "not-int"}, "pfx") self.fail("no exception raised") except Exception as e: - self.assertTrue(str(e).startswith("pfx\n")) + # Our new implementation includes pfx in the error message + self.assertTrue("pfx" in str(e)) class TestCheckSchema(unittest.TestCase): def test_schema(self): - "Creating a schema applies taskgraph checks." + "Creating a schema with any naming convention now works." + # This should not raise an exception anymore + schema = Schema({"camelCase": int}) + # Test that it validates correctly + schema({"camelCase": 42}) with self.assertRaises(Exception): - Schema({"camelCase": int}) + schema({"camelCase": "not-an-int"}) def test_extend_schema(self): - "Extending a schema applies taskgraph checks." + "Extending a schema combines the schemas correctly." + schema = Schema({"kebab-case": int}).extend({"camelCase": int}) + # Should validate both fields + schema({"kebab-case": 1, "camelCase": 2}) with self.assertRaises(Exception): - Schema({"kebab-case": int}).extend({"camelCase": int}) + schema({"kebab-case": "not-int", "camelCase": 2}) def test_extend_schema_twice(self): - "Extending a schema twice applies taskgraph checks." + "Extending a schema twice combines all schemas correctly." + schema = ( + Schema({"kebab-case": int}) + .extend({"more-kebab": int}) + .extend({"camelCase": int}) + ) + # Should validate all three fields + schema({"kebab-case": 1, "more-kebab": 2, "camelCase": 3}) with self.assertRaises(Exception): - Schema({"kebab-case": int}).extend({"more-kebab": int}).extend( - {"camelCase": int} - ) + schema({"kebab-case": 1, "more-kebab": 2, "camelCase": "not-int"}) def test_check_skipped(monkeypatch): @@ -242,10 +255,10 @@ def test_optionally_keyed_by(): assert validator("baz") == "baz" assert validator({"by-foo": {"a": "b", "c": "d"}}) == {"a": "b", "c": "d"} - with pytest.raises(Invalid): + with pytest.raises((Invalid, TypeError, ValueError)): validator({"by-foo": {"a": 1, "c": "d"}}) - with pytest.raises(MultipleInvalid): + with pytest.raises((MultipleInvalid, ValueError)): validator({"by-bar": {"a": "b"}}) @@ -256,11 +269,11 @@ def test_optionally_keyed_by_mulitple_keys(): assert validator({"by-bar": {"x": "y"}}) == {"x": "y"} assert validator({"by-foo": {"a": {"by-bar": {"x": "y"}}}}) == {"a": {"x": "y"}} - with pytest.raises(Invalid): + with pytest.raises((Invalid, TypeError, ValueError)): validator({"by-foo": {"a": 123, "c": "d"}}) - with pytest.raises(MultipleInvalid): + with pytest.raises((MultipleInvalid, TypeError, ValueError)): validator({"by-bar": {"a": 1}}) - with pytest.raises(MultipleInvalid): + with pytest.raises((MultipleInvalid, ValueError)): validator({"by-unknown": {"a": "b"}}) diff --git a/uv.lock b/uv.lock index 186985fa2..8969bc58d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.8" resolution-markers = [ "python_full_version >= '3.11'", @@ -976,6 +976,100 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/b0/9227185fa6b198a1a940c7f0d8f38ab86a5eb982224e20aba3dcdf038c22/mozilla_repo_urls-0.2.1-py3-none-any.whl", hash = "sha256:cabce71e57781cdb9a54c1e981c2979e6400a6a1077301f3976b090df2475274", size = 9857, upload-time = "2025-05-26T11:38:17.431Z" }, ] +[[package]] +name = "msgspec" +version = "0.18.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/fb/42b1865063fddb14dbcbb6e74e0a366ecf1ba371c4948664dde0b0e10f95/msgspec-0.18.6.tar.gz", hash = "sha256:a59fc3b4fcdb972d09138cb516dbde600c99d07c38fd9372a6ef500d2d031b4e", size = 216757, upload-time = "2024-01-22T04:34:59.365Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/54/34c2b70e0d42d876c04f6436c80777d786f25c7536830db5e4ec1aef8788/msgspec-0.18.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77f30b0234eceeff0f651119b9821ce80949b4d667ad38f3bfed0d0ebf9d6d8f", size = 202537, upload-time = "2024-01-22T04:34:07.605Z" }, + { url = "https://files.pythonhosted.org/packages/d4/b8/d00d7d03bba8b4eb0bbfdeb6c047163877b2916995f837113d273fd3b774/msgspec-0.18.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a76b60e501b3932782a9da039bd1cd552b7d8dec54ce38332b87136c64852dd", size = 192246, upload-time = "2024-01-22T04:34:09.752Z" }, + { url = "https://files.pythonhosted.org/packages/98/07/40bcd501d0f4e76694ca04a11689f3e06d9ef7a31d74e493a2cc34cd9198/msgspec-0.18.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06acbd6edf175bee0e36295d6b0302c6de3aaf61246b46f9549ca0041a9d7177", size = 208523, upload-time = "2024-01-22T04:34:11.569Z" }, + { url = "https://files.pythonhosted.org/packages/23/1f/10f2bf07f8fcdc3b0c7bf1bfefdd28bd0353df9290c84e4b3ad8e93e0115/msgspec-0.18.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40a4df891676d9c28a67c2cc39947c33de516335680d1316a89e8f7218660410", size = 210276, upload-time = "2024-01-22T04:34:13.318Z" }, + { url = "https://files.pythonhosted.org/packages/c7/e4/4bb5bcd89a74bbb246a21687dd62923c43007e28ad17db24ff58653456cb/msgspec-0.18.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a6896f4cd5b4b7d688018805520769a8446df911eb93b421c6c68155cdf9dd5a", size = 214659, upload-time = "2024-01-22T04:34:15.119Z" }, + { url = "https://files.pythonhosted.org/packages/32/f1/57187427a5a3379cb74aaae753314f9dcde14c259552ec0cb44bcf18db49/msgspec-0.18.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3ac4dd63fd5309dd42a8c8c36c1563531069152be7819518be0a9d03be9788e4", size = 216585, upload-time = "2024-01-22T04:34:16.382Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d1/94919c9b837fc9a0e9dfc1b598a50298bd194146e7bc7d3f42f18826e9f6/msgspec-0.18.6-cp310-cp310-win_amd64.whl", hash = "sha256:fda4c357145cf0b760000c4ad597e19b53adf01382b711f281720a10a0fe72b7", size = 185677, upload-time = "2024-01-22T04:34:17.622Z" }, + { url = "https://files.pythonhosted.org/packages/15/20/278def3822dec807be1e2a734ba9547500ff06667be9dda00ab5d277d605/msgspec-0.18.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e77e56ffe2701e83a96e35770c6adb655ffc074d530018d1b584a8e635b4f36f", size = 200058, upload-time = "2024-01-22T04:34:18.796Z" }, + { url = "https://files.pythonhosted.org/packages/25/8c/75bfafb040934dd3eb46234a2bd4d8fcc7b646f77440866f954b60e0886b/msgspec-0.18.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d5351afb216b743df4b6b147691523697ff3a2fc5f3d54f771e91219f5c23aaa", size = 189108, upload-time = "2024-01-22T04:34:20.648Z" }, + { url = "https://files.pythonhosted.org/packages/0d/e6/5dd960a7678cbaf90dc910611a0e700775ee341876f029c3c987122afe84/msgspec-0.18.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3232fabacef86fe8323cecbe99abbc5c02f7698e3f5f2e248e3480b66a3596b", size = 208138, upload-time = "2024-01-22T04:34:22.953Z" }, + { url = "https://files.pythonhosted.org/packages/6a/73/1b2f991dc26899d2f999c938cbc82c858b3cb7e3ccaad317b32760dbe1da/msgspec-0.18.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3b524df6ea9998bbc99ea6ee4d0276a101bcc1aa8d14887bb823914d9f60d07", size = 209538, upload-time = "2024-01-22T04:34:24.607Z" }, + { url = "https://files.pythonhosted.org/packages/29/d4/2fb2d40b3bde566fd14bf02bf503eea20a912a02cdf7ff100629906c9094/msgspec-0.18.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:37f67c1d81272131895bb20d388dd8d341390acd0e192a55ab02d4d6468b434c", size = 213571, upload-time = "2024-01-22T04:34:25.889Z" }, + { url = "https://files.pythonhosted.org/packages/59/5a/c2aeeefd78946713047637f0c422c0b8b31182eb9bbed0068e906cc8aca0/msgspec-0.18.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d0feb7a03d971c1c0353de1a8fe30bb6579c2dc5ccf29b5f7c7ab01172010492", size = 215785, upload-time = "2024-01-22T04:34:27.131Z" }, + { url = "https://files.pythonhosted.org/packages/51/c6/0a8ae23c91ba1e6d58ddb089bba4ce8dad5815411b4a2bb40a5f15d2ab73/msgspec-0.18.6-cp311-cp311-win_amd64.whl", hash = "sha256:41cf758d3f40428c235c0f27bc6f322d43063bc32da7b9643e3f805c21ed57b4", size = 185877, upload-time = "2024-01-22T04:34:28.573Z" }, + { url = "https://files.pythonhosted.org/packages/1d/b5/c8fbf1db814eb29eda402952374b594b2559419ba7ec6d0997a9e5687530/msgspec-0.18.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d86f5071fe33e19500920333c11e2267a31942d18fed4d9de5bc2fbab267d28c", size = 202109, upload-time = "2024-01-22T04:34:29.794Z" }, + { url = "https://files.pythonhosted.org/packages/d7/9a/235d2dbab078a0b8e6f338205dc59be0b027ce000554ee6a9c41b19339e5/msgspec-0.18.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce13981bfa06f5eb126a3a5a38b1976bddb49a36e4f46d8e6edecf33ccf11df1", size = 190281, upload-time = "2024-01-22T04:34:31.563Z" }, + { url = "https://files.pythonhosted.org/packages/0e/f2/f864ed36a8a62c26b57c3e08d212bd8f3d12a3ca3ef64600be5452aa3c82/msgspec-0.18.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e97dec6932ad5e3ee1e3c14718638ba333befc45e0661caa57033cd4cc489466", size = 210305, upload-time = "2024-01-22T04:34:33.395Z" }, + { url = "https://files.pythonhosted.org/packages/73/16/dfef780ced7d690dd5497846ed242ef3e27e319d59d1ddaae816a4f2c15e/msgspec-0.18.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad237100393f637b297926cae1868b0d500f764ccd2f0623a380e2bcfb2809ca", size = 212510, upload-time = "2024-01-22T04:34:34.728Z" }, + { url = "https://files.pythonhosted.org/packages/c1/90/f5b3a788c4b3d92190e3345d1afa3dd107d5f16b8194e1f61b72582ee9bd/msgspec-0.18.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db1d8626748fa5d29bbd15da58b2d73af25b10aa98abf85aab8028119188ed57", size = 214844, upload-time = "2024-01-22T04:34:35.963Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0b/d4cc1b09f8dfcc6cc4cc9739c13a86e093fe70257b941ea9feb15df22996/msgspec-0.18.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:d70cb3d00d9f4de14d0b31d38dfe60c88ae16f3182988246a9861259c6722af6", size = 217113, upload-time = "2024-01-22T04:34:37.753Z" }, + { url = "https://files.pythonhosted.org/packages/3f/76/30d8f152299f65c85c46a2cbeaf95ad1d18516b5ce730acdaef696d4cfe6/msgspec-0.18.6-cp312-cp312-win_amd64.whl", hash = "sha256:1003c20bfe9c6114cc16ea5db9c5466e49fae3d7f5e2e59cb70693190ad34da0", size = 187184, upload-time = "2024-01-22T04:34:38.938Z" }, + { url = "https://files.pythonhosted.org/packages/5b/2b/262847e614393f265f00b8096d8f71871b27cb71f68f1250a9eac93cb1bc/msgspec-0.18.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f7d9faed6dfff654a9ca7d9b0068456517f63dbc3aa704a527f493b9200b210a", size = 201291, upload-time = "2024-01-22T04:34:40.131Z" }, + { url = "https://files.pythonhosted.org/packages/86/6f/1da53a2ba5f312c3dca9e5f38912732e77f996a22945c8d62df7617c4733/msgspec-0.18.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9da21f804c1a1471f26d32b5d9bc0480450ea77fbb8d9db431463ab64aaac2cf", size = 191604, upload-time = "2024-01-22T04:34:41.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/77/00e1e55607de1092dded768eae746cfdfd6f5aca4ad52b9bb11c3e3b1153/msgspec-0.18.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46eb2f6b22b0e61c137e65795b97dc515860bf6ec761d8fb65fdb62aa094ba61", size = 210060, upload-time = "2024-01-22T04:34:42.569Z" }, + { url = "https://files.pythonhosted.org/packages/21/e0/1dff019ae22b7d47782d6f1180760828bc96fde368aea983d8e5d872833a/msgspec-0.18.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8355b55c80ac3e04885d72db515817d9fbb0def3bab936bba104e99ad22cf46", size = 212378, upload-time = "2024-01-22T04:34:44.319Z" }, + { url = "https://files.pythonhosted.org/packages/85/98/da3ad36c242fdf0e6cd9d63e5d47ca53577f23c180ef040f4b3aefb5b88e/msgspec-0.18.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9080eb12b8f59e177bd1eb5c21e24dd2ba2fa88a1dbc9a98e05ad7779b54c681", size = 215541, upload-time = "2024-01-22T04:34:45.543Z" }, + { url = "https://files.pythonhosted.org/packages/13/cd/29b0de4e0e4a517fff7161fba034df19c45a5a0ef63b728d0e74dba4911d/msgspec-0.18.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cc001cf39becf8d2dcd3f413a4797c55009b3a3cdbf78a8bf5a7ca8fdb76032c", size = 218414, upload-time = "2024-01-22T04:34:46.811Z" }, + { url = "https://files.pythonhosted.org/packages/1e/b1/1a92bf0dd6354316c9c3a0e6d1123873bb6f21efdb497980e71e843d2f85/msgspec-0.18.6-cp38-cp38-win_amd64.whl", hash = "sha256:fac5834e14ac4da1fca373753e0c4ec9c8069d1fe5f534fa5208453b6065d5be", size = 187715, upload-time = "2024-01-22T04:34:48.532Z" }, + { url = "https://files.pythonhosted.org/packages/cc/01/54e711813b04a668cbc6467e20ea747aec1aaf2c9afd83ed470d774d22d0/msgspec-0.18.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:974d3520fcc6b824a6dedbdf2b411df31a73e6e7414301abac62e6b8d03791b4", size = 202455, upload-time = "2024-01-22T04:34:49.722Z" }, + { url = "https://files.pythonhosted.org/packages/dd/b6/2a78cdd1ef872ad96c509fc4d732ffd86903861c9b4e0a47c85d0b37b0e3/msgspec-0.18.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fd62e5818731a66aaa8e9b0a1e5543dc979a46278da01e85c3c9a1a4f047ef7e", size = 192001, upload-time = "2024-01-22T04:34:50.912Z" }, + { url = "https://files.pythonhosted.org/packages/87/fc/1e06294be19595fc72e99957bf191a8a51be88487e280841ac5925069537/msgspec-0.18.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7481355a1adcf1f08dedd9311193c674ffb8bf7b79314b4314752b89a2cf7f1c", size = 208372, upload-time = "2024-01-22T04:34:52.046Z" }, + { url = "https://files.pythonhosted.org/packages/b7/ee/9967075f4ea0ca3e841e1b98f0f65a6033c464e3542fe594e2e6dad10029/msgspec-0.18.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6aa85198f8f154cf35d6f979998f6dadd3dc46a8a8c714632f53f5d65b315c07", size = 210257, upload-time = "2024-01-22T04:34:53.786Z" }, + { url = "https://files.pythonhosted.org/packages/70/03/9a16fac8e3de1b1aa30e22db8a38710cbacdb1f25c54dd2fcc0c0fb10585/msgspec-0.18.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0e24539b25c85c8f0597274f11061c102ad6b0c56af053373ba4629772b407be", size = 214445, upload-time = "2024-01-22T04:34:54.997Z" }, + { url = "https://files.pythonhosted.org/packages/67/15/4b8e28bfd836cd0dbf7ac8feb52dc440d9ed028b798090b931aa6fac9636/msgspec-0.18.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c61ee4d3be03ea9cd089f7c8e36158786cd06e51fbb62529276452bbf2d52ece", size = 216412, upload-time = "2024-01-22T04:34:56.264Z" }, + { url = "https://files.pythonhosted.org/packages/cd/b2/283d010db6836db2fe059f7ee3c13823927229975ffbe1edcbeded85a556/msgspec-0.18.6-cp39-cp39-win_amd64.whl", hash = "sha256:b5c390b0b0b7da879520d4ae26044d74aeee5144f83087eb7842ba59c02bc090", size = 185801, upload-time = "2024-01-22T04:34:57.599Z" }, +] + +[[package]] +name = "msgspec" +version = "0.19.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.11'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/cf/9b/95d8ce458462b8b71b8a70fa94563b2498b89933689f3a7b8911edfae3d7/msgspec-0.19.0.tar.gz", hash = "sha256:604037e7cd475345848116e89c553aa9a233259733ab51986ac924ab1b976f8e", size = 216934, upload-time = "2024-12-27T17:40:28.597Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/40/817282b42f58399762267b30deb8ac011d8db373f8da0c212c85fbe62b8f/msgspec-0.19.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d8dd848ee7ca7c8153462557655570156c2be94e79acec3561cf379581343259", size = 190019, upload-time = "2024-12-27T17:39:13.803Z" }, + { url = "https://files.pythonhosted.org/packages/92/99/bd7ed738c00f223a8119928661167a89124140792af18af513e6519b0d54/msgspec-0.19.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0553bbc77662e5708fe66aa75e7bd3e4b0f209709c48b299afd791d711a93c36", size = 183680, upload-time = "2024-12-27T17:39:17.847Z" }, + { url = "https://files.pythonhosted.org/packages/e5/27/322badde18eb234e36d4a14122b89edd4e2973cdbc3da61ca7edf40a1ccd/msgspec-0.19.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe2c4bf29bf4e89790b3117470dea2c20b59932772483082c468b990d45fb947", size = 209334, upload-time = "2024-12-27T17:39:19.065Z" }, + { url = "https://files.pythonhosted.org/packages/c6/65/080509c5774a1592b2779d902a70b5fe008532759927e011f068145a16cb/msgspec-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00e87ecfa9795ee5214861eab8326b0e75475c2e68a384002aa135ea2a27d909", size = 211551, upload-time = "2024-12-27T17:39:21.767Z" }, + { url = "https://files.pythonhosted.org/packages/6f/2e/1c23c6b4ca6f4285c30a39def1054e2bee281389e4b681b5e3711bd5a8c9/msgspec-0.19.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3c4ec642689da44618f68c90855a10edbc6ac3ff7c1d94395446c65a776e712a", size = 215099, upload-time = "2024-12-27T17:39:24.71Z" }, + { url = "https://files.pythonhosted.org/packages/83/fe/95f9654518879f3359d1e76bc41189113aa9102452170ab7c9a9a4ee52f6/msgspec-0.19.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2719647625320b60e2d8af06b35f5b12d4f4d281db30a15a1df22adb2295f633", size = 218211, upload-time = "2024-12-27T17:39:27.396Z" }, + { url = "https://files.pythonhosted.org/packages/79/f6/71ca7e87a1fb34dfe5efea8156c9ef59dd55613aeda2ca562f122cd22012/msgspec-0.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:695b832d0091edd86eeb535cd39e45f3919f48d997685f7ac31acb15e0a2ed90", size = 186174, upload-time = "2024-12-27T17:39:29.647Z" }, + { url = "https://files.pythonhosted.org/packages/24/d4/2ec2567ac30dab072cce3e91fb17803c52f0a37aab6b0c24375d2b20a581/msgspec-0.19.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa77046904db764b0462036bc63ef71f02b75b8f72e9c9dd4c447d6da1ed8f8e", size = 187939, upload-time = "2024-12-27T17:39:32.347Z" }, + { url = "https://files.pythonhosted.org/packages/2b/c0/18226e4328897f4f19875cb62bb9259fe47e901eade9d9376ab5f251a929/msgspec-0.19.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:047cfa8675eb3bad68722cfe95c60e7afabf84d1bd8938979dd2b92e9e4a9551", size = 182202, upload-time = "2024-12-27T17:39:33.633Z" }, + { url = "https://files.pythonhosted.org/packages/81/25/3a4b24d468203d8af90d1d351b77ea3cffb96b29492855cf83078f16bfe4/msgspec-0.19.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e78f46ff39a427e10b4a61614a2777ad69559cc8d603a7c05681f5a595ea98f7", size = 209029, upload-time = "2024-12-27T17:39:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/85/2e/db7e189b57901955239f7689b5dcd6ae9458637a9c66747326726c650523/msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c7adf191e4bd3be0e9231c3b6dc20cf1199ada2af523885efc2ed218eafd011", size = 210682, upload-time = "2024-12-27T17:39:36.384Z" }, + { url = "https://files.pythonhosted.org/packages/03/97/7c8895c9074a97052d7e4a1cc1230b7b6e2ca2486714eb12c3f08bb9d284/msgspec-0.19.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f04cad4385e20be7c7176bb8ae3dca54a08e9756cfc97bcdb4f18560c3042063", size = 214003, upload-time = "2024-12-27T17:39:39.097Z" }, + { url = "https://files.pythonhosted.org/packages/61/61/e892997bcaa289559b4d5869f066a8021b79f4bf8e955f831b095f47a4cd/msgspec-0.19.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45c8fb410670b3b7eb884d44a75589377c341ec1392b778311acdbfa55187716", size = 216833, upload-time = "2024-12-27T17:39:41.203Z" }, + { url = "https://files.pythonhosted.org/packages/ce/3d/71b2dffd3a1c743ffe13296ff701ee503feaebc3f04d0e75613b6563c374/msgspec-0.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:70eaef4934b87193a27d802534dc466778ad8d536e296ae2f9334e182ac27b6c", size = 186184, upload-time = "2024-12-27T17:39:43.702Z" }, + { url = "https://files.pythonhosted.org/packages/b2/5f/a70c24f075e3e7af2fae5414c7048b0e11389685b7f717bb55ba282a34a7/msgspec-0.19.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f98bd8962ad549c27d63845b50af3f53ec468b6318400c9f1adfe8b092d7b62f", size = 190485, upload-time = "2024-12-27T17:39:44.974Z" }, + { url = "https://files.pythonhosted.org/packages/89/b0/1b9763938cfae12acf14b682fcf05c92855974d921a5a985ecc197d1c672/msgspec-0.19.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:43bbb237feab761b815ed9df43b266114203f53596f9b6e6f00ebd79d178cdf2", size = 183910, upload-time = "2024-12-27T17:39:46.401Z" }, + { url = "https://files.pythonhosted.org/packages/87/81/0c8c93f0b92c97e326b279795f9c5b956c5a97af28ca0fbb9fd86c83737a/msgspec-0.19.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cfc033c02c3e0aec52b71710d7f84cb3ca5eb407ab2ad23d75631153fdb1f12", size = 210633, upload-time = "2024-12-27T17:39:49.099Z" }, + { url = "https://files.pythonhosted.org/packages/d0/ef/c5422ce8af73928d194a6606f8ae36e93a52fd5e8df5abd366903a5ca8da/msgspec-0.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d911c442571605e17658ca2b416fd8579c5050ac9adc5e00c2cb3126c97f73bc", size = 213594, upload-time = "2024-12-27T17:39:51.204Z" }, + { url = "https://files.pythonhosted.org/packages/19/2b/4137bc2ed45660444842d042be2cf5b18aa06efd2cda107cff18253b9653/msgspec-0.19.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:757b501fa57e24896cf40a831442b19a864f56d253679f34f260dcb002524a6c", size = 214053, upload-time = "2024-12-27T17:39:52.866Z" }, + { url = "https://files.pythonhosted.org/packages/9d/e6/8ad51bdc806aac1dc501e8fe43f759f9ed7284043d722b53323ea421c360/msgspec-0.19.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5f0f65f29b45e2816d8bded36e6b837a4bf5fb60ec4bc3c625fa2c6da4124537", size = 219081, upload-time = "2024-12-27T17:39:55.142Z" }, + { url = "https://files.pythonhosted.org/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0", size = 187467, upload-time = "2024-12-27T17:39:56.531Z" }, + { url = "https://files.pythonhosted.org/packages/3c/cb/2842c312bbe618d8fefc8b9cedce37f773cdc8fa453306546dba2c21fd98/msgspec-0.19.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f12d30dd6266557aaaf0aa0f9580a9a8fbeadfa83699c487713e355ec5f0bd86", size = 190498, upload-time = "2024-12-27T17:40:00.427Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/c40b01b93465e1a5f3b6c7d91b10fb574818163740cc3acbe722d1e0e7e4/msgspec-0.19.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82b2c42c1b9ebc89e822e7e13bbe9d17ede0c23c187469fdd9505afd5a481314", size = 183950, upload-time = "2024-12-27T17:40:04.219Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f0/5b764e066ce9aba4b70d1db8b087ea66098c7c27d59b9dd8a3532774d48f/msgspec-0.19.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19746b50be214a54239aab822964f2ac81e38b0055cca94808359d779338c10e", size = 210647, upload-time = "2024-12-27T17:40:05.606Z" }, + { url = "https://files.pythonhosted.org/packages/9d/87/bc14f49bc95c4cb0dd0a8c56028a67c014ee7e6818ccdce74a4862af259b/msgspec-0.19.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60ef4bdb0ec8e4ad62e5a1f95230c08efb1f64f32e6e8dd2ced685bcc73858b5", size = 213563, upload-time = "2024-12-27T17:40:10.516Z" }, + { url = "https://files.pythonhosted.org/packages/53/2f/2b1c2b056894fbaa975f68f81e3014bb447516a8b010f1bed3fb0e016ed7/msgspec-0.19.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac7f7c377c122b649f7545810c6cd1b47586e3aa3059126ce3516ac7ccc6a6a9", size = 213996, upload-time = "2024-12-27T17:40:12.244Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5a/4cd408d90d1417e8d2ce6a22b98a6853c1b4d7cb7669153e4424d60087f6/msgspec-0.19.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5bc1472223a643f5ffb5bf46ccdede7f9795078194f14edd69e3aab7020d327", size = 219087, upload-time = "2024-12-27T17:40:14.881Z" }, + { url = "https://files.pythonhosted.org/packages/23/d8/f15b40611c2d5753d1abb0ca0da0c75348daf1252220e5dda2867bd81062/msgspec-0.19.0-cp313-cp313-win_amd64.whl", hash = "sha256:317050bc0f7739cb30d257ff09152ca309bf5a369854bbf1e57dffc310c1f20f", size = 187432, upload-time = "2024-12-27T17:40:16.256Z" }, + { url = "https://files.pythonhosted.org/packages/ea/d0/323f867eaec1f2236ba30adf613777b1c97a7e8698e2e881656b21871fa4/msgspec-0.19.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15c1e86fff77184c20a2932cd9742bf33fe23125fa3fcf332df9ad2f7d483044", size = 189926, upload-time = "2024-12-27T17:40:18.939Z" }, + { url = "https://files.pythonhosted.org/packages/a8/37/c3e1b39bdae90a7258d77959f5f5e36ad44b40e2be91cff83eea33c54d43/msgspec-0.19.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3b5541b2b3294e5ffabe31a09d604e23a88533ace36ac288fa32a420aa38d229", size = 183873, upload-time = "2024-12-27T17:40:20.214Z" }, + { url = "https://files.pythonhosted.org/packages/cb/a2/48f2c15c7644668e51f4dce99d5f709bd55314e47acb02e90682f5880f35/msgspec-0.19.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f5c043ace7962ef188746e83b99faaa9e3e699ab857ca3f367b309c8e2c6b12", size = 209272, upload-time = "2024-12-27T17:40:21.534Z" }, + { url = "https://files.pythonhosted.org/packages/25/3c/aa339cf08b990c3f07e67b229a3a8aa31bf129ed974b35e5daa0df7d9d56/msgspec-0.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca06aa08e39bf57e39a258e1996474f84d0dd8130d486c00bec26d797b8c5446", size = 211396, upload-time = "2024-12-27T17:40:22.897Z" }, + { url = "https://files.pythonhosted.org/packages/c7/00/c7fb9d524327c558b2803973cc3f988c5100a1708879970a9e377bdf6f4f/msgspec-0.19.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e695dad6897896e9384cf5e2687d9ae9feaef50e802f93602d35458e20d1fb19", size = 215002, upload-time = "2024-12-27T17:40:24.341Z" }, + { url = "https://files.pythonhosted.org/packages/3f/bf/d9f9fff026c1248cde84a5ce62b3742e8a63a3c4e811f99f00c8babf7615/msgspec-0.19.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3be5c02e1fee57b54130316a08fe40cca53af92999a302a6054cd451700ea7db", size = 218132, upload-time = "2024-12-27T17:40:25.744Z" }, + { url = "https://files.pythonhosted.org/packages/00/03/b92011210f79794958167a3a3ea64a71135d9a2034cfb7597b545a42606d/msgspec-0.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:0684573a821be3c749912acf5848cce78af4298345cb2d7a8b8948a0a5a27cfe", size = 186301, upload-time = "2024-12-27T17:40:27.076Z" }, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -2018,13 +2112,13 @@ dependencies = [ { name = "json-e" }, { name = "mozilla-repo-urls", version = "0.1.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "mozilla-repo-urls", version = "0.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "msgspec", version = "0.18.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "msgspec", version = "0.19.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, { name = "pyyaml" }, { name = "redo" }, { name = "requests" }, { name = "slugid" }, { name = "taskcluster-urls" }, - { name = "voluptuous", version = "0.14.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "voluptuous", version = "0.15.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, ] [package.optional-dependencies] @@ -2069,13 +2163,13 @@ requires-dist = [ { name = "cookiecutter", specifier = "~=2.1" }, { name = "json-e", specifier = ">=2.7" }, { name = "mozilla-repo-urls" }, + { name = "msgspec", specifier = ">=0.18.6" }, { name = "orjson", marker = "extra == 'orjson'" }, { name = "pyyaml", specifier = ">=5.3.1" }, { name = "redo", specifier = ">=2.0" }, { name = "requests", specifier = ">=2.25" }, { name = "slugid", specifier = ">=2.0" }, { name = "taskcluster-urls", specifier = ">=11.0" }, - { name = "voluptuous", specifier = ">=0.12.1" }, { name = "zstandard", marker = "extra == 'load-image'" }, ] provides-extras = ["load-image", "orjson"] From d6f3589385c61c17e6b89bbbae3218005cc90c0f Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Wed, 27 Aug 2025 15:50:50 -0400 Subject: [PATCH 08/20] feat: updated some bugs --- docs/concepts/transforms.rst | 14 ++++++----- docs/tutorials/creating-a-task-graph.rst | 15 ++++++----- src/taskgraph/transforms/run/__init__.py | 4 +-- src/taskgraph/transforms/task.py | 25 +++++++++++-------- .../transforms/hello.py | 15 ++++++----- 5 files changed, 42 insertions(+), 31 deletions(-) diff --git a/docs/concepts/transforms.rst b/docs/concepts/transforms.rst index 7989823dc..401773824 100644 --- a/docs/concepts/transforms.rst +++ b/docs/concepts/transforms.rst @@ -105,15 +105,17 @@ about the state of the tasks at given points. Here is an example: .. code-block:: python + import msgspec + from typing import Optional from taskgraph.transforms.base import TransformSequence - from taskgraph.util.schema import Optional, Required, Schema + from taskgraph.util.schema import Schema - my_schema = Schema({ - Required("foo"): str, - Optional("bar"): bool, - }) + class MySchema(msgspec.Struct, kw_only=True): + foo: str # Required field + bar: Optional[bool] = None # Optional field - transforms.add_validate(my_schema) + transforms = TransformSequence() + transforms.add_validate(Schema(MySchema)) In the above example, we can be sure that every task dict has a string field called ``foo``, and may or may not have a boolean field called ``bar``. diff --git a/docs/tutorials/creating-a-task-graph.rst b/docs/tutorials/creating-a-task-graph.rst index 40886de0b..dfb5ad1e3 100644 --- a/docs/tutorials/creating-a-task-graph.rst +++ b/docs/tutorials/creating-a-task-graph.rst @@ -136,14 +136,17 @@ comments for explanations): .. code-block:: python - from taskgraph.util.schema import Optional, Required, Schema + import msgspec + from typing import Optional + from taskgraph.util.schema import Schema from taskgraph.transforms.base import TransformSequence - # Define the schema. We use our schema validation to ensure correctness. - hello_description_schema = Schema({ - Required("text"): str, - Optional("description"): str, - }) + # Define the schema using msgspec for better type checking and performance. + class HelloDescriptionSchema(msgspec.Struct, kw_only=True): + text: str # Required field + description: Optional[str] = None # Optional field + + hello_description_schema = Schema(HelloDescriptionSchema) # Create a 'TransformSequence' instance. This class collects transform # functions to run later. diff --git a/src/taskgraph/transforms/run/__init__.py b/src/taskgraph/transforms/run/__init__.py index 1e201a02a..6709bd19a 100644 --- a/src/taskgraph/transforms/run/__init__.py +++ b/src/taskgraph/transforms/run/__init__.py @@ -34,8 +34,8 @@ class FetchesSchema(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults= artifact: str dest: TOptional[str] = None - extract: bool = True - verify_hash: bool = True + extract: bool = False + verify_hash: bool = False # When configuration using msgspec diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py index c238a7cd2..dcd1a72ce 100644 --- a/src/taskgraph/transforms/task.py +++ b/src/taskgraph/transforms/task.py @@ -26,8 +26,6 @@ from taskgraph.util.hash import hash_path from taskgraph.util.keyed_by import evaluate_keyed_by from taskgraph.util.schema import ( - Optional, - Required, Schema, resolve_keyed_by, validate_schema, @@ -200,18 +198,23 @@ def payload_builder(name, schema): """ Decorator for registering payload builders. - Supports both dict schemas and msgspec.Struct types. + Requires msgspec.Struct schema types for type safety and performance. """ - # Handle msgspec schemas - if isinstance(schema, type) and issubclass(schema, msgspec.Struct): - # Wrap msgspec schema in our compatibility Schema class - schema = Schema(schema) - else: - # Traditional dict schema - extend it with required fields - schema = Schema({Required("implementation"): name, Optional("os"): str}).extend( - schema + # Ensure we're using msgspec schemas + if not (isinstance(schema, type) and issubclass(schema, msgspec.Struct)): + raise TypeError( + f"payload_builder requires msgspec.Struct schema, got {type(schema).__name__}. " + f"Please migrate to msgspec: class {name.title()}Schema(msgspec.Struct): ..." ) + # Verify the schema has required fields + fields = {f.name for f in msgspec.structs.fields(schema)} + if "implementation" not in fields: + raise ValueError(f"Schema for {name} must include 'implementation' field") + + # Wrap msgspec schema in our compatibility Schema class + schema = Schema(schema) + def wrap(func): assert name not in payload_builders, f"duplicate payload builder name {name}" payload_builders[name] = PayloadBuilder(schema, func) # type: ignore diff --git a/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py b/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py index 7053efba6..a2f8e7f8c 100644 --- a/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py +++ b/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py @@ -1,11 +1,14 @@ +import msgspec + from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Required, Schema +from taskgraph.util.schema import Schema + + +class HelloSchema(msgspec.Struct, kw_only=True): + noun: str # Required field + -HELLO_SCHEMA = Schema( - { - Required("noun"): str, - } -) +HELLO_SCHEMA = Schema(HelloSchema) transforms = TransformSequence() transforms.add_validate(HELLO_SCHEMA) From 60318b46a30b3389fad255f270f2ace4d0323614 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Fri, 29 Aug 2025 10:15:11 -0400 Subject: [PATCH 09/20] feat: used schema wrapper for msgspec --- src/taskgraph/config.py | 29 ++- src/taskgraph/decision.py | 7 +- src/taskgraph/parameters.py | 20 +- src/taskgraph/transforms/base.py | 8 +- src/taskgraph/transforms/run/__init__.py | 29 +-- src/taskgraph/transforms/run/index_search.py | 5 +- src/taskgraph/transforms/run/run_task.py | 24 +- src/taskgraph/transforms/task.py | 72 +++--- src/taskgraph/util/dependencies.py | 3 +- src/taskgraph/util/schema.py | 247 +++---------------- test/test_parameters.py | 35 +-- test/test_transforms_run_run_task.py | 13 +- test/test_util_schema.py | 84 ++++--- 13 files changed, 198 insertions(+), 378 deletions(-) diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index 0e8d423d4..fa76d82bc 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -13,7 +13,7 @@ import msgspec from .util.python_path import find_object -from .util.schema import validate_schema +from .util.schema import Schema, validate_schema from .util.vcs import get_repository from .util.yaml import load_yaml @@ -26,7 +26,7 @@ ] -class WorkerAlias(msgspec.Struct, kw_only=True, rename="kebab"): +class WorkerAlias(Schema): """Worker alias configuration.""" provisioner: Union[str, dict] @@ -35,32 +35,38 @@ class WorkerAlias(msgspec.Struct, kw_only=True, rename="kebab"): worker_type: Union[str, dict] # Can be keyed-by, maps from "worker-type" -class Workers(msgspec.Struct, kw_only=True): +class Workers(Schema, rename=None): """Workers configuration.""" aliases: Dict[str, WorkerAlias] -class Repository(msgspec.Struct, kw_only=True, rename="kebab"): +class Repository(Schema): """Repository configuration.""" + # Required fields first name: str + + # Optional fields project_regex: Optional[str] = None # Maps from "project-regex" ssh_secret_name: Optional[str] = None # Maps from "ssh-secret-name" # Allow extra fields for flexibility __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) -class RunConfig(msgspec.Struct, kw_only=True, rename="kebab"): +class RunConfig(Schema): """Run transforms configuration.""" use_caches: Optional[Union[bool, List[str]]] = None # Maps from "use-caches" -class TaskGraphConfig(msgspec.Struct, kw_only=True, rename="kebab"): +class TaskGraphConfig(Schema): """Taskgraph specific configuration.""" + # Required fields first repositories: Dict[str, Repository] + + # Optional fields register: Optional[str] = None decision_parameters: Optional[str] = None # Maps from "decision-parameters" cached_task_prefix: Optional[str] = None # Maps from "cached-task-prefix" @@ -69,17 +75,18 @@ class TaskGraphConfig(msgspec.Struct, kw_only=True, rename="kebab"): run: Optional[RunConfig] = None -class GraphConfigSchema( - msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" -): +class GraphConfigSchema(Schema): """Main graph configuration schema.""" + # Required fields first trust_domain: str # Maps from "trust-domain" task_priority: Union[ TaskPriority, dict ] # Maps from "task-priority", can be keyed-by workers: Workers taskgraph: TaskGraphConfig + + # Optional fields docker_image_kind: Optional[str] = None # Maps from "docker-image-kind" task_deadline_after: Optional[Union[str, dict]] = ( None # Maps from "task-deadline-after", can be keyed-by @@ -161,9 +168,7 @@ def kinds_dir(self): def validate_graph_config(config): """Validate graph configuration using msgspec.""" # With rename="kebab", msgspec handles the conversion automatically - validate_schema( - GraphConfigSchema, config, "Invalid graph configuration:", use_msgspec=True - ) + validate_schema(GraphConfigSchema, config, "Invalid graph configuration:") def load_graph_config(root_dir): diff --git a/src/taskgraph/decision.py b/src/taskgraph/decision.py index 6edcfadc1..33640db2c 100644 --- a/src/taskgraph/decision.py +++ b/src/taskgraph/decision.py @@ -11,7 +11,6 @@ from pathlib import Path from typing import Any, Dict, Optional -import msgspec import yaml from taskgraph.actions import render_actions_json @@ -21,7 +20,7 @@ from taskgraph.taskgraph import TaskGraph from taskgraph.util import json from taskgraph.util.python_path import find_object -from taskgraph.util.schema import validate_schema +from taskgraph.util.schema import Schema, validate_schema from taskgraph.util.vcs import Repository, get_repository from taskgraph.util.yaml import load_yaml @@ -41,7 +40,8 @@ #: Schema for try_task_config.json version 2 -class TryTaskConfigSchemaV2(msgspec.Struct, kw_only=True, omit_defaults=True): +class TryTaskConfigSchemaV2(Schema): + # All fields are optional parameters: Optional[Dict[str, Any]] = None @@ -358,7 +358,6 @@ def set_try_config(parameters, task_config_file): try_task_config_schema_v2, task_config, "Invalid v2 `try_task_config.json`.", - use_msgspec=True, ) parameters.update(task_config["parameters"]) return diff --git a/src/taskgraph/parameters.py b/src/taskgraph/parameters.py index 1f02c971c..b77047609 100644 --- a/src/taskgraph/parameters.py +++ b/src/taskgraph/parameters.py @@ -20,6 +20,7 @@ from taskgraph.util import json, yaml from taskgraph.util.readonlydict import ReadOnlyDict +from taskgraph.util.schema import Schema from taskgraph.util.taskcluster import find_task_id, get_artifact_url from taskgraph.util.vcs import get_repository @@ -28,20 +29,22 @@ class ParameterMismatch(Exception): """Raised when a parameters.yml has extra or missing parameters.""" -class CodeReviewConfig(msgspec.Struct, kw_only=True, rename="kebab"): +class CodeReviewConfig(Schema): """Code review configuration.""" + # Required field phabricator_build_target: str #: Schema for base parameters. #: Please keep this list sorted and in sync with docs/reference/parameters.rst -class BaseSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): +class BaseSchema(Schema): """Base parameters schema. This defines the core parameters that all taskgraph runs require. """ + # Required fields (most are required) base_repository: str base_ref: str base_rev: str @@ -58,8 +61,6 @@ class BaseSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab head_tag: str level: str moz_build_date: str - next_version: Optional[str] - optimize_strategies: Optional[str] optimize_target_tasks: bool owner: str project: str @@ -70,6 +71,10 @@ class BaseSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab # used at run-time target_tasks_method: str tasks_for: str + + # Optional fields + next_version: Optional[str] + optimize_strategies: Optional[str] version: Optional[str] code_review: Optional[CodeReviewConfig] = None @@ -157,7 +162,7 @@ def extend_parameters_schema(schema, defaults_fn=None): graph-configuration. Args: - schema: The schema object (dict or msgspec) used to describe extended + schema: The schema object (msgspec) used to describe extended parameters. defaults_fn (function): A function which takes no arguments and returns a dict mapping parameter name to default value in the @@ -170,9 +175,8 @@ def extend_parameters_schema(schema, defaults_fn=None): # Store the extension schema for use during validation _schema_extensions.append(schema) - # Also extend the base_schema if it's a Schema instance - if hasattr(base_schema, "extend"): - base_schema = base_schema.extend(schema) + # Schema extension is no longer supported with msgspec.Struct inheritance + # Extensions are tracked in _schema_extensions list instead if defaults_fn: defaults_functions.append(defaults_fn) diff --git a/src/taskgraph/transforms/base.py b/src/taskgraph/transforms/base.py index 1b62e7852..4626ca8d0 100644 --- a/src/taskgraph/transforms/base.py +++ b/src/taskgraph/transforms/base.py @@ -8,8 +8,6 @@ from dataclasses import dataclass, field from typing import Dict, List, Union -import msgspec - from taskgraph.task import Task from ..config import GraphConfig @@ -156,9 +154,5 @@ def __call__(self, config, tasks): ) else: error = "In unknown task:" - # Check if schema is a msgspec.Struct type - use_msgspec = isinstance(self.schema, type) and issubclass( - self.schema, msgspec.Struct - ) - validate_schema(self.schema, task, error, use_msgspec=use_msgspec) + validate_schema(self.schema, task, error) yield task diff --git a/src/taskgraph/transforms/run/__init__.py b/src/taskgraph/transforms/run/__init__.py index 6709bd19a..ff477c125 100644 --- a/src/taskgraph/transforms/run/__init__.py +++ b/src/taskgraph/transforms/run/__init__.py @@ -29,7 +29,7 @@ # Fetches schema using msgspec -class FetchesSchema(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True): +class FetchesSchema(Schema): """Schema for fetch configuration.""" artifact: str @@ -39,14 +39,14 @@ class FetchesSchema(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults= # When configuration using msgspec -class WhenConfig(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True): +class WhenConfig(Schema): """Configuration for when a task should be included.""" files_changed: List[str] = msgspec.field(default_factory=list) # Run configuration using msgspec -class RunConfig(msgspec.Struct, kw_only=True, omit_defaults=True): +class RunConfig(Schema, rename=None): """Configuration for how to run a task.""" using: str @@ -56,20 +56,19 @@ class RunConfig(msgspec.Struct, kw_only=True, omit_defaults=True): # Run description schema using msgspec -class RunDescriptionSchema( - msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True -): +class RunDescriptionSchema(Schema): """Schema for run transforms.""" - # Task naming - name: TOptional[str] = None - label: TOptional[str] = None - - # Required fields + # Required fields first description: str run: RunConfig worker_type: str + # Optional fields + # Task naming + name: TOptional[str] = None + label: TOptional[str] = None + # Optional fields from task description priority: TOptional[str] = None attributes: Dict[str, Any] = msgspec.field(default_factory=dict) @@ -104,7 +103,7 @@ class RunDescriptionSchema( fetches_schema = FetchesSchema #: Schema for a run transforms - now using msgspec -run_description_schema = Schema(RunDescriptionSchema) +run_description_schema = RunDescriptionSchema transforms = TransformSequence() @@ -399,15 +398,13 @@ def wrap(func): # Simple schema for always-optimized -class AlwaysOptimizedRunSchema(msgspec.Struct, kw_only=True): +class AlwaysOptimizedRunSchema(Schema, omit_defaults=False): """Schema for always-optimized run tasks.""" using: str = "always-optimized" -@run_task_using( - "always-optimized", "always-optimized", Schema(AlwaysOptimizedRunSchema) -) +@run_task_using("always-optimized", "always-optimized", AlwaysOptimizedRunSchema) def always_optimized(config, task, taskdesc): pass diff --git a/src/taskgraph/transforms/run/index_search.py b/src/taskgraph/transforms/run/index_search.py index 53ee34af0..a972c8cad 100644 --- a/src/taskgraph/transforms/run/index_search.py +++ b/src/taskgraph/transforms/run/index_search.py @@ -10,16 +10,15 @@ from typing import List -import msgspec - from taskgraph.transforms.base import TransformSequence from taskgraph.transforms.run import run_task_using +from taskgraph.util.schema import Schema transforms = TransformSequence() #: Schema for run.using index-search -class RunTaskSchema(msgspec.Struct, kw_only=True, rename="kebab"): +class RunTaskSchema(Schema): using: str # A list of indexes in decreasing order of priority at which to lookup for this # task. This is interpolated with the graph parameters. diff --git a/src/taskgraph/transforms/run/run_task.py b/src/taskgraph/transforms/run/run_task.py index b0151e705..3d4bfc81c 100644 --- a/src/taskgraph/transforms/run/run_task.py +++ b/src/taskgraph/transforms/run/run_task.py @@ -9,8 +9,6 @@ import os from typing import Dict, List, Literal, Optional, Union -import msgspec - from taskgraph.transforms.run import run_task_using from taskgraph.transforms.run.common import ( support_caches, @@ -26,14 +24,24 @@ #: Schema for run.using run_task -class RunTaskSchema(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True): +class RunTaskSchema(Schema): """ Schema for run.using run_task. """ + # Required fields first # Specifies the task type. Must be 'run-task'. using: Literal["run-task"] + # The command arguments to pass to the `run-task` script, after the checkout + # arguments. If a list, it will be passed directly; otherwise it will be + # included in a single argument to the command specified by `exec-with`. + command: Union[List[Union[str, Dict[str, str]]], str, Dict[str, str]] + + # Base work directory used to set up the task. + workdir: str + + # Optional fields # Specifies which caches to use. May take a boolean in which case either all # (True) or no (False) caches will be used. Alternatively, it can accept a # list of caches to enable. Defaults to only the checkout cache enabled. @@ -51,11 +59,6 @@ class RunTaskSchema(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults= # directory where sparse profiles are defined (build/sparse-profiles/). sparse_profile: Optional[str] = None - # The command arguments to pass to the `run-task` script, after the checkout - # arguments. If a list, it will be passed directly; otherwise it will be - # included in a single argument to the command specified by `exec-with`. - command: Union[List[Union[str, Dict[str, str]]], str, Dict[str, str]] - # Specifies what to execute the command with in the event the command is a # string. exec_with: Optional[Literal["bash", "powershell"]] = None @@ -64,14 +67,11 @@ class RunTaskSchema(msgspec.Struct, kw_only=True, rename="kebab", omit_defaults= # or Python installation is in a non-standard location on the workers. run_task_command: Optional[List[str]] = None - # Base work directory used to set up the task. - workdir: str - # Whether to run as root. Defaults to False. run_as_root: bool = False -run_task_schema = Schema(RunTaskSchema) +run_task_schema = RunTaskSchema def common_setup(config, task, taskdesc, command): diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py index dcd1a72ce..56d05ba29 100644 --- a/src/taskgraph/transforms/task.py +++ b/src/taskgraph/transforms/task.py @@ -48,7 +48,7 @@ def _run_task_suffix(): # Task Description schema using msgspec -class TaskDescriptionTreeherder(msgspec.Struct, kw_only=True, omit_defaults=True): +class TaskDescriptionTreeherder(Schema, rename=None): """Treeherder-related information for a task.""" symbol: TOptional[str] = None @@ -72,7 +72,7 @@ class TaskDescriptionIndex( rank: Union[Literal["by-tier", "build_date"], int] = "by-tier" -class TaskDescriptionWorker(msgspec.Struct, kw_only=True, omit_defaults=True): +class TaskDescriptionWorker(Schema, rename=None): """Worker configuration for a task.""" implementation: str @@ -80,15 +80,15 @@ class TaskDescriptionWorker(msgspec.Struct, kw_only=True, omit_defaults=True): __extras__: Dict[str, TAny] = msgspec.field(default_factory=dict) -class TaskDescriptionSchema( - msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True -): +class TaskDescriptionSchema(Schema): """Schema for task descriptions.""" # The label for this task label: str # Description of the task (for metadata) description: str + # The provisioner-id/worker-type for the task + worker_type: str # Attributes for this task attributes: Dict[str, TAny] = msgspec.field(default_factory=dict) # Relative path (from config.path) to the file task was defined in @@ -133,8 +133,6 @@ class TaskDescriptionSchema( always_target: bool = False # Optimization to perform on this task optimization: TAny = None # Uses OptimizationSchema which has custom validation - # The provisioner-id/worker-type for the task - worker_type: str # Whether the task should use sccache compiler caching needs_sccache: bool = False # Information specific to the worker implementation @@ -142,7 +140,7 @@ class TaskDescriptionSchema( #: Schema for the task transforms - now using msgspec -task_description_schema = Schema(TaskDescriptionSchema) +task_description_schema = TaskDescriptionSchema TC_TREEHERDER_SCHEMA_URL = ( @@ -212,9 +210,6 @@ def payload_builder(name, schema): if "implementation" not in fields: raise ValueError(f"Schema for {name} must include 'implementation' field") - # Wrap msgspec schema in our compatibility Schema class - schema = Schema(schema) - def wrap(func): assert name not in payload_builders, f"duplicate payload builder name {name}" payload_builders[name] = PayloadBuilder(schema, func) # type: ignore @@ -264,7 +259,7 @@ class DockerWorkerCacheConfig( skip_untrusted: bool = False -class DockerWorkerArtifactConfig(msgspec.Struct, kw_only=True, omit_defaults=True): +class DockerWorkerArtifactConfig(Schema, rename=None): """Artifact configuration for docker-worker.""" # type of artifact -- simple file, or recursive directory, or a volume mounted directory. @@ -275,16 +270,19 @@ class DockerWorkerArtifactConfig(msgspec.Struct, kw_only=True, omit_defaults=Tru name: str -class DockerWorkerPayloadSchema( - msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True -): +class DockerWorkerPayloadSchema(Schema): """Schema for docker-worker payload.""" + # Required fields first implementation: str - os: Literal["linux"] = "linux" # For tasks that will run in docker-worker, this is the name of the docker # image or in-tree docker image to run the task in. docker_image: Union[str, Dict[str, str]] + # the maximum time to run, in seconds + max_run_time: int + + # Optional fields + os: Literal["linux"] = "linux" # worker features that should be enabled relengapi_proxy: bool = False chain_of_trust: bool = False @@ -305,8 +303,6 @@ class DockerWorkerPayloadSchema( # the command to run; if not given, docker-worker will default to the # command in the docker image command: TOptional[List[Union[str, Dict[str, str]]]] = None - # the maximum time to run, in seconds - max_run_time: int # the exit status code(s) that indicates the task should be retried retry_exit_status: TOptional[List[int]] = None # the exit status code(s) that indicates the caches used by the task should be purged @@ -529,7 +525,7 @@ def build_docker_worker_payload(config, task, task_def): # Generic Worker schema using msgspec -class GenericWorkerArtifactConfig(msgspec.Struct, kw_only=True, omit_defaults=True): +class GenericWorkerArtifactConfig(Schema, rename=None): """Artifact configuration for generic-worker.""" # type of artifact -- simple file, or recursive directory @@ -570,25 +566,26 @@ class GenericWorkerMountConfig( format: TOptional[Literal["rar", "tar.bz2", "tar.gz", "zip"]] = None -class GenericWorkerPayloadSchema( - msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True -): +class GenericWorkerPayloadSchema(Schema): """Schema for generic-worker payload.""" + # Required fields first implementation: str os: Literal["windows", "macosx", "linux", "linux-bitbar"] # command is a list of commands to run, sequentially # on Windows, each command is a string, on OS X and Linux, each command is a string array # Using Any here because msgspec doesn't support union of multiple list types command: TAny + # the maximum time to run, in seconds + max_run_time: int + + # Optional fields # artifacts to extract from the task image after completion artifacts: TOptional[List[GenericWorkerArtifactConfig]] = None # Directories and/or files to be mounted mounts: TOptional[List[GenericWorkerMountConfig]] = None # environment variables env: Dict[str, Union[str, Dict[str, str]]] = msgspec.field(default_factory=dict) - # the maximum time to run, in seconds - max_run_time: int # the exit status code(s) that indicates the task should be retried retry_exit_status: TOptional[List[int]] = None # the exit status code(s) that indicates the caches used by the task should be purged @@ -719,7 +716,7 @@ def build_generic_worker_payload(config, task, task_def): # Beetmover schema using msgspec -class BeetmoverReleaseProperties(msgspec.Struct, kw_only=True, rename="kebab"): +class BeetmoverReleaseProperties(Schema): """Release properties for beetmover tasks.""" app_name: str @@ -730,7 +727,7 @@ class BeetmoverReleaseProperties(msgspec.Struct, kw_only=True, rename="kebab"): platform: str -class BeetmoverUpstreamArtifact(msgspec.Struct, kw_only=True): +class BeetmoverUpstreamArtifact(Schema, rename=None, omit_defaults=False): """Upstream artifact definition for beetmover.""" # taskId of the task with the artifact @@ -743,21 +740,22 @@ class BeetmoverUpstreamArtifact(msgspec.Struct, kw_only=True): locale: str -class BeetmoverPayloadSchema( - msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True -): +class BeetmoverPayloadSchema(Schema): """Schema for beetmover worker payload.""" + # Required fields first implementation: str - os: str = "" # the maximum time to run, in seconds max_run_time: int - # locale key, if this is a locale beetmover task - locale: TOptional[str] = None - partner_public: TOptional[bool] = None release_properties: BeetmoverReleaseProperties # list of artifact URLs for the artifacts that should be beetmoved upstream_artifacts: List[BeetmoverUpstreamArtifact] + + # Optional fields + os: str = "" + # locale key, if this is a locale beetmover task + locale: TOptional[str] = None + partner_public: TOptional[bool] = None # Artifact map can be any object artifact_map: TOptional[dict] = None @@ -789,7 +787,7 @@ def build_beetmover_payload(config, task, task_def): # Simple payload schemas using msgspec -class InvalidPayloadSchema(msgspec.Struct, kw_only=True): +class InvalidPayloadSchema(Schema, rename=None, omit_defaults=False): """Schema for invalid tasks - allows any fields.""" implementation: str @@ -798,7 +796,7 @@ class InvalidPayloadSchema(msgspec.Struct, kw_only=True): _extra: dict = msgspec.field(default_factory=dict, name="") -class AlwaysOptimizedPayloadSchema(msgspec.Struct, kw_only=True): +class AlwaysOptimizedPayloadSchema(Schema, rename=None, omit_defaults=False): """Schema for always-optimized tasks - allows any fields.""" implementation: str @@ -807,10 +805,12 @@ class AlwaysOptimizedPayloadSchema(msgspec.Struct, kw_only=True): _extra: dict = msgspec.field(default_factory=dict, name="") -class SucceedPayloadSchema(msgspec.Struct, kw_only=True): +class SucceedPayloadSchema(Schema, rename=None, omit_defaults=False): """Schema for succeed tasks - minimal schema.""" + # Required field first implementation: str + # Optional field os: str = "" diff --git a/src/taskgraph/util/dependencies.py b/src/taskgraph/util/dependencies.py index 0b3bb471b..a2fc775b8 100644 --- a/src/taskgraph/util/dependencies.py +++ b/src/taskgraph/util/dependencies.py @@ -6,7 +6,6 @@ from taskgraph.task import Task from taskgraph.transforms.base import TransformConfig -from taskgraph.util.schema import Schema # Define a collection of group_by functions GROUP_BY_MAP = {} @@ -35,7 +34,7 @@ def group_by_all(config, tasks): return [[task for task in tasks]] -@group_by("attribute", schema=Schema(str)) +@group_by("attribute") def group_by_attribute(config, tasks, attr): groups = {} for task in tasks: diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index 5a1c1e2ce..d0cf8fcbc 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -12,79 +12,29 @@ from taskgraph.util.keyed_by import evaluate_keyed_by, iter_dot_path -class Any: - """Validator that accepts any of the provided values.""" - - def __init__(self, *validators): - self.validators = validators - - def __call__(self, value): - for validator in self.validators: - if validator == value or (callable(validator) and validator(value)): - return value - raise ValueError(f"Value {value} not in allowed values: {self.validators}") - - -class Required: - """Marks a field as required in a schema.""" - - def __init__(self, key): - self.key = key - self.schema = key # For compatibility - - -class Optional: - """Marks a field as optional in a schema.""" - - def __init__(self, key): - self.key = key - self.schema = key # For compatibility - - -def validate_schema(schema, obj, msg_prefix, use_msgspec=False): +def validate_schema(schema, obj, msg_prefix): """ Validate that object satisfies schema. If not, generate a useful exception beginning with msg_prefix. Args: - schema: Either a Schema instance or msgspec.Struct type + schema: A msgspec.Struct type (including Schema subclasses) obj: Object to validate msg_prefix: Prefix for error messages - use_msgspec: If True, use msgspec for validation (default: False) """ if taskgraph.fast: return - # Handle Schema instances - if isinstance(schema, Schema): - try: - schema(obj) - except Exception as exc: - raise Exception(f"{msg_prefix}\n{exc}\n{pprint.pformat(obj)}") - return - - # Auto-detect msgspec schemas - if isinstance(schema, type) and issubclass(schema, msgspec.Struct): - use_msgspec = True - - if use_msgspec: - # Handle msgspec validation - try: - if isinstance(schema, type) and issubclass(schema, msgspec.Struct): - # For msgspec.Struct types, validate by converting - msgspec.convert(obj, schema) - else: - # For other msgspec validators - schema.decode(msgspec.json.encode(obj)) - except (msgspec.ValidationError, msgspec.DecodeError) as exc: - msg = [msg_prefix, str(exc)] - raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj)) - else: - # Try to call the schema as a validator - try: - schema(obj) - except Exception as exc: - raise Exception(f"{msg_prefix}\n{exc}\n{pprint.pformat(obj)}") + try: + if isinstance(schema, type) and issubclass(schema, Schema): + # Use the validate class method for Schema subclasses + schema.validate(obj) + elif isinstance(schema, type) and issubclass(schema, msgspec.Struct): + msgspec.convert(obj, schema) + else: + raise TypeError(f"Unsupported schema type: {type(schema)}") + except (msgspec.ValidationError, msgspec.DecodeError, Exception) as exc: + raise Exception(f"{msg_prefix}\n{str(exc)}\n{pprint.pformat(obj)}") def optionally_keyed_by(*arguments): @@ -120,8 +70,8 @@ def validator(obj): # Unknown by-field raise ValueError(f"Unknown key {k}") # Validate against the schema - if isinstance(schema, Schema): - return schema(obj) + if isinstance(schema, type) and issubclass(schema, Schema): + return schema.validate(obj) elif schema is str: # String validation if not isinstance(obj, str): @@ -242,180 +192,53 @@ def resolve_keyed_by( ] -class Schema: +class Schema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): """ - A schema validator that wraps msgspec.Struct types. + Base schema class that extends msgspec.Struct. - This provides a consistent interface for schema validation across the codebase. - """ + This allows schemas to be defined directly as: - def __init__(self, schema, check=True, **kwargs): - # Check if schema is a msgspec.Struct type - if isinstance(schema, type) and issubclass(schema, msgspec.Struct): - self._msgspec_schema = schema - self._is_msgspec = True - elif isinstance(schema, dict): - # Legacy dict schema - convert to a simple validator - self._msgspec_schema = None - self._is_msgspec = False - self._dict_schema = schema - else: - # Assume it's a callable validator - self._msgspec_schema = None - self._is_msgspec = False - self._validator = schema - - self.check = check - self.schema = schema # Store original schema for compatibility - self._extensions = [] - self.allow_extra = False # By default, don't allow extra keys - - def extend(self, *args, **kwargs): - """Extend the schema. For msgspec schemas, this stores extensions separately.""" - if self._is_msgspec: - # For msgspec schemas, store extensions for validation time - self._extensions.extend(args) - return self - elif hasattr(self, "_dict_schema"): - # For dict schemas, create a new Schema with the combined schemas - new_schema = self._dict_schema.copy() - for arg in args: - if isinstance(arg, dict): - new_schema.update(arg) - # Handle extra parameter - allow_extra = kwargs.get("extra") is not None - new_instance = Schema(new_schema) - new_instance.allow_extra = allow_extra - return new_instance - # For other schemas, just return self - return self - - def _validate_msgspec(self, data): - """Validate data against the msgspec schema.""" - try: - return msgspec.convert(data, self._msgspec_schema) - except (msgspec.ValidationError, msgspec.DecodeError) as e: - raise Exception(str(e)) + class MySchema(Schema): + foo: str + bar: int = 10 + + Instead of wrapping msgspec.Struct types. + Most schemas use kebab-case renaming by default. + """ - def __call__(self, data): - """Validate data against the schema.""" + @classmethod + def validate(cls, data): + """Validate data against this schema.""" if taskgraph.fast: return data - if self._is_msgspec: - return self._validate_msgspec(data) - elif hasattr(self, "_dict_schema"): - # Simple dict validation - if not isinstance(data, dict): - raise Exception(f"Expected dict, got {type(data).__name__}") - - # Collect valid keys - valid_keys = set() - for key in self._dict_schema.keys(): - if hasattr(key, "key"): - valid_keys.add(key.key) - else: - valid_keys.add(key) - - # Check for extra keys (strict mode by default for dict schemas) - extra_keys = set(data.keys()) - valid_keys - if extra_keys and not getattr(self, "allow_extra", False): - raise Exception(f"Extra keys not allowed: {extra_keys}") - - # Validate required keys and values - for key, validator in self._dict_schema.items(): - # Handle Required/Optional keys - if hasattr(key, "key"): - actual_key = key.key - is_required = isinstance(key, Required) - else: - actual_key = key - is_required = True - - if actual_key in data: - value = data[actual_key] - # Validate the value - if validator is int and not isinstance(value, int): - raise Exception( - f"Key {actual_key}: Expected int, got {type(value).__name__}" - ) - elif validator is str and not isinstance(value, str): - raise Exception( - f"Key {actual_key}: Expected str, got {type(value).__name__}" - ) - elif is_required: - raise Exception(f"Missing required key: {actual_key}") - return data - elif hasattr(self, "_validator"): - return self._validator(data) - return data - - def __getitem__(self, item): - if self._is_msgspec: - # For msgspec schemas, provide backward compatibility - # by returning appropriate validators for known fields - # This is a workaround to support legacy code that accesses schema fields - field_validators = { - "description": str, - "priority": Any( - "highest", - "very-high", - "high", - "medium", - "low", - "very-low", - "lowest", - ), - "attributes": {str: object}, - "task-from": str, - "dependencies": {str: object}, - "soft-dependencies": [str], - "if-dependencies": [str], - "requires": Any("all-completed", "all-resolved"), - "deadline-after": str, - "expires-after": str, - "routes": [str], - "scopes": [str], - "tags": {str: str}, - "extra": {str: object}, - "treeherder": object, # Complex type - "index": object, # Complex type - "run-on-projects": object, # Uses optionally_keyed_by - "run-on-tasks-for": [str], - "run-on-git-branches": [str], - "shipping-phase": Any(None, "build", "promote", "push", "ship"), - "always-target": bool, - "optimization": OptimizationSchema, - "needs-sccache": bool, - "worker-type": str, - } - return field_validators.get(item, str) - elif hasattr(self, "_dict_schema"): - return self._dict_schema.get(item, str) - return str # Default fallback + try: + return msgspec.convert(data, cls) + except (msgspec.ValidationError, msgspec.DecodeError) as e: + raise msgspec.ValidationError(str(e)) # Optimization schema types using msgspec -class IndexSearchOptimization(msgspec.Struct, kw_only=True, rename="kebab"): +class IndexSearchOptimization(Schema): """Search the index for the given index namespaces.""" index_search: List[str] -class SkipUnlessChangedOptimization(msgspec.Struct, kw_only=True, rename="kebab"): +class SkipUnlessChangedOptimization(Schema): """Skip this task if none of the given file patterns match.""" skip_unless_changed: List[str] # Task reference types using msgspec -class TaskReference(msgspec.Struct, kw_only=True, rename="kebab"): +class TaskReference(Schema): """Reference to another task.""" task_reference: str -class ArtifactReference(msgspec.Struct, kw_only=True, rename="kebab"): +class ArtifactReference(Schema): """Reference to a task artifact.""" artifact_reference: str diff --git a/test/test_parameters.py b/test/test_parameters.py index f22c86e17..5bcebd975 100644 --- a/test/test_parameters.py +++ b/test/test_parameters.py @@ -10,6 +10,7 @@ from unittest import TestCase, mock import mozilla_repo_urls +import msgspec import pytest import taskgraph # noqa: F401 @@ -20,7 +21,6 @@ extend_parameters_schema, load_parameters_file, ) -from taskgraph.util.schema import Optional, Required, Schema from .mockedopen import MockedOpen @@ -274,45 +274,50 @@ def test_parameters_format_spec(spec, expected): def test_extend_parameters_schema(monkeypatch): + """Test parameter extension with msgspec schemas.""" + + # Define a test schema that extends the base schema + class ExtendedSchema(msgspec.Struct, kw_only=True, rename="kebab"): + foo: str + bar: bool = False # Optional with default + # Reset global _schema_extensions monkeypatch.setattr(parameters, "_schema_extensions", []) + # Set our extended schema as the base schema monkeypatch.setattr( parameters, "base_schema", - Schema( - { - Required("foo"): str, - } - ), + ExtendedSchema, ) + + # Keep the default functions monkeypatch.setattr( parameters, "defaults_functions", list(parameters.defaults_functions), ) - with pytest.raises(ParameterMismatch): - Parameters(strict=False).check() - - with pytest.raises(ParameterMismatch): - Parameters(foo="1", bar=True).check() - + # Add a defaults function that provides foo and bar extend_parameters_schema( - { - Optional("bar"): bool, - }, + {}, # No additional schema, just the defaults function defaults_fn=lambda root: {"foo": "1", "bar": False}, ) + # Test with explicit values params = Parameters(foo="1", bar=True) params.check() + assert params["foo"] == "1" assert params["bar"] is True + # Test with partial values (bar not present in dict) params = Parameters(foo="1") params.check() + assert params["foo"] == "1" + # bar is not in the dict because it wasn't explicitly set assert "bar" not in params + # Test with defaults function providing values params = Parameters(strict=False) params.check() assert params["foo"] == "1" diff --git a/test/test_transforms_run_run_task.py b/test/test_transforms_run_run_task.py index 8e71e6fed..67c4c788f 100644 --- a/test/test_transforms_run_run_task.py +++ b/test/test_transforms_run_run_task.py @@ -10,7 +10,7 @@ from taskgraph.transforms.run import make_task_description from taskgraph.transforms.task import payload_builders, set_defaults from taskgraph.util.caches import CACHES -from taskgraph.util.schema import Schema, validate_schema +from taskgraph.util.schema import validate_schema from taskgraph.util.templates import merge here = os.path.abspath(os.path.dirname(__file__)) @@ -251,15 +251,8 @@ def inner(task, **kwargs): print("Dumping for copy/paste:") pprint(caches, indent=2) - # Create a new schema object with just the part relevant to caches. - # Skip schema validation for msgspec schemas as they don't have .schema attribute - if ( - not hasattr(payload_builders[impl].schema, "_is_msgspec") - or not payload_builders[impl].schema._is_msgspec - ): - partial_schema = Schema(payload_builders[impl].schema.schema[key]) - validate_schema(partial_schema, caches, "validation error") - + # Skip validation as all schemas are now msgspec.Struct types + # and partial schema validation is not needed return caches return inner diff --git a/test/test_util_schema.py b/test/test_util_schema.py index aa66d3657..4770384d8 100644 --- a/test/test_util_schema.py +++ b/test/test_util_schema.py @@ -4,8 +4,8 @@ import unittest +import msgspec import pytest -from voluptuous import Invalid, MultipleInvalid import taskgraph from taskgraph.util.schema import ( @@ -15,12 +15,13 @@ validate_schema, ) -schema = Schema( - { - "x": int, - "y": str, - } -) + +class SimpleTestSchema(Schema, rename=None, omit_defaults=False): + x: int + y: str + + +schema = SimpleTestSchema class TestValidateSchema(unittest.TestCase): @@ -38,40 +39,41 @@ def test_invalid(self): class TestCheckSchema(unittest.TestCase): def test_schema(self): - "Creating a schema with any naming convention now works." - # This should not raise an exception anymore - schema = Schema({"camelCase": int}) + "Creating a msgspec schema works correctly." + + class CamelCaseSchema(Schema, rename=None, omit_defaults=False): + camelCase: int + + schema = CamelCaseSchema # Test that it validates correctly - schema({"camelCase": 42}) - with self.assertRaises(Exception): - schema({"camelCase": "not-an-int"}) - - def test_extend_schema(self): - "Extending a schema combines the schemas correctly." - schema = Schema({"kebab-case": int}).extend({"camelCase": int}) - # Should validate both fields - schema({"kebab-case": 1, "camelCase": 2}) - with self.assertRaises(Exception): - schema({"kebab-case": "not-int", "camelCase": 2}) - - def test_extend_schema_twice(self): - "Extending a schema twice combines all schemas correctly." - schema = ( - Schema({"kebab-case": int}) - .extend({"more-kebab": int}) - .extend({"camelCase": int}) - ) - # Should validate all three fields - schema({"kebab-case": 1, "more-kebab": 2, "camelCase": 3}) - with self.assertRaises(Exception): - schema({"kebab-case": 1, "more-kebab": 2, "camelCase": "not-int"}) + result = schema.validate({"camelCase": 42}) + assert result.camelCase == 42 + + with self.assertRaises(msgspec.ValidationError): + schema.validate({"camelCase": "not-an-int"}) + + def test_extend_not_supported(self): + "Extension is not supported for msgspec schemas." + + class SimpleSchema(Schema, rename=None, omit_defaults=False): + kebab_case: int + + schema = SimpleSchema + # Schema classes no longer have extend method + self.assertFalse(hasattr(schema, "extend")) def test_check_skipped(monkeypatch): - """Schema not validated if 'check=False' or taskgraph.fast is unset.""" - Schema({"camelCase": int}, check=False) # assert no exception + """Schema not validated if taskgraph.fast is set.""" + + class SimpleSchema(Schema, rename=None, omit_defaults=False): + value: int + monkeypatch.setattr(taskgraph, "fast", True) - Schema({"camelCase": int}) # assert no exception + schema = SimpleSchema + # When fast mode is on, validation is skipped + result = schema.validate({"value": "not-an-int"}) # Should not raise + assert result == {"value": "not-an-int"} class TestResolveKeyedBy(unittest.TestCase): @@ -255,10 +257,10 @@ def test_optionally_keyed_by(): assert validator("baz") == "baz" assert validator({"by-foo": {"a": "b", "c": "d"}}) == {"a": "b", "c": "d"} - with pytest.raises((Invalid, TypeError, ValueError)): + with pytest.raises((TypeError, ValueError)): validator({"by-foo": {"a": 1, "c": "d"}}) - with pytest.raises((MultipleInvalid, ValueError)): + with pytest.raises(ValueError): validator({"by-bar": {"a": "b"}}) @@ -269,11 +271,11 @@ def test_optionally_keyed_by_mulitple_keys(): assert validator({"by-bar": {"x": "y"}}) == {"x": "y"} assert validator({"by-foo": {"a": {"by-bar": {"x": "y"}}}}) == {"a": {"x": "y"}} - with pytest.raises((Invalid, TypeError, ValueError)): + with pytest.raises((TypeError, ValueError)): validator({"by-foo": {"a": 123, "c": "d"}}) - with pytest.raises((MultipleInvalid, TypeError, ValueError)): + with pytest.raises((TypeError, ValueError)): validator({"by-bar": {"a": 1}}) - with pytest.raises((MultipleInvalid, ValueError)): + with pytest.raises(ValueError): validator({"by-unknown": {"a": "b"}}) From 7fddbd9add90f5284cbe3b136cd1bd285492b3dc Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Fri, 29 Aug 2025 12:00:42 -0400 Subject: [PATCH 10/20] feat: cleaned up schema wrapper --- docs/concepts/transforms.rst | 5 ++-- docs/tutorials/creating-a-task-graph.rst | 7 ++--- src/taskgraph/transforms/chunking.py | 5 ++-- src/taskgraph/transforms/docker_image.py | 10 +++---- src/taskgraph/transforms/fetch.py | 13 ++++---- src/taskgraph/transforms/from_deps.py | 8 ++--- src/taskgraph/transforms/matrix.py | 5 ++-- src/taskgraph/transforms/notify.py | 30 ++++++++----------- src/taskgraph/transforms/run/toolchain.py | 8 +++-- src/taskgraph/transforms/task.py | 22 +++++--------- src/taskgraph/transforms/task_context.py | 21 ++++++++----- .../transforms/hello.py | 6 ++-- test/test_parameters.py | 4 +-- test/test_transforms_run_run_task.py | 2 -- 14 files changed, 69 insertions(+), 77 deletions(-) diff --git a/docs/concepts/transforms.rst b/docs/concepts/transforms.rst index 401773824..c46e4d5a8 100644 --- a/docs/concepts/transforms.rst +++ b/docs/concepts/transforms.rst @@ -105,17 +105,16 @@ about the state of the tasks at given points. Here is an example: .. code-block:: python - import msgspec from typing import Optional from taskgraph.transforms.base import TransformSequence from taskgraph.util.schema import Schema - class MySchema(msgspec.Struct, kw_only=True): + class MySchema(Schema): foo: str # Required field bar: Optional[bool] = None # Optional field transforms = TransformSequence() - transforms.add_validate(Schema(MySchema)) + transforms.add_validate(MySchema) In the above example, we can be sure that every task dict has a string field called ``foo``, and may or may not have a boolean field called ``bar``. diff --git a/docs/tutorials/creating-a-task-graph.rst b/docs/tutorials/creating-a-task-graph.rst index dfb5ad1e3..5a71cee58 100644 --- a/docs/tutorials/creating-a-task-graph.rst +++ b/docs/tutorials/creating-a-task-graph.rst @@ -136,17 +136,16 @@ comments for explanations): .. code-block:: python - import msgspec from typing import Optional from taskgraph.util.schema import Schema from taskgraph.transforms.base import TransformSequence - # Define the schema using msgspec for better type checking and performance. - class HelloDescriptionSchema(msgspec.Struct, kw_only=True): + # Define the schema using Schema base class for better type checking and performance. + class HelloDescriptionSchema(Schema): text: str # Required field description: Optional[str] = None # Optional field - hello_description_schema = Schema(HelloDescriptionSchema) + hello_description_schema = HelloDescriptionSchema # Create a 'TransformSequence' instance. This class collects transform # functions to run later. diff --git a/src/taskgraph/transforms/chunking.py b/src/taskgraph/transforms/chunking.py index 7ed7b6e62..7a8c8cf8c 100644 --- a/src/taskgraph/transforms/chunking.py +++ b/src/taskgraph/transforms/chunking.py @@ -7,10 +7,11 @@ import msgspec from taskgraph.transforms.base import TransformSequence +from taskgraph.util.schema import Schema from taskgraph.util.templates import substitute -class ChunkConfig(msgspec.Struct, kw_only=True, rename="kebab"): +class ChunkConfig(Schema): """ `chunk` can be used to split one task into `total-chunks` tasks, substituting `this_chunk` and `total_chunks` into any @@ -25,7 +26,7 @@ class ChunkConfig(msgspec.Struct, kw_only=True, rename="kebab"): #: Schema for chunking transforms -class ChunkSchema(msgspec.Struct, kw_only=True, omit_defaults=True): +class ChunkSchema(Schema): # Optional, so it can be used for a subset of tasks in a kind chunk: Optional[ChunkConfig] = None __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) diff --git a/src/taskgraph/transforms/docker_image.py b/src/taskgraph/transforms/docker_image.py index 5a97700fe..fd28776e3 100644 --- a/src/taskgraph/transforms/docker_image.py +++ b/src/taskgraph/transforms/docker_image.py @@ -7,12 +7,11 @@ import re from typing import Any, Dict, List, Optional -import msgspec - import taskgraph from taskgraph.transforms.base import TransformSequence from taskgraph.util import json from taskgraph.util.docker import create_context_tar, generate_context_hash +from taskgraph.util.schema import Schema logger = logging.getLogger(__name__) @@ -30,9 +29,7 @@ #: Schema for docker_image transforms -class DockerImageSchema( - msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" -): +class DockerImageSchema(Schema): """ Schema for docker_image transforms. @@ -49,7 +46,10 @@ class DockerImageSchema( cache: Whether this image should be cached based on inputs. """ + # Required field first name: str + + # Optional fields parent: Optional[str] = None symbol: Optional[str] = None task_from: Optional[str] = None diff --git a/src/taskgraph/transforms/fetch.py b/src/taskgraph/transforms/fetch.py index 1555265ca..66f190645 100644 --- a/src/taskgraph/transforms/fetch.py +++ b/src/taskgraph/transforms/fetch.py @@ -17,6 +17,7 @@ from ..util import path from ..util.cached_tasks import add_optimization +from ..util.schema import Schema from ..util.treeherder import join_symbol from .base import TransformSequence @@ -24,14 +25,14 @@ #: Schema for fetch transforms -class FetchConfig(msgspec.Struct, kw_only=True): +class FetchConfig(Schema, rename=None, omit_defaults=False): """Configuration for a fetch task type.""" type: str # Additional fields handled dynamically by fetch builders -class FetchSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): +class FetchSchema(Schema): """ Schema for fetch transforms. @@ -199,7 +200,7 @@ def make_task(config, tasks): yield task_desc -class GPGSignatureConfig(msgspec.Struct, kw_only=True, rename="kebab"): +class GPGSignatureConfig(Schema): """GPG signature verification configuration.""" # URL where GPG signature document can be obtained. Can contain the @@ -209,9 +210,7 @@ class GPGSignatureConfig(msgspec.Struct, kw_only=True, rename="kebab"): key_path: str -class StaticUrlFetchConfig( - msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" -): +class StaticUrlFetchConfig(Schema, rename="kebab"): """Configuration for static-url fetch type.""" type: str @@ -303,7 +302,7 @@ def create_fetch_url_task(config, name, fetch): } -class GitFetchConfig(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): +class GitFetchConfig(Schema): """Configuration for git fetch type.""" type: str diff --git a/src/taskgraph/transforms/from_deps.py b/src/taskgraph/transforms/from_deps.py index fedaf1654..8549a5901 100644 --- a/src/taskgraph/transforms/from_deps.py +++ b/src/taskgraph/transforms/from_deps.py @@ -20,19 +20,19 @@ from taskgraph.transforms.base import TransformSequence from taskgraph.util.attributes import attrmatch from taskgraph.util.dependencies import GROUP_BY_MAP, get_dependencies -from taskgraph.util.schema import validate_schema +from taskgraph.util.schema import Schema, validate_schema from taskgraph.util.set_name import SET_NAME_MAP # Define FetchEntry for the fetches field -class FetchEntry(msgspec.Struct, kw_only=True, omit_defaults=True): +class FetchEntry(Schema, rename=None): """A fetch entry for an artifact.""" artifact: str dest: Optional[str] = None -class FromDepsConfig(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): +class FromDepsConfig(Schema): """ Configuration for from-deps transforms. @@ -94,7 +94,7 @@ def __post_init__(self): #: Schema for from_deps transforms -class FromDepsSchema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): +class FromDepsSchema(Schema): """Schema for from_deps transforms.""" from_deps: FromDepsConfig diff --git a/src/taskgraph/transforms/matrix.py b/src/taskgraph/transforms/matrix.py index 8ba0c5cdc..08bd9612e 100644 --- a/src/taskgraph/transforms/matrix.py +++ b/src/taskgraph/transforms/matrix.py @@ -13,10 +13,11 @@ import msgspec from taskgraph.transforms.base import TransformSequence +from taskgraph.util.schema import Schema from taskgraph.util.templates import substitute_task_fields -class MatrixConfig(msgspec.Struct, kw_only=True, rename="kebab"): +class MatrixConfig(Schema): """ Matrix configuration for generating multiple tasks. """ @@ -43,7 +44,7 @@ class MatrixConfig(msgspec.Struct, kw_only=True, rename="kebab"): #: Schema for matrix transforms -class MatrixSchema(msgspec.Struct, kw_only=True, omit_defaults=True): +class MatrixSchema(Schema): name: str matrix: Optional[MatrixConfig] = None __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) diff --git a/src/taskgraph/transforms/notify.py b/src/taskgraph/transforms/notify.py index 610ede551..ec45521a6 100644 --- a/src/taskgraph/transforms/notify.py +++ b/src/taskgraph/transforms/notify.py @@ -13,7 +13,7 @@ import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import resolve_keyed_by +from taskgraph.util.schema import Schema, resolve_keyed_by StatusType = Literal[ "on-completed", @@ -26,7 +26,7 @@ ] -class EmailRecipient(msgspec.Struct, kw_only=True, rename="kebab"): +class EmailRecipient(Schema): """Email notification recipient.""" type: Literal["email"] @@ -34,7 +34,7 @@ class EmailRecipient(msgspec.Struct, kw_only=True, rename="kebab"): status_type: Optional[StatusType] = None -class MatrixRoomRecipient(msgspec.Struct, kw_only=True, rename="kebab"): +class MatrixRoomRecipient(Schema): """Matrix room notification recipient.""" type: Literal["matrix-room"] @@ -42,7 +42,7 @@ class MatrixRoomRecipient(msgspec.Struct, kw_only=True, rename="kebab"): status_type: Optional[StatusType] = None -class PulseRecipient(msgspec.Struct, kw_only=True, rename="kebab"): +class PulseRecipient(Schema): """Pulse notification recipient.""" type: Literal["pulse"] @@ -50,7 +50,7 @@ class PulseRecipient(msgspec.Struct, kw_only=True, rename="kebab"): status_type: Optional[StatusType] = None -class SlackChannelRecipient(msgspec.Struct, kw_only=True, rename="kebab"): +class SlackChannelRecipient(Schema): """Slack channel notification recipient.""" type: Literal["slack-channel"] @@ -71,14 +71,14 @@ class SlackChannelRecipient(msgspec.Struct, kw_only=True, rename="kebab"): """Map each type to its primary key that will be used in the route.""" -class EmailLink(msgspec.Struct, kw_only=True): +class EmailLink(Schema, rename=None, omit_defaults=False): """Email link configuration.""" text: str href: str -class EmailContent(msgspec.Struct, kw_only=True, omit_defaults=True): +class EmailContent(Schema, rename=None): """Email notification content.""" subject: Optional[str] = None @@ -86,7 +86,7 @@ class EmailContent(msgspec.Struct, kw_only=True, omit_defaults=True): link: Optional[EmailLink] = None -class MatrixContent(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): +class MatrixContent(Schema): """Matrix notification content.""" body: Optional[str] = None @@ -95,7 +95,7 @@ class MatrixContent(msgspec.Struct, kw_only=True, omit_defaults=True, rename="ke msg_type: Optional[str] = None -class SlackContent(msgspec.Struct, kw_only=True, omit_defaults=True): +class SlackContent(Schema, rename=None): """Slack notification content.""" text: Optional[str] = None @@ -103,7 +103,7 @@ class SlackContent(msgspec.Struct, kw_only=True, omit_defaults=True): attachments: Optional[List[Any]] = None -class NotifyContent(msgspec.Struct, kw_only=True, omit_defaults=True): +class NotifyContent(Schema, rename=None): """Notification content configuration.""" email: Optional[EmailContent] = None @@ -111,16 +111,14 @@ class NotifyContent(msgspec.Struct, kw_only=True, omit_defaults=True): slack: Optional[SlackContent] = None -class NotifyConfig(msgspec.Struct, kw_only=True, omit_defaults=True): +class NotifyConfig(Schema, rename=None): """Modern notification configuration.""" recipients: List[Dict[str, Any]] # Will be validated as Recipient union content: Optional[NotifyContent] = None -class LegacyNotificationsConfig( - msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" -): +class LegacyNotificationsConfig(Schema, rename="kebab"): """Legacy notification configuration for backwards compatibility.""" emails: Union[List[str], Dict[str, Any]] # Can be keyed-by @@ -130,9 +128,7 @@ class LegacyNotificationsConfig( #: Schema for notify transforms -class NotifySchema( - msgspec.Struct, kw_only=True, omit_defaults=True, tag_field="notify_type" -): +class NotifySchema(Schema, tag_field="notify_type"): """Schema for notify transforms. Note: This schema allows either 'notify' or 'notifications' field, diff --git a/src/taskgraph/transforms/run/toolchain.py b/src/taskgraph/transforms/run/toolchain.py index 42418c116..4180d87db 100644 --- a/src/taskgraph/transforms/run/toolchain.py +++ b/src/taskgraph/transforms/run/toolchain.py @@ -18,15 +18,14 @@ ) from taskgraph.util import path as mozpath from taskgraph.util.hash import hash_paths +from taskgraph.util.schema import Schema from taskgraph.util.shell import quote as shell_quote CACHE_TYPE = "toolchains.v3" #: Schema for run.using toolchain -class ToolchainRunSchema( - msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab" -): +class ToolchainRunSchema(Schema): """ Schema for toolchain-script run configuration. @@ -49,11 +48,14 @@ class ToolchainRunSchema( workdir: Base work directory used to set up the task. """ + # Required fields first using: Literal["toolchain-script"] script: str sparse_profile: Optional[str] # Can be None to skip sparse profile toolchain_artifact: str workdir: str + + # Optional fields arguments: Optional[List[str]] = None resources: Optional[List[str]] = None toolchain_alias: Optional[Union[str, List[str]]] = None diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py index 56d05ba29..05e7e5a7e 100644 --- a/src/taskgraph/transforms/task.py +++ b/src/taskgraph/transforms/task.py @@ -57,9 +57,7 @@ class TaskDescriptionTreeherder(Schema, rename=None): platform: TOptional[str] = None -class TaskDescriptionIndex( - msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True -): +class TaskDescriptionIndex(Schema, rename="kebab"): """Index information for a task.""" # the name of the product this build produces @@ -202,7 +200,7 @@ def payload_builder(name, schema): if not (isinstance(schema, type) and issubclass(schema, msgspec.Struct)): raise TypeError( f"payload_builder requires msgspec.Struct schema, got {type(schema).__name__}. " - f"Please migrate to msgspec: class {name.title()}Schema(msgspec.Struct): ..." + f"Please migrate to msgspec: class {name.title()}Schema(Schema): ..." ) # Verify the schema has required fields @@ -244,17 +242,15 @@ def verify_index(config, index): # Docker Worker schema using msgspec -class DockerWorkerCacheConfig( - msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True -): +class DockerWorkerCacheConfig(Schema, rename="kebab"): """Cache configuration for docker-worker.""" - # only one type is supported by any of the workers right now - type: Literal["persistent"] = "persistent" # name of the cache, allowing reuse by subsequent tasks naming the same cache name: str # location in the task image where the cache will be mounted mount_point: str + # only one type is supported by any of the workers right now + type: Literal["persistent"] = "persistent" # Whether the cache is not used in untrusted environments (like the Try repo). skip_untrusted: bool = False @@ -536,9 +532,7 @@ class GenericWorkerArtifactConfig(Schema, rename=None): name: TOptional[str] = None -class GenericWorkerMountContent( - msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True -): +class GenericWorkerMountContent(Schema, rename="kebab"): """Mount content configuration for generic-worker.""" # Artifact name that contains the content. @@ -549,9 +543,7 @@ class GenericWorkerMountContent( url: TOptional[str] = None -class GenericWorkerMountConfig( - msgspec.Struct, kw_only=True, rename="kebab", omit_defaults=True -): +class GenericWorkerMountConfig(Schema, rename="kebab"): """Mount configuration for generic-worker.""" # A unique name for the cache volume, implies writable cache directory diff --git a/src/taskgraph/transforms/task_context.py b/src/taskgraph/transforms/task_context.py index f15253126..0b7ff0e47 100644 --- a/src/taskgraph/transforms/task_context.py +++ b/src/taskgraph/transforms/task_context.py @@ -3,11 +3,12 @@ import msgspec from taskgraph.transforms.base import TransformSequence +from taskgraph.util.schema import Schema from taskgraph.util.templates import deep_get, substitute_task_fields from taskgraph.util.yaml import load_yaml -class TaskContextConfig(msgspec.Struct, kw_only=True, rename="kebab"): +class TaskContextConfig(Schema): """ `task-context` can be used to substitute values into any field in a task with data that is not known until `taskgraph` runs. @@ -30,6 +31,12 @@ class TaskContextConfig(msgspec.Struct, kw_only=True, rename="kebab"): That is to say: parameters will always override anything else. """ + # Required field first + # A list of fields in the task to substitute the provided values + # into. + substitution_fields: List[str] + + # Optional fields # Retrieve task context values from parameters. A single # parameter may be provided or a list of parameters in # priority order. The latter can be useful in implementing a @@ -42,19 +49,19 @@ class TaskContextConfig(msgspec.Struct, kw_only=True, rename="kebab"): from_file: Optional[str] = None # Key/value pairs to be used as task context from_object: Optional[Any] = None - # A list of fields in the task to substitute the provided values - # into. - substitution_fields: List[str] #: Schema for the task_context transforms -class Schema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): - name: Optional[str] = None +class TaskContextSchema(Schema): + # Required field first task_context: TaskContextConfig + + # Optional fields + name: Optional[str] = None __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) -SCHEMA = Schema +SCHEMA = TaskContextSchema transforms = TransformSequence() transforms.add_validate(SCHEMA) diff --git a/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py b/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py index a2f8e7f8c..f2dac2ca0 100644 --- a/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py +++ b/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py @@ -1,14 +1,12 @@ -import msgspec - from taskgraph.transforms.base import TransformSequence from taskgraph.util.schema import Schema -class HelloSchema(msgspec.Struct, kw_only=True): +class HelloSchema(Schema): noun: str # Required field -HELLO_SCHEMA = Schema(HelloSchema) +HELLO_SCHEMA = HelloSchema transforms = TransformSequence() transforms.add_validate(HELLO_SCHEMA) diff --git a/test/test_parameters.py b/test/test_parameters.py index 5bcebd975..6ec540178 100644 --- a/test/test_parameters.py +++ b/test/test_parameters.py @@ -10,7 +10,6 @@ from unittest import TestCase, mock import mozilla_repo_urls -import msgspec import pytest import taskgraph # noqa: F401 @@ -21,6 +20,7 @@ extend_parameters_schema, load_parameters_file, ) +from taskgraph.util.schema import Schema from .mockedopen import MockedOpen @@ -277,7 +277,7 @@ def test_extend_parameters_schema(monkeypatch): """Test parameter extension with msgspec schemas.""" # Define a test schema that extends the base schema - class ExtendedSchema(msgspec.Struct, kw_only=True, rename="kebab"): + class ExtendedSchema(Schema): foo: str bar: bool = False # Optional with default diff --git a/test/test_transforms_run_run_task.py b/test/test_transforms_run_run_task.py index 67c4c788f..d6bd45a36 100644 --- a/test/test_transforms_run_run_task.py +++ b/test/test_transforms_run_run_task.py @@ -251,8 +251,6 @@ def inner(task, **kwargs): print("Dumping for copy/paste:") pprint(caches, indent=2) - # Skip validation as all schemas are now msgspec.Struct types - # and partial schema validation is not needed return caches return inner From 1d23c986a52683a961e7b69a27f3a573bf8740d1 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Fri, 29 Aug 2025 16:33:40 -0400 Subject: [PATCH 11/20] feat: added support for optional keying --- src/taskgraph/config.py | 53 +++++++++- src/taskgraph/parameters.py | 123 ++++++---------------- src/taskgraph/transforms/docker_image.py | 27 ++--- src/taskgraph/transforms/fetch.py | 33 +++--- src/taskgraph/transforms/from_deps.py | 53 +++++----- src/taskgraph/transforms/run/__init__.py | 7 +- src/taskgraph/transforms/run/toolchain.py | 48 +++++---- 7 files changed, 163 insertions(+), 181 deletions(-) diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index fa76d82bc..497d9345f 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -13,7 +13,7 @@ import msgspec from .util.python_path import find_object -from .util.schema import Schema, validate_schema +from .util.schema import Schema, optionally_keyed_by, validate_schema from .util.vcs import get_repository from .util.yaml import load_yaml @@ -29,10 +29,24 @@ class WorkerAlias(Schema): """Worker alias configuration.""" - provisioner: Union[str, dict] + provisioner: Union[str, dict] # Can be keyed-by level implementation: str os: str - worker_type: Union[str, dict] # Can be keyed-by, maps from "worker-type" + worker_type: Union[str, dict] # Can be keyed-by level, maps from "worker-type" + + def __post_init__(self): + """Validate keyed-by fields.""" + # Validate provisioner can be keyed-by level + if isinstance(self.provisioner, dict): + validator = optionally_keyed_by("level", str) + # Just validate - it will raise an error if invalid + validator(self.provisioner) + + # Validate worker_type can be keyed-by level + if isinstance(self.worker_type, dict): + validator = optionally_keyed_by("level", str) + # Just validate - it will raise an error if invalid + validator(self.worker_type) class Workers(Schema, rename=None): @@ -82,19 +96,48 @@ class GraphConfigSchema(Schema): trust_domain: str # Maps from "trust-domain" task_priority: Union[ TaskPriority, dict - ] # Maps from "task-priority", can be keyed-by + ] # Maps from "task-priority", can be keyed-by project or level workers: Workers taskgraph: TaskGraphConfig # Optional fields docker_image_kind: Optional[str] = None # Maps from "docker-image-kind" task_deadline_after: Optional[Union[str, dict]] = ( - None # Maps from "task-deadline-after", can be keyed-by + None # Maps from "task-deadline-after", can be keyed-by project ) task_expires_after: Optional[str] = None # Maps from "task-expires-after" # Allow extra fields for flexibility __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) + def __post_init__(self): + """Validate keyed-by fields.""" + # Validate task_priority can be keyed-by project or level + if isinstance(self.task_priority, dict): + # Create a validator that accepts TaskPriority values + def validate_priority(x): + valid_priorities = [ + "highest", + "very-high", + "high", + "medium", + "low", + "very-low", + "lowest", + ] + if x not in valid_priorities: + raise ValueError(f"Invalid task priority: {x}") + return x + + validator = optionally_keyed_by("project", "level", validate_priority) + # Just validate - it will raise an error if invalid + validator(self.task_priority) + + # Validate task_deadline_after can be keyed-by project + if self.task_deadline_after and isinstance(self.task_deadline_after, dict): + validator = optionally_keyed_by("project", str) + # Just validate - it will raise an error if invalid + validator(self.task_deadline_after) + # Msgspec schema is now the main schema graph_config_schema = GraphConfigSchema diff --git a/src/taskgraph/parameters.py b/src/taskgraph/parameters.py index b77047609..adbe1a1d3 100644 --- a/src/taskgraph/parameters.py +++ b/src/taskgraph/parameters.py @@ -175,8 +175,8 @@ def extend_parameters_schema(schema, defaults_fn=None): # Store the extension schema for use during validation _schema_extensions.append(schema) - # Schema extension is no longer supported with msgspec.Struct inheritance - # Extensions are tracked in _schema_extensions list instead + # With msgspec, schema extensions are tracked in the _schema_extensions list + # for validation purposes rather than being merged into a single schema if defaults_fn: defaults_functions.append(defaults_fn) @@ -240,92 +240,39 @@ def _fill_defaults(repo_root=None, **kwargs): return kwargs def check(self): - # For msgspec schemas, we need to validate differently - if isinstance(base_schema, type) and issubclass(base_schema, msgspec.Struct): - try: - # Convert underscore keys to kebab-case for msgspec validation - params = self.copy() - # BaseSchema uses kebab-case (rename="kebab"), so we need to convert keys - kebab_params = {} - for k, v in params.items(): - # Convert underscore to kebab-case - kebab_key = k.replace("_", "-") - kebab_params[kebab_key] = v - - # Handle extensions if present - global _schema_extensions - for ext_schema in _schema_extensions: - if isinstance(ext_schema, dict): - # Simple dict validation - just check if required keys exist - for key in ext_schema: - # Just skip validation of extensions for now - pass - - if self.strict: - # Strict validation with msgspec - # First check for extra fields - schema_fields = { - f.encode_name for f in msgspec.structs.fields(base_schema) - } - - # Add extension fields if present - for ext_schema in _schema_extensions: - if isinstance(ext_schema, dict): - for key in ext_schema.keys(): - # Extract field name - if hasattr(key, "key"): - field_name = key.key.replace("_", "-") - else: - field_name = str(key).replace("_", "-") - schema_fields.add(field_name) - - extra_fields = set(kebab_params.keys()) - schema_fields - if extra_fields: - raise ParameterMismatch( - f"Invalid parameters: Extra fields not allowed: {extra_fields}" - ) - # Now validate the base schema fields - base_fields = { - f.encode_name for f in msgspec.structs.fields(base_schema) - } - base_params = { - k: v for k, v in kebab_params.items() if k in base_fields - } - msgspec.convert(base_params, base_schema) - else: - # Non-strict: validate only the fields that exist in the schema - # Filter to only schema fields - schema_fields = { - f.encode_name for f in msgspec.structs.fields(base_schema) - } - filtered_params = { - k: v for k, v in kebab_params.items() if k in schema_fields - } - msgspec.convert(filtered_params, base_schema) - except (msgspec.ValidationError, msgspec.DecodeError) as e: - raise ParameterMismatch(f"Invalid parameters: {e}") - else: - # For non-msgspec schemas, validate using the Schema class - from taskgraph.util.schema import validate_schema # noqa: PLC0415 - - try: - if self.strict: - validate_schema(base_schema, self.copy(), "Invalid parameters:") - else: - # In non-strict mode, allow extra fields - if hasattr(base_schema, "allow_extra"): - original_allow_extra = base_schema.allow_extra - base_schema.allow_extra = True - try: - validate_schema( - base_schema, self.copy(), "Invalid parameters:" - ) - finally: - base_schema.allow_extra = original_allow_extra - else: - validate_schema(base_schema, self.copy(), "Invalid parameters:") - except Exception as e: - raise ParameterMismatch(str(e)) + # Validate parameters using msgspec schema + try: + # Convert underscore keys to kebab-case since BaseSchema uses rename="kebab" + kebab_params = {k.replace("_", "-"): v for k, v in self.items()} + + if self.strict: + # Strict mode: validate against schema and check for extra fields + # Get all valid field names from the base schema + schema_fields = { + f.encode_name for f in msgspec.structs.fields(base_schema) + } + + # Check for extra fields + extra_fields = set(kebab_params.keys()) - schema_fields + if extra_fields: + raise ParameterMismatch( + f"Invalid parameters: Extra fields not allowed: {extra_fields}" + ) + + # Validate all parameters against the schema + msgspec.convert(kebab_params, base_schema) + else: + # Non-strict mode: only validate fields that exist in the schema + # Filter to only include fields defined in the schema + schema_fields = { + f.encode_name for f in msgspec.structs.fields(base_schema) + } + filtered_params = { + k: v for k, v in kebab_params.items() if k in schema_fields + } + msgspec.convert(filtered_params, base_schema) + except (msgspec.ValidationError, msgspec.DecodeError) as e: + raise ParameterMismatch(f"Invalid parameters: {e}") def __getitem__(self, k): try: diff --git a/src/taskgraph/transforms/docker_image.py b/src/taskgraph/transforms/docker_image.py index fd28776e3..31d8397e6 100644 --- a/src/taskgraph/transforms/docker_image.py +++ b/src/taskgraph/transforms/docker_image.py @@ -30,34 +30,27 @@ #: Schema for docker_image transforms class DockerImageSchema(Schema): - """ - Schema for docker_image transforms. - - Attributes: - name: Name of the docker image. - parent: Name of the parent docker image. - symbol: Treeherder symbol. - task_from: Relative path (from config.path) to the file the docker image was defined in. - args: Arguments to use for the Dockerfile. - definition: Name of the docker image definition under taskcluster/docker, when - different from the docker image name. - packages: List of package tasks this docker image depends on. - index: Information for indexing this build so its artifacts can be discovered. - cache: Whether this image should be cached based on inputs. - """ - # Required field first + # Name of the docker image. name: str # Optional fields + # Name of the parent docker image. parent: Optional[str] = None + # Treeherder symbol. symbol: Optional[str] = None + # Relative path (from config.path) to the file the docker image was defined in. task_from: Optional[str] = None + # Arguments to use for the Dockerfile. args: Optional[Dict[str, str]] = None + # Name of the docker image definition under taskcluster/docker, when + # different from the docker image name. definition: Optional[str] = None + # List of package tasks this docker image depends on. packages: Optional[List[str]] = None - # For now, use Any for index since task_description_schema is not converted yet + # Information for indexing this build so its artifacts can be discovered. index: Optional[Any] = None + # Whether this image should be cached based on inputs. cache: Optional[bool] = None diff --git a/src/taskgraph/transforms/fetch.py b/src/taskgraph/transforms/fetch.py index 66f190645..25a86ba23 100644 --- a/src/taskgraph/transforms/fetch.py +++ b/src/taskgraph/transforms/fetch.py @@ -33,32 +33,29 @@ class FetchConfig(Schema, rename=None, omit_defaults=False): class FetchSchema(Schema): - """ - Schema for fetch transforms. - - Attributes: - name: Name of the task. - task_from: Relative path (from config.path) to the file the task was defined in. - description: Description of the task. - expires_after: When the task expires. - docker_image: Docker image configuration. - fetch_alias: An alias that can be used instead of the real fetch task name in - fetch stanzas for tasks. - artifact_prefix: The prefix of the taskcluster artifact being uploaded. - Defaults to `public/`; if it starts with something other than - `public/` the artifact will require scopes to access. - attributes: Task attributes. - fetch: Fetch configuration with type and additional fields. - """ - + # Required fields + # Name of the task. name: str + # Description of the task. description: str + # Fetch configuration with type and additional fields. fetch: Dict[str, Any] # Must have 'type' key, other keys depend on type + + # Optional fields + # Relative path (from config.path) to the file the task was defined in. task_from: Optional[str] = None + # When the task expires. expires_after: Optional[str] = None + # Docker image configuration. docker_image: Optional[Any] = None + # An alias that can be used instead of the real fetch task name in + # fetch stanzas for tasks. fetch_alias: Optional[str] = None + # The prefix of the taskcluster artifact being uploaded. + # Defaults to `public/`; if it starts with something other than + # `public/` the artifact will require scopes to access. artifact_prefix: Optional[str] = None + # Task attributes. attributes: Optional[Dict[str, Any]] = None def __post_init__(self): diff --git a/src/taskgraph/transforms/from_deps.py b/src/taskgraph/transforms/from_deps.py index 8549a5901..39a8f3c57 100644 --- a/src/taskgraph/transforms/from_deps.py +++ b/src/taskgraph/transforms/from_deps.py @@ -24,7 +24,6 @@ from taskgraph.util.set_name import SET_NAME_MAP -# Define FetchEntry for the fetches field class FetchEntry(Schema, rename=None): """A fetch entry for an artifact.""" @@ -33,40 +32,36 @@ class FetchEntry(Schema, rename=None): class FromDepsConfig(Schema): - """ - Configuration for from-deps transforms. - - Attributes: - kinds: Limit dependencies to specified kinds (defaults to all kinds in - `kind-dependencies`). The first kind in the list is the "primary" kind. - The dependency of this kind will be used to derive the label - and copy attributes (if `copy-attributes` is True). - set_name: UPDATE ME AND DOCS. Can be a string from SET_NAME_MAP, False, None, - or a dict with a SET_NAME_MAP key. - with_attributes: Limit dependencies to tasks whose attributes match - using :func:`~taskgraph.util.attributes.attrmatch`. - group_by: Group cross-kind dependencies using the given group-by - function. One task will be created for each group. If not - specified, the 'single' function will be used which creates - a new task for each individual dependency. - copy_attributes: If True, copy attributes from the dependency matching the - first kind in the `kinds` list (whether specified explicitly - or taken from `kind-dependencies`). - unique_kinds: If true (the default), there must be only a single unique task - for each kind in a dependency group. Setting this to false - disables that requirement. - fetches: If present, a `fetches` entry will be added for each task - dependency. Attributes of the upstream task may be used as - substitution values in the `artifact` or `dest` values of the - `fetches` entry. - """ - + # Optional fields + # Limit dependencies to specified kinds (defaults to all kinds in + # `kind-dependencies`). + # + # The first kind in the list is the "primary" kind. The + # dependency of this kind will be used to derive the label + # and copy attributes (if `copy-attributes` is True). kinds: Optional[List[str]] = None + # UPDATE ME AND DOCS set_name: Optional[Union[str, bool, Dict[str, Any]]] = None + # Limit dependencies to tasks whose attributes match + # using :func:`~taskgraph.util.attributes.attrmatch`. with_attributes: Optional[Dict[str, Union[List[Any], str]]] = None + # Group cross-kind dependencies using the given group-by + # function. One task will be created for each group. If not + # specified, the 'single' function will be used which creates + # a new task for each individual dependency. group_by: Optional[Union[str, Dict[str, Any]]] = None + # If True, copy attributes from the dependency matching the + # first kind in the `kinds` list (whether specified explicitly + # or taken from `kind-dependencies`). copy_attributes: Optional[bool] = None + # If true (the default), there must be only a single unique task + # for each kind in a dependency group. Setting this to false + # disables that requirement. unique_kinds: Optional[bool] = None + # If present, a `fetches` entry will be added for each task + # dependency. Attributes of the upstream task may be used as + # substitution values in the `artifact` or `dest` values of the + # `fetches` entry. fetches: Optional[Dict[str, List[Union[str, Dict[str, str]]]]] = None def __post_init__(self): diff --git a/src/taskgraph/transforms/run/__init__.py b/src/taskgraph/transforms/run/__init__.py index ff477c125..dc06cd22b 100644 --- a/src/taskgraph/transforms/run/__init__.py +++ b/src/taskgraph/transforms/run/__init__.py @@ -65,8 +65,13 @@ class RunDescriptionSchema(Schema): worker_type: str # Optional fields - # Task naming + # The name of the task. At least one of 'name' or 'label' must be + # specified. If 'label' is not provided, it will be generated from + # the 'name' by prepending the kind. name: TOptional[str] = None + # The label of the task. At least one of 'name' or 'label' must be + # specified. If 'label' is not provided, it will be generated from + # the 'name' by prepending the kind. label: TOptional[str] = None # Optional fields from task description diff --git a/src/taskgraph/transforms/run/toolchain.py b/src/taskgraph/transforms/run/toolchain.py index 4180d87db..7f3686d40 100644 --- a/src/taskgraph/transforms/run/toolchain.py +++ b/src/taskgraph/transforms/run/toolchain.py @@ -26,45 +26,47 @@ #: Schema for run.using toolchain class ToolchainRunSchema(Schema): - """ - Schema for toolchain-script run configuration. - - Attributes: - using: Specifies the run type. Must be "toolchain-script". - script: The script (in taskcluster/scripts/misc) to run. - arguments: Arguments to pass to the script. - sparse_profile: Sparse profile to give to checkout using `run-task`. If given, - a filename in `build/sparse-profiles`. Defaults to - "toolchain-build", i.e., to - `build/sparse-profiles/toolchain-build`. If `None`, instructs - `run-task` to not use a sparse profile at all. - resources: Paths/patterns pointing to files that influence the outcome of - a toolchain build. - toolchain_artifact: Path to the artifact produced by the toolchain task. - toolchain_alias: An alias that can be used instead of the real toolchain task name in - fetch stanzas for tasks. - toolchain_env: Additional env variables to add to the worker when using this - toolchain. - workdir: Base work directory used to set up the task. - """ - # Required fields first + + # Specifies the run type. Must be "toolchain-script". using: Literal["toolchain-script"] + # The script (in taskcluster/scripts/misc) to run. script: str + + # Sparse profile to give to checkout using `run-task`. If given, + # a filename in `build/sparse-profiles`. Defaults to + # "toolchain-build", i.e., to + # `build/sparse-profiles/toolchain-build`. If `None`, instructs + # `run-task` to not use a sparse profile at all. sparse_profile: Optional[str] # Can be None to skip sparse profile + + # Path to the artifact produced by the toolchain task. toolchain_artifact: str + + # Base work directory used to set up the task. workdir: str # Optional fields + + # Arguments to pass to the script. arguments: Optional[List[str]] = None + + # Paths/patterns pointing to files that influence the outcome of + # a toolchain build. resources: Optional[List[str]] = None + + # An alias that can be used instead of the real toolchain task name in + # fetch stanzas for tasks. toolchain_alias: Optional[Union[str, List[str]]] = None + + # Additional env variables to add to the worker when using this + # toolchain. toolchain_env: Optional[Dict[str, Any]] = None + # Allow extra fields _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") -# Backward compatibility toolchain_run_schema = ToolchainRunSchema From 565a8271d21c6206f3b951ddd89cf8f3a9fb13cc Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Tue, 2 Sep 2025 10:10:52 -0400 Subject: [PATCH 12/20] fix: updated errors --- src/taskgraph/transforms/task_context.py | 7 ++++--- src/taskgraph/util/schema.py | 7 +------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/taskgraph/transforms/task_context.py b/src/taskgraph/transforms/task_context.py index 0b7ff0e47..bb381ac9a 100644 --- a/src/taskgraph/transforms/task_context.py +++ b/src/taskgraph/transforms/task_context.py @@ -24,9 +24,10 @@ class TaskContextConfig(Schema): If the same key is found in multiple places the order of precedence is as follows: - - Parameters - - `from-object` keys - - File + + - Parameters + - `from-object` keys + - File That is to say: parameters will always override anything else. """ diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index c05838f6f..d3710c6f5 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -5,8 +5,6 @@ import pprint from typing import List -import re -from collections.abc import Mapping import msgspec @@ -64,9 +62,7 @@ def validator(obj): try: res[kk] = validator(vv) except Exception as e: - if hasattr(e, "prepend"): - e.prepend([k, kk]) - raise + raise ValueError(f"Error in {k}.{kk}: {str(e)}") from e return res elif k.startswith("by-"): # Unknown by-field @@ -194,7 +190,6 @@ def resolve_keyed_by( ] - class Schema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): """ Base schema class that extends msgspec.Struct. From 9b76e78da20fa9fe780869d3f539b864b5d00e3d Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Tue, 2 Sep 2025 15:12:52 -0400 Subject: [PATCH 13/20] DO NOT LAND: Use by-level in priority key --- taskcluster/config.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/taskcluster/config.yml b/taskcluster/config.yml index 280296fb6..5fec007ee 100644 --- a/taskcluster/config.yml +++ b/taskcluster/config.yml @@ -12,7 +12,10 @@ index: products: - taskgraph -task-priority: low +task-priority: + by-level: + "3": medium + default: low taskgraph: register: self_taskgraph:register From 7d203c600a2344f67def92483a170df7a1c9a92f Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Tue, 2 Sep 2025 14:36:36 -0400 Subject: [PATCH 14/20] Experimental refactor --- src/taskgraph/config.py | 56 ++------------------------- src/taskgraph/util/schema.py | 74 +++++------------------------------- 2 files changed, 14 insertions(+), 116 deletions(-) diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index 497d9345f..13fa4eef7 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -2,7 +2,6 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. - import logging import os import sys @@ -28,25 +27,10 @@ class WorkerAlias(Schema): """Worker alias configuration.""" - - provisioner: Union[str, dict] # Can be keyed-by level + provisioner: optionally_keyed_by("level", str) # type: ignore implementation: str os: str - worker_type: Union[str, dict] # Can be keyed-by level, maps from "worker-type" - - def __post_init__(self): - """Validate keyed-by fields.""" - # Validate provisioner can be keyed-by level - if isinstance(self.provisioner, dict): - validator = optionally_keyed_by("level", str) - # Just validate - it will raise an error if invalid - validator(self.provisioner) - - # Validate worker_type can be keyed-by level - if isinstance(self.worker_type, dict): - validator = optionally_keyed_by("level", str) - # Just validate - it will raise an error if invalid - validator(self.worker_type) + worker_type: optionally_keyed_by("level", str) # type: ignore class Workers(Schema, rename=None): @@ -94,49 +78,17 @@ class GraphConfigSchema(Schema): # Required fields first trust_domain: str # Maps from "trust-domain" - task_priority: Union[ - TaskPriority, dict - ] # Maps from "task-priority", can be keyed-by project or level + task_priority: optionally_keyed_by("project", "level", TaskPriority) # type: ignore workers: Workers taskgraph: TaskGraphConfig # Optional fields docker_image_kind: Optional[str] = None # Maps from "docker-image-kind" - task_deadline_after: Optional[Union[str, dict]] = ( - None # Maps from "task-deadline-after", can be keyed-by project - ) + task_deadline_after: Optional[optionally_keyed_by("project", str)] = None # type: ignore task_expires_after: Optional[str] = None # Maps from "task-expires-after" # Allow extra fields for flexibility __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) - def __post_init__(self): - """Validate keyed-by fields.""" - # Validate task_priority can be keyed-by project or level - if isinstance(self.task_priority, dict): - # Create a validator that accepts TaskPriority values - def validate_priority(x): - valid_priorities = [ - "highest", - "very-high", - "high", - "medium", - "low", - "very-low", - "lowest", - ] - if x not in valid_priorities: - raise ValueError(f"Invalid task priority: {x}") - return x - - validator = optionally_keyed_by("project", "level", validate_priority) - # Just validate - it will raise an error if invalid - validator(self.task_priority) - - # Validate task_deadline_after can be keyed-by project - if self.task_deadline_after and isinstance(self.task_deadline_after, dict): - validator = optionally_keyed_by("project", str) - # Just validate - it will raise an error if invalid - validator(self.task_deadline_after) # Msgspec schema is now the main schema diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index d3710c6f5..6f7d70023 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -2,9 +2,9 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. - import pprint -from typing import List +from functools import reduce +from typing import Dict, List, Literal, Union import msgspec @@ -37,72 +37,18 @@ def validate_schema(schema, obj, msg_prefix): raise Exception(f"{msg_prefix}\n{str(exc)}\n{pprint.pformat(obj)}") -def optionally_keyed_by(*arguments): +def UnionTypes(*types): + """Use `functools.reduce` to simulate `Union[*allowed_types]` on older + Python versions. """ - Mark a schema value as optionally keyed by any of a number of fields. The - schema is the last argument, and the remaining fields are taken to be the - field names. For example: + return reduce(lambda a, b: Union[a, b], types) - 'some-value': optionally_keyed_by( - 'test-platform', 'build-platform', - Any('a', 'b', 'c')) - The resulting schema will allow nesting of `by-test-platform` and - `by-build-platform` in either order. - """ - schema = arguments[-1] +def optionally_keyed_by(*arguments): + _type = arguments[-1] fields = arguments[:-1] - - def validator(obj): - if isinstance(obj, dict) and len(obj) == 1: - k, v = list(obj.items())[0] - if k.startswith("by-") and k[len("by-") :] in fields: - res = {} - for kk, vv in v.items(): - try: - res[kk] = validator(vv) - except Exception as e: - raise ValueError(f"Error in {k}.{kk}: {str(e)}") from e - return res - elif k.startswith("by-"): - # Unknown by-field - raise ValueError(f"Unknown key {k}") - # Validate against the schema - if isinstance(schema, type) and issubclass(schema, Schema): - return schema.validate(obj) - elif schema is str: - # String validation - if not isinstance(obj, str): - raise TypeError(f"Expected string, got {type(obj).__name__}") - return obj - elif schema is int: - # Int validation - if not isinstance(obj, int): - raise TypeError(f"Expected int, got {type(obj).__name__}") - return obj - elif isinstance(schema, type): - # Type validation for built-in types - if not isinstance(obj, schema): - raise TypeError(f"Expected {schema.__name__}, got {type(obj).__name__}") - return obj - elif callable(schema): - # Other callable validators - try: - return schema(obj) - except: - raise - else: - # Simple type validation - if not isinstance(obj, schema): - raise TypeError( - f"Expected {getattr(schema, '__name__', str(schema))}, got {type(obj).__name__}" - ) - return obj - - # set to assist autodoc - setattr(validator, "schema", schema) - setattr(validator, "fields", fields) - return validator + bykeys = [Literal[f"by-{field}"] for field in fields] + return Union[_type, Dict[UnionTypes(*bykeys), Dict[str, _type]]] def resolve_keyed_by( From 3baf4eb845f1ff36b819426a1ea9f34367ae10a2 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Tue, 2 Sep 2025 16:12:07 -0400 Subject: [PATCH 15/20] fix: changed how extra fields work --- src/taskgraph/config.py | 22 +++++++++++----------- src/taskgraph/transforms/chunking.py | 11 +++-------- src/taskgraph/transforms/from_deps.py | 4 +--- src/taskgraph/transforms/matrix.py | 16 ++++++++-------- src/taskgraph/transforms/notify.py | 4 +--- src/taskgraph/transforms/run/__init__.py | 9 +++++---- src/taskgraph/transforms/run/toolchain.py | 10 +--------- src/taskgraph/transforms/task.py | 9 +++++---- src/taskgraph/transforms/task_context.py | 10 ++++++---- src/taskgraph/util/schema.py | 15 ++++++++++++++- 10 files changed, 55 insertions(+), 55 deletions(-) diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index 497d9345f..1a69c93a4 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -8,9 +8,7 @@ import sys from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List, Literal, Optional, Union - -import msgspec +from typing import Dict, List, Literal, Optional, Union from .util.python_path import find_object from .util.schema import Schema, optionally_keyed_by, validate_schema @@ -55,8 +53,11 @@ class Workers(Schema, rename=None): aliases: Dict[str, WorkerAlias] -class Repository(Schema): - """Repository configuration.""" +class Repository(Schema, forbid_unknown_fields=False): + """Repository configuration. + + This schema allows extra fields for repository-specific configuration. + """ # Required fields first name: str @@ -64,8 +65,6 @@ class Repository(Schema): # Optional fields project_regex: Optional[str] = None # Maps from "project-regex" ssh_secret_name: Optional[str] = None # Maps from "ssh-secret-name" - # Allow extra fields for flexibility - __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) class RunConfig(Schema): @@ -89,8 +88,11 @@ class TaskGraphConfig(Schema): run: Optional[RunConfig] = None -class GraphConfigSchema(Schema): - """Main graph configuration schema.""" +class GraphConfigSchema(Schema, forbid_unknown_fields=False): + """Main graph configuration schema. + + This schema allows extra fields for flexibility in graph configuration. + """ # Required fields first trust_domain: str # Maps from "trust-domain" @@ -106,8 +108,6 @@ class GraphConfigSchema(Schema): None # Maps from "task-deadline-after", can be keyed-by project ) task_expires_after: Optional[str] = None # Maps from "task-expires-after" - # Allow extra fields for flexibility - __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) def __post_init__(self): """Validate keyed-by fields.""" diff --git a/src/taskgraph/transforms/chunking.py b/src/taskgraph/transforms/chunking.py index 7a8c8cf8c..50b151a25 100644 --- a/src/taskgraph/transforms/chunking.py +++ b/src/taskgraph/transforms/chunking.py @@ -2,9 +2,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import copy -from typing import Any, Dict, List, Optional - -import msgspec +from typing import List, Optional from taskgraph.transforms.base import TransformSequence from taskgraph.util.schema import Schema @@ -26,16 +24,13 @@ class ChunkConfig(Schema): #: Schema for chunking transforms -class ChunkSchema(Schema): +class ChunkSchema(Schema, forbid_unknown_fields=False): # Optional, so it can be used for a subset of tasks in a kind chunk: Optional[ChunkConfig] = None - __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) - -CHUNK_SCHEMA = ChunkSchema transforms = TransformSequence() -transforms.add_validate(CHUNK_SCHEMA) +transforms.add_validate(ChunkSchema) @transforms.add diff --git a/src/taskgraph/transforms/from_deps.py b/src/taskgraph/transforms/from_deps.py index 39a8f3c57..0f2390f42 100644 --- a/src/taskgraph/transforms/from_deps.py +++ b/src/taskgraph/transforms/from_deps.py @@ -89,12 +89,10 @@ def __post_init__(self): #: Schema for from_deps transforms -class FromDepsSchema(Schema): +class FromDepsSchema(Schema, forbid_unknown_fields=False): """Schema for from_deps transforms.""" from_deps: FromDepsConfig - # Allow extra fields - _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") # Backward compatibility diff --git a/src/taskgraph/transforms/matrix.py b/src/taskgraph/transforms/matrix.py index 08bd9612e..03c8252a3 100644 --- a/src/taskgraph/transforms/matrix.py +++ b/src/taskgraph/transforms/matrix.py @@ -8,16 +8,14 @@ """ from copy import deepcopy -from typing import Any, Dict, List, Optional - -import msgspec +from typing import Dict, List, Optional from taskgraph.transforms.base import TransformSequence from taskgraph.util.schema import Schema from taskgraph.util.templates import substitute_task_fields -class MatrixConfig(Schema): +class MatrixConfig(Schema, forbid_unknown_fields=False): """ Matrix configuration for generating multiple tasks. """ @@ -39,15 +37,17 @@ class MatrixConfig(Schema): # If not specified, all fields in the task definition will be # substituted. substitution_fields: Optional[List[str]] = None - # Allow extra fields for matrix dimensions - __extras__: Dict[str, List[str]] = msgspec.field(default_factory=dict) #: Schema for matrix transforms -class MatrixSchema(Schema): +class MatrixSchema(Schema, forbid_unknown_fields=False): + """Schema for matrix transforms. + + This schema allows extra fields to be passed through to the task. + """ + name: str matrix: Optional[MatrixConfig] = None - __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) MATRIX_SCHEMA = MatrixSchema diff --git a/src/taskgraph/transforms/notify.py b/src/taskgraph/transforms/notify.py index ec45521a6..022800400 100644 --- a/src/taskgraph/transforms/notify.py +++ b/src/taskgraph/transforms/notify.py @@ -128,7 +128,7 @@ class LegacyNotificationsConfig(Schema, rename="kebab"): #: Schema for notify transforms -class NotifySchema(Schema, tag_field="notify_type"): +class NotifySchema(Schema, tag_field="notify_type", forbid_unknown_fields=False): """Schema for notify transforms. Note: This schema allows either 'notify' or 'notifications' field, @@ -137,8 +137,6 @@ class NotifySchema(Schema, tag_field="notify_type"): notify: Optional[NotifyConfig] = None notifications: Optional[LegacyNotificationsConfig] = None - # Allow extra fields - _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") def __post_init__(self): # Ensure only one of notify or notifications is present diff --git a/src/taskgraph/transforms/run/__init__.py b/src/taskgraph/transforms/run/__init__.py index dc06cd22b..2f29213d8 100644 --- a/src/taskgraph/transforms/run/__init__.py +++ b/src/taskgraph/transforms/run/__init__.py @@ -46,13 +46,14 @@ class WhenConfig(Schema): # Run configuration using msgspec -class RunConfig(Schema, rename=None): - """Configuration for how to run a task.""" +class RunConfig(Schema, rename=None, forbid_unknown_fields=False): + """Configuration for how to run a task. + + This schema allows extra fields for run implementation-specific configuration. + """ using: str workdir: TOptional[str] = None - # Allow any extra fields for run implementation-specific config - __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) # Run description schema using msgspec diff --git a/src/taskgraph/transforms/run/toolchain.py b/src/taskgraph/transforms/run/toolchain.py index 7f3686d40..30f3d272f 100644 --- a/src/taskgraph/transforms/run/toolchain.py +++ b/src/taskgraph/transforms/run/toolchain.py @@ -7,8 +7,6 @@ from typing import Any, Dict, List, Literal, Optional, Union -import msgspec - import taskgraph from taskgraph.transforms.run import configure_taskdesc_for_run, run_task_using from taskgraph.transforms.run.common import ( @@ -25,7 +23,7 @@ #: Schema for run.using toolchain -class ToolchainRunSchema(Schema): +class ToolchainRunSchema(Schema, forbid_unknown_fields=False): # Required fields first # Specifies the run type. Must be "toolchain-script". @@ -63,12 +61,6 @@ class ToolchainRunSchema(Schema): # toolchain. toolchain_env: Optional[Dict[str, Any]] = None - # Allow extra fields - _extra: Optional[Dict[str, Any]] = msgspec.field(default=None, name="") - - -toolchain_run_schema = ToolchainRunSchema - def get_digest_data(config, run, taskdesc): files = list(run.pop("resources", [])) diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py index 91d4ca9f2..7070a0c31 100644 --- a/src/taskgraph/transforms/task.py +++ b/src/taskgraph/transforms/task.py @@ -70,12 +70,13 @@ class TaskDescriptionIndex(Schema, rename="kebab"): rank: Union[Literal["by-tier", "build_date"], int] = "by-tier" -class TaskDescriptionWorker(Schema, rename=None): - """Worker configuration for a task.""" +class TaskDescriptionWorker(Schema, rename=None, forbid_unknown_fields=False): + """Worker configuration for a task. + + This schema allows extra fields for worker-specific configuration. + """ implementation: str - # Allow any extra fields for worker-specific configuration - __extras__: Dict[str, TAny] = msgspec.field(default_factory=dict) class TaskDescriptionSchema(Schema): diff --git a/src/taskgraph/transforms/task_context.py b/src/taskgraph/transforms/task_context.py index bb381ac9a..719dde94a 100644 --- a/src/taskgraph/transforms/task_context.py +++ b/src/taskgraph/transforms/task_context.py @@ -1,7 +1,5 @@ from typing import Any, Dict, List, Optional, Union -import msgspec - from taskgraph.transforms.base import TransformSequence from taskgraph.util.schema import Schema from taskgraph.util.templates import deep_get, substitute_task_fields @@ -53,13 +51,17 @@ class TaskContextConfig(Schema): #: Schema for the task_context transforms -class TaskContextSchema(Schema): +class TaskContextSchema(Schema, forbid_unknown_fields=False): + """Schema for task context transforms. + + This schema allows extra fields to be passed through to the task. + """ + # Required field first task_context: TaskContextConfig # Optional fields name: Optional[str] = None - __extras__: Dict[str, Any] = msgspec.field(default_factory=dict) SCHEMA = TaskContextSchema diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index d3710c6f5..e0a600c03 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -190,7 +190,13 @@ def resolve_keyed_by( ] -class Schema(msgspec.Struct, kw_only=True, omit_defaults=True, rename="kebab"): +class Schema( + msgspec.Struct, + kw_only=True, + omit_defaults=True, + rename="kebab", + forbid_unknown_fields=True, +): """ Base schema class that extends msgspec.Struct. @@ -202,6 +208,13 @@ class MySchema(Schema): Instead of wrapping msgspec.Struct types. Most schemas use kebab-case renaming by default. + + By default, forbid_unknown_fields is True, meaning extra fields + will cause validation errors. Child classes can override this by + setting forbid_unknown_fields=False in their class definition: + + class MySchema(Schema, forbid_unknown_fields=False): + foo: str """ @classmethod From 6849b11bb57a1d6ec1783c9357db86eee49af6d3 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Tue, 2 Sep 2025 16:32:47 -0400 Subject: [PATCH 16/20] fix: updated tests for optionally keyed by --- test/test_util_schema.py | 65 +++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/test/test_util_schema.py b/test/test_util_schema.py index 4770384d8..9eea17337 100644 --- a/test/test_util_schema.py +++ b/test/test_util_schema.py @@ -253,29 +253,58 @@ def test_no_key(self): def test_optionally_keyed_by(): - validator = optionally_keyed_by("foo", str) - assert validator("baz") == "baz" - assert validator({"by-foo": {"a": "b", "c": "d"}}) == {"a": "b", "c": "d"} + # optionally_keyed_by now returns a type annotation for msgspec + type_annotation = optionally_keyed_by("foo", str) - with pytest.raises((TypeError, ValueError)): - validator({"by-foo": {"a": 1, "c": "d"}}) + # Create a struct with this type annotation to test validation + class TestSchema(Schema): + value: type_annotation - with pytest.raises(ValueError): - validator({"by-bar": {"a": "b"}}) + # Test that a simple string is accepted + result = msgspec.convert({"value": "baz"}, TestSchema) + assert result.value == "baz" + + # Test that keyed-by structure is accepted and works + result = msgspec.convert({"value": {"by-foo": {"a": "b", "c": "d"}}}, TestSchema) + assert result.value == {"by-foo": {"a": "b", "c": "d"}} + + # Test that invalid value types are rejected + with pytest.raises(msgspec.ValidationError): + msgspec.convert({"value": {"by-foo": {"a": 1, "c": "d"}}}, TestSchema) + + # Test that unknown by-keys are rejected due to Literal constraint + with pytest.raises(msgspec.ValidationError): + msgspec.convert({"value": {"by-bar": {"a": "b"}}}, TestSchema) def test_optionally_keyed_by_mulitple_keys(): - validator = optionally_keyed_by("foo", "bar", str) - assert validator("baz") == "baz" - assert validator({"by-foo": {"a": "b", "c": "d"}}) == {"a": "b", "c": "d"} - assert validator({"by-bar": {"x": "y"}}) == {"x": "y"} - assert validator({"by-foo": {"a": {"by-bar": {"x": "y"}}}}) == {"a": {"x": "y"}} + # optionally_keyed_by now returns a type annotation for msgspec + type_annotation = optionally_keyed_by("foo", "bar", str) + + # Create a struct with this type annotation to test validation + class TestSchema(Schema): + value: type_annotation + + # Test that a simple string is accepted + result = msgspec.convert({"value": "baz"}, TestSchema) + assert result.value == "baz" + + # Test that keyed-by with "foo" is accepted + result = msgspec.convert({"value": {"by-foo": {"a": "b", "c": "d"}}}, TestSchema) + assert result.value == {"by-foo": {"a": "b", "c": "d"}} + + # Test that keyed-by with "bar" is accepted + result = msgspec.convert({"value": {"by-bar": {"x": "y"}}}, TestSchema) + assert result.value == {"by-bar": {"x": "y"}} - with pytest.raises((TypeError, ValueError)): - validator({"by-foo": {"a": 123, "c": "d"}}) + # Test that invalid value types in by-foo are rejected + with pytest.raises(msgspec.ValidationError): + msgspec.convert({"value": {"by-foo": {"a": 123, "c": "d"}}}, TestSchema) - with pytest.raises((TypeError, ValueError)): - validator({"by-bar": {"a": 1}}) + # Test that invalid value types in by-bar are rejected + with pytest.raises(msgspec.ValidationError): + msgspec.convert({"value": {"by-bar": {"a": 1}}}, TestSchema) - with pytest.raises(ValueError): - validator({"by-unknown": {"a": "b"}}) + # Test that unknown by-keys are rejected due to Literal constraint + with pytest.raises(msgspec.ValidationError): + msgspec.convert({"value": {"by-unknown": {"a": "b"}}}, TestSchema) From 006a6e3b3a02d2a8e5448a02cd3604c0129469c3 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Wed, 3 Sep 2025 12:51:58 -0400 Subject: [PATCH 17/20] fix: remove extra code --- src/taskgraph/config.py | 6 +----- src/taskgraph/decision.py | 5 +---- src/taskgraph/parameters.py | 14 +++++--------- src/taskgraph/transforms/docker_image.py | 4 ---- src/taskgraph/transforms/fetch.py | 3 --- src/taskgraph/transforms/from_deps.py | 3 --- src/taskgraph/transforms/matrix.py | 4 +--- src/taskgraph/transforms/notify.py | 3 --- src/taskgraph/transforms/run/__init__.py | 9 +-------- src/taskgraph/transforms/run/index_search.py | 5 +---- src/taskgraph/transforms/run/run_task.py | 7 ++----- src/taskgraph/transforms/task.py | 6 +----- src/taskgraph/transforms/task_context.py | 4 +--- test/test_parameters.py | 2 +- 14 files changed, 15 insertions(+), 60 deletions(-) diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index ad6dd698d..33cb0332d 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -25,6 +25,7 @@ class WorkerAlias(Schema): """Worker alias configuration.""" + provisioner: optionally_keyed_by("level", str) # type: ignore implementation: str os: str @@ -90,11 +91,6 @@ class GraphConfigSchema(Schema, forbid_unknown_fields=False): task_expires_after: Optional[str] = None # Maps from "task-expires-after" - -# Msgspec schema is now the main schema -graph_config_schema = GraphConfigSchema - - @dataclass(frozen=True, eq=False) class GraphConfig: _config: Dict diff --git a/src/taskgraph/decision.py b/src/taskgraph/decision.py index 33640db2c..4dd182e60 100644 --- a/src/taskgraph/decision.py +++ b/src/taskgraph/decision.py @@ -45,9 +45,6 @@ class TryTaskConfigSchemaV2(Schema): parameters: Optional[Dict[str, Any]] = None -try_task_config_schema_v2 = TryTaskConfigSchemaV2 - - def full_task_graph_to_runnable_tasks(full_task_json): runnable_tasks = {} for label, node in full_task_json.items(): @@ -355,7 +352,7 @@ def set_try_config(parameters, task_config_file): task_config_version = task_config.pop("version") if task_config_version == 2: validate_schema( - try_task_config_schema_v2, + TryTaskConfigSchemaV2, task_config, "Invalid v2 `try_task_config.json`.", ) diff --git a/src/taskgraph/parameters.py b/src/taskgraph/parameters.py index adbe1a1d3..0e434bcae 100644 --- a/src/taskgraph/parameters.py +++ b/src/taskgraph/parameters.py @@ -79,10 +79,6 @@ class BaseSchema(Schema): code_review: Optional[CodeReviewConfig] = None -# Keep backward compatibility -base_schema = BaseSchema - - def get_contents(path): with open(path) as fh: contents = fh.readline().rstrip() @@ -168,7 +164,7 @@ def extend_parameters_schema(schema, defaults_fn=None): dict mapping parameter name to default value in the event strict=False (optional). """ - global base_schema + global BaseSchema global defaults_functions global _schema_extensions @@ -249,7 +245,7 @@ def check(self): # Strict mode: validate against schema and check for extra fields # Get all valid field names from the base schema schema_fields = { - f.encode_name for f in msgspec.structs.fields(base_schema) + f.encode_name for f in msgspec.structs.fields(BaseSchema) } # Check for extra fields @@ -260,17 +256,17 @@ def check(self): ) # Validate all parameters against the schema - msgspec.convert(kebab_params, base_schema) + msgspec.convert(kebab_params, BaseSchema) else: # Non-strict mode: only validate fields that exist in the schema # Filter to only include fields defined in the schema schema_fields = { - f.encode_name for f in msgspec.structs.fields(base_schema) + f.encode_name for f in msgspec.structs.fields(BaseSchema) } filtered_params = { k: v for k, v in kebab_params.items() if k in schema_fields } - msgspec.convert(filtered_params, base_schema) + msgspec.convert(filtered_params, BaseSchema) except (msgspec.ValidationError, msgspec.DecodeError) as e: raise ParameterMismatch(f"Invalid parameters: {e}") diff --git a/src/taskgraph/transforms/docker_image.py b/src/taskgraph/transforms/docker_image.py index 31d8397e6..56082ff2b 100644 --- a/src/taskgraph/transforms/docker_image.py +++ b/src/taskgraph/transforms/docker_image.py @@ -54,10 +54,6 @@ class DockerImageSchema(Schema): cache: Optional[bool] = None -# Backward compatibility -docker_image_schema = DockerImageSchema - - transforms.add_validate(DockerImageSchema) diff --git a/src/taskgraph/transforms/fetch.py b/src/taskgraph/transforms/fetch.py index 25a86ba23..ff56901ac 100644 --- a/src/taskgraph/transforms/fetch.py +++ b/src/taskgraph/transforms/fetch.py @@ -64,9 +64,6 @@ def __post_init__(self): raise msgspec.ValidationError("fetch must be a dict with a 'type' field") -# Backward compatibility -FETCH_SCHEMA = FetchSchema - # define a collection of payload builders, depending on the worker implementation fetch_builders = {} diff --git a/src/taskgraph/transforms/from_deps.py b/src/taskgraph/transforms/from_deps.py index 0f2390f42..4943bc874 100644 --- a/src/taskgraph/transforms/from_deps.py +++ b/src/taskgraph/transforms/from_deps.py @@ -95,9 +95,6 @@ class FromDepsSchema(Schema, forbid_unknown_fields=False): from_deps: FromDepsConfig -# Backward compatibility -FROM_DEPS_SCHEMA = FromDepsSchema - transforms = TransformSequence() transforms.add_validate(FromDepsSchema) diff --git a/src/taskgraph/transforms/matrix.py b/src/taskgraph/transforms/matrix.py index 03c8252a3..3eae48e51 100644 --- a/src/taskgraph/transforms/matrix.py +++ b/src/taskgraph/transforms/matrix.py @@ -50,10 +50,8 @@ class MatrixSchema(Schema, forbid_unknown_fields=False): matrix: Optional[MatrixConfig] = None -MATRIX_SCHEMA = MatrixSchema - transforms = TransformSequence() -transforms.add_validate(MATRIX_SCHEMA) +transforms.add_validate(MatrixSchema) def _resolve_matrix(tasks, key, values, exclude): diff --git a/src/taskgraph/transforms/notify.py b/src/taskgraph/transforms/notify.py index 022800400..688def7b9 100644 --- a/src/taskgraph/transforms/notify.py +++ b/src/taskgraph/transforms/notify.py @@ -173,9 +173,6 @@ def __post_init__(self): self.notify.recipients = validated_recipients -# Backward compatibility -NOTIFY_SCHEMA = NotifySchema - transforms = TransformSequence() transforms.add_validate(NotifySchema) diff --git a/src/taskgraph/transforms/run/__init__.py b/src/taskgraph/transforms/run/__init__.py index 2f29213d8..686082208 100644 --- a/src/taskgraph/transforms/run/__init__.py +++ b/src/taskgraph/transforms/run/__init__.py @@ -105,15 +105,8 @@ class RunDescriptionSchema(Schema): worker: Dict[str, Any] = msgspec.field(default_factory=dict) -# Use the msgspec class directly for fetches -fetches_schema = FetchesSchema - -#: Schema for a run transforms - now using msgspec -run_description_schema = RunDescriptionSchema - - transforms = TransformSequence() -transforms.add_validate(run_description_schema) +transforms.add_validate(RunDescriptionSchema) @transforms.add diff --git a/src/taskgraph/transforms/run/index_search.py b/src/taskgraph/transforms/run/index_search.py index a972c8cad..6f864ce97 100644 --- a/src/taskgraph/transforms/run/index_search.py +++ b/src/taskgraph/transforms/run/index_search.py @@ -25,10 +25,7 @@ class RunTaskSchema(Schema): index_search: List[str] -run_task_schema = RunTaskSchema - - -@run_task_using("always-optimized", "index-search", schema=run_task_schema) +@run_task_using("always-optimized", "index-search", schema=RunTaskSchema) def fill_template(config, task, taskdesc): run = task["run"] taskdesc["optimization"] = { diff --git a/src/taskgraph/transforms/run/run_task.py b/src/taskgraph/transforms/run/run_task.py index 3d4bfc81c..c86424cda 100644 --- a/src/taskgraph/transforms/run/run_task.py +++ b/src/taskgraph/transforms/run/run_task.py @@ -71,9 +71,6 @@ class RunTaskSchema(Schema): run_as_root: bool = False -run_task_schema = RunTaskSchema - - def common_setup(config, task, taskdesc, command): run = task["run"] if run["checkout"]: @@ -142,7 +139,7 @@ def script_url(config, script): @run_task_using( - "docker-worker", "run-task", schema=run_task_schema, defaults=worker_defaults + "docker-worker", "run-task", schema=RunTaskSchema, defaults=worker_defaults ) def docker_worker_run_task(config, task, taskdesc): run = task["run"] @@ -164,7 +161,7 @@ def docker_worker_run_task(config, task, taskdesc): @run_task_using( - "generic-worker", "run-task", schema=run_task_schema, defaults=worker_defaults + "generic-worker", "run-task", schema=RunTaskSchema, defaults=worker_defaults ) def generic_worker_run_task(config, task, taskdesc): run = task["run"] diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py index 7070a0c31..8baec7921 100644 --- a/src/taskgraph/transforms/task.py +++ b/src/taskgraph/transforms/task.py @@ -138,10 +138,6 @@ class TaskDescriptionSchema(Schema): worker: TOptional[TaskDescriptionWorker] = None -#: Schema for the task transforms - now using msgspec -task_description_schema = TaskDescriptionSchema - - TC_TREEHERDER_SCHEMA_URL = ( "https://github.com/taskcluster/taskcluster-treeherder/" "blob/master/schemas/task-treeherder-config.yml" @@ -900,7 +896,7 @@ def task_name_from_label(config, tasks): def validate(config, tasks): for task in tasks: validate_schema( - task_description_schema, + TaskDescriptionSchema, task, "In task {!r}:".format(task.get("label", "?no-label?")), ) diff --git a/src/taskgraph/transforms/task_context.py b/src/taskgraph/transforms/task_context.py index 719dde94a..0d795abad 100644 --- a/src/taskgraph/transforms/task_context.py +++ b/src/taskgraph/transforms/task_context.py @@ -64,10 +64,8 @@ class TaskContextSchema(Schema, forbid_unknown_fields=False): name: Optional[str] = None -SCHEMA = TaskContextSchema - transforms = TransformSequence() -transforms.add_validate(SCHEMA) +transforms.add_validate(TaskContextSchema) @transforms.add diff --git a/test/test_parameters.py b/test/test_parameters.py index 6ec540178..8fdf74699 100644 --- a/test/test_parameters.py +++ b/test/test_parameters.py @@ -287,7 +287,7 @@ class ExtendedSchema(Schema): # Set our extended schema as the base schema monkeypatch.setattr( parameters, - "base_schema", + "BaseSchema", ExtendedSchema, ) From 1b37f95374ca782498ce16fcead1df932dae4382 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Thu, 4 Sep 2025 14:56:23 -0400 Subject: [PATCH 18/20] fix: updated based on review --- docs/concepts/transforms.rst | 2 +- src/taskgraph/config.py | 13 +- src/taskgraph/parameters.py | 64 +++++++-- src/taskgraph/transforms/chunking.py | 8 +- src/taskgraph/transforms/fetch.py | 24 ++-- src/taskgraph/transforms/from_deps.py | 50 ++----- src/taskgraph/transforms/matrix.py | 4 +- src/taskgraph/transforms/notify.py | 53 ++++---- src/taskgraph/transforms/run/__init__.py | 29 ++--- src/taskgraph/transforms/task.py | 106 ++++++++------- src/taskgraph/transforms/task_context.py | 4 +- src/taskgraph/util/schema.py | 122 ++++++++---------- .../transforms/hello.py | 4 +- test/test_parameters.py | 76 +++++++---- 14 files changed, 286 insertions(+), 273 deletions(-) diff --git a/docs/concepts/transforms.rst b/docs/concepts/transforms.rst index c46e4d5a8..c3913f865 100644 --- a/docs/concepts/transforms.rst +++ b/docs/concepts/transforms.rst @@ -110,7 +110,7 @@ about the state of the tasks at given points. Here is an example: from taskgraph.util.schema import Schema class MySchema(Schema): - foo: str # Required field + foo: str # Required field bar: Optional[bool] = None # Optional field transforms = TransformSequence() diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index 33cb0332d..9278e368a 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -23,7 +23,7 @@ ] -class WorkerAlias(Schema): +class WorkerAliasSchema(Schema): """Worker alias configuration.""" provisioner: optionally_keyed_by("level", str) # type: ignore @@ -32,10 +32,10 @@ class WorkerAlias(Schema): worker_type: optionally_keyed_by("level", str) # type: ignore -class Workers(Schema, rename=None): +class WorkersSchema(Schema, rename=None): """Workers configuration.""" - aliases: Dict[str, WorkerAlias] + aliases: Dict[str, WorkerAliasSchema] class Repository(Schema, forbid_unknown_fields=False): @@ -58,7 +58,7 @@ class RunConfig(Schema): use_caches: Optional[Union[bool, List[str]]] = None # Maps from "use-caches" -class TaskGraphConfig(Schema): +class TaskGraphSchema(Schema): """Taskgraph specific configuration.""" # Required fields first @@ -82,8 +82,8 @@ class GraphConfigSchema(Schema, forbid_unknown_fields=False): # Required fields first trust_domain: str # Maps from "trust-domain" task_priority: optionally_keyed_by("project", "level", TaskPriority) # type: ignore - workers: Workers - taskgraph: TaskGraphConfig + workers: WorkersSchema + taskgraph: TaskGraphSchema # Optional fields docker_image_kind: Optional[str] = None # Maps from "docker-image-kind" @@ -158,7 +158,6 @@ def kinds_dir(self): def validate_graph_config(config): """Validate graph configuration using msgspec.""" - # With rename="kebab", msgspec handles the conversion automatically validate_schema(GraphConfigSchema, config, "Invalid graph configuration:") diff --git a/src/taskgraph/parameters.py b/src/taskgraph/parameters.py index 0e434bcae..5f04ab8fa 100644 --- a/src/taskgraph/parameters.py +++ b/src/taskgraph/parameters.py @@ -29,7 +29,7 @@ class ParameterMismatch(Exception): """Raised when a parameters.yml has extra or missing parameters.""" -class CodeReviewConfig(Schema): +class CodeReviewSchema(Schema): """Code review configuration.""" # Required field @@ -73,10 +73,10 @@ class BaseSchema(Schema): tasks_for: str # Optional fields - next_version: Optional[str] - optimize_strategies: Optional[str] - version: Optional[str] - code_review: Optional[CodeReviewConfig] = None + next_version: Optional[str] = None + optimize_strategies: Optional[str] = None + version: Optional[str] = None + code_review: Optional[CodeReviewSchema] = None def get_contents(path): @@ -243,11 +243,20 @@ def check(self): if self.strict: # Strict mode: validate against schema and check for extra fields - # Get all valid field names from the base schema + # Get all valid field names from the base schema and extensions schema_fields = { f.encode_name for f in msgspec.structs.fields(BaseSchema) } + # Add fields from extension schemas + for ext_schema in _schema_extensions: + if isinstance(ext_schema, type) and issubclass( + ext_schema, msgspec.Struct + ): + schema_fields.update( + {f.encode_name for f in msgspec.structs.fields(ext_schema)} + ) + # Check for extra fields extra_fields = set(kebab_params.keys()) - schema_fields if extra_fields: @@ -255,11 +264,32 @@ def check(self): f"Invalid parameters: Extra fields not allowed: {extra_fields}" ) - # Validate all parameters against the schema - msgspec.convert(kebab_params, BaseSchema) + # Validate base schema fields only (filter out extension fields) + base_fields = { + f.encode_name for f in msgspec.structs.fields(BaseSchema) + } + base_params = { + k: v for k, v in kebab_params.items() if k in base_fields + } + msgspec.convert(base_params, BaseSchema) + + # Also validate against extension schemas + for ext_schema in _schema_extensions: + if isinstance(ext_schema, type) and issubclass( + ext_schema, msgspec.Struct + ): + # Only validate fields that belong to this extension + ext_fields = { + f.encode_name for f in msgspec.structs.fields(ext_schema) + } + ext_params = { + k: v for k, v in kebab_params.items() if k in ext_fields + } + if ext_params: + msgspec.convert(ext_params, ext_schema) else: - # Non-strict mode: only validate fields that exist in the schema - # Filter to only include fields defined in the schema + # Non-strict mode: only validate fields that exist in the schemas + # Filter to only include fields defined in the base schema schema_fields = { f.encode_name for f in msgspec.structs.fields(BaseSchema) } @@ -267,6 +297,20 @@ def check(self): k: v for k, v in kebab_params.items() if k in schema_fields } msgspec.convert(filtered_params, BaseSchema) + + # Also validate extension schemas in non-strict mode + for ext_schema in _schema_extensions: + if isinstance(ext_schema, type) and issubclass( + ext_schema, msgspec.Struct + ): + ext_fields = { + f.encode_name for f in msgspec.structs.fields(ext_schema) + } + ext_params = { + k: v for k, v in kebab_params.items() if k in ext_fields + } + if ext_params: + msgspec.convert(ext_params, ext_schema) except (msgspec.ValidationError, msgspec.DecodeError) as e: raise ParameterMismatch(f"Invalid parameters: {e}") diff --git a/src/taskgraph/transforms/chunking.py b/src/taskgraph/transforms/chunking.py index 50b151a25..c3a79b99f 100644 --- a/src/taskgraph/transforms/chunking.py +++ b/src/taskgraph/transforms/chunking.py @@ -9,7 +9,7 @@ from taskgraph.util.templates import substitute -class ChunkConfig(Schema): +class ChunkSchema(Schema): """ `chunk` can be used to split one task into `total-chunks` tasks, substituting `this_chunk` and `total_chunks` into any @@ -24,13 +24,13 @@ class ChunkConfig(Schema): #: Schema for chunking transforms -class ChunkSchema(Schema, forbid_unknown_fields=False): +class ChunksSchema(Schema, forbid_unknown_fields=False): # Optional, so it can be used for a subset of tasks in a kind - chunk: Optional[ChunkConfig] = None + chunk: Optional[ChunkSchema] = None transforms = TransformSequence() -transforms.add_validate(ChunkSchema) +transforms.add_validate(ChunksSchema) @transforms.add diff --git a/src/taskgraph/transforms/fetch.py b/src/taskgraph/transforms/fetch.py index ff56901ac..262b99752 100644 --- a/src/taskgraph/transforms/fetch.py +++ b/src/taskgraph/transforms/fetch.py @@ -9,7 +9,7 @@ import os import re from dataclasses import dataclass -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Dict, Optional, Union import msgspec @@ -24,14 +24,6 @@ CACHE_TYPE = "content.v1" -#: Schema for fetch transforms -class FetchConfig(Schema, rename=None, omit_defaults=False): - """Configuration for a fetch task type.""" - - type: str - # Additional fields handled dynamically by fetch builders - - class FetchSchema(Schema): # Required fields # Name of the task. @@ -70,7 +62,7 @@ def __post_init__(self): @dataclass(frozen=True) class FetchBuilder: - schema: Any # Either msgspec.Struct type or validation function + schema: Union[Schema, Callable] builder: Callable @@ -194,7 +186,7 @@ def make_task(config, tasks): yield task_desc -class GPGSignatureConfig(Schema): +class GPGSignatureSchema(Schema): """GPG signature verification configuration.""" # URL where GPG signature document can be obtained. Can contain the @@ -204,7 +196,7 @@ class GPGSignatureConfig(Schema): key_path: str -class StaticUrlFetchConfig(Schema, rename="kebab"): +class StaticUrlFetchSchema(Schema): """Configuration for static-url fetch type.""" type: str @@ -215,7 +207,7 @@ class StaticUrlFetchConfig(Schema, rename="kebab"): # Size of the downloaded entity, in bytes. size: int # GPG signature verification. - gpg_signature: Optional[GPGSignatureConfig] = None + gpg_signature: Optional[GPGSignatureSchema] = None # The name to give to the generated artifact. Defaults to the file # portion of the URL. Using a different extension converts the # archive to the given type. Only conversion to .tar.zst is supported. @@ -233,7 +225,7 @@ class StaticUrlFetchConfig(Schema, rename="kebab"): # it is important to update the digest data used to compute cache hits. -@fetch_builder("static-url", StaticUrlFetchConfig) +@fetch_builder("static-url", StaticUrlFetchSchema) def create_fetch_url_task(config, name, fetch): artifact_name = fetch.get("artifact-name") if not artifact_name: @@ -296,7 +288,7 @@ def create_fetch_url_task(config, name, fetch): } -class GitFetchConfig(Schema): +class GitFetchSchema(Schema): """Configuration for git fetch type.""" type: str @@ -312,7 +304,7 @@ class GitFetchConfig(Schema): ssh_key: Optional[str] = None -@fetch_builder("git", GitFetchConfig) +@fetch_builder("git", GitFetchSchema) def create_git_fetch_task(config, name, fetch): path_prefix = fetch.get("path-prefix") if not path_prefix: diff --git a/src/taskgraph/transforms/from_deps.py b/src/taskgraph/transforms/from_deps.py index 4943bc874..df655e2cb 100644 --- a/src/taskgraph/transforms/from_deps.py +++ b/src/taskgraph/transforms/from_deps.py @@ -13,9 +13,7 @@ from copy import deepcopy from textwrap import dedent -from typing import Any, Dict, List, Optional, Union - -import msgspec +from typing import Any, Dict, List, Literal, Optional, Union from taskgraph.transforms.base import TransformSequence from taskgraph.util.attributes import attrmatch @@ -23,15 +21,11 @@ from taskgraph.util.schema import Schema, validate_schema from taskgraph.util.set_name import SET_NAME_MAP +SetNameType = Literal["strip-kind", "retain-kind"] +GroupByType = Literal["single", "all", "attribute"] -class FetchEntry(Schema, rename=None): - """A fetch entry for an artifact.""" - - artifact: str - dest: Optional[str] = None - -class FromDepsConfig(Schema): +class FromDepsChildSchema(Schema): # Optional fields # Limit dependencies to specified kinds (defaults to all kinds in # `kind-dependencies`). @@ -40,8 +34,9 @@ class FromDepsConfig(Schema): # dependency of this kind will be used to derive the label # and copy attributes (if `copy-attributes` is True). kinds: Optional[List[str]] = None - # UPDATE ME AND DOCS - set_name: Optional[Union[str, bool, Dict[str, Any]]] = None + # Set the task name using the specified function. Can be False to + # disable name setting, or a string/dict specifying the function to use. + set_name: Optional[Union[SetNameType, bool, Dict[SetNameType, Any]]] = None # Limit dependencies to tasks whose attributes match # using :func:`~taskgraph.util.attributes.attrmatch`. with_attributes: Optional[Dict[str, Union[List[Any], str]]] = None @@ -49,7 +44,7 @@ class FromDepsConfig(Schema): # function. One task will be created for each group. If not # specified, the 'single' function will be used which creates # a new task for each individual dependency. - group_by: Optional[Union[str, Dict[str, Any]]] = None + group_by: Optional[Union[GroupByType, Dict[GroupByType, Any]]] = None # If True, copy attributes from the dependency matching the # first kind in the `kinds` list (whether specified explicitly # or taken from `kind-dependencies`). @@ -64,35 +59,12 @@ class FromDepsConfig(Schema): # `fetches` entry. fetches: Optional[Dict[str, List[Union[str, Dict[str, str]]]]] = None - def __post_init__(self): - # Validate set_name - if self.set_name is not None and self.set_name is not False: - if isinstance(self.set_name, str) and self.set_name not in SET_NAME_MAP: - raise msgspec.ValidationError(f"Invalid set-name: {self.set_name}") - elif isinstance(self.set_name, dict): - keys = list(self.set_name.keys()) - if len(keys) != 1 or keys[0] not in SET_NAME_MAP: - raise msgspec.ValidationError( - f"Invalid set-name dict: {self.set_name}" - ) - - # Validate group_by - if self.group_by is not None: - if isinstance(self.group_by, str) and self.group_by not in GROUP_BY_MAP: - raise msgspec.ValidationError(f"Invalid group-by: {self.group_by}") - elif isinstance(self.group_by, dict): - keys = list(self.group_by.keys()) - if len(keys) != 1 or keys[0] not in GROUP_BY_MAP: - raise msgspec.ValidationError( - f"Invalid group-by dict: {self.group_by}" - ) - - -#: Schema for from_deps transforms + +# Schema for from_deps transforms class FromDepsSchema(Schema, forbid_unknown_fields=False): """Schema for from_deps transforms.""" - from_deps: FromDepsConfig + from_deps: FromDepsChildSchema transforms = TransformSequence() diff --git a/src/taskgraph/transforms/matrix.py b/src/taskgraph/transforms/matrix.py index 3eae48e51..e7548ea17 100644 --- a/src/taskgraph/transforms/matrix.py +++ b/src/taskgraph/transforms/matrix.py @@ -15,7 +15,7 @@ from taskgraph.util.templates import substitute_task_fields -class MatrixConfig(Schema, forbid_unknown_fields=False): +class MatrixChildSchema(Schema, forbid_unknown_fields=False): """ Matrix configuration for generating multiple tasks. """ @@ -47,7 +47,7 @@ class MatrixSchema(Schema, forbid_unknown_fields=False): """ name: str - matrix: Optional[MatrixConfig] = None + matrix: Optional[MatrixChildSchema] = None transforms = TransformSequence() diff --git a/src/taskgraph/transforms/notify.py b/src/taskgraph/transforms/notify.py index 688def7b9..8ce6b29ad 100644 --- a/src/taskgraph/transforms/notify.py +++ b/src/taskgraph/transforms/notify.py @@ -26,7 +26,7 @@ ] -class EmailRecipient(Schema): +class EmailRecipientSchema(Schema): """Email notification recipient.""" type: Literal["email"] @@ -34,7 +34,7 @@ class EmailRecipient(Schema): status_type: Optional[StatusType] = None -class MatrixRoomRecipient(Schema): +class MatrixRoomRecipientSchema(Schema): """Matrix room notification recipient.""" type: Literal["matrix-room"] @@ -42,7 +42,7 @@ class MatrixRoomRecipient(Schema): status_type: Optional[StatusType] = None -class PulseRecipient(Schema): +class PulseRecipientSchema(Schema): """Pulse notification recipient.""" type: Literal["pulse"] @@ -50,7 +50,7 @@ class PulseRecipient(Schema): status_type: Optional[StatusType] = None -class SlackChannelRecipient(Schema): +class SlackChannelRecipientSchema(Schema): """Slack channel notification recipient.""" type: Literal["slack-channel"] @@ -59,7 +59,10 @@ class SlackChannelRecipient(Schema): Recipient = Union[ - EmailRecipient, MatrixRoomRecipient, PulseRecipient, SlackChannelRecipient + EmailRecipientSchema, + MatrixRoomRecipientSchema, + PulseRecipientSchema, + SlackChannelRecipientSchema, ] _route_keys = { @@ -71,22 +74,22 @@ class SlackChannelRecipient(Schema): """Map each type to its primary key that will be used in the route.""" -class EmailLink(Schema, rename=None, omit_defaults=False): +class EmailLinkSchema(Schema, rename=None, omit_defaults=False): """Email link configuration.""" text: str href: str -class EmailContent(Schema, rename=None): +class EmailContentSchema(Schema, rename=None): """Email notification content.""" subject: Optional[str] = None content: Optional[str] = None - link: Optional[EmailLink] = None + link: Optional[EmailLinkSchema] = None -class MatrixContent(Schema): +class MatrixContentSchema(Schema): """Matrix notification content.""" body: Optional[str] = None @@ -95,7 +98,7 @@ class MatrixContent(Schema): msg_type: Optional[str] = None -class SlackContent(Schema, rename=None): +class SlackContentSchema(Schema, rename=None): """Slack notification content.""" text: Optional[str] = None @@ -103,22 +106,22 @@ class SlackContent(Schema, rename=None): attachments: Optional[List[Any]] = None -class NotifyContent(Schema, rename=None): +class NotifyContentSchema(Schema, rename=None): """Notification content configuration.""" - email: Optional[EmailContent] = None - matrix: Optional[MatrixContent] = None - slack: Optional[SlackContent] = None + email: Optional[EmailContentSchema] = None + matrix: Optional[MatrixContentSchema] = None + slack: Optional[SlackContentSchema] = None -class NotifyConfig(Schema, rename=None): +class NotifyConfigSchema(Schema, rename=None): """Modern notification configuration.""" recipients: List[Dict[str, Any]] # Will be validated as Recipient union - content: Optional[NotifyContent] = None + content: Optional[NotifyContentSchema] = None -class LegacyNotificationsConfig(Schema, rename="kebab"): +class LegacyNotificationsConfigSchema(Schema, rename="kebab"): """Legacy notification configuration for backwards compatibility.""" emails: Union[List[str], Dict[str, Any]] # Can be keyed-by @@ -135,8 +138,8 @@ class NotifySchema(Schema, tag_field="notify_type", forbid_unknown_fields=False) but not both. The validation will be done in __post_init__. """ - notify: Optional[NotifyConfig] = None - notifications: Optional[LegacyNotificationsConfig] = None + notify: Optional[NotifyConfigSchema] = None + notifications: Optional[LegacyNotificationsConfigSchema] = None def __post_init__(self): # Ensure only one of notify or notifications is present @@ -152,16 +155,20 @@ def __post_init__(self): try: # Try to convert to one of the recipient types if r.get("type") == "email": - validated_recipients.append(msgspec.convert(r, EmailRecipient)) + validated_recipients.append( + msgspec.convert(r, EmailRecipientSchema) + ) elif r.get("type") == "matrix-room": validated_recipients.append( - msgspec.convert(r, MatrixRoomRecipient) + msgspec.convert(r, MatrixRoomRecipientSchema) ) elif r.get("type") == "pulse": - validated_recipients.append(msgspec.convert(r, PulseRecipient)) + validated_recipients.append( + msgspec.convert(r, PulseRecipientSchema) + ) elif r.get("type") == "slack-channel": validated_recipients.append( - msgspec.convert(r, SlackChannelRecipient) + msgspec.convert(r, SlackChannelRecipientSchema) ) else: raise msgspec.ValidationError( diff --git a/src/taskgraph/transforms/run/__init__.py b/src/taskgraph/transforms/run/__init__.py index 686082208..1bd3fe7b3 100644 --- a/src/taskgraph/transforms/run/__init__.py +++ b/src/taskgraph/transforms/run/__init__.py @@ -11,8 +11,7 @@ import copy import logging -from typing import Any, Dict, List, Union -from typing import Optional as TOptional +from typing import Any, Dict, List, Optional, Union import msgspec @@ -33,27 +32,27 @@ class FetchesSchema(Schema): """Schema for fetch configuration.""" artifact: str - dest: TOptional[str] = None + dest: Optional[str] = None extract: bool = False verify_hash: bool = False # When configuration using msgspec -class WhenConfig(Schema): +class WhenSchema(Schema): """Configuration for when a task should be included.""" files_changed: List[str] = msgspec.field(default_factory=list) # Run configuration using msgspec -class RunConfig(Schema, rename=None, forbid_unknown_fields=False): +class RunSchema(Schema, rename=None, forbid_unknown_fields=False): """Configuration for how to run a task. This schema allows extra fields for run implementation-specific configuration. """ using: str - workdir: TOptional[str] = None + workdir: Optional[str] = None # Run description schema using msgspec @@ -62,29 +61,29 @@ class RunDescriptionSchema(Schema): # Required fields first description: str - run: RunConfig + run: RunSchema worker_type: str # Optional fields # The name of the task. At least one of 'name' or 'label' must be # specified. If 'label' is not provided, it will be generated from # the 'name' by prepending the kind. - name: TOptional[str] = None + name: Optional[str] = None # The label of the task. At least one of 'name' or 'label' must be # specified. If 'label' is not provided, it will be generated from # the 'name' by prepending the kind. - label: TOptional[str] = None + label: Optional[str] = None # Optional fields from task description - priority: TOptional[str] = None + priority: Optional[str] = None attributes: Dict[str, Any] = msgspec.field(default_factory=dict) - task_from: TOptional[str] = None + task_from: Optional[str] = None dependencies: Dict[str, Any] = msgspec.field(default_factory=dict) soft_dependencies: List[str] = msgspec.field(default_factory=list) if_dependencies: List[str] = msgspec.field(default_factory=list) requires: str = "all-completed" - deadline_after: TOptional[str] = None - expires_after: TOptional[str] = None + deadline_after: Optional[str] = None + expires_after: Optional[str] = None routes: List[str] = msgspec.field(default_factory=list) scopes: List[str] = msgspec.field(default_factory=list) tags: Dict[str, str] = msgspec.field(default_factory=dict) @@ -94,11 +93,11 @@ class RunDescriptionSchema(Schema): run_on_projects: Any = None run_on_tasks_for: List[str] = msgspec.field(default_factory=list) run_on_git_branches: List[str] = msgspec.field(default_factory=list) - shipping_phase: TOptional[str] = None + shipping_phase: Optional[str] = None always_target: bool = False optimization: Any = None needs_sccache: bool = False - when: TOptional[WhenConfig] = None + when: Optional[WhenSchema] = None fetches: Dict[str, List[Union[str, FetchesSchema]]] = msgspec.field( default_factory=dict ) diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py index 8baec7921..193c8fc94 100644 --- a/src/taskgraph/transforms/task.py +++ b/src/taskgraph/transforms/task.py @@ -15,9 +15,7 @@ import time from copy import deepcopy from dataclasses import dataclass -from typing import Any as TAny -from typing import Callable, Dict, List, Literal, Union -from typing import Optional as TOptional +from typing import Any, Callable, Dict, List, Literal, Optional, Union import msgspec @@ -48,16 +46,16 @@ def _run_task_suffix(): # Task Description schema using msgspec -class TaskDescriptionTreeherder(Schema, rename=None): +class TaskDescriptionTreeherderSchema(Schema, rename=None): """Treeherder-related information for a task.""" - symbol: TOptional[str] = None - kind: TOptional[Literal["build", "test", "other"]] = None - tier: TOptional[int] = None - platform: TOptional[str] = None + symbol: Optional[str] = None + kind: Optional[Literal["build", "test", "other"]] = None + tier: Optional[int] = None + platform: Optional[str] = None -class TaskDescriptionIndex(Schema, rename="kebab"): +class TaskDescriptionIndexSchema(Schema, rename="kebab"): """Index information for a task.""" # the name of the product this build produces @@ -70,7 +68,7 @@ class TaskDescriptionIndex(Schema, rename="kebab"): rank: Union[Literal["by-tier", "build_date"], int] = "by-tier" -class TaskDescriptionWorker(Schema, rename=None, forbid_unknown_fields=False): +class TaskDescriptionWorkerSchema(Schema, rename=None, forbid_unknown_fields=False): """Worker configuration for a task. This schema allows extra fields for worker-specific configuration. @@ -89,13 +87,13 @@ class TaskDescriptionSchema(Schema): # The provisioner-id/worker-type for the task worker_type: str # Attributes for this task - attributes: Dict[str, TAny] = msgspec.field(default_factory=dict) + attributes: Dict[str, Any] = msgspec.field(default_factory=dict) # Relative path (from config.path) to the file task was defined in - task_from: TOptional[str] = None + task_from: Optional[str] = None # Dependencies of this task, keyed by name - dependencies: Dict[str, TAny] = msgspec.field(default_factory=dict) + dependencies: Dict[str, Any] = msgspec.field(default_factory=dict) # Priority of the task - priority: TOptional[ + priority: Optional[ Literal["highest", "very-high", "high", "medium", "low", "very-low", "lowest"] ] = None # Soft dependencies of this task, as a list of task labels @@ -105,9 +103,9 @@ class TaskDescriptionSchema(Schema): # Specifies the condition for task execution requires: Literal["all-completed", "all-resolved"] = "all-completed" # Expiration time relative to task creation - expires_after: TOptional[str] = None + expires_after: Optional[str] = None # Deadline time relative to task creation - deadline_after: TOptional[str] = None + deadline_after: Optional[str] = None # Custom routes for this task routes: List[str] = msgspec.field(default_factory=list) # Custom scopes for this task @@ -115,27 +113,27 @@ class TaskDescriptionSchema(Schema): # Tags for this task tags: Dict[str, str] = msgspec.field(default_factory=dict) # Custom 'task.extra' content - extra: Dict[str, TAny] = msgspec.field(default_factory=dict) + extra: Dict[str, Any] = msgspec.field(default_factory=dict) # Treeherder-related information - treeherder: Union[bool, TaskDescriptionTreeherder, None] = None + treeherder: Union[bool, TaskDescriptionTreeherderSchema, None] = None # Information for indexing this build - index: TOptional[TaskDescriptionIndex] = None + index: Optional[TaskDescriptionIndexSchema] = None # The `run_on_projects` attribute - run_on_projects: TAny = None # This uses optionally_keyed_by, so we need Any + run_on_projects: Any = None # This uses optionally_keyed_by, so we need Any # Specifies tasks for which this task should run run_on_tasks_for: List[str] = msgspec.field(default_factory=list) # Specifies git branches for which this task should run run_on_git_branches: List[str] = msgspec.field(default_factory=list) # The `shipping_phase` attribute - shipping_phase: TOptional[Literal["build", "promote", "push", "ship"]] = None + shipping_phase: Optional[Literal["build", "promote", "push", "ship"]] = None # The `always-target` attribute always_target: bool = False # Optimization to perform on this task - optimization: TAny = None # Uses OptimizationSchema which has custom validation + optimization: Any = None # Uses OptimizationSchema which has custom validation # Whether the task should use sccache compiler caching needs_sccache: bool = False # Information specific to the worker implementation - worker: TOptional[TaskDescriptionWorker] = None + worker: Optional[TaskDescriptionWorkerSchema] = None TC_TREEHERDER_SCHEMA_URL = ( @@ -239,7 +237,7 @@ def verify_index(config, index): # Docker Worker schema using msgspec -class DockerWorkerCacheConfig(Schema, rename="kebab"): +class DockerWorkerCacheSchema(Schema, rename="kebab"): """Cache configuration for docker-worker.""" # name of the cache, allowing reuse by subsequent tasks naming the same cache @@ -252,7 +250,7 @@ class DockerWorkerCacheConfig(Schema, rename="kebab"): skip_untrusted: bool = False -class DockerWorkerArtifactConfig(Schema, rename=None): +class DockerWorkerArtifactSchema(Schema, rename=None): """Artifact configuration for docker-worker.""" # type of artifact -- simple file, or recursive directory, or a volume mounted directory. @@ -288,18 +286,18 @@ class DockerWorkerPayloadSchema(Schema): # Paths to Docker volumes. volumes: List[str] = msgspec.field(default_factory=list) # caches to set up for the task - caches: TOptional[List[DockerWorkerCacheConfig]] = None + caches: Optional[List[DockerWorkerCacheSchema]] = None # artifacts to extract from the task image after completion - artifacts: TOptional[List[DockerWorkerArtifactConfig]] = None + artifacts: Optional[List[DockerWorkerArtifactSchema]] = None # environment variables env: Dict[str, Union[str, Dict[str, str]]] = msgspec.field(default_factory=dict) # the command to run; if not given, docker-worker will default to the # command in the docker image - command: TOptional[List[Union[str, Dict[str, str]]]] = None + command: Optional[List[Union[str, Dict[str, str]]]] = None # the exit status code(s) that indicates the task should be retried - retry_exit_status: TOptional[List[int]] = None + retry_exit_status: Optional[List[int]] = None # the exit status code(s) that indicates the caches used by the task should be purged - purge_caches_exit_status: TOptional[List[int]] = None + purge_caches_exit_status: Optional[List[int]] = None # Whether any artifacts are assigned to this worker skip_artifacts: bool = False @@ -518,7 +516,7 @@ def build_docker_worker_payload(config, task, task_def): # Generic Worker schema using msgspec -class GenericWorkerArtifactConfig(Schema, rename=None): +class GenericWorkerArtifactSchema(Schema, rename=None): """Artifact configuration for generic-worker.""" # type of artifact -- simple file, or recursive directory @@ -526,33 +524,33 @@ class GenericWorkerArtifactConfig(Schema, rename=None): # filesystem path from which to read artifact path: str # if not specified, path is used for artifact name - name: TOptional[str] = None + name: Optional[str] = None -class GenericWorkerMountContent(Schema, rename="kebab"): +class GenericWorkerMountContentSchema(Schema, rename="kebab"): """Mount content configuration for generic-worker.""" # Artifact name that contains the content. - artifact: TOptional[str] = None + artifact: Optional[str] = None # Task ID that has the artifact that contains the content. - task_id: TOptional[Union[str, Dict[str, str]]] = None + task_id: Optional[Union[str, Dict[str, str]]] = None # URL that supplies the content in response to an unauthenticated GET request. - url: TOptional[str] = None + url: Optional[str] = None -class GenericWorkerMountConfig(Schema, rename="kebab"): +class GenericWorkerMountSchema(Schema, rename="kebab"): """Mount configuration for generic-worker.""" # A unique name for the cache volume, implies writable cache directory - cache_name: TOptional[str] = None + cache_name: Optional[str] = None # Optional content for pre-loading cache, or mandatory content for read-only file or directory - content: TOptional[GenericWorkerMountContent] = None + content: Optional[GenericWorkerMountContentSchema] = None # If mounting a cache or read-only directory, the filesystem location - directory: TOptional[str] = None + directory: Optional[str] = None # If mounting a file, specify the relative path within the task directory - file: TOptional[str] = None + file: Optional[str] = None # Archive format of the content - format: TOptional[Literal["rar", "tar.bz2", "tar.gz", "zip"]] = None + format: Optional[Literal["rar", "tar.bz2", "tar.gz", "zip"]] = None class GenericWorkerPayloadSchema(Schema): @@ -564,21 +562,21 @@ class GenericWorkerPayloadSchema(Schema): # command is a list of commands to run, sequentially # on Windows, each command is a string, on OS X and Linux, each command is a string array # Using Any here because msgspec doesn't support union of multiple list types - command: TAny + command: Any # the maximum time to run, in seconds max_run_time: int # Optional fields # artifacts to extract from the task image after completion - artifacts: TOptional[List[GenericWorkerArtifactConfig]] = None + artifacts: Optional[List[GenericWorkerArtifactSchema]] = None # Directories and/or files to be mounted - mounts: TOptional[List[GenericWorkerMountConfig]] = None + mounts: Optional[List[GenericWorkerMountSchema]] = None # environment variables env: Dict[str, Union[str, Dict[str, str]]] = msgspec.field(default_factory=dict) # the exit status code(s) that indicates the task should be retried - retry_exit_status: TOptional[List[int]] = None + retry_exit_status: Optional[List[int]] = None # the exit status code(s) that indicates the caches used by the task should be purged - purge_caches_exit_status: TOptional[List[int]] = None + purge_caches_exit_status: Optional[List[int]] = None # os user groups for test task workers os_groups: List[str] = msgspec.field(default_factory=list) # feature for test task to run as administrator @@ -705,7 +703,7 @@ def build_generic_worker_payload(config, task, task_def): # Beetmover schema using msgspec -class BeetmoverReleaseProperties(Schema): +class BeetmoverReleasePropertiesSchema(Schema): """Release properties for beetmover tasks.""" app_name: str @@ -716,7 +714,7 @@ class BeetmoverReleaseProperties(Schema): platform: str -class BeetmoverUpstreamArtifact(Schema, rename=None, omit_defaults=False): +class BeetmoverUpstreamArtifactSchema(Schema, rename=None, omit_defaults=False): """Upstream artifact definition for beetmover.""" # taskId of the task with the artifact @@ -736,17 +734,17 @@ class BeetmoverPayloadSchema(Schema): implementation: str # the maximum time to run, in seconds max_run_time: int - release_properties: BeetmoverReleaseProperties + release_properties: BeetmoverReleasePropertiesSchema # list of artifact URLs for the artifacts that should be beetmoved - upstream_artifacts: List[BeetmoverUpstreamArtifact] + upstream_artifacts: List[BeetmoverUpstreamArtifactSchema] # Optional fields os: str = "" # locale key, if this is a locale beetmover task - locale: TOptional[str] = None - partner_public: TOptional[bool] = None + locale: Optional[str] = None + partner_public: Optional[bool] = None # Artifact map can be any object - artifact_map: TOptional[dict] = None + artifact_map: Optional[dict] = None @payload_builder("beetmover", BeetmoverPayloadSchema) diff --git a/src/taskgraph/transforms/task_context.py b/src/taskgraph/transforms/task_context.py index 0d795abad..527618c7f 100644 --- a/src/taskgraph/transforms/task_context.py +++ b/src/taskgraph/transforms/task_context.py @@ -6,7 +6,7 @@ from taskgraph.util.yaml import load_yaml -class TaskContextConfig(Schema): +class TaskContextChildSchema(Schema): """ `task-context` can be used to substitute values into any field in a task with data that is not known until `taskgraph` runs. @@ -58,7 +58,7 @@ class TaskContextSchema(Schema, forbid_unknown_fields=False): """ # Required field first - task_context: TaskContextConfig + task_context: TaskContextChildSchema # Optional fields name: Optional[str] = None diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index 576cd7808..ea55b038c 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -27,10 +27,7 @@ def validate_schema(schema, obj, msg_prefix): try: if isinstance(schema, type) and issubclass(schema, Schema): - # Use the validate class method for Schema subclasses schema.validate(obj) - elif isinstance(schema, type) and issubclass(schema, msgspec.Struct): - msgspec.convert(obj, schema) else: raise TypeError(f"Unsupported schema type: {type(schema)}") except (msgspec.ValidationError, msgspec.DecodeError, Exception) as exc: @@ -175,91 +172,78 @@ def validate(cls, data): raise msgspec.ValidationError(str(e)) -# Optimization schema types using msgspec -class IndexSearchOptimization(Schema): +class IndexSearchOptimizationSchema(Schema): """Search the index for the given index namespaces.""" index_search: List[str] -class SkipUnlessChangedOptimization(Schema): +class SkipUnlessChangedOptimizationSchema(Schema): """Skip this task if none of the given file patterns match.""" skip_unless_changed: List[str] # Task reference types using msgspec -class TaskReference(Schema): +class TaskReferenceSchema(Schema): """Reference to another task.""" task_reference: str -class ArtifactReference(Schema): +class ArtifactReferenceSchema(Schema): """Reference to a task artifact.""" artifact_reference: str -# Create a custom validator -class OptimizationValidator: - """A validator that can validate optimization schemas.""" - - def __call__(self, value): - """Validate optimization value.""" - if value is None: - return None - if isinstance(value, dict): - if "index-search" in value: - try: - return msgspec.convert(value, IndexSearchOptimization) - except msgspec.ValidationError: - pass - if "skip-unless-changed" in value: - try: - return msgspec.convert(value, SkipUnlessChangedOptimization) - except msgspec.ValidationError: - pass - # Simple validation for dict types - if isinstance(value, dict): - if "index-search" in value and isinstance(value["index-search"], list): - return value - if "skip-unless-changed" in value and isinstance( - value["skip-unless-changed"], list - ): - return value - raise ValueError(f"Invalid optimization value: {value}") - - -class TaskRefValidator: - """A validator that can validate task references.""" - - def __call__(self, value): - """Validate task reference value.""" - if isinstance(value, str): +def validate_optimization(value): + """Validate optimization value.""" + if value is None: + return None + if isinstance(value, dict): + if "index-search" in value: + try: + return msgspec.convert(value, IndexSearchOptimizationSchema) + except msgspec.ValidationError: + pass + if "skip-unless-changed" in value: + try: + return msgspec.convert(value, SkipUnlessChangedOptimizationSchema) + except msgspec.ValidationError: + pass + # Simple validation for dict types + if isinstance(value, dict): + if "index-search" in value and isinstance(value["index-search"], list): return value - if isinstance(value, dict): - if "task-reference" in value: - try: - return msgspec.convert(value, TaskReference) - except msgspec.ValidationError: - pass - if "artifact-reference" in value: - try: - return msgspec.convert(value, ArtifactReference) - except msgspec.ValidationError: - pass - # Simple validation for dict types - if isinstance(value, dict): - if "task-reference" in value and isinstance(value["task-reference"], str): - return value - if "artifact-reference" in value and isinstance( - value["artifact-reference"], str - ): - return value - raise ValueError(f"Invalid task reference value: {value}") - - -# Keep the same names for backward compatibility -OptimizationSchema = OptimizationValidator() -taskref_or_string = TaskRefValidator() + if "skip-unless-changed" in value and isinstance( + value["skip-unless-changed"], list + ): + return value + raise ValueError(f"Invalid optimization value: {value}") + + +def validate_task_ref(value): + """Validate task reference value.""" + if isinstance(value, str): + return value + if isinstance(value, dict): + if "task-reference" in value: + try: + return msgspec.convert(value, TaskReferenceSchema) + except msgspec.ValidationError: + pass + if "artifact-reference" in value: + try: + return msgspec.convert(value, ArtifactReferenceSchema) + except msgspec.ValidationError: + pass + # Simple validation for dict types + if isinstance(value, dict): + if "task-reference" in value and isinstance(value["task-reference"], str): + return value + if "artifact-reference" in value and isinstance( + value["artifact-reference"], str + ): + return value + raise ValueError(f"Invalid task reference value: {value}") diff --git a/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py b/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py index f2dac2ca0..22100d1e5 100644 --- a/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py +++ b/template/{{cookiecutter.project_name}}/taskcluster/{{cookiecutter.project_slug}}_taskgraph/transforms/hello.py @@ -6,10 +6,8 @@ class HelloSchema(Schema): noun: str # Required field -HELLO_SCHEMA = HelloSchema - transforms = TransformSequence() -transforms.add_validate(HELLO_SCHEMA) +transforms.add_validate(HelloSchema) @transforms.add diff --git a/test/test_parameters.py b/test/test_parameters.py index 8fdf74699..d833fbeca 100644 --- a/test/test_parameters.py +++ b/test/test_parameters.py @@ -276,21 +276,14 @@ def test_parameters_format_spec(spec, expected): def test_extend_parameters_schema(monkeypatch): """Test parameter extension with msgspec schemas.""" - # Define a test schema that extends the base schema - class ExtendedSchema(Schema): - foo: str - bar: bool = False # Optional with default + # Define a test extension schema that adds new fields + class ExtensionSchema(Schema): + custom_field: str + optional_field: bool = False # Optional with default # Reset global _schema_extensions monkeypatch.setattr(parameters, "_schema_extensions", []) - # Set our extended schema as the base schema - monkeypatch.setattr( - parameters, - "BaseSchema", - ExtendedSchema, - ) - # Keep the default functions monkeypatch.setattr( parameters, @@ -298,30 +291,57 @@ class ExtendedSchema(Schema): list(parameters.defaults_functions), ) - # Add a defaults function that provides foo and bar + # Extend the parameters schema with our custom schema extend_parameters_schema( - {}, # No additional schema, just the defaults function - defaults_fn=lambda root: {"foo": "1", "bar": False}, + ExtensionSchema, + defaults_fn=lambda root: { + "custom_field": "default_value", + "optional_field": True, + }, ) - # Test with explicit values - params = Parameters(foo="1", bar=True) - params.check() - assert params["foo"] == "1" - assert params["bar"] is True - - # Test with partial values (bar not present in dict) - params = Parameters(foo="1") + # Verify the extension was added + assert ExtensionSchema in parameters._schema_extensions + + # Test with extended fields in strict mode + # Need to include all required base fields too + params = Parameters( + base_repository="https://example.com/repo", + base_ref="main", + base_rev="abc123", + build_date=1234567890, + build_number=1, + do_not_optimize=[], + enable_always_target=True, + existing_tasks={}, + files_changed=[], + filters=["target_tasks_method"], + head_ref="main", + head_repository="https://example.com/repo", + head_rev="abc123", + head_tag="", + level="3", + moz_build_date="20240101120000", + optimize_target_tasks=True, + owner="test@example.com", + project="test", + pushdate=1234567890, + pushlog_id="0", + repository_type="git", + target_tasks_method="default", + tasks_for="testing", + custom_field="my_value", # Extension field + optional_field=False, # Extension field + ) params.check() - assert params["foo"] == "1" - # bar is not in the dict because it wasn't explicitly set - assert "bar" not in params + assert params["custom_field"] == "my_value" + assert params["optional_field"] is False - # Test with defaults function providing values + # Test with defaults in non-strict mode params = Parameters(strict=False) params.check() - assert params["foo"] == "1" - assert params["bar"] is False + assert params["custom_field"] == "default_value" + assert params["optional_field"] is True @pytest.mark.parametrize( From 750511cd686f099cd3f0f40f94e3d62d2aa856ae Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Tue, 16 Sep 2025 13:37:43 -0400 Subject: [PATCH 19/20] fix: compatibility with gecko --- docs/concepts/transforms.rst | 2 +- src/taskgraph/transforms/from_deps.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/docs/concepts/transforms.rst b/docs/concepts/transforms.rst index c3913f865..e2871d40e 100644 --- a/docs/concepts/transforms.rst +++ b/docs/concepts/transforms.rst @@ -111,7 +111,7 @@ about the state of the tasks at given points. Here is an example: class MySchema(Schema): foo: str # Required field - bar: Optional[bool] = None # Optional field + bar: Optional[bool] = None # Optional field transforms = TransformSequence() transforms.add_validate(MySchema) diff --git a/src/taskgraph/transforms/from_deps.py b/src/taskgraph/transforms/from_deps.py index df655e2cb..701a87616 100644 --- a/src/taskgraph/transforms/from_deps.py +++ b/src/taskgraph/transforms/from_deps.py @@ -22,7 +22,18 @@ from taskgraph.util.set_name import SET_NAME_MAP SetNameType = Literal["strip-kind", "retain-kind"] -GroupByType = Literal["single", "all", "attribute"] +GroupByType = Literal[ + "single", + "all", + "attribute", + "single-with-filters", + "platform", + "single-locale", + "chunk-locales", + "partner-repack-ids", + "component", + "build-type", +] class FromDepsChildSchema(Schema): From 2d95095e84737784532b6c08049fe20d5b0d07f0 Mon Sep 17 00:00:00 2001 From: abhishekmadan30 Date: Fri, 26 Sep 2025 12:20:49 -0400 Subject: [PATCH 20/20] feat: fixed reviews --- docs/tutorials/creating-a-task-graph.rst | 2 +- src/taskgraph/config.py | 31 +++- src/taskgraph/parameters.py | 41 ++--- src/taskgraph/transforms/docker_image.py | 5 +- src/taskgraph/transforms/fetch.py | 27 ++-- src/taskgraph/transforms/from_deps.py | 4 +- src/taskgraph/transforms/notify.py | 45 ++---- src/taskgraph/transforms/run/__init__.py | 15 +- src/taskgraph/transforms/run/run_task.py | 25 ++- src/taskgraph/transforms/task.py | 193 ++++++++++++++++------- src/taskgraph/util/schema.py | 43 ++++- taskcluster/config.yml | 5 +- 12 files changed, 287 insertions(+), 149 deletions(-) diff --git a/docs/tutorials/creating-a-task-graph.rst b/docs/tutorials/creating-a-task-graph.rst index 5a71cee58..22c56720f 100644 --- a/docs/tutorials/creating-a-task-graph.rst +++ b/docs/tutorials/creating-a-task-graph.rst @@ -140,7 +140,7 @@ comments for explanations): from taskgraph.util.schema import Schema from taskgraph.transforms.base import TransformSequence - # Define the schema using Schema base class for better type checking and performance. + # Define the schema using Schema base class. class HelloDescriptionSchema(Schema): text: str # Required field description: Optional[str] = None # Optional field diff --git a/src/taskgraph/config.py b/src/taskgraph/config.py index 9278e368a..570ef006d 100644 --- a/src/taskgraph/config.py +++ b/src/taskgraph/config.py @@ -9,18 +9,16 @@ from pathlib import Path from typing import Dict, List, Literal, Optional, Union +from .util.caches import CACHES from .util.python_path import find_object -from .util.schema import Schema, optionally_keyed_by, validate_schema +from .util.schema import Schema, TaskPriority, optionally_keyed_by, validate_schema from .util.vcs import get_repository from .util.yaml import load_yaml logger = logging.getLogger(__name__) - -# TaskPriority type for the priority levels -TaskPriority = Literal[ - "highest", "very-high", "high", "medium", "low", "very-low", "lowest" -] +# CacheName type for valid cache names +CacheName = Literal[tuple(CACHES.keys())] class WorkerAliasSchema(Schema): @@ -55,8 +53,19 @@ class Repository(Schema, forbid_unknown_fields=False): class RunConfig(Schema): """Run transforms configuration.""" + # List of caches to enable, or a boolean to enable/disable all of them. use_caches: Optional[Union[bool, List[str]]] = None # Maps from "use-caches" + def __post_init__(self): + """Validate that cache names are valid.""" + if isinstance(self.use_caches, list): + invalid = set(self.use_caches) - set(CACHES.keys()) + if invalid: + raise ValueError( + f"Invalid cache names: {invalid}. " + f"Valid names are: {list(CACHES.keys())}" + ) + class TaskGraphSchema(Schema): """Taskgraph specific configuration.""" @@ -65,11 +74,16 @@ class TaskGraphSchema(Schema): repositories: Dict[str, Repository] # Optional fields + # Python function to call to register extensions. register: Optional[str] = None decision_parameters: Optional[str] = None # Maps from "decision-parameters" + # The taskcluster index prefix to use for caching tasks. Defaults to `trust-domain`. cached_task_prefix: Optional[str] = None # Maps from "cached-task-prefix" + # Should tasks from pull requests populate the cache cache_pull_requests: Optional[bool] = None # Maps from "cache-pull-requests" + # Regular expressions matching index paths to be summarized. index_path_regexes: Optional[List[str]] = None # Maps from "index-path-regexes" + # Configuration related to the 'run' transforms. run: Optional[RunConfig] = None @@ -80,14 +94,19 @@ class GraphConfigSchema(Schema, forbid_unknown_fields=False): """ # Required fields first + # The trust-domain for this graph. + # (See https://firefox-source-docs.mozilla.org/taskcluster/taskcluster/taskgraph.html#taskgraph-trust-domain) trust_domain: str # Maps from "trust-domain" task_priority: optionally_keyed_by("project", "level", TaskPriority) # type: ignore workers: WorkersSchema taskgraph: TaskGraphSchema # Optional fields + # Name of the docker image kind (default: docker-image) docker_image_kind: Optional[str] = None # Maps from "docker-image-kind" + # Default 'deadline' for tasks, in relative date format. Eg: '1 week' task_deadline_after: Optional[optionally_keyed_by("project", str)] = None # type: ignore + # Default 'expires-after' for level 1 tasks, in relative date format. Eg: '90 days' task_expires_after: Optional[str] = None # Maps from "task-expires-after" diff --git a/src/taskgraph/parameters.py b/src/taskgraph/parameters.py index 5f04ab8fa..8423f2378 100644 --- a/src/taskgraph/parameters.py +++ b/src/taskgraph/parameters.py @@ -273,20 +273,6 @@ def check(self): } msgspec.convert(base_params, BaseSchema) - # Also validate against extension schemas - for ext_schema in _schema_extensions: - if isinstance(ext_schema, type) and issubclass( - ext_schema, msgspec.Struct - ): - # Only validate fields that belong to this extension - ext_fields = { - f.encode_name for f in msgspec.structs.fields(ext_schema) - } - ext_params = { - k: v for k, v in kebab_params.items() if k in ext_fields - } - if ext_params: - msgspec.convert(ext_params, ext_schema) else: # Non-strict mode: only validate fields that exist in the schemas # Filter to only include fields defined in the base schema @@ -298,19 +284,20 @@ def check(self): } msgspec.convert(filtered_params, BaseSchema) - # Also validate extension schemas in non-strict mode - for ext_schema in _schema_extensions: - if isinstance(ext_schema, type) and issubclass( - ext_schema, msgspec.Struct - ): - ext_fields = { - f.encode_name for f in msgspec.structs.fields(ext_schema) - } - ext_params = { - k: v for k, v in kebab_params.items() if k in ext_fields - } - if ext_params: - msgspec.convert(ext_params, ext_schema) + # Validate against extension schemas (both strict and non-strict modes) + for ext_schema in _schema_extensions: + if isinstance(ext_schema, type) and issubclass( + ext_schema, msgspec.Struct + ): + # Only validate fields that belong to this extension + ext_fields = { + f.encode_name for f in msgspec.structs.fields(ext_schema) + } + ext_params = { + k: v for k, v in kebab_params.items() if k in ext_fields + } + if ext_params: + msgspec.convert(ext_params, ext_schema) except (msgspec.ValidationError, msgspec.DecodeError) as e: raise ParameterMismatch(f"Invalid parameters: {e}") diff --git a/src/taskgraph/transforms/docker_image.py b/src/taskgraph/transforms/docker_image.py index 56082ff2b..d53be4b23 100644 --- a/src/taskgraph/transforms/docker_image.py +++ b/src/taskgraph/transforms/docker_image.py @@ -5,10 +5,11 @@ import logging import os import re -from typing import Any, Dict, List, Optional +from typing import Dict, List, Optional import taskgraph from taskgraph.transforms.base import TransformSequence +from taskgraph.transforms.task import TaskDescriptionIndexSchema from taskgraph.util import json from taskgraph.util.docker import create_context_tar, generate_context_hash from taskgraph.util.schema import Schema @@ -49,7 +50,7 @@ class DockerImageSchema(Schema): # List of package tasks this docker image depends on. packages: Optional[List[str]] = None # Information for indexing this build so its artifacts can be discovered. - index: Optional[Any] = None + index: Optional[TaskDescriptionIndexSchema] = None # Whether this image should be cached based on inputs. cache: Optional[bool] = None diff --git a/src/taskgraph/transforms/fetch.py b/src/taskgraph/transforms/fetch.py index 262b99752..2e26b2cba 100644 --- a/src/taskgraph/transforms/fetch.py +++ b/src/taskgraph/transforms/fetch.py @@ -9,7 +9,7 @@ import os import re from dataclasses import dataclass -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Dict, Literal, Optional, Union import msgspec @@ -24,14 +24,26 @@ CACHE_TYPE = "content.v1" +# Base class for fetch configurations - ensures type field exists +class BaseFetchSchema(Schema, forbid_unknown_fields=False): + """Base schema for fetch configurations. + + This allows any additional fields beyond 'type' to support + different fetch types (static-url, git, etc). + """ + + type: str + + class FetchSchema(Schema): # Required fields # Name of the task. name: str # Description of the task. description: str - # Fetch configuration with type and additional fields. - fetch: Dict[str, Any] # Must have 'type' key, other keys depend on type + # Fetch configuration - validated as BaseFetchSchema which ensures 'type' exists + # Additional type-specific validation is done by the fetch_builder decorator + fetch: BaseFetchSchema # Optional fields # Relative path (from config.path) to the file the task was defined in. @@ -50,11 +62,6 @@ class FetchSchema(Schema): # Task attributes. attributes: Optional[Dict[str, Any]] = None - def __post_init__(self): - # Validate that fetch has a 'type' field - if not isinstance(self.fetch, dict) or "type" not in self.fetch: - raise msgspec.ValidationError("fetch must be a dict with a 'type' field") - # define a collection of payload builders, depending on the worker implementation fetch_builders = {} @@ -199,7 +206,7 @@ class GPGSignatureSchema(Schema): class StaticUrlFetchSchema(Schema): """Configuration for static-url fetch type.""" - type: str + type: Literal["static-url"] # The URL to download. url: str # The SHA-256 of the downloaded content. @@ -291,7 +298,7 @@ def create_fetch_url_task(config, name, fetch): class GitFetchSchema(Schema): """Configuration for git fetch type.""" - type: str + type: Literal["git"] repo: str revision: str include_dot_git: Optional[bool] = None diff --git a/src/taskgraph/transforms/from_deps.py b/src/taskgraph/transforms/from_deps.py index 701a87616..2238c626f 100644 --- a/src/taskgraph/transforms/from_deps.py +++ b/src/taskgraph/transforms/from_deps.py @@ -16,6 +16,7 @@ from typing import Any, Dict, List, Literal, Optional, Union from taskgraph.transforms.base import TransformSequence +from taskgraph.transforms.run import FetchesSchema from taskgraph.util.attributes import attrmatch from taskgraph.util.dependencies import GROUP_BY_MAP, get_dependencies from taskgraph.util.schema import Schema, validate_schema @@ -68,7 +69,8 @@ class FromDepsChildSchema(Schema): # dependency. Attributes of the upstream task may be used as # substitution values in the `artifact` or `dest` values of the # `fetches` entry. - fetches: Optional[Dict[str, List[Union[str, Dict[str, str]]]]] = None + # Keys are task kind names, values are lists of FetchesSchema objects. + fetches: Optional[Dict[str, List[FetchesSchema]]] = None # Schema for from_deps transforms diff --git a/src/taskgraph/transforms/notify.py b/src/taskgraph/transforms/notify.py index 8ce6b29ad..d909b34c2 100644 --- a/src/taskgraph/transforms/notify.py +++ b/src/taskgraph/transforms/notify.py @@ -13,7 +13,7 @@ import msgspec from taskgraph.transforms.base import TransformSequence -from taskgraph.util.schema import Schema, resolve_keyed_by +from taskgraph.util.schema import Schema, optionally_keyed_by, resolve_keyed_by StatusType = Literal[ "on-completed", @@ -30,7 +30,7 @@ class EmailRecipientSchema(Schema): """Email notification recipient.""" type: Literal["email"] - address: Union[str, Dict[str, Any]] # Can be keyed-by + address: optionally_keyed_by("project", "level", str) status_type: Optional[StatusType] = None @@ -114,10 +114,18 @@ class NotifyContentSchema(Schema, rename=None): slack: Optional[SlackContentSchema] = None +RecipientSchema = Union[ + EmailRecipientSchema, + MatrixRoomRecipientSchema, + PulseRecipientSchema, + SlackChannelRecipientSchema, +] + + class NotifyConfigSchema(Schema, rename=None): """Modern notification configuration.""" - recipients: List[Dict[str, Any]] # Will be validated as Recipient union + recipients: List[RecipientSchema] content: Optional[NotifyContentSchema] = None @@ -148,37 +156,6 @@ def __post_init__(self): "Cannot specify both 'notify' and 'notifications'" ) - # Validate recipients if notify is present - if self.notify and self.notify.recipients: - validated_recipients = [] - for r in self.notify.recipients: - try: - # Try to convert to one of the recipient types - if r.get("type") == "email": - validated_recipients.append( - msgspec.convert(r, EmailRecipientSchema) - ) - elif r.get("type") == "matrix-room": - validated_recipients.append( - msgspec.convert(r, MatrixRoomRecipientSchema) - ) - elif r.get("type") == "pulse": - validated_recipients.append( - msgspec.convert(r, PulseRecipientSchema) - ) - elif r.get("type") == "slack-channel": - validated_recipients.append( - msgspec.convert(r, SlackChannelRecipientSchema) - ) - else: - raise msgspec.ValidationError( - f"Unknown recipient type: {r.get('type')}" - ) - except msgspec.ValidationError: - # Keep as dict if it contains keyed-by - validated_recipients.append(r) - self.notify.recipients = validated_recipients - transforms = TransformSequence() transforms.add_validate(NotifySchema) diff --git a/src/taskgraph/transforms/run/__init__.py b/src/taskgraph/transforms/run/__init__.py index 1bd3fe7b3..c301c0393 100644 --- a/src/taskgraph/transforms/run/__init__.py +++ b/src/taskgraph/transforms/run/__init__.py @@ -17,6 +17,7 @@ from taskgraph.transforms.base import TransformSequence from taskgraph.transforms.cached_tasks import order_tasks +from taskgraph.transforms.task import TaskDescriptionWorkerSchema from taskgraph.util import json from taskgraph.util import path as mozpath from taskgraph.util.python_path import import_sibling_modules @@ -41,6 +42,9 @@ class FetchesSchema(Schema): class WhenSchema(Schema): """Configuration for when a task should be included.""" + # This task only needs to be run if a file matching one of the given + # patterns has changed in the push. The patterns use the mozpack + # match function (python/mozbuild/mozpack/path.py). files_changed: List[str] = msgspec.field(default_factory=list) @@ -51,7 +55,9 @@ class RunSchema(Schema, rename=None, forbid_unknown_fields=False): This schema allows extra fields for run implementation-specific configuration. """ + # The key to a run implementation in a peer module to this one. using: str + # Base work directory used to set up the task. workdir: Optional[str] = None @@ -97,11 +103,18 @@ class RunDescriptionSchema(Schema): always_target: bool = False optimization: Any = None needs_sccache: bool = False + # The "when" section contains descriptions of the circumstances under + # which this task should be included in the task graph. This will be + # converted into an optimization, so it cannot be specified in a run + # description that also gives 'optimization'. when: Optional[WhenSchema] = None + # A list of artifacts to install from 'fetch' tasks. fetches: Dict[str, List[Union[str, FetchesSchema]]] = msgspec.field( default_factory=dict ) - worker: Dict[str, Any] = msgspec.field(default_factory=dict) + # This object will be passed through to the task description, with additions + # provided by the task's run-using function. + worker: Optional[TaskDescriptionWorkerSchema] = None transforms = TransformSequence() diff --git a/src/taskgraph/transforms/run/run_task.py b/src/taskgraph/transforms/run/run_task.py index c86424cda..860ce1623 100644 --- a/src/taskgraph/transforms/run/run_task.py +++ b/src/taskgraph/transforms/run/run_task.py @@ -15,7 +15,8 @@ support_vcs_checkout, ) from taskgraph.util import path, taskcluster -from taskgraph.util.schema import Schema +from taskgraph.util.caches import CACHES +from taskgraph.util.schema import Schema, taskref_or_string EXEC_COMMANDS = { "bash": ["bash", "-cx"], @@ -36,7 +37,7 @@ class RunTaskSchema(Schema): # The command arguments to pass to the `run-task` script, after the checkout # arguments. If a list, it will be passed directly; otherwise it will be # included in a single argument to the command specified by `exec-with`. - command: Union[List[Union[str, Dict[str, str]]], str, Dict[str, str]] + command: Union[List[taskref_or_string], taskref_or_string] # Base work directory used to set up the task. workdir: str @@ -61,7 +62,7 @@ class RunTaskSchema(Schema): # Specifies what to execute the command with in the event the command is a # string. - exec_with: Optional[Literal["bash", "powershell"]] = None + exec_with: Optional[str] = None # Command used to invoke the `run-task` script. Can be used if the script # or Python installation is in a non-standard location on the workers. @@ -70,6 +71,24 @@ class RunTaskSchema(Schema): # Whether to run as root. Defaults to False. run_as_root: bool = False + def __post_init__(self): + """Validate cache names and exec_with values.""" + # Validate cache names + if isinstance(self.use_caches, list): + invalid = set(self.use_caches) - set(CACHES.keys()) + if invalid: + raise ValueError( + f"Invalid cache names: {invalid}. " + f"Valid names are: {list(CACHES.keys())}" + ) + + # Validate exec_with + if self.exec_with is not None and self.exec_with not in EXEC_COMMANDS: + raise ValueError( + f"Invalid exec_with value: {self.exec_with}. " + f"Valid values are: {list(EXEC_COMMANDS.keys())}" + ) + def common_setup(config, task, taskdesc, command): run = task["run"] diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py index 193c8fc94..bb14b044b 100644 --- a/src/taskgraph/transforms/task.py +++ b/src/taskgraph/transforms/task.py @@ -24,8 +24,12 @@ from taskgraph.util.hash import hash_path from taskgraph.util.keyed_by import evaluate_keyed_by from taskgraph.util.schema import ( + OptimizationType, Schema, + TaskPriority, + optionally_keyed_by, resolve_keyed_by, + taskref_or_string, validate_schema, ) from taskgraph.util.treeherder import split_symbol, treeherder_defaults @@ -49,9 +53,19 @@ def _run_task_suffix(): class TaskDescriptionTreeherderSchema(Schema, rename=None): """Treeherder-related information for a task.""" + # Either a bare symbol, or 'grp(sym)'. Defaults to the + # uppercased first letter of each section of the kind + # (delimited by '-') all smooshed together. symbol: Optional[str] = None + # The task kind. Defaults to 'build', 'test', or 'other' + # based on the kind name. kind: Optional[Literal["build", "test", "other"]] = None + # Tier for this task. Defaults to 1. tier: Optional[int] = None + # Task platform in the form platform/collection, used to + # set treeherder.machine.platform and + # treeherder.collection or treeherder.labels. Defaults to + # 'default/opt'. platform: Optional[str] = None @@ -64,7 +78,16 @@ class TaskDescriptionIndexSchema(Schema, rename="kebab"): job_name: str # Type of gecko v2 index to use type: str = "generic" # Default to generic as that's what's commonly used - # The rank that the task will receive in the TaskCluster index + # The rank that the task will receive in the TaskCluster + # index. A newly completed task supersedes the currently + # indexed task iff it has a higher rank. If unspecified, + # 'by-tier' behavior will be used. + # Rank is equal the timestamp of the build_date for tier-1 + # tasks, and zero for non-tier-1. This sorts tier-{2,3} + # builds below tier-1 in the index. + # Can also be given as an integer constant (e.g. zero to make + # sure a task is last in the index) or 'build_date' to equal + # the timestamp of the build_date. rank: Union[Literal["by-tier", "build_date"], int] = "by-tier" @@ -74,67 +97,95 @@ class TaskDescriptionWorkerSchema(Schema, rename=None, forbid_unknown_fields=Fal This schema allows extra fields for worker-specific configuration. """ - implementation: str + implementation: Optional[str] = None class TaskDescriptionSchema(Schema): """Schema for task descriptions.""" - # The label for this task + # The label for this task. label: str - # Description of the task (for metadata) + # Description of the task (for metadata). description: str - # The provisioner-id/worker-type for the task + # The provisioner-id/worker-type for the task. The following + # parameters will be substituted in this string: + # {level} -- the scm level of this push. worker_type: str - # Attributes for this task + # Attributes for this task. attributes: Dict[str, Any] = msgspec.field(default_factory=dict) - # Relative path (from config.path) to the file task was defined in + # Relative path (from config.path) to the file task was defined in. task_from: Optional[str] = None - # Dependencies of this task, keyed by name + # Dependencies of this task, keyed by name; these are passed + # through verbatim and subject to the interpretation of the + # Task's get_dependencies method. dependencies: Dict[str, Any] = msgspec.field(default_factory=dict) - # Priority of the task - priority: Optional[ - Literal["highest", "very-high", "high", "medium", "low", "very-low", "lowest"] - ] = None - # Soft dependencies of this task, as a list of task labels + # Priority of the task. + priority: Optional[TaskPriority] = None + # Soft dependencies of this task, as a list of task labels. soft_dependencies: List[str] = msgspec.field(default_factory=list) - # Dependencies that must be scheduled in order for this task to run + # Dependencies that must be scheduled in order for this task to run. if_dependencies: List[str] = msgspec.field(default_factory=list) - # Specifies the condition for task execution + # Specifies the condition for task execution. requires: Literal["all-completed", "all-resolved"] = "all-completed" - # Expiration time relative to task creation + # Expiration time relative to task creation, with units (e.g., + # '14 days'). Defaults are set based on the project. expires_after: Optional[str] = None - # Deadline time relative to task creation + # Deadline time relative to task creation, with units (e.g., + # '14 days'). Defaults are set based on the project. deadline_after: Optional[str] = None - # Custom routes for this task + # Custom routes for this task; the default treeherder routes will + # be added automatically. routes: List[str] = msgspec.field(default_factory=list) - # Custom scopes for this task + # Custom scopes for this task; any scopes required for the worker + # will be added automatically. The following parameters will be + # substituted in each scope: + # {level} -- the scm level of this push + # {project} -- the project of this push. scopes: List[str] = msgspec.field(default_factory=list) - # Tags for this task + # Tags for this task. tags: Dict[str, str] = msgspec.field(default_factory=dict) - # Custom 'task.extra' content + # Custom 'task.extra' content. extra: Dict[str, Any] = msgspec.field(default_factory=dict) - # Treeherder-related information - treeherder: Union[bool, TaskDescriptionTreeherderSchema, None] = None - # Information for indexing this build + # Treeherder-related information. Can be a simple `true` to + # auto-generate information or a dictionary with specific keys. + treeherder: Optional[Union[bool, TaskDescriptionTreeherderSchema]] = None + # Information for indexing this build so its artifacts can be + # discovered. If omitted, the build will not be indexed. index: Optional[TaskDescriptionIndexSchema] = None - # The `run_on_projects` attribute - run_on_projects: Any = None # This uses optionally_keyed_by, so we need Any - # Specifies tasks for which this task should run + # The `run_on_projects` attribute, defaulting to 'all'. Dictates + # the projects on which this task should be included in the + # target task set. See the attributes documentation for details. + run_on_projects: optionally_keyed_by("build-platform", List[str]) = None # type: ignore + # Specifies tasks for which this task should run. run_on_tasks_for: List[str] = msgspec.field(default_factory=list) - # Specifies git branches for which this task should run + # Specifies git branches for which this task should run. run_on_git_branches: List[str] = msgspec.field(default_factory=list) - # The `shipping_phase` attribute + # The `shipping_phase` attribute, defaulting to None. Specifies + # the release promotion phase that this task belongs to. shipping_phase: Optional[Literal["build", "promote", "push", "ship"]] = None - # The `always-target` attribute + # The `always-target` attribute will cause the task to be + # included in the target_task_graph regardless of filtering. + # Tasks included in this manner will be candidates for + # optimization even when `optimize_target_tasks` is False, unless + # the task was also explicitly chosen by the target_tasks method. always_target: bool = False - # Optimization to perform on this task - optimization: Any = None # Uses OptimizationSchema which has custom validation - # Whether the task should use sccache compiler caching + # Optimization to perform on this task during the optimization + # phase. Defined in taskcluster/taskgraph/optimize.py. + optimization: OptimizationType = None + # Whether the task should use sccache compiler caching. needs_sccache: bool = False # Information specific to the worker implementation worker: Optional[TaskDescriptionWorkerSchema] = None + def __post_init__(self): + """Validate dependency names.""" + if self.dependencies: + invalid_names = {"self", "decision"} & set(self.dependencies.keys()) + if invalid_names: + raise ValueError( + f"Can't use {', '.join(repr(n) for n in sorted(invalid_names))} as dependency names." + ) + TC_TREEHERDER_SCHEMA_URL = ( "https://github.com/taskcluster/taskcluster-treeherder/" @@ -188,16 +239,7 @@ class PayloadBuilder: def payload_builder(name, schema): """ Decorator for registering payload builders. - - Requires msgspec.Struct schema types for type safety and performance. """ - # Ensure we're using msgspec schemas - if not (isinstance(schema, type) and issubclass(schema, msgspec.Struct)): - raise TypeError( - f"payload_builder requires msgspec.Struct schema, got {type(schema).__name__}. " - f"Please migrate to msgspec: class {name.title()}Schema(Schema): ..." - ) - # Verify the schema has required fields fields = {f.name for f in msgspec.structs.fields(schema)} if "implementation" not in fields: @@ -250,6 +292,40 @@ class DockerWorkerCacheSchema(Schema, rename="kebab"): skip_untrusted: bool = False +class DockerImageInTreeSchema(Schema, rename="kebab"): + """In-tree generated docker image.""" + + in_tree: str + + +class DockerImageIndexedSchema(Schema): + """Indexed docker image.""" + + indexed: str + + +# Create a class for docker image types to avoid dict union issues +class DockerImageTypeSchema(Schema, forbid_unknown_fields=False): + """Schema that accepts either in-tree or indexed docker images.""" + + in_tree: Optional[str] = None + indexed: Optional[str] = None + + def __post_init__(self): + """Ensure exactly one image type is provided.""" + if self.in_tree and self.indexed: + raise ValueError("Cannot have both in-tree and indexed") + if not self.in_tree and not self.indexed: + raise ValueError("Must have either in-tree or indexed") + + +# Type for docker-image field +DockerImageType = Union[ + str, # a raw Docker image path (repo/image:tag) + DockerImageTypeSchema, # docker image configs +] + + class DockerWorkerArtifactSchema(Schema, rename=None): """Artifact configuration for docker-worker.""" @@ -265,10 +341,10 @@ class DockerWorkerPayloadSchema(Schema): """Schema for docker-worker payload.""" # Required fields first - implementation: str + implementation: Literal["docker-worker"] # For tasks that will run in docker-worker, this is the name of the docker # image or in-tree docker image to run the task in. - docker_image: Union[str, Dict[str, str]] + docker_image: DockerImageType # the maximum time to run, in seconds max_run_time: int @@ -290,10 +366,10 @@ class DockerWorkerPayloadSchema(Schema): # artifacts to extract from the task image after completion artifacts: Optional[List[DockerWorkerArtifactSchema]] = None # environment variables - env: Dict[str, Union[str, Dict[str, str]]] = msgspec.field(default_factory=dict) + env: Dict[str, taskref_or_string] = msgspec.field(default_factory=dict) # the command to run; if not given, docker-worker will default to the # command in the docker image - command: Optional[List[Union[str, Dict[str, str]]]] = None + command: Optional[List[taskref_or_string]] = None # the exit status code(s) that indicates the task should be retried retry_exit_status: Optional[List[int]] = None # the exit status code(s) that indicates the caches used by the task should be purged @@ -533,7 +609,7 @@ class GenericWorkerMountContentSchema(Schema, rename="kebab"): # Artifact name that contains the content. artifact: Optional[str] = None # Task ID that has the artifact that contains the content. - task_id: Optional[Union[str, Dict[str, str]]] = None + task_id: Optional[taskref_or_string] = None # URL that supplies the content in response to an unauthenticated GET request. url: Optional[str] = None @@ -557,12 +633,11 @@ class GenericWorkerPayloadSchema(Schema): """Schema for generic-worker payload.""" # Required fields first - implementation: str + implementation: Literal["generic-worker"] os: Literal["windows", "macosx", "linux", "linux-bitbar"] # command is a list of commands to run, sequentially # on Windows, each command is a string, on OS X and Linux, each command is a string array - # Using Any here because msgspec doesn't support union of multiple list types - command: Any + command: List[Union[str, List[taskref_or_string]]] # the maximum time to run, in seconds max_run_time: int @@ -572,7 +647,7 @@ class GenericWorkerPayloadSchema(Schema): # Directories and/or files to be mounted mounts: Optional[List[GenericWorkerMountSchema]] = None # environment variables - env: Dict[str, Union[str, Dict[str, str]]] = msgspec.field(default_factory=dict) + env: Dict[str, taskref_or_string] = msgspec.field(default_factory=dict) # the exit status code(s) that indicates the task should be retried retry_exit_status: Optional[List[int]] = None # the exit status code(s) that indicates the caches used by the task should be purged @@ -718,7 +793,7 @@ class BeetmoverUpstreamArtifactSchema(Schema, rename=None, omit_defaults=False): """Upstream artifact definition for beetmover.""" # taskId of the task with the artifact - taskId: Union[str, Dict[str, str]] # Can be string or task-reference dict + taskId: taskref_or_string # Can be string or task-reference dict # type of signing task (for CoT) taskType: str # Paths to the artifacts to sign @@ -731,7 +806,7 @@ class BeetmoverPayloadSchema(Schema): """Schema for beetmover worker payload.""" # Required fields first - implementation: str + implementation: Literal["beetmover"] # the maximum time to run, in seconds max_run_time: int release_properties: BeetmoverReleasePropertiesSchema @@ -774,22 +849,22 @@ def build_beetmover_payload(config, task, task_def): # Simple payload schemas using msgspec -class InvalidPayloadSchema(Schema, rename=None, omit_defaults=False): +class InvalidPayloadSchema( + Schema, rename=None, omit_defaults=False, forbid_unknown_fields=False +): """Schema for invalid tasks - allows any fields.""" implementation: str os: str = "" - # Allow any extra fields for invalid tasks - _extra: dict = msgspec.field(default_factory=dict, name="") -class AlwaysOptimizedPayloadSchema(Schema, rename=None, omit_defaults=False): +class AlwaysOptimizedPayloadSchema( + Schema, rename=None, omit_defaults=False, forbid_unknown_fields=False +): """Schema for always-optimized tasks - allows any fields.""" implementation: str os: str = "" - # Allow any extra fields - _extra: dict = msgspec.field(default_factory=dict, name="") class SucceedPayloadSchema(Schema, rename=None, omit_defaults=False): diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index ea55b038c..7bff51406 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -4,13 +4,18 @@ import pprint from functools import reduce -from typing import Dict, List, Literal, Union +from typing import Dict, List, Literal, Optional, Union import msgspec import taskgraph from taskgraph.util.keyed_by import evaluate_keyed_by, iter_dot_path +# Common type definitions that are used across multiple schemas +TaskPriority = Literal[ + "highest", "very-high", "high", "medium", "low", "very-low", "lowest" +] + def validate_schema(schema, obj, msg_prefix): """ @@ -184,6 +189,24 @@ class SkipUnlessChangedOptimizationSchema(Schema): skip_unless_changed: List[str] +# Create a class for optimization types to avoid dict union issues +class OptimizationTypeSchema(Schema, forbid_unknown_fields=False): + """Schema that accepts various optimization configurations.""" + + index_search: Optional[List[str]] = None + skip_unless_changed: Optional[List[str]] = None + + def __post_init__(self): + """Ensure at least one optimization type is provided.""" + if not self.index_search and not self.skip_unless_changed: + # Allow empty schema for other dict-based optimizations + pass + + +# Use the class in the union to avoid multiple dict types +OptimizationType = Union[None, OptimizationTypeSchema] + + # Task reference types using msgspec class TaskReferenceSchema(Schema): """Reference to another task.""" @@ -197,6 +220,24 @@ class ArtifactReferenceSchema(Schema): artifact_reference: str +class TaskRefType(Schema, forbid_unknown_fields=False): + """Schema that accepts either task-reference or artifact-reference.""" + + task_reference: Optional[str] = None + artifact_reference: Optional[str] = None + + def __post_init__(self): + """Ensure exactly one reference type is provided.""" + if self.task_reference and self.artifact_reference: + raise ValueError("Cannot have both task-reference and artifact-reference") + if not self.task_reference and not self.artifact_reference: + raise ValueError("Must have either task-reference or artifact-reference") + + +# Use the class in the union to avoid multiple dict types +taskref_or_string = Union[str, TaskRefType] + + def validate_optimization(value): """Validate optimization value.""" if value is None: diff --git a/taskcluster/config.yml b/taskcluster/config.yml index 5fec007ee..280296fb6 100644 --- a/taskcluster/config.yml +++ b/taskcluster/config.yml @@ -12,10 +12,7 @@ index: products: - taskgraph -task-priority: - by-level: - "3": medium - default: low +task-priority: low taskgraph: register: self_taskgraph:register