From 5591fd8bfa77bd8c8e92889e2fde1cd6385b021d Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 19 Sep 2025 11:08:15 -0400 Subject: [PATCH 01/10] feat: add a replay mechanism Signed-off-by: Louis Mandel --- src/pdl/pdl_interpreter.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 168e781eb..94a73b23c 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -152,7 +152,7 @@ write_trace, ) -empty_scope: ScopeType = PdlDict({"pdl_context": DependentContext([])}) +empty_scope: ScopeType = PdlDict({"pdl_context": DependentContext([]), "__pdl_replay": {}}) RefT = TypeVar("RefT") @@ -623,6 +623,34 @@ def result_with_type_checking( return result +def process_block_body_with_replay( + state: InterpreterState, + scope: ScopeType, + block: AdvancedBlockType, + loc: PdlLocationType, +) -> tuple[PdlLazy[Any], LazyMessages, ScopeType, AdvancedBlockType]: + if isinstance(block, LeafBlock): + block_id = block.pdl__id + replay_scope = scope["__pdl_replay"] + assert(isinstance(block_id, str)) + assert(isinstance(replay_scope, dict)) + try: + result = replay_scope[block_id] + background = SingletonContext( + PdlDict({"role": state.role, "content": result}) + ) + if state.yield_result: + yield_result(result.result(), block.kind) + if state.yield_background: + yield_background(background) + trace = block + except KeyError: + result, background, scope, trace = process_block_body(state, scope, block, loc) + scope = scope | { "__pdl_replay": (replay_scope | {block_id: result}) } + else: + result, background, scope, trace = process_block_body(state, scope, block, loc) + return result, background, scope, trace + def process_block_body( state: InterpreterState, scope: ScopeType, From 96f2287c6521c0162bef5c9430d173921a0f1a1b Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 19 Sep 2025 11:25:43 -0400 Subject: [PATCH 02/10] Formatting Signed-off-by: Louis Mandel --- src/pdl/pdl_interpreter.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 94a73b23c..fe588ceab 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -152,7 +152,9 @@ write_trace, ) -empty_scope: ScopeType = PdlDict({"pdl_context": DependentContext([]), "__pdl_replay": {}}) +empty_scope: ScopeType = PdlDict( + {"pdl_context": DependentContext([]), "__pdl_replay": {}} +) RefT = TypeVar("RefT") @@ -632,8 +634,8 @@ def process_block_body_with_replay( if isinstance(block, LeafBlock): block_id = block.pdl__id replay_scope = scope["__pdl_replay"] - assert(isinstance(block_id, str)) - assert(isinstance(replay_scope, dict)) + assert isinstance(block_id, str) + assert isinstance(replay_scope, dict) try: result = replay_scope[block_id] background = SingletonContext( @@ -645,12 +647,15 @@ def process_block_body_with_replay( yield_background(background) trace = block except KeyError: - result, background, scope, trace = process_block_body(state, scope, block, loc) - scope = scope | { "__pdl_replay": (replay_scope | {block_id: result}) } + result, background, scope, trace = process_block_body( + state, scope, block, loc + ) + scope = scope | {"__pdl_replay": (replay_scope | {block_id: result})} else: result, background, scope, trace = process_block_body(state, scope, block, loc) return result, background, scope, trace + def process_block_body( state: InterpreterState, scope: ScopeType, From 0398cf4597058521ee39f63272ece137daaa2911 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 19 Sep 2025 11:27:06 -0400 Subject: [PATCH 03/10] Typing Signed-off-by: Louis Mandel --- src/pdl/pdl_interpreter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index fe588ceab..61bad3cda 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -638,7 +638,7 @@ def process_block_body_with_replay( assert isinstance(replay_scope, dict) try: result = replay_scope[block_id] - background = SingletonContext( + background: LazyMessages = SingletonContext( PdlDict({"role": state.role, "content": result}) ) if state.yield_result: From a8c9f9754a0f3c763ba2cab38147288b3dce3b49 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 19 Sep 2025 15:15:36 -0400 Subject: [PATCH 04/10] feat: replay an execution Signed-off-by: Louis Mandel --- src/pdl/pdl.py | 15 ++++++++++++--- src/pdl/pdl_interpreter.py | 13 +++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/pdl/pdl.py b/src/pdl/pdl.py index f9559df76..3aa0f466e 100644 --- a/src/pdl/pdl.py +++ b/src/pdl/pdl.py @@ -47,6 +47,9 @@ class InterpreterConfig(TypedDict, total=False): """ cwd: Path """Path considered as the current working directory for file reading.""" + replay: dict[str, Any] + """Execute the program reusing some already computed values. + """ def exec_program( @@ -66,9 +69,10 @@ def exec_program( output: Configure the output of the returned value of this function. Defaults to `"result"` Returns: - Return the final result if `output` is set to `"result"`. If set of `all`, it returns a dictionary containing, `result`, `scope`, and `trace`. + Return the final result if `output` is set to `"result"`. If set of `all`, it returns a dictionary containing, `result`, `scope`, `trace`, and `replay`. """ - config = config or {} + config = config or InterpreterConfig() + config["replay"] = dict(config.get("replay", {})) state = InterpreterState(**config) if not isinstance(scope, PdlDict): scope = PdlDict(scope or {}) @@ -83,7 +87,12 @@ def exec_program( return result case "all": scope = future_scope.result() - return {"result": result, "scope": scope, "trace": trace} + return { + "result": result, + "scope": scope, + "trace": trace, + "replay": state.replay, + } case _: assert False, 'The `output` variable should be "result" or "all"' diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 61bad3cda..5a34f0769 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -152,9 +152,7 @@ write_trace, ) -empty_scope: ScopeType = PdlDict( - {"pdl_context": DependentContext([]), "__pdl_replay": {}} -) +empty_scope: ScopeType = PdlDict({"pdl_context": DependentContext([])}) RefT = TypeVar("RefT") @@ -190,6 +188,7 @@ class InterpreterState(BaseModel): """Event loop to schedule LLM calls.""" current_pdl_context: Ref[LazyMessages] = Ref(DependentContext([])) """Current value of the context set at the beginning of the execution of the block.""" + replay: dict[str, Any] = {} def with_yield_result(self: "InterpreterState", b: bool) -> "InterpreterState": return self.model_copy(update={"yield_result": b}) @@ -498,7 +497,7 @@ def process_advance_block_retry( trial_total = max_retry + 1 for trial_idx in range(trial_total): try: - result, background, new_scope, trace = process_block_body( + result, background, new_scope, trace = process_block_body_with_replay( state, scope, block, loc ) if block.requirements != []: @@ -633,11 +632,9 @@ def process_block_body_with_replay( ) -> tuple[PdlLazy[Any], LazyMessages, ScopeType, AdvancedBlockType]: if isinstance(block, LeafBlock): block_id = block.pdl__id - replay_scope = scope["__pdl_replay"] assert isinstance(block_id, str) - assert isinstance(replay_scope, dict) try: - result = replay_scope[block_id] + result = state.replay[block_id] background: LazyMessages = SingletonContext( PdlDict({"role": state.role, "content": result}) ) @@ -650,7 +647,7 @@ def process_block_body_with_replay( result, background, scope, trace = process_block_body( state, scope, block, loc ) - scope = scope | {"__pdl_replay": (replay_scope | {block_id: result})} + state.replay[block_id] = result else: result, background, scope, trace = process_block_body(state, scope, block, loc) return result, background, scope, trace From 6c47c9d2b8aa3a0fdc4ced61f91cc982145d8738 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Sat, 20 Sep 2025 18:48:11 -0400 Subject: [PATCH 05/10] Add replay to test_examples_run Signed-off-by: Louis Mandel --- tests/test_examples_run.py | 52 ++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py index cd4c1a9ba..154af3e64 100644 --- a/tests/test_examples_run.py +++ b/tests/test_examples_run.py @@ -4,7 +4,7 @@ import random from dataclasses import dataclass, field from enum import Enum -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Tuple import yaml from pytest import CaptureFixture, MonkeyPatch @@ -104,6 +104,7 @@ class FailedResults: wrong_results: Dict[str, str] = field(default_factory=lambda: {}) unexpected_parse_error: Dict[str, str] = field(default_factory=lambda: {}) unexpected_runtime_error: Dict[str, str] = field(default_factory=lambda: {}) + wrong_replay_results: Dict[str, str] = field(default_factory=lambda: {}) # pylint: disable=too-many-instance-attributes @@ -161,7 +162,9 @@ def __init__(self, monkeypatch: MonkeyPatch) -> None: self.__collect_expected_results() # Inits execution results for each PDL file - self.execution_results: Dict[str, ExecutionResult] = {} + self.execution_results: Dict[ + str, Tuple[ExecutionResult, ExecutionResult | None] + ] = {} # Init failed results self.failed_results = FailedResults() @@ -199,13 +202,11 @@ def __collect_expected_results(self) -> None: self.expected_results[file] = expected_result - def __execute_file(self, pdl_file_name: str) -> None: + def __execute_and_replay_file(self, pdl_file_name: str) -> None: """ Tests the result of a single file and returns the result output and the error code """ - exec_result = ExecutionResult() - pdl_file_path = pathlib.Path(pdl_file_name) scope: ScopeType = PdlDict({}) @@ -217,13 +218,27 @@ def __execute_file(self, pdl_file_name: str) -> None: if inputs.scope is not None: scope = inputs.scope + exec_result, output = self.__execute_file(pdl_file_path, scope, replay={}) + + if output is not None: + replay_result, _ = self.__execute_file( + pdl_file_path, scope, replay=output["replay"] + ) + else: + replay_result = None + + self.execution_results[pdl_file_name] = exec_result, replay_result + + def __execute_file(self, pdl_file_path, scope, replay): + exec_result = ExecutionResult() + output = None try: # Execute file output = pdl.exec_file( pdl_file_path, scope=scope, output="all", - config=pdl.InterpreterConfig(batch=1), + config=pdl.InterpreterConfig(batch=1, replay=replay), ) exec_result.result = str(output["result"]) @@ -235,8 +250,7 @@ def __execute_file(self, pdl_file_name: str) -> None: except Exception as exc: exec_result.result = str(exc) exec_result.error_code = ExecutionErrorCode.RUNTIME_ERROR - - self.execution_results[pdl_file_name] = exec_result + return exec_result, output def populate_exec_result_for_checks(self) -> None: """ @@ -245,7 +259,7 @@ def populate_exec_result_for_checks(self) -> None: for file in self.check: if file not in self.skip: - self.__execute_file(file) + self.__execute_and_replay_file(file) def validate_expected_and_actual(self) -> None: """ @@ -256,11 +270,12 @@ def validate_expected_and_actual(self) -> None: wrong_result: Dict[str, str] = {} unexpected_parse_error: Dict[str, str] = {} unexpected_runtime_error: Dict[str, str] = {} + wrong_replay_result: Dict[str, str] = {} for file in self.check: if file not in self.skip: expected_result = self.expected_results[file] - actual_result = self.execution_results[file] + actual_result, replay_result = self.execution_results[file] match = expected_result.compare_to_execution(actual_result) if not match: @@ -274,7 +289,14 @@ def validate_expected_and_actual(self) -> None: if actual_result.result is not None: wrong_result[file] = actual_result.result + if replay_result is not None: + match_replay = expected_result.compare_to_execution(replay_result) + if not match_replay: + if replay_result.result is not None: + wrong_replay_result[file] = replay_result.result + self.failed_results.wrong_results = wrong_result + self.failed_results.wrong_replay_results = wrong_replay_result self.failed_results.unexpected_parse_error = unexpected_parse_error self.failed_results.unexpected_runtime_error = unexpected_runtime_error @@ -347,6 +369,16 @@ def test_example_runs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> f"Actual result (copy everything below this line):\n✂️ ------------------------------------------------------------\n{actual}\n-------------------------------------------------------------" ) + # Print the actual results for wrong replay results + for file, actual in background.failed_results.wrong_replay_results.items(): + print( + "\n============================================================================" + ) + print(f"File that produced wrong REPLAY result: {file}") + print( + f"Actual result:\n ------------------------------------------------------------\n{actual}\n-------------------------------------------------------------" + ) + assert ( len(background.failed_results.unexpected_parse_error) == 0 ), f"Unexpected parse error: {background.failed_results.unexpected_parse_error}" From dab91c0956bedee68bb5e56f865fdf155327dce0 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Mon, 22 Sep 2025 16:03:18 -0400 Subject: [PATCH 06/10] Check that there is no replay errors in tests Signed-off-by: Louis Mandel --- tests/test_examples_run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py index 154af3e64..4a3cecc9a 100644 --- a/tests/test_examples_run.py +++ b/tests/test_examples_run.py @@ -388,3 +388,6 @@ def test_example_runs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> assert ( len(background.failed_results.wrong_results) == 0 ), f"Wrong results: {background.failed_results.wrong_results}" + assert ( + len(background.failed_results.wrong_replay_results) == 0 + ), f"Wrong replay results: {background.failed_results.wrong_results}" From fc0b50b827f9ff4e66028a1b60d290e84fd36fc9 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Tue, 23 Sep 2025 10:14:47 -0400 Subject: [PATCH 07/10] Test is unexpected replay appends Signed-off-by: Louis Mandel --- src/pdl/pdl_interpreter.py | 2 ++ tests/test_examples_run.py | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 6e1caa3b3..01f632549 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -652,6 +652,8 @@ def process_block_body_with_replay( assert isinstance(block_id, str) try: result = state.replay[block_id] + # XXX TO REMOVE + assert False, "XXXXXXXXXXXXX" background: LazyMessages = SingletonContext( PdlDict({"role": state.role, "content": result}) ) diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py index 4a3cecc9a..5e09f6dd0 100644 --- a/tests/test_examples_run.py +++ b/tests/test_examples_run.py @@ -220,7 +220,7 @@ def __execute_and_replay_file(self, pdl_file_name: str) -> None: exec_result, output = self.__execute_file(pdl_file_path, scope, replay={}) - if output is not None: + if False and output is not None: # XXXXX REMOVE `False and` replay_result, _ = self.__execute_file( pdl_file_path, scope, replay=output["replay"] ) @@ -388,6 +388,7 @@ def test_example_runs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> assert ( len(background.failed_results.wrong_results) == 0 ), f"Wrong results: {background.failed_results.wrong_results}" - assert ( - len(background.failed_results.wrong_replay_results) == 0 - ), f"Wrong replay results: {background.failed_results.wrong_results}" + # XXXXXXX REMOVE comments + # assert ( + # len(background.failed_results.wrong_replay_results) == 0 + # ), f"Wrong replay results: {background.failed_results.wrong_results}" From 4c5e4925e77ca1ad7ada7a6fc71ecfc66caabeba Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Tue, 23 Sep 2025 11:29:52 -0400 Subject: [PATCH 08/10] Remove unexpected replay test Signed-off-by: Louis Mandel --- src/pdl/pdl_interpreter.py | 2 -- tests/test_examples_run.py | 9 ++++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 01f632549..6e1caa3b3 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -652,8 +652,6 @@ def process_block_body_with_replay( assert isinstance(block_id, str) try: result = state.replay[block_id] - # XXX TO REMOVE - assert False, "XXXXXXXXXXXXX" background: LazyMessages = SingletonContext( PdlDict({"role": state.role, "content": result}) ) diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py index 5e09f6dd0..4a3cecc9a 100644 --- a/tests/test_examples_run.py +++ b/tests/test_examples_run.py @@ -220,7 +220,7 @@ def __execute_and_replay_file(self, pdl_file_name: str) -> None: exec_result, output = self.__execute_file(pdl_file_path, scope, replay={}) - if False and output is not None: # XXXXX REMOVE `False and` + if output is not None: replay_result, _ = self.__execute_file( pdl_file_path, scope, replay=output["replay"] ) @@ -388,7 +388,6 @@ def test_example_runs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> assert ( len(background.failed_results.wrong_results) == 0 ), f"Wrong results: {background.failed_results.wrong_results}" - # XXXXXXX REMOVE comments - # assert ( - # len(background.failed_results.wrong_replay_results) == 0 - # ), f"Wrong replay results: {background.failed_results.wrong_results}" + assert ( + len(background.failed_results.wrong_replay_results) == 0 + ), f"Wrong replay results: {background.failed_results.wrong_results}" From 75765f7ac2d3687143e64d8c5df70b06db84b354 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Tue, 23 Sep 2025 11:53:08 -0400 Subject: [PATCH 09/10] fix conflict with stdlib Signed-off-by: Louis Mandel --- src/pdl/pdl_interpreter.py | 2 +- tests/data/function.pdl | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 6e1caa3b3..3dd6fdd65 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -306,7 +306,7 @@ def process_prog( stdlib_file = Path(__file__).parent / "pdl_stdlib.pdl" stdlib, _ = parse_file(stdlib_file) _, _, stdlib_dict, _ = process_block( - state.with_yield_background(False).with_yield_result(False), + state.with_yield_background(False).with_yield_result(False).with_id("stdlib"), empty_scope, stdlib.root, loc, diff --git a/tests/data/function.pdl b/tests/data/function.pdl index 2fd3a0c44..c54b68c34 100644 --- a/tests/data/function.pdl +++ b/tests/data/function.pdl @@ -13,6 +13,3 @@ defs: ${ notes } ### Answer: - - - From 6233021e10cf8ae4df26cd32d1535785a689e734 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Tue, 23 Sep 2025 13:54:35 -0400 Subject: [PATCH 10/10] fix modelResponse replay Signed-off-by: Louis Mandel --- src/pdl/pdl_interpreter.py | 9 +++++++++ tests/test_examples_run.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index 3dd6fdd65..4009d5960 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -660,6 +660,13 @@ def process_block_body_with_replay( if state.yield_background: yield_background(background) trace = block + # Special case + match block: + case ModelBlock(): + if block.modelResponse is not None: + assert block.pdl__id is not None + raw_result = state.replay[block.pdl__id + ".modelResponse"] + scope = scope | {block.modelResponse: raw_result} except KeyError: result, background, scope, trace = process_block_body( state, scope, block, loc @@ -1845,6 +1852,8 @@ def get_transformed_inputs(kwargs): ) if block.modelResponse is not None: scope = scope | {block.modelResponse: raw_result} + assert block.pdl__id is not None + state.replay[block.pdl__id + ".modelResponse"] = raw_result trace: BlockTypeTVarProcessCallModel = concrete_block.model_copy( update={"pdl__result": result} ) # pyright: ignore diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py index 4a3cecc9a..ca3f04dad 100644 --- a/tests/test_examples_run.py +++ b/tests/test_examples_run.py @@ -376,7 +376,7 @@ def test_example_runs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> ) print(f"File that produced wrong REPLAY result: {file}") print( - f"Actual result:\n ------------------------------------------------------------\n{actual}\n-------------------------------------------------------------" + f"Replay result:\n ------------------------------------------------------------\n{actual}\n-------------------------------------------------------------" ) assert (