diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
new file mode 100644
index 0000000..c30903e
--- /dev/null
+++ b/.github/workflows/examples.yml
@@ -0,0 +1,77 @@
+name: Examples
+on:
+  push:
+    branches:
+      - main
+  schedule:
+    # Every night at midnight
+    - cron: "0 0 * * *"
+  workflow_dispatch:
+
+jobs:
+  examples:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install Poetry
+        run: pipx install poetry==1.8.3
+
+      - name: Install dependencies
+        run: poetry install --all-extras
+
+      - name: Run all example scripts
+        run: |
+          # Find all Python files in examples directory
+          find examples -name "*.py" -type f | while read -r script; do
+            echo "Running example: $script"
+            poetry run python "$script"
+            if [ $? -ne 0 ]; then
+              echo "Error: Failed to run $script"
+              exit 1
+            fi
+          done
+
+      - name: Send Slack Notification
+        if: failure()
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
+          GITHUB_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        run: |
+          curl -X POST -H 'Content-type: application/json' --data '{
+            "blocks": [
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": ":warning: *Python Examples Failed!* \n <!channel>"
+                }
+              },
+              {
+                "type": "section",
+                "fields": [
+                  {
+                    "type": "mrkdwn",
+                    "text": "*Job:* ${{ github.job }}"
+                  },
+                  {
+                    "type": "mrkdwn",
+                    "text": "*Run Number:* ${{ github.run_number }}"
+                  }
+                ]
+              },
+              {
+                "type": "actions",
+                "elements": [
+                  {
+                    "type": "button",
+                    "text": {
+                      "type": "plain_text",
+                      "text": "View Action Run"
+                    },
+                    "url": "${{ env.GITHUB_RUN_URL }}"
+                  }
+                ]
+              }
+            ]
+          }' $SLACK_WEBHOOK_URL
diff --git a/README.md b/README.md
index b5c15fa..227a474 100644
--- a/README.md
+++ b/README.md
@@ -253,6 +253,29 @@ async def analyze_call_feedback(input: CallFeedbackInput) -> AsyncIterator[Run[C
     ...
 ```
 
+### Caching
+
+By default, the cache settings is `auto`, meaning that agent runs are cached when the temperature is 0
+(the default temperature value). Which means that, when running the same agent twice with the **exact** same input,
+the exact same output is returned and the underlying model is not called a second time.
+
+The cache usage string literal is defined in [cache_usage.py](./workflowai/core/domain/cache_usage.py) file. There are 3 possible values:
+
+- `auto`: (default) Use cached results only when temperature is 0
+- `always`: Always use cached results if available, regardless of model temperature
+- `never`: Never use cached results, always execute a new run
+
+The cache usage can be passed to the agent function as a keyword argument:
+
+```python
+@workflowai.agent(id="analyze-call-feedback")
+async def analyze_call_feedback(_: CallFeedbackInput) -> AsyncIterator[CallFeedbackOutput]: ...
+
+run = await analyze_call_feedback(CallFeedbackInput(...), use_cache="always")
+```
+
+<!-- TODO: add cache usage at agent level when available -->
+
 ### Replying to a run
 
 Some use cases require the ability to have a back and forth between the client and the LLM. For example:
@@ -275,7 +298,7 @@ async def say_hello(input: Input) -> Run[Output]:
     ...
 
 run = await say_hello(Input(name="John"))
-run = await run.reply(user_response="Now say hello to his brother James")
+run = await run.reply(user_message="Now say hello to his brother James")
 ```
 
 The output of a reply to a run has the same type as the original run, which makes it easy to iterate towards the
diff --git a/examples/__init__.py b/examples/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/examples/reply/name_extractor.py b/examples/reply/name_extractor.py
index 506690c..1ba8d87 100644
--- a/examples/reply/name_extractor.py
+++ b/examples/reply/name_extractor.py
@@ -1,28 +1,32 @@
+import asyncio
+
+from pydantic import BaseModel, Field  # pyright: ignore [reportUnknownVariableType]
+
 import workflowai
 from workflowai import Model, Run
-from pydantic import BaseModel, Field
-import asyncio
 
 
 class NameExtractionInput(BaseModel):
     """Input containing a sentence with a person's name."""
+
     sentence: str = Field(description="A sentence containing a person's name.")
 
 
 class NameExtractionOutput(BaseModel):
     """Output containing the extracted first and last name."""
+
     first_name: str = Field(
         default="",
-        description="The person's first name extracted from the sentence."
+        description="The person's first name extracted from the sentence.",
     )
     last_name: str = Field(
         default="",
-        description="The person's last name extracted from the sentence."
+        description="The person's last name extracted from the sentence.",
     )
 
 
-@workflowai.agent(id='name-extractor', model=Model.GPT_4O_MINI_LATEST)
-async def extract_name(input: NameExtractionInput) -> Run[NameExtractionOutput]:
+@workflowai.agent(id="name-extractor", model=Model.GPT_4O_MINI_LATEST)
+async def extract_name(_: NameExtractionInput) -> Run[NameExtractionOutput]:
     """
     Extract a person's first and last name from a sentence.
     Be precise and consider cultural variations in name formats.
@@ -38,21 +42,21 @@ async def main():
         "Dr. Maria Garcia-Rodriguez presented her research.",
         "The report was written by James van der Beek last week.",
     ]
-    
+
     for sentence in sentences:
         print(f"\nProcessing: {sentence}")
-        
+
         # Initial extraction
         run = await extract_name(NameExtractionInput(sentence=sentence))
-        
+
         print(f"Extracted: {run.output.first_name} {run.output.last_name}")
-        
+
         # Double check with a simple confirmation
-        run = await run.reply(user_response="Are you sure?")
-        
+        run = await run.reply(user_message="Are you sure?")
+
         print("\nAfter double-checking:")
         print(f"Final extraction: {run.output.first_name} {run.output.last_name}")
 
 
 if __name__ == "__main__":
-    asyncio.run(main()) 
\ No newline at end of file
+    asyncio.run(main())
diff --git a/pyproject.toml b/pyproject.toml
index 12a03a1..046e0f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,6 +65,7 @@ unfixable = []
 "bin/*" = ["T201"]
 "*_test.py" = ["S101"]
 "conftest.py" = ["S101"]
+"examples/*" = ["INP001", "T201"]
 
 [tool.pyright]
 pythonVersion = "3.9"
diff --git a/tests/e2e/reply_test.py b/tests/e2e/reply_test.py
new file mode 100644
index 0000000..e3b96f9
--- /dev/null
+++ b/tests/e2e/reply_test.py
@@ -0,0 +1,45 @@
+from pydantic import BaseModel, Field  # pyright: ignore [reportUnknownVariableType]
+
+import workflowai
+from workflowai import Model, Run
+
+
+class NameExtractionInput(BaseModel):
+    """Input containing a sentence with a person's name."""
+
+    sentence: str = Field(description="A sentence containing a person's name.")
+
+
+class NameExtractionOutput(BaseModel):
+    """Output containing the extracted first and last name."""
+
+    first_name: str = Field(
+        default="",
+        description="The person's first name extracted from the sentence.",
+    )
+    last_name: str = Field(
+        default="",
+        description="The person's last name extracted from the sentence.",
+    )
+
+
+@workflowai.agent(id="name-extractor", model=Model.GPT_4O_MINI_LATEST)
+async def extract_name(_: NameExtractionInput) -> Run[NameExtractionOutput]:
+    """
+    Extract a person's first and last name from a sentence.
+    Be precise and consider cultural variations in name formats.
+    If multiple names are present, focus on the most prominent one.
+    """
+    ...
+
+
+async def test_reply():
+    run = await extract_name(NameExtractionInput(sentence="My friend John Smith went to the store."))
+
+    assert run.output.first_name == "John"
+    assert run.output.last_name == "Smith"
+
+    run = await run.reply(user_message="Are you sure?")
+
+    assert run.output.first_name == "John"
+    assert run.output.last_name == "Smith"
diff --git a/workflowai/core/client/_models.py b/workflowai/core/client/_models.py
index 593eb3f..06fce4a 100644
--- a/workflowai/core/client/_models.py
+++ b/workflowai/core/client/_models.py
@@ -35,7 +35,7 @@ class RunRequest(BaseModel):
 
 
 class ReplyRequest(BaseModel):
-    user_response: Optional[str] = None
+    user_message: Optional[str] = None
     version: Union[str, int, dict[str, Any]]
     metadata: Optional[dict[str, Any]] = None
 
diff --git a/workflowai/core/client/agent.py b/workflowai/core/client/agent.py
index f8643f1..ddc4fc6 100644
--- a/workflowai/core/client/agent.py
+++ b/workflowai/core/client/agent.py
@@ -117,7 +117,7 @@ async def _prepare_run(self, task_input: AgentInput, stream: bool, **kwargs: Unp
     async def _prepare_reply(
         self,
         run_id: str,
-        user_response: Optional[str],
+        user_message: Optional[str],
         tool_results: Optional[Iterable[ToolCallResult]],
         stream: bool,
         **kwargs: Unpack[RunParams[AgentOutput]],
@@ -127,7 +127,7 @@ async def _prepare_reply(
         version = self._sanitize_version(kwargs.get("version"))
 
         request = ReplyRequest(
-            user_response=user_response,
+            user_message=user_message,
             version=version,
             stream=stream,
             metadata=kwargs.get("metadata"),
@@ -345,12 +345,12 @@ async def stream(
     async def reply(
         self,
         run_id: str,
-        user_response: Optional[str] = None,
+        user_message: Optional[str] = None,
         tool_results: Optional[Iterable[ToolCallResult]] = None,
         current_iteration: int = 0,
         **kwargs: Unpack[RunParams[AgentOutput]],
     ):
-        prepared_run = await self._prepare_reply(run_id, user_response, tool_results, stream=False, **kwargs)
+        prepared_run = await self._prepare_reply(run_id, user_message, tool_results, stream=False, **kwargs)
         validator, new_kwargs = self._sanitize_validator(kwargs, intolerant_validator(self.output_cls))
 
         res = await self.api.post(prepared_run.route, prepared_run.request, returns=RunResponse, run=True)
diff --git a/workflowai/core/domain/cache_usage.py b/workflowai/core/domain/cache_usage.py
index 01f4a25..69a743d 100644
--- a/workflowai/core/domain/cache_usage.py
+++ b/workflowai/core/domain/cache_usage.py
@@ -1,3 +1,7 @@
 from typing import Literal
 
-CacheUsage = Literal["always", "never", "auto"]
+# Cache usage configuration for agent runs
+# - "auto": Use cached results only when temperature is 0
+# - "always": Always use cached results if available, regardless of model temperature
+# - "never": Never use cached results, always execute a new run
+CacheUsage = Literal["auto", "always", "never"]
diff --git a/workflowai/core/domain/run.py b/workflowai/core/domain/run.py
index 62c6269..c2e895d 100644
--- a/workflowai/core/domain/run.py
+++ b/workflowai/core/domain/run.py
@@ -67,7 +67,7 @@ def __eq__(self, other: object) -> bool:
 
     async def reply(
         self,
-        user_response: Optional[str] = None,
+        user_message: Optional[str] = None,
         tool_results: Optional[Iterable[ToolCallResult]] = None,
         **kwargs: Unpack["_common_types.RunParams[AgentOutput]"],
     ):
@@ -75,7 +75,7 @@ async def reply(
             raise ValueError("Agent is not set")
         return await self._agent.reply(
             run_id=self.id,
-            user_response=user_response,
+            user_message=user_message,
             tool_results=tool_results,
             **kwargs,
         )
@@ -91,9 +91,9 @@ class _AgentBase(Protocol, Generic[AgentOutput]):
     async def reply(
         self,
         run_id: str,
-        user_response: Optional[str] = None,
+        user_message: Optional[str] = None,
         tool_results: Optional[Iterable[ToolCallResult]] = None,
         **kwargs: Unpack["_types.RunParams[AgentOutput]"],
     ) -> "Run[AgentOutput]":
-        """Reply to a run. Either a user_response or tool_results must be provided."""
+        """Reply to a run. Either a user_message or tool_results must be provided."""
         ...