WorkflowAI · guillaq · Feb 4, 2025 · Jan 30, 2025 · Feb 3, 2025 · Feb 4, 2025
diff --git a/.env.sample b/.env.sample
@@ -1,4 +1,8 @@
-WORKFLOWAI_API_URL=
+# Only change this URL if you are self-hosting WorkflowAI
+# WORKFLOWAI_API_URL=https://run.workflowai.com
+
+# Your WorkflowAI API key
+# [Get your API key here](https://workflowai.com/organization/settings/api-keys)
 WORKFLOWAI_API_KEY=
 
 # Used when running e2e tests

diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
@@ -8,6 +8,10 @@ on:
     - cron: "0 0 * * *"
   workflow_dispatch:
 
+env:
+  # TODO: change to .com when reply is released
+  WORKFLOWAI_API_URL: https://run.workflowai.dev
+
 jobs:
   examples:
     runs-on: ubuntu-latest
@@ -31,6 +35,8 @@ jobs:
               exit 1
             fi
           done
+        env:
+          WORKFLOWAI_API_KEY: ${{ secrets.WORKFLOWAI_TEST_API_KEY }}
 
       - name: Send Slack Notification
         if: failure()

diff --git a/README.md b/README.md
@@ -253,6 +253,69 @@ async def analyze_call_feedback(input: CallFeedbackInput) -> AsyncIterator[Run[C
     ...
 ```
 
+### Images
+
+Add images as input to an agent by using the `Image` class. An image can either have:
+
+- a `content`, base64 encoded data
+- a `url`
+
+```python
+from workflowai.fields import Image
+
+class ImageInput(BaseModel):
+    image: Image = Field(description="The image to analyze")
+
+# use base64 to include the image inline
+image = Image(content_type='image/jpeg', data='<base 64 encoded data>')
+
+# You can also use the `url` property to pass an image URL.
+image = Image(url="https://example.com/image.jpg")
+```
+
+An example of using image as input is available in [city_identifier.py](./examples/images/city_identifier.py).
+
+### Files (PDF, .txt, ...)
+
+Use the `File` class to pass files as input to an agent. Different LLMs support different file types.
+
+```python
+from workflowai.fields import File
+...
+
+class PDFQuestionInput(BaseModel):
+    pdf: File = Field(description="The PDF document to analyze")
+    question: str = Field(description="The question to answer about the PDF content")
+
+class PDFAnswerOutput(BaseModel):
+    answer: str = Field(description="The answer to the question based on the PDF content")
+    quotes: List[str] = Field(description="Relevant quotes from the PDF that support the answer")
+
+@workflowai.agent(id="pdf-answer", model=Model.CLAUDE_3_5_SONNET_LATEST)
+async def answer_pdf_question(input: PDFQuestionInput) -> PDFAnswerOutput:
+    """
+    Analyze the provided PDF document and answer the given question.
+    Provide a clear and concise answer based on the content found in the PDF.
+    """
+    ...
+
+pdf = File(content_type='application/pdf', data='<base 64 encoded data>')
+question = "What are the key findings in this report?"
+
+output = await answer_pdf_question(PDFQuestionInput(pdf=pdf, question=question))
+# Print the answer and supporting quotes
+print("Answer:", output.answer)
+print("Supporting quotes:", "\n -".join(("", *quotes))
+for quote in output.quotes:
+    print(f"- {quote}")
+```
+
+An example of using a PDF as input is available in [pdf_answer.py](./examples/pdf_answer.py).
+
+### Audio
+
+[todo]
+
 ### Caching
 
 By default, the cache settings is `auto`, meaning that agent runs are cached when the temperature is 0

diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,32 @@
+# WorkflowAI Examples
+
+This directory contains example agents demonstrating different capabilities of the WorkflowAI SDK.
+
+## Image Analysis Examples
+
+### City Identifier
+[city_identifier.py](./images/city_identifier.py)
+
+An agent that identifies cities from images. Given a photo of a city, it:
+- Identifies the city and country
+- Explains the reasoning behind the identification
+- Lists key landmarks or architectural features visible in the image
+- Provides confidence level in the identification
+
+Uses the `Image` field type to handle image inputs and Claude 3.5 Sonnet for its strong visual analysis capabilities.
+
+## Document Analysis Examples
+
+### PDF Question Answering
+[pdf_answer.py](./pdf_answer.py)
+
+An agent that answers questions about PDF documents. Given a PDF and a question, it:
+- Analyzes the PDF content
+- Provides a clear and concise answer to the question
+- Includes relevant quotes from the document to support its answer
+
+Uses the `File` field type to handle PDF inputs and Claude 3.5 Sonnet for its strong document comprehension abilities.
+
+## Workflow Pattern Examples
+
+For examples of different workflow patterns (chains, routing, parallel processing, etc.), see the [workflows](./workflows) directory. 
diff --git a/examples/city_to_capital_task.py b/examples/city_to_capital_task.py
diff --git a/examples/images/assets/new-york-city.jpg b/examples/images/assets/new-york-city.jpg
diff --git a/examples/images/assets/paris.jpg b/examples/images/assets/paris.jpg
diff --git a/examples/images/city_identifier.py b/examples/images/city_identifier.py
@@ -0,0 +1,86 @@
+import asyncio
+import os
+from typing import Optional
+
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field  # pyright: ignore [reportUnknownVariableType]
+
+import workflowai
+from workflowai import Run, WorkflowAIError
+from workflowai.core.domain.model import Model
+from workflowai.fields import Image
+
+
+class ImageInput(BaseModel):
+    image: Image = Field(description="The image to analyze")
+
+
+class ImageOutput(BaseModel):
+    city: str = Field(default="", description="Name of the city shown in the image")
+    country: str = Field(default="", description="Name of the country where the city is located")
+    confidence: Optional[float] = Field(
+        default=None,
+        description="Confidence level in the identification (0-1)",
+    )
+
+
+@workflowai.agent(id="city-identifier", model=Model.GEMINI_1_5_FLASH_LATEST)
+async def identify_city_from_image(_: ImageInput) -> Run[ImageOutput]:
+    """
+    Analyze the provided image and identify the city and country shown in it.
+    If the image shows a recognizable landmark or cityscape, identify the city and country.
+    If uncertain, indicate lower confidence or leave fields empty.
+
+    Focus on:
+    - Famous landmarks
+    - Distinctive architecture
+    - Recognizable skylines
+    - Cultural elements that identify the location
+
+    Return empty strings if the city/country cannot be determined with reasonable confidence.
+    """
+    ...
+
+
+async def run_city_identifier():
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    image_path = os.path.join(current_dir, "assets", "new-york-city.jpg")
+
+    # With a properly async function you should use an async open
+    # see https://github.com/Tinche/aiofiles for example
+    with open(image_path, "rb") as image_file:  # noqa: ASYNC230
+        import base64
+
+        content = base64.b64encode(image_file.read()).decode("utf-8")
+
+    image = Image(content_type="image/jpeg", data=content)
+    try:
+        agent_run = await identify_city_from_image(
+            ImageInput(image=image),
+            use_cache="auto",
+        )
+    except WorkflowAIError as e:
+        print(f"Failed to run task. Code: {e.error.code}. Message: {e.error.message}")
+        return
+
+    print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n")
+    print(f"Cost: ${agent_run.cost_usd:.10f}")
+    print(f"Latency: {agent_run.duration_seconds:.2f}s")
+
+    # using URL for Image
+    # TODO: replace with a Github URL
+    image_url = "https://t4.ftcdn.net/jpg/02/96/15/35/360_F_296153501_B34baBHDkFXbl5RmzxpiOumF4LHGCvAE.jpg"
+    image = Image(url=image_url)
+    agent_run = await identify_city_from_image(
+        ImageInput(image=image),
+        use_cache="auto",
+    )
+
+    print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n")
+    print(f"Cost: ${agent_run.cost_usd:.10f}")
+    print(f"Latency: {agent_run.duration_seconds:.2f}s")
+
+
+if __name__ == "__main__":
+    load_dotenv(override=True)
+    asyncio.run(run_city_identifier())
diff --git a/examples/pdfs/assets/sec-form-4.pdf b/examples/pdfs/assets/sec-form-4.pdf
diff --git a/examples/pdfs/pdf_answer.py b/examples/pdfs/pdf_answer.py
@@ -0,0 +1,75 @@
+import asyncio
+import os
+from typing import List
+
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field  # pyright: ignore [reportUnknownVariableType]
+
+import workflowai
+from workflowai import Run, WorkflowAIError
+from workflowai.core.domain.model import Model
+from workflowai.fields import File
+
+
+class PDFQuestionInput(BaseModel):
+    pdf: File = Field(description="The PDF document to analyze")
+    question: str = Field(description="The question to answer about the PDF content")
+
+
+class PDFAnswerOutput(BaseModel):
+    answer: str = Field(description="The answer to the question based on the PDF content")
+    quotes: List[str] = Field(description="Relevant quotes from the PDF that support the answer")
+
+
+@workflowai.agent(id="pdf-answer", model=Model.CLAUDE_3_5_SONNET_LATEST)
+async def answer_pdf_question(_: PDFQuestionInput) -> Run[PDFAnswerOutput]:
+    """
+    Analyze the provided PDF document and answer the given question.
+    Provide a clear and concise answer based on the content found in the PDF.
+
+    Focus on:
+    - Accurate information extraction from the PDF
+    - Direct and relevant answers to the question
+    - Context-aware responses that consider the full document
+    - Citing specific sections or pages when relevant
+
+    If the question cannot be answered based on the PDF content,
+    provide a clear explanation of why the information is not available.
+    """
+    ...
+
+
+async def run_pdf_answer():
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    pdf_path = os.path.join(current_dir, "assets", "sec-form-4.pdf")
+
+    # With a properly async function you should use an async open
+    # see https://github.com/Tinche/aiofiles for example
+    with open(pdf_path, "rb") as pdf_file:  # noqa: ASYNC230
+        import base64
+
+        content = base64.b64encode(pdf_file.read()).decode("utf-8")
+
+    pdf = File(content_type="application/pdf", data=content)
+    # Could also pass the content via url
+    # pdf = File(url="https://example.com/sample.pdf")
+    question = "How many stocks were sold? What is the total amount in USD?"
+
+    try:
+        agent_run = await answer_pdf_question(
+            PDFQuestionInput(pdf=pdf, question=question),
+            use_cache="auto",
+        )
+    except WorkflowAIError as e:
+        print(f"Failed to run task. Code: {e.error.code}. Message: {e.error.message}")
+        return
+
+    print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n")
+    print(f"Cost: ${agent_run.cost_usd:.10f}")
+    print(f"Latency: {agent_run.duration_seconds:.2f}s")
+
+
+if __name__ == "__main__":
+    # Load environment variables from .env file
+    load_dotenv(override=True)
+    asyncio.run(run_pdf_answer())
diff --git a/examples/reply/name_extractor.py b/examples/reply/name_extractor.py
@@ -1,5 +1,6 @@
 import asyncio
 
+from dotenv import load_dotenv
 from pydantic import BaseModel, Field  # pyright: ignore [reportUnknownVariableType]
 
 import workflowai
@@ -59,4 +60,5 @@ async def main():
 
 
 if __name__ == "__main__":
+    load_dotenv(override=True)
     asyncio.run(main())
diff --git a/pyproject.toml b/pyproject.toml
@@ -65,7 +65,7 @@ unfixable = []
 "bin/*" = ["T201"]
 "*_test.py" = ["S101"]
 "conftest.py" = ["S101"]
-"examples/*" = ["INP001", "T201"]
+"examples/*" = ["INP001", "T201", "ERA001"]
 
 [tool.pyright]
 pythonVersion = "3.9"