diff --git a/.env.sample b/.env.sample index 767bccf..d2c6864 100644 --- a/.env.sample +++ b/.env.sample @@ -1,4 +1,8 @@ -WORKFLOWAI_API_URL= +# Only change this URL if you are self-hosting WorkflowAI +# WORKFLOWAI_API_URL=https://run.workflowai.com + +# Your WorkflowAI API key +# [Get your API key here](https://workflowai.com/organization/settings/api-keys) WORKFLOWAI_API_KEY= # Used when running e2e tests diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index c30903e..4053d60 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -8,6 +8,10 @@ on: - cron: "0 0 * * *" workflow_dispatch: +env: + # TODO: change to .com when reply is released + WORKFLOWAI_API_URL: https://run.workflowai.dev + jobs: examples: runs-on: ubuntu-latest @@ -31,6 +35,8 @@ jobs: exit 1 fi done + env: + WORKFLOWAI_API_KEY: ${{ secrets.WORKFLOWAI_TEST_API_KEY }} - name: Send Slack Notification if: failure() diff --git a/README.md b/README.md index 227a474..0cfba9f 100644 --- a/README.md +++ b/README.md @@ -253,6 +253,69 @@ async def analyze_call_feedback(input: CallFeedbackInput) -> AsyncIterator[Run[C ... ``` +### Images + +Add images as input to an agent by using the `Image` class. An image can either have: + +- a `content`, base64 encoded data +- a `url` + +```python +from workflowai.fields import Image + +class ImageInput(BaseModel): + image: Image = Field(description="The image to analyze") + +# use base64 to include the image inline +image = Image(content_type='image/jpeg', data='') + +# You can also use the `url` property to pass an image URL. +image = Image(url="https://example.com/image.jpg") +``` + +An example of using image as input is available in [city_identifier.py](./examples/images/city_identifier.py). + +### Files (PDF, .txt, ...) + +Use the `File` class to pass files as input to an agent. Different LLMs support different file types. + +```python +from workflowai.fields import File +... + +class PDFQuestionInput(BaseModel): + pdf: File = Field(description="The PDF document to analyze") + question: str = Field(description="The question to answer about the PDF content") + +class PDFAnswerOutput(BaseModel): + answer: str = Field(description="The answer to the question based on the PDF content") + quotes: List[str] = Field(description="Relevant quotes from the PDF that support the answer") + +@workflowai.agent(id="pdf-answer", model=Model.CLAUDE_3_5_SONNET_LATEST) +async def answer_pdf_question(input: PDFQuestionInput) -> PDFAnswerOutput: + """ + Analyze the provided PDF document and answer the given question. + Provide a clear and concise answer based on the content found in the PDF. + """ + ... + +pdf = File(content_type='application/pdf', data='') +question = "What are the key findings in this report?" + +output = await answer_pdf_question(PDFQuestionInput(pdf=pdf, question=question)) +# Print the answer and supporting quotes +print("Answer:", output.answer) +print("Supporting quotes:", "\n -".join(("", *quotes)) +for quote in output.quotes: + print(f"- {quote}") +``` + +An example of using a PDF as input is available in [pdf_answer.py](./examples/pdf_answer.py). + +### Audio + +[todo] + ### Caching By default, the cache settings is `auto`, meaning that agent runs are cached when the temperature is 0 diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..cf71b4e --- /dev/null +++ b/examples/README.md @@ -0,0 +1,32 @@ +# WorkflowAI Examples + +This directory contains example agents demonstrating different capabilities of the WorkflowAI SDK. + +## Image Analysis Examples + +### City Identifier +[city_identifier.py](./images/city_identifier.py) + +An agent that identifies cities from images. Given a photo of a city, it: +- Identifies the city and country +- Explains the reasoning behind the identification +- Lists key landmarks or architectural features visible in the image +- Provides confidence level in the identification + +Uses the `Image` field type to handle image inputs and Claude 3.5 Sonnet for its strong visual analysis capabilities. + +## Document Analysis Examples + +### PDF Question Answering +[pdf_answer.py](./pdf_answer.py) + +An agent that answers questions about PDF documents. Given a PDF and a question, it: +- Analyzes the PDF content +- Provides a clear and concise answer to the question +- Includes relevant quotes from the document to support its answer + +Uses the `File` field type to handle PDF inputs and Claude 3.5 Sonnet for its strong document comprehension abilities. + +## Workflow Pattern Examples + +For examples of different workflow patterns (chains, routing, parallel processing, etc.), see the [workflows](./workflows) directory. \ No newline at end of file diff --git a/examples/city_to_capital_task.py b/examples/city_to_capital_task.py deleted file mode 100644 index 1dccb05..0000000 --- a/examples/city_to_capital_task.py +++ /dev/null @@ -1,39 +0,0 @@ -from asyncio import run as aiorun - -import typer -from pydantic import BaseModel, Field # pyright: ignore [reportUnknownVariableType] -from rich import print as rprint - -import workflowai - - -class CityToCapitalTaskInput(BaseModel): - city: str = Field( - description="The name of the city for which the capital is to be found", - examples=["Tokyo"], - ) - - -class CityToCapitalTaskOutput(BaseModel): - capital: str = Field( - description="The capital of the specified city", - examples=["Tokyo"], - ) - - -@workflowai.agent(schema_id=1) -async def city_to_capital(task_input: CityToCapitalTaskInput) -> CityToCapitalTaskOutput: ... - - -def main(city: str) -> None: - async def _inner() -> None: - task_input = CityToCapitalTaskInput(city=city) - output = await city_to_capital(task_input) - - rprint(output) - - aiorun(_inner()) - - -if __name__ == "__main__": - typer.run(main) diff --git a/examples/images/assets/new-york-city.jpg b/examples/images/assets/new-york-city.jpg new file mode 100644 index 0000000..1a4f588 Binary files /dev/null and b/examples/images/assets/new-york-city.jpg differ diff --git a/examples/images/assets/paris.jpg b/examples/images/assets/paris.jpg new file mode 100644 index 0000000..cdbd3e7 Binary files /dev/null and b/examples/images/assets/paris.jpg differ diff --git a/examples/images/city_identifier.py b/examples/images/city_identifier.py new file mode 100644 index 0000000..4f279db --- /dev/null +++ b/examples/images/city_identifier.py @@ -0,0 +1,86 @@ +import asyncio +import os +from typing import Optional + +from dotenv import load_dotenv +from pydantic import BaseModel, Field # pyright: ignore [reportUnknownVariableType] + +import workflowai +from workflowai import Run, WorkflowAIError +from workflowai.core.domain.model import Model +from workflowai.fields import Image + + +class ImageInput(BaseModel): + image: Image = Field(description="The image to analyze") + + +class ImageOutput(BaseModel): + city: str = Field(default="", description="Name of the city shown in the image") + country: str = Field(default="", description="Name of the country where the city is located") + confidence: Optional[float] = Field( + default=None, + description="Confidence level in the identification (0-1)", + ) + + +@workflowai.agent(id="city-identifier", model=Model.GEMINI_1_5_FLASH_LATEST) +async def identify_city_from_image(_: ImageInput) -> Run[ImageOutput]: + """ + Analyze the provided image and identify the city and country shown in it. + If the image shows a recognizable landmark or cityscape, identify the city and country. + If uncertain, indicate lower confidence or leave fields empty. + + Focus on: + - Famous landmarks + - Distinctive architecture + - Recognizable skylines + - Cultural elements that identify the location + + Return empty strings if the city/country cannot be determined with reasonable confidence. + """ + ... + + +async def run_city_identifier(): + current_dir = os.path.dirname(os.path.abspath(__file__)) + image_path = os.path.join(current_dir, "assets", "new-york-city.jpg") + + # With a properly async function you should use an async open + # see https://github.com/Tinche/aiofiles for example + with open(image_path, "rb") as image_file: # noqa: ASYNC230 + import base64 + + content = base64.b64encode(image_file.read()).decode("utf-8") + + image = Image(content_type="image/jpeg", data=content) + try: + agent_run = await identify_city_from_image( + ImageInput(image=image), + use_cache="auto", + ) + except WorkflowAIError as e: + print(f"Failed to run task. Code: {e.error.code}. Message: {e.error.message}") + return + + print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n") + print(f"Cost: ${agent_run.cost_usd:.10f}") + print(f"Latency: {agent_run.duration_seconds:.2f}s") + + # using URL for Image + # TODO: replace with a Github URL + image_url = "https://t4.ftcdn.net/jpg/02/96/15/35/360_F_296153501_B34baBHDkFXbl5RmzxpiOumF4LHGCvAE.jpg" + image = Image(url=image_url) + agent_run = await identify_city_from_image( + ImageInput(image=image), + use_cache="auto", + ) + + print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n") + print(f"Cost: ${agent_run.cost_usd:.10f}") + print(f"Latency: {agent_run.duration_seconds:.2f}s") + + +if __name__ == "__main__": + load_dotenv(override=True) + asyncio.run(run_city_identifier()) diff --git a/examples/pdfs/assets/sec-form-4.pdf b/examples/pdfs/assets/sec-form-4.pdf new file mode 100644 index 0000000..6646e87 Binary files /dev/null and b/examples/pdfs/assets/sec-form-4.pdf differ diff --git a/examples/pdfs/pdf_answer.py b/examples/pdfs/pdf_answer.py new file mode 100644 index 0000000..b8b672b --- /dev/null +++ b/examples/pdfs/pdf_answer.py @@ -0,0 +1,75 @@ +import asyncio +import os +from typing import List + +from dotenv import load_dotenv +from pydantic import BaseModel, Field # pyright: ignore [reportUnknownVariableType] + +import workflowai +from workflowai import Run, WorkflowAIError +from workflowai.core.domain.model import Model +from workflowai.fields import File + + +class PDFQuestionInput(BaseModel): + pdf: File = Field(description="The PDF document to analyze") + question: str = Field(description="The question to answer about the PDF content") + + +class PDFAnswerOutput(BaseModel): + answer: str = Field(description="The answer to the question based on the PDF content") + quotes: List[str] = Field(description="Relevant quotes from the PDF that support the answer") + + +@workflowai.agent(id="pdf-answer", model=Model.CLAUDE_3_5_SONNET_LATEST) +async def answer_pdf_question(_: PDFQuestionInput) -> Run[PDFAnswerOutput]: + """ + Analyze the provided PDF document and answer the given question. + Provide a clear and concise answer based on the content found in the PDF. + + Focus on: + - Accurate information extraction from the PDF + - Direct and relevant answers to the question + - Context-aware responses that consider the full document + - Citing specific sections or pages when relevant + + If the question cannot be answered based on the PDF content, + provide a clear explanation of why the information is not available. + """ + ... + + +async def run_pdf_answer(): + current_dir = os.path.dirname(os.path.abspath(__file__)) + pdf_path = os.path.join(current_dir, "assets", "sec-form-4.pdf") + + # With a properly async function you should use an async open + # see https://github.com/Tinche/aiofiles for example + with open(pdf_path, "rb") as pdf_file: # noqa: ASYNC230 + import base64 + + content = base64.b64encode(pdf_file.read()).decode("utf-8") + + pdf = File(content_type="application/pdf", data=content) + # Could also pass the content via url + # pdf = File(url="https://example.com/sample.pdf") + question = "How many stocks were sold? What is the total amount in USD?" + + try: + agent_run = await answer_pdf_question( + PDFQuestionInput(pdf=pdf, question=question), + use_cache="auto", + ) + except WorkflowAIError as e: + print(f"Failed to run task. Code: {e.error.code}. Message: {e.error.message}") + return + + print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n") + print(f"Cost: ${agent_run.cost_usd:.10f}") + print(f"Latency: {agent_run.duration_seconds:.2f}s") + + +if __name__ == "__main__": + # Load environment variables from .env file + load_dotenv(override=True) + asyncio.run(run_pdf_answer()) diff --git a/examples/reply/name_extractor.py b/examples/reply/name_extractor.py index 1ba8d87..f61e111 100644 --- a/examples/reply/name_extractor.py +++ b/examples/reply/name_extractor.py @@ -1,5 +1,6 @@ import asyncio +from dotenv import load_dotenv from pydantic import BaseModel, Field # pyright: ignore [reportUnknownVariableType] import workflowai @@ -59,4 +60,5 @@ async def main(): if __name__ == "__main__": + load_dotenv(override=True) asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index 046e0f7..825be28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ unfixable = [] "bin/*" = ["T201"] "*_test.py" = ["S101"] "conftest.py" = ["S101"] -"examples/*" = ["INP001", "T201"] +"examples/*" = ["INP001", "T201", "ERA001"] [tool.pyright] pythonVersion = "3.9"