|
14 | 14 | "test agents in realistic, conversational scenarios involving tool use and\n", |
15 | 15 | "adherence to policies.\n", |
16 | 16 | "\n", |
17 | | - "**Our Goal:** To take a simple, underperforming prompt and automatically\n", |
| 17 | + "**Goal:** To take a simple, underperforming prompt and automatically\n", |
18 | 18 | "improve it using GEPA, increasing the agent's reliability on a customer\n", |
19 | 19 | "support task.\n", |
20 | 20 | "\n", |
| 21 | + "**Note:** You can find more options to run GEPA with an ADK agent in the [README file](https://github.com/google/adk-python/blob/main/contributing/samples/gepa/README.md).\n", |
| 22 | + "\n", |
21 | 23 | "## Prerequisites\n", |
22 | 24 | "\n", |
23 | 25 | "* **Google Cloud Project:** You'll need access to a Google Cloud Project with\n", |
|
36 | 38 | }, |
37 | 39 | "outputs": [], |
38 | 40 | "source": [ |
39 | | - "#@title Install Tau-bench and GEPA\n", |
| 41 | + "# @title Install Tau-bench and GEPA\n", |
40 | 42 | "!git clone https://github.com/google/adk-python.git\n", |
41 | 43 | "!git clone https://github.com/sierra-research/tau-bench.git\n", |
42 | 44 | "%cd tau-bench/\n", |
|
45 | 47 | "%cd ..\n", |
46 | 48 | "!pip install gepa --quiet\n", |
47 | 49 | "\n", |
48 | | - "!pip install retry --quiet\n" |
| 50 | + "!pip install retry --quiet" |
49 | 51 | ] |
50 | 52 | }, |
51 | 53 | { |
52 | 54 | "cell_type": "code", |
53 | 55 | "source": [ |
54 | | - "#@title Configure python dependencies\n", |
| 56 | + "# @title Configure python dependencies\n", |
55 | 57 | "import sys\n", |
56 | 58 | "\n", |
57 | 59 | "sys.path.append('/content/tau-bench')\n", |
|
67 | 69 | { |
68 | 70 | "cell_type": "code", |
69 | 71 | "source": [ |
70 | | - "#@title Authentication\n", |
| 72 | + "# @title Authentication\n", |
71 | 73 | "from google.colab import auth\n", |
| 74 | + "\n", |
72 | 75 | "auth.authenticate_user()" |
73 | 76 | ], |
74 | 77 | "metadata": { |
|
87 | 90 | }, |
88 | 91 | "outputs": [], |
89 | 92 | "source": [ |
90 | | - "#@title Setup\n", |
| 93 | + "# @title Setup\n", |
91 | 94 | "from datetime import datetime\n", |
92 | 95 | "import json\n", |
93 | 96 | "import logging\n", |
94 | 97 | "import os\n", |
95 | 98 | "\n", |
96 | | - "from google.genai import types\n", |
97 | 99 | "import experiment as experiment_lib\n", |
| 100 | + "from google.genai import types\n", |
98 | 101 | "\n", |
99 | 102 | "\n", |
100 | 103 | "# @markdown ### ☁️ Configure Vertex AI Access\n", |
101 | 104 | "# @markdown Enter your Google Cloud Project ID and Location.\n", |
102 | 105 | "\n", |
103 | | - "#@markdown Configure Vertex AI Access\n", |
| 106 | + "# @markdown Configure Vertex AI Access\n", |
104 | 107 | "\n", |
105 | | - "GCP_PROJECT = '' #@param {type: 'string'}\n", |
106 | | - "GCP_LOCATION = 'us-central1' #@param {type: 'string'}\n", |
| 108 | + "GCP_PROJECT = '' # @param {type: 'string'}\n", |
| 109 | + "GCP_LOCATION = 'us-central1' # @param {type: 'string'}\n", |
107 | 110 | "\n", |
108 | 111 | "# @markdown ---\n", |
109 | 112 | "# @markdown ### 🧠 Configure LLM Models\n", |
|
116 | 119 | "\n", |
117 | 120 | "# @markdown ---\n", |
118 | 121 | "# @markdown ### ⚙️ Configure Experiment Parameters\n", |
119 | | - "# @markdown These control the dataset size, evaluation runs, and GEPA budget.\n", |
120 | | - "# @markdown For a quick demo, keep these values small. For a real run, you might\n", |
121 | | - "# @markdown increase `MAX_DATASET_SIZE` to 50-100 and `MAX_METRIC_CALLS` to 100+.\n", |
| 122 | + "# @markdown Number of trajectories sampled from rollouts to be used by the reflection model in each GEPA step:\n", |
| 123 | + "MINI_BATCH_SIZE = 8 # @param {type: 'integer'}\n", |
| 124 | + "# @markdown Size of the pareto and feedback datasets (small setting for demo purposes):\n", |
122 | 125 | "MAX_DATASET_SIZE = 10 # @param {type: 'integer'}\n", |
| 126 | + "# @markdown Number of times each task is run during evaluation:\n", |
123 | 127 | "NUM_EVAL_TRIALS = 4 # @param {type: 'integer'}\n", |
| 128 | + "# @markdown Total budget for GEPA prompt evaluations:\n", |
124 | 129 | "MAX_METRIC_CALLS = 100 # @param {type: 'integer'}\n", |
| 130 | + "# @markdown Maximum number of parallel agent-environment interactions\n", |
125 | 131 | "MAX_CONCURRENCY = 4 # @param {type: 'integer'}\n", |
126 | 132 | "\n", |
| 133 | + "# @markdown **Note:** You can find more information on how to configure GEPA in the [README file](https://github.com/google/adk-python/blob/main/contributing/samples/gepa/README.md).\n", |
| 134 | + "\n", |
127 | 135 | "# The ADK uses these environment variables to connect to Vertex AI via the\n", |
128 | 136 | "# Google GenAI SDK.\n", |
129 | 137 | "os.environ['GOOGLE_GENAI_USE_VERTEXAI'] = 'true'\n", |
|
165 | 173 | { |
166 | 174 | "cell_type": "code", |
167 | 175 | "source": [ |
168 | | - "#@title Define an initial instruction\n", |
| 176 | + "# @title Define an initial instruction\n", |
169 | 177 | "\n", |
170 | 178 | "# @markdown This is our starting \"seed\" prompt. It's very generic and doesn't give the agent much guidance on how to behave or use tools.\n", |
171 | 179 | "BASE_SYSTEM_INSTRUCTION = 'you are a customer support agent helping customers resolve their issues by using the right tools' # @param {type: 'string'}\n", |
|
226 | 234 | } |
227 | 235 | ], |
228 | 236 | "source": [ |
229 | | - "#@title Initial Inference: A First Look at Our Agent\n", |
| 237 | + "# @title Initial Inference: A First Look at Our Agent\n", |
230 | 238 | "\n", |
231 | 239 | "from tau_bench.types import EnvRunResult, RunConfig\n", |
232 | 240 | "\n", |
|
373 | 381 | } |
374 | 382 | ], |
375 | 383 | "source": [ |
376 | | - "#@title Let's visualize one of the sampled trajectory\n", |
| 384 | + "# @title Let's visualize one of the sampled trajectory\n", |
| 385 | + "\n", |
377 | 386 | "\n", |
378 | 387 | "def display_trajectory(trajectory):\n", |
379 | 388 | " \"\"\"Formats and prints a trajectory for display in Colab.\"\"\"\n", |
|
400 | 409 | " f'**{role.upper()}**: ↪️ Tool Response from'\n", |
401 | 410 | " f' `{fr[\"name\"]}`: `{fr[\"response\"][\"result\"]}`'\n", |
402 | 411 | " )\n", |
403 | | - " print() # new line after each turn\n", |
| 412 | + " print() # new line after each turn\n", |
404 | 413 | "\n", |
405 | 414 | "\n", |
406 | 415 | "# Let's inspect the \"trajectory\" of the first run. A trajectory is the full\n", |
|
485 | 494 | " rnd_seed=42,\n", |
486 | 495 | " max_metric_calls=MAX_METRIC_CALLS, # GEPA budget: max prompt evaluations\n", |
487 | 496 | " reflection_model=REFLECTION_MODEL_NAME, # Model for GEPA's reflection step\n", |
488 | | - " reflection_minibatch_size=8,\n", |
| 497 | + " # Number of trajectories sampled from failed rollouts to be used by the\n", |
| 498 | + " # reflection model in each GEPA step to generate prompt improvements.\n", |
| 499 | + " reflection_minibatch_size=MINI_BATCH_SIZE,\n", |
489 | 500 | " use_rater=False, # Optional: LLM rater for nuanced feedback\n", |
490 | 501 | " # For this demo, we use the same small dataset for all splits.\n", |
491 | 502 | " # In a real optimization run, you would use separate datasets:\n", |
|
1330 | 1341 | } |
1331 | 1342 | ], |
1332 | 1343 | "source": [ |
1333 | | - "#@title Run GEPA (this might take ~10 minutes)\n", |
| 1344 | + "# @title Run GEPA (this might take ~10 minutes)\n", |
1334 | 1345 | "# This process can take around 10 minutes for the demo settings, as it\n", |
1335 | 1346 | "# involves multiple rounds of running the agent and calling the reflection model.\n", |
1336 | 1347 | "# A real run with more metric calls will take longer.\n", |
|
1424 | 1435 | } |
1425 | 1436 | ], |
1426 | 1437 | "source": [ |
1427 | | - "#@title Visualize the optimized prompt\n", |
| 1438 | + "# @title Visualize the optimized prompt\n", |
1428 | 1439 | "# Now, let's look at the final, optimized prompt that GEPA produced.\n", |
1429 | 1440 | "# It should be much more detailed than our initial one-line prompt!\n", |
1430 | 1441 | "print('\\n--- Optimized Prompt from GEPA ---')\n", |
|
1489 | 1500 | } |
1490 | 1501 | ], |
1491 | 1502 | "source": [ |
1492 | | - "#@title Run evaluation\n", |
| 1503 | + "# @title Run evaluation\n", |
1493 | 1504 | "\n", |
1494 | 1505 | "# Let's create a new directory for this final evaluation run.\n", |
1495 | 1506 | "final_eval_dir = os.path.join(\n", |
|
0 commit comments