feat: introduces a new AgentEngineSandboxCodeExecutor class that supports executes agent generated code

google-genai-bot · copybara-github · commit ee39a8911063 · 2025-10-20T10:14:34.000-07:00
The AgentEngineSandboxCodeExecutor uses the Vertex AI Code Execution Sandbox API to execute code

PiperOrigin-RevId: 821699641
diff --git a/contributing/samples/agent_engine_code_execution/README b/contributing/samples/agent_engine_code_execution/README
@@ -0,0 +1,18 @@
+# OAuth Sample
+
+## Introduction
+
+This sample data science agent uses Agent Engine Code Execution Sandbox to execute LLM generated code.
+
+
+## How to use
+
+* 1. Follow https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/code-execution/overview to create a code execution sandbox environment.
+
+* 2. Replace the SANDBOX_RESOURCE_NAME with the one you just created. If you dont want to create a new sandbox environment directly, the Agent Engine Code Execution Sandbox will create one for you by default using the AGENT_ENGINE_RESOURCE_NAME you specified, however, please ensure to clean up sandboxes after use, otherwise, it will consume quotas.
+
+
+## Sample prompt
+
+* Can you write a function that calculates the sum from 1 to 100.
+* The dataset is given as below. Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment Store 1,2023-06-01,1000,0,70,3.0,200,5 Store 2,2023-06-02,1200,1,80,3.5,210,6 Store 3,2023-06-03,1400,0,90,4.0,220,7 Store 4,2023-06-04,1600,1,70,4.5,230,8 Store 5,2023-06-05,1800,0,80,5.0,240,9 Store 6,2023-06-06,2000,1,90,5.5,250,10 Store 7,2023-06-07,2200,0,90,6.0,260,11 Plot a scatter plot showcasing the relationship between Weekly Sales and Temperature for each store, distinguishing stores with a Holiday Flag.
diff --git a/contributing/samples/agent_engine_code_execution/__init__.py b/contributing/samples/agent_engine_code_execution/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import agent
diff --git a/contributing/samples/agent_engine_code_execution/agent.py b/contributing/samples/agent_engine_code_execution/agent.py
@@ -0,0 +1,95 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Data science agent."""
+
+from google.adk.agents.llm_agent import Agent
+from google.adk.code_executors.agent_engine_sandbox_code_executor import AgentEngineSandboxCodeExecutor
+
+
+def base_system_instruction():
+  """Returns: data science agent system instruction."""
+
+  return """
+  # Guidelines
+
+  **Objective:** Assist the user in achieving their data analysis goals within the context of a Python Colab notebook, **with emphasis on avoiding assumptions and ensuring accuracy.** Reaching that goal can involve multiple steps. When you need to generate code, you **don't** need to solve the goal in one go. Only generate the next step at a time.
+
+  **Code Execution:** All code snippets provided will be executed within the Colab environment.
+
+  **Statefulness:** All code snippets are executed and the variables stays in the environment. You NEVER need to re-initialize variables. You NEVER need to reload files. You NEVER need to re-import libraries.
+
+  **Output Visibility:** Always print the output of code execution to visualize results, especially for data exploration and analysis. For example:
+    - To look a the shape of a pandas.DataFrame do:
+      ```tool_code
+      print(df.shape)
+      ```
+      The output will be presented to you as:
+      ```tool_outputs
+      (49, 7)
+
+      ```
+    - To display the result of a numerical computation:
+      ```tool_code
+      x = 10 ** 9 - 12 ** 5
+      print(f'{{x=}}')
+      ```
+      The output will be presented to you as:
+      ```tool_outputs
+      x=999751168
+
+      ```
+    - You **never** generate ```tool_outputs yourself.
+    - You can then use this output to decide on next steps.
+    - Print just variables (e.g., `print(f'{{variable=}}')`.
+
+  **No Assumptions:** **Crucially, avoid making assumptions about the nature of the data or column names.** Base findings solely on the data itself. Always use the information obtained from `explore_df` to guide your analysis.
+
+  **Available files:** Only use the files that are available as specified in the list of available files.
+
+  **Data in prompt:** Some queries contain the input data directly in the prompt. You have to parse that data into a pandas DataFrame. ALWAYS parse all the data. NEVER edit the data that are given to you.
+
+  **Answerability:** Some queries may not be answerable with the available data. In those cases, inform the user why you cannot process their query and suggest what type of data would be needed to fulfill their request.
+
+  """
+
+
+root_agent = Agent(
+    model="gemini-2.0-flash-001",
+    name="agent_engine_code_execution_agent",
+    instruction=base_system_instruction() + """
+
+
+You need to assist the user with their queries by looking at the data and the context in the conversation.
+You final answer should summarize the code and code execution relevant to the user query.
+
+You should include all pieces of data to answer the user query, such as the table from code execution results.
+If you cannot answer the question directly, you should follow the guidelines above to generate the next step.
+If the question can be answered directly with writing any code, you should do that.
+If you doesn't have enough data to answer the question, you should ask for clarification from the user.
+
+You should NEVER install any package on your own like `pip install ...`.
+When plotting trends, you should make sure to sort and order the data by the x-axis.
+
+
+""",
+    code_executor=AgentEngineSandboxCodeExecutor(
+        # Replace with your sandbox resource name if you already have one.
+        sandbox_resource_name="SANDBOX_RESOURCE_NAME",
+        # "projects/vertex-agent-loadtest/locations/us-central1/reasoningEngines/6842889780301135872/sandboxEnvironments/6545148628569161728",
+        # Replace with agent engine resource name used for creating sandbox if
+        # sandbox_resource_name is not set.
+        agent_engine_resource_name="AGENT_ENGINE_RESOURCE_NAME",
+    ),
+)
diff --git a/pyproject.toml b/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
   "click>=8.1.8, <9.0.0",                                   # For CLI tools
   "fastapi>=0.115.0, <1.0.0",                               # FastAPI framework
   "google-api-python-client>=2.157.0, <3.0.0",              # Google API client discovery
-  "google-cloud-aiplatform[agent_engines]>=1.112.0, <2.0.0",# For VertexAI integrations, e.g. example store.
+  "google-cloud-aiplatform[agent_engines]>=1.121.0, <2.0.0",# For VertexAI integrations, e.g. example store.
   "google-cloud-bigtable>=2.32.0",                          # For Bigtable database
   "google-cloud-discoveryengine>=0.13.12, <0.14.0",         # For Discovery Engine Search Tool
   "google-cloud-secret-manager>=2.22.0, <3.0.0",            # Fetching secrets in RestAPI Tool
diff --git a/src/google/adk/code_executors/__init__.py b/src/google/adk/code_executors/__init__.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import logging
 
 from .base_code_executor import BaseCodeExecutor
@@ -29,6 +31,7 @@
     'VertexAiCodeExecutor',
     'ContainerCodeExecutor',
     'GkeCodeExecutor',
+    'AgentEngineSandboxCodeExecutor',
 ]
 
 
@@ -63,4 +66,14 @@ def __getattr__(name: str):
           'GkeCodeExecutor requires additional dependencies. '
           'Please install with: pip install "google-adk[extensions]"'
       ) from e
+  elif name == 'AgentEngineSandboxCodeExecutor':
+    try:
+      from .agent_engine_sandbox_code_executor import AgentEngineSandboxCodeExecutor
+
+      return AgentEngineSandboxCodeExecutor
+    except ImportError as e:
+      raise ImportError(
+          'AgentEngineSandboxCodeExecutor requires additional dependencies. '
+          'Please install with: pip install "google-adk[extensions]"'
+      ) from e
   raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
diff --git a/src/google/adk/code_executors/agent_engine_sandbox_code_executor.py b/src/google/adk/code_executors/agent_engine_sandbox_code_executor.py
@@ -0,0 +1,187 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import json
+import logging
+import mimetypes
+import re
+from typing import Optional
+
+from typing_extensions import override
+import vertexai
+from vertexai import types
+
+from ..agents.invocation_context import InvocationContext
+from ..utils.feature_decorator import experimental
+from .base_code_executor import BaseCodeExecutor
+from .code_execution_utils import CodeExecutionInput
+from .code_execution_utils import CodeExecutionResult
+from .code_execution_utils import File
+
+logger = logging.getLogger('google_adk.' + __name__)
+
+
+@experimental
+class AgentEngineSandboxCodeExecutor(BaseCodeExecutor):
+  """A code executor that uses Agent Engine Code Execution Sandbox to execute code.
+
+  Attributes:
+    sandbox_resource_name: If set, load the existing resource name of the code
+      interpreter extension instead of creating a new one. Format:
+      projects/123/locations/us-central1/reasoningEngines/456/sandboxEnvironments/789
+  """
+
+  sandbox_resource_name: str = None
+
+  def __init__(
+      self,
+      sandbox_resource_name: Optional[str] = None,
+      agent_engine_resource_name: Optional[str] = None,
+      **data,
+  ):
+    """Initializes the AgentEngineSandboxCodeExecutor.
+
+    Args:
+      sandbox_resource_name: If set, load the existing resource name of code
+        execution sandbox, if not set, create a new one. Format:
+        projects/123/locations/us-central1/reasoningEngines/456/
+        sandboxEnvironments/789
+      agent_engine_resource_name: The resource name of the agent engine to use
+        to create the code execution sandbox. Format:
+        projects/123/locations/us-central1/reasoningEngines/456, when both
+        sandbox_resource_name and agent_engine_resource_name are set,
+        agent_engine_resource_name will be ignored.
+      **data: Additional keyword arguments to be passed to the base class.
+    """
+    super().__init__(**data)
+    sandbox_resource_name_pattern = r'^projects/([a-zA-Z0-9-_]+)/locations/([a-zA-Z0-9-_]+)/reasoningEngines/(\d+)/sandboxEnvironments/(\d+)$'
+    agent_engine_resource_name_pattern = r'^projects/([a-zA-Z0-9-_]+)/locations/([a-zA-Z0-9-_]+)/reasoningEngines/(\d+)$'
+
+    if sandbox_resource_name is not None:
+      self.sandbox_resource_name = sandbox_resource_name
+      self._project_id, self._location = (
+          self._get_project_id_and_location_from_resource_name(
+              sandbox_resource_name, sandbox_resource_name_pattern
+          )
+      )
+    elif agent_engine_resource_name is not None:
+      self._project_id, self._location = (
+          self._get_project_id_and_location_from_resource_name(
+              agent_engine_resource_name, agent_engine_resource_name_pattern
+          )
+      )
+      # @TODO - Add TTL for sandbox creation after it is available
+      # in SDK.
+      operation = self._get_api_client().agent_engines.sandboxes.create(
+          spec={'code_execution_environment': {}},
+          name=agent_engine_resource_name,
+          config=types.CreateAgentEngineSandboxConfig(
+              display_name='default_sandbox'
+          ),
+      )
+      self.sandbox_resource_name = operation.response.name
+    else:
+      raise ValueError(
+          'Either sandbox_resource_name or agent_engine_resource_name must be'
+          ' set.'
+      )
+
+  @override
+  def execute_code(
+      self,
+      invocation_context: InvocationContext,
+      code_execution_input: CodeExecutionInput,
+  ) -> CodeExecutionResult:
+    # Execute the code.
+    input_data = {
+        'code': code_execution_input.code,
+    }
+    if code_execution_input.input_files:
+      input_data['files'] = [
+          {
+              'name': f.name,
+              'contents': f.content,
+              'mimeType': f.mime_type,
+          }
+          for f in code_execution_input.input_files
+      ]
+
+    code_execution_response = (
+        self._get_api_client().agent_engines.sandboxes.execute_code(
+            name=self.sandbox_resource_name,
+            input_data=input_data,
+        )
+    )
+    saved_files = []
+    stdout = ''
+    stderr = ''
+    for output in code_execution_response.outputs:
+      if output.mime_type == 'application/json' and (
+          output.metadata is None
+          or output.metadata.attributes is None
+          or 'file_name' not in output.metadata.attributes
+      ):
+        json_output_data = json.loads(output.data.decode('utf-8'))
+        stdout = json_output_data.get('stdout', '')
+        stderr = json_output_data.get('stderr', '')
+      else:
+        file_name = ''
+        if (
+            output.metadata is not None
+            and output.metadata.attributes is not None
+        ):
+          file_name = output.metadata.attributes.get('file_name', b'').decode(
+              'utf-8'
+          )
+        mime_type = output.mime_type
+        if not mime_type:
+          mime_type, _ = mimetypes.guess_type(file_name)
+        saved_files.append(
+            File(
+                name=file_name,
+                content=output.data,
+                mime_type=mime_type,
+            )
+        )
+
+    # Collect the final result.
+    return CodeExecutionResult(
+        stdout=stdout,
+        stderr=stderr,
+        output_files=saved_files,
+    )
+
+  def _get_api_client(self):
+    """Instantiates an API client for the given project and location.
+
+    It needs to be instantiated inside each request so that the event loop
+    management can be properly propagated.
+
+    Returns:
+      An API client for the given project and location.
+    """
+    return vertexai.Client(project=self._project_id, location=self._location)
+
+  def _get_project_id_and_location_from_resource_name(
+      self, resource_name: str, pattern: str
+  ) -> tuple[str, str]:
+    """Extracts the project ID and location from the resource name."""
+    match = re.fullmatch(pattern, resource_name)
+
+    if not match:
+      raise ValueError(f'resource name {resource_name} is not valid.')
+
+    return match.groups()[0], match.groups()[1]
diff --git a/src/google/adk/code_executors/code_execution_utils.py b/src/google/adk/code_executors/code_execution_utils.py
@@ -14,6 +14,8 @@
 
 """Utility functions for code execution."""
 
+from __future__ import annotations
+
 import base64
 import binascii
 import copy
@@ -34,9 +36,9 @@ class File:
   The name of the file with file extension (e.g., "file.csv").
   """
 
-  content: str
+  content: str | bytes
   """
-  The base64-encoded bytes of the file content.
+  The base64-encoded bytes of the file content or the original bytes of the file content.
   """
 
   mime_type: str = 'text/plain'
diff --git a/tests/unittests/code_executors/test_agent_engine_sandbox_code_executor.py b/tests/unittests/code_executors/test_agent_engine_sandbox_code_executor.py