feat(langchain): add support for image generation tool

christian-bromann · christian-bromann · commit 62eff6b42d97 · 2025-12-08T15:40:47.000-08:00
diff --git a/.changeset/tidy-ligers-rhyme.md b/.changeset/tidy-ligers-rhyme.md
@@ -0,0 +1,5 @@
+---
+"@langchain/openai": minor
+---
+
+feat(langchain): add support for image generation tool
diff --git a/libs/providers/langchain-openai/README.md b/libs/providers/langchain-openai/README.md
@@ -292,6 +292,122 @@ Filter operators: `eq` (equals), `ne` (not equal), `gt` (greater than), `gte` (g
 
 For more information, see [OpenAI's File Search Documentation](https://platform.openai.com/docs/guides/tools-file-search).
 
+### Image Generation Tool
+
+The Image Generation tool allows models to generate or edit images using text prompts and optional image inputs. It leverages the GPT Image model and automatically optimizes text inputs for improved performance.
+
+Use Image Generation for:
+
+- **Creating images from text**: Generate images from detailed text descriptions
+- **Editing existing images**: Modify images based on text instructions
+- **Multi-turn image editing**: Iteratively refine images across conversation turns
+- **Various output formats**: Support for PNG, JPEG, and WebP formats
+
+```typescript
+import { ChatOpenAI, tools } from "@langchain/openai";
+
+const model = new ChatOpenAI({ model: "gpt-4o" });
+
+// Basic usage - generate an image
+const response = await model.invoke(
+  "Generate an image of a gray tabby cat hugging an otter with an orange scarf",
+  { tools: [tools.imageGeneration()] }
+);
+
+// Access the generated image (base64-encoded)
+const imageOutput = response.additional_kwargs.tool_outputs?.find(
+  (output) => output.type === "image_generation_call"
+);
+if (imageOutput?.result) {
+  const fs = await import("fs");
+  fs.writeFileSync("output.png", Buffer.from(imageOutput.result, "base64"));
+}
+```
+
+**Custom size and quality** - Configure output dimensions and quality:
+
+```typescript
+const response = await model.invoke("Draw a beautiful sunset over mountains", {
+  tools: [
+    tools.imageGeneration({
+      size: "1536x1024", // Landscape format (also: "1024x1024", "1024x1536", "auto")
+      quality: "high", // Quality level (also: "low", "medium", "auto")
+    }),
+  ],
+});
+```
+
+**Output format and compression** - Choose format and compression level:
+
+```typescript
+const response = await model.invoke("Create a product photo", {
+  tools: [
+    tools.imageGeneration({
+      outputFormat: "jpeg", // Format (also: "png", "webp")
+      outputCompression: 90, // Compression 0-100 (for JPEG/WebP)
+    }),
+  ],
+});
+```
+
+**Transparent background** - Generate images with transparency:
+
+```typescript
+const response = await model.invoke(
+  "Create a logo with transparent background",
+  {
+    tools: [
+      tools.imageGeneration({
+        background: "transparent", // Background type (also: "opaque", "auto")
+        outputFormat: "png",
+      }),
+    ],
+  }
+);
+```
+
+**Streaming with partial images** - Get visual feedback during generation:
+
+```typescript
+const response = await model.invoke("Draw a detailed fantasy castle", {
+  tools: [
+    tools.imageGeneration({
+      partialImages: 2, // Number of partial images (0-3)
+    }),
+  ],
+});
+```
+
+**Force image generation** - Ensure the model uses the image generation tool:
+
+```typescript
+const response = await model.invoke("A serene lake at dawn", {
+  tools: [tools.imageGeneration()],
+  tool_choice: { type: "image_generation" },
+});
+```
+
+**Multi-turn editing** - Refine images across conversation turns:
+
+```typescript
+// First turn: generate initial image
+const response1 = await model.invoke("Draw a red car", {
+  tools: [tools.imageGeneration()],
+});
+
+// Second turn: edit the image
+const response2 = await model.invoke(
+  [response1, new HumanMessage("Now change the car color to blue")],
+  { tools: [tools.imageGeneration()] }
+);
+```
+
+> **Prompting tips**: Use terms like "draw" or "edit" for best results. For combining images, say "edit the first image by adding this element" instead of "combine" or "merge".
+
+Supported models: `gpt-4o`, `gpt-4o-mini`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4.1-nano`, `o3`
+
+For more information, see [OpenAI's Image Generation Documentation](https://platform.openai.com/docs/guides/tools-image-generation).
+
 ## Embeddings
 
 This package also adds support for OpenAI's embeddings model.
diff --git a/libs/providers/langchain-openai/src/tools/imageGeneration.ts b/libs/providers/langchain-openai/src/tools/imageGeneration.ts
@@ -0,0 +1,234 @@
+import { OpenAI as OpenAIClient } from "openai";
+
+/**
+ * Optional mask for inpainting. Allows you to specify areas of the image
+ * that should be regenerated.
+ */
+export interface ImageGenerationInputMask {
+  /**
+   * Base64-encoded mask image URL.
+   */
+  imageUrl?: string;
+  /**
+   * File ID for the mask image (uploaded via OpenAI File API).
+   */
+  fileId?: string;
+}
+
+/**
+ * Options for the Image Generation tool.
+ */
+export interface ImageGenerationOptions {
+  /**
+   * Background type for the generated image.
+   * - `transparent`: Generate image with transparent background
+   * - `opaque`: Generate image with opaque background
+   * - `auto`: Let the model decide based on the prompt
+   * @default "auto"
+   */
+  background?: "transparent" | "opaque" | "auto";
+
+  /**
+   * Control how much effort the model will exert to match the style and features,
+   * especially facial features, of input images. This parameter is only supported
+   * for `gpt-image-1`. Unsupported for `gpt-image-1-mini`.
+   * - `high`: Higher fidelity to input images
+   * - `low`: Lower fidelity to input images
+   * @default "low"
+   */
+  inputFidelity?: "high" | "low";
+
+  /**
+   * Optional mask for inpainting. Use this to specify areas of an image
+   * that should be regenerated.
+   */
+  inputImageMask?: ImageGenerationInputMask;
+
+  /**
+   * The image generation model to use.
+   * @default "gpt-image-1"
+   */
+  model?: "gpt-image-1" | "gpt-image-1-mini";
+
+  /**
+   * Moderation level for the generated image.
+   * - `auto`: Standard moderation
+   * - `low`: Less restrictive moderation
+   * @default "auto"
+   */
+  moderation?: "auto" | "low";
+
+  /**
+   * Compression level for the output image (0-100).
+   * Only applies to JPEG and WebP formats.
+   * @default 100
+   */
+  outputCompression?: number;
+
+  /**
+   * The output format of the generated image.
+   * @default "png"
+   */
+  outputFormat?: "png" | "webp" | "jpeg";
+
+  /**
+   * Number of partial images to generate in streaming mode (0-3).
+   * When set, the model will return partial images as they are generated,
+   * providing faster visual feedback.
+   * @default 0
+   */
+  partialImages?: number;
+
+  /**
+   * The quality of the generated image.
+   * - `low`: Faster generation, lower quality
+   * - `medium`: Balanced generation time and quality
+   * - `high`: Slower generation, higher quality
+   * - `auto`: Let the model decide based on the prompt
+   * @default "auto"
+   */
+  quality?: "low" | "medium" | "high" | "auto";
+
+  /**
+   * The size of the generated image.
+   * - `1024x1024`: Square format
+   * - `1024x1536`: Portrait format
+   * - `1536x1024`: Landscape format
+   * - `auto`: Let the model decide based on the prompt
+   * @default "auto"
+   */
+  size?: "1024x1024" | "1024x1536" | "1536x1024" | "auto";
+}
+
+/**
+ * OpenAI Image Generation tool type for the Responses API.
+ */
+export type ImageGenerationTool = OpenAIClient.Responses.Tool.ImageGeneration;
+
+/**
+ * Converts input mask options to the API format.
+ */
+function convertInputImageMask(
+  mask: ImageGenerationInputMask | undefined
+): ImageGenerationTool["input_image_mask"] {
+  if (!mask) return undefined;
+  return {
+    image_url: mask.imageUrl,
+    file_id: mask.fileId,
+  };
+}
+
+/**
+ * Creates an Image Generation tool that allows models to generate or edit images
+ * using text prompts and optional image inputs.
+ *
+ * The image generation tool leverages the GPT Image model and automatically
+ * optimizes text inputs for improved performance. When included in a request,
+ * the model can decide when and how to generate images as part of the conversation.
+ *
+ * **Key Features**:
+ * - Generate images from text descriptions
+ * - Edit existing images with text instructions
+ * - Multi-turn image editing by referencing previous responses
+ * - Configurable output options (size, quality, format)
+ * - Streaming support for partial image generation
+ *
+ * **Prompting Tips**:
+ * - Use terms like "draw" or "edit" in your prompt for best results
+ * - For combining images, say "edit the first image by adding this element" instead of "combine"
+ *
+ * @see {@link https://platform.openai.com/docs/guides/tools-image-generation | OpenAI Image Generation Documentation}
+ *
+ * @param options - Configuration options for the Image Generation tool
+ * @returns An Image Generation tool definition to be passed to the OpenAI Responses API
+ *
+ * @example
+ * ```typescript
+ * import { ChatOpenAI, tools } from "@langchain/openai";
+ *
+ * const model = new ChatOpenAI({ model: "gpt-4o" });
+ *
+ * // Basic usage - generate an image
+ * const response = await model.invoke(
+ *   "Generate an image of a gray tabby cat hugging an otter with an orange scarf",
+ *   { tools: [tools.imageGeneration()] }
+ * );
+ *
+ * // Access the generated image
+ * const imageData = response.additional_kwargs.tool_outputs?.find(
+ *   (output) => output.type === "image_generation_call"
+ * );
+ * if (imageData?.result) {
+ *   // imageData.result contains the base64-encoded image
+ *   const fs = await import("fs");
+ *   fs.writeFileSync("output.png", Buffer.from(imageData.result, "base64"));
+ * }
+ *
+ * // With custom options
+ * const response = await model.invoke(
+ *   "Draw a beautiful sunset over mountains",
+ *   {
+ *     tools: [tools.imageGeneration({
+ *       size: "1536x1024",      // Landscape format
+ *       quality: "high",        // Higher quality output
+ *       outputFormat: "jpeg",   // JPEG format
+ *       outputCompression: 90,  // 90% compression
+ *     })]
+ *   }
+ * );
+ *
+ * // With transparent background
+ * const response = await model.invoke(
+ *   "Create a logo with a transparent background",
+ *   {
+ *     tools: [tools.imageGeneration({
+ *       background: "transparent",
+ *       outputFormat: "png",
+ *     })]
+ *   }
+ * );
+ *
+ * // Force the model to use image generation
+ * const response = await model.invoke(
+ *   "A serene lake at dawn",
+ *   {
+ *     tools: [tools.imageGeneration()],
+ *     tool_choice: { type: "image_generation" },
+ *   }
+ * );
+ *
+ * // Enable streaming with partial images
+ * const response = await model.invoke(
+ *   "Draw a detailed fantasy castle",
+ *   {
+ *     tools: [tools.imageGeneration({
+ *       partialImages: 2,  // Get 2 partial images during generation
+ *     })]
+ *   }
+ * );
+ * ```
+ *
+ * @remarks
+ * - Supported models: gpt-4o, gpt-4o-mini, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3
+ * - The image generation process always uses `gpt-image-1` model internally
+ * - The model will automatically revise prompts for improved performance
+ * - Access the revised prompt via `revised_prompt` field in the output
+ * - Multi-turn editing is supported by passing previous response messages
+ */
+export function imageGeneration(
+  options?: ImageGenerationOptions
+): ImageGenerationTool {
+  return {
+    type: "image_generation",
+    background: options?.background,
+    input_fidelity: options?.inputFidelity,
+    input_image_mask: convertInputImageMask(options?.inputImageMask),
+    model: options?.model,
+    moderation: options?.moderation,
+    output_compression: options?.outputCompression,
+    output_format: options?.outputFormat,
+    partial_images: options?.partialImages,
+    quality: options?.quality,
+    size: options?.size,
+  };
+}
diff --git a/libs/providers/langchain-openai/src/tools/index.ts b/libs/providers/langchain-openai/src/tools/index.ts
@@ -37,9 +37,17 @@ export type {
   FileSearchHybridSearchWeights,
 } from "./fileSearch.js";
 
+import { imageGeneration } from "./imageGeneration.js";
+export type {
+  ImageGenerationTool,
+  ImageGenerationOptions,
+  ImageGenerationInputMask,
+} from "./imageGeneration.js";
+
 export const tools = {
   webSearch,
   mcp,
   codeInterpreter,
   fileSearch,
+  imageGeneration,
 };
diff --git a/libs/providers/langchain-openai/src/tools/tests/imageGeneration.int.test.ts b/libs/providers/langchain-openai/src/tools/tests/imageGeneration.int.test.ts
diff --git a/libs/providers/langchain-openai/src/tools/tests/imageGeneration.test.ts b/libs/providers/langchain-openai/src/tools/tests/imageGeneration.test.ts

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"@langchain/openai": minor
 +---
++
 +feat(langchain): add support for image generation tool