Skip to content

feat: add support for image generation using gpt-image-1 #971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
62 changes: 61 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This library provides unofficial Go clients for [OpenAI API](https://platform.op

* ChatGPT 4o, o1
* GPT-3, GPT-4
* DALL·E 2, DALL·E 3
* DALL·E 2, DALL·E 3, GPT Image 1
* Whisper

## Installation
Expand Down Expand Up @@ -357,6 +357,66 @@ func main() {
```
</details>

<details>
<summary>GPT Image 1 image generation</summary>

```go
package main

import (
"context"
"encoding/base64"
"fmt"
"os"

openai "github.com/sashabaranov/go-openai"
)

func main() {
c := openai.NewClient("your token")
ctx := context.Background()

req := openai.ImageRequest{
Prompt: "Parrot on a skateboard performing a trick. Large bold text \"SKATE MASTER\" banner at the bottom of the image. Cartoon style, natural light, high detail, 1:1 aspect ratio.",
Background: openai.CreateImageBackgroundOpaque,
Model: openai.CreateImageModelGptImage1,
Size: openai.CreateImageSize1024x1024,
N: 1,
Quality: openai.CreateImageQualityLow,
OutputCompression: 100,
OutputFormat: openai.CreateImageOutputFormatJPEG,
// Moderation: openai.CreateImageModerationLow,
// User: "",
}

resp, err := c.CreateImage(ctx, req)
if err != nil {
fmt.Printf("Image creation Image generation with GPT Image 1error: %v\n", err)
return
}

fmt.Println("Image Base64:", resp.Data[0].B64JSON)

// Decode the base64 data
imgBytes, err := base64.StdEncoding.DecodeString(resp.Data[0].B64JSON)
if err != nil {
fmt.Printf("Base64 decode error: %v\n", err)
return
}

// Write image to file
outputPath := "generated_image.jpg"
err = os.WriteFile(outputPath, imgBytes, 0644)
if err != nil {
fmt.Printf("Failed to write image file: %v\n", err)
return
}

fmt.Printf("The image was saved as %s\n", outputPath)
}
```
</details>

<details>
<summary>Configuring proxy</summary>

Expand Down
2 changes: 1 addition & 1 deletion examples/images/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ func main() {
return
}
fmt.Println(respUrl.Data[0].URL)
}
}
75 changes: 64 additions & 11 deletions image.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,51 +13,101 @@ const (
CreateImageSize256x256 = "256x256"
CreateImageSize512x512 = "512x512"
CreateImageSize1024x1024 = "1024x1024"

// dall-e-3 supported only.
CreateImageSize1792x1024 = "1792x1024"
CreateImageSize1024x1792 = "1024x1792"

// gpt-image-1 supported only.
CreateImageSize1536x1024 = "1536x1024" // Landscape
CreateImageSize1024x1536 = "1024x1536" // Portrait
)

const (
CreateImageResponseFormatURL = "url"
// dall-e-2 and dall-e-3 only.
CreateImageResponseFormatB64JSON = "b64_json"
CreateImageResponseFormatURL = "url"
)

const (
CreateImageModelDallE2 = "dall-e-2"
CreateImageModelDallE3 = "dall-e-3"
CreateImageModelDallE2 = "dall-e-2"
CreateImageModelDallE3 = "dall-e-3"
CreateImageModelGptImage1 = "gpt-image-1"
)

const (
CreateImageQualityHD = "hd"
CreateImageQualityStandard = "standard"

// gpt-image-1 only.
CreateImageQualityHigh = "high"
CreateImageQualityMedium = "medium"
CreateImageQualityLow = "low"
)

const (
// dall-e-3 only.
CreateImageStyleVivid = "vivid"
CreateImageStyleNatural = "natural"
)

const (
// gpt-image-1 only.
CreateImageBackgroundTransparent = "transparent"
CreateImageBackgroundOpaque = "opaque"
)

const (
// gpt-image-1 only.
CreateImageModerationLow = "low"
)

const (
// gpt-image-1 only.
CreateImageOutputFormatPNG = "png"
CreateImageOutputFormatJPEG = "jpeg"
CreateImageOutputFormatWEBP = "webp"
)

// ImageRequest represents the request structure for the image API.
type ImageRequest struct {
Prompt string `json:"prompt,omitempty"`
Model string `json:"model,omitempty"`
N int `json:"n,omitempty"`
Quality string `json:"quality,omitempty"`
Size string `json:"size,omitempty"`
Style string `json:"style,omitempty"`
ResponseFormat string `json:"response_format,omitempty"`
User string `json:"user,omitempty"`
Prompt string `json:"prompt,omitempty"`
Model string `json:"model,omitempty"`
N int `json:"n,omitempty"`
Quality string `json:"quality,omitempty"`
Size string `json:"size,omitempty"`
Style string `json:"style,omitempty"`
ResponseFormat string `json:"response_format,omitempty"`
User string `json:"user,omitempty"`
Background string `json:"background,omitempty"`
Moderation string `json:"moderation,omitempty"`
OutputCompression int `json:"output_compression,omitempty"`
OutputFormat string `json:"output_format,omitempty"`
}

// ImageResponse represents a response structure for image API.
type ImageResponse struct {
Created int64 `json:"created,omitempty"`
Data []ImageResponseDataInner `json:"data,omitempty"`
Usage ImageResponseUsage `json:"usage,omitempty"`

httpHeader
}

// ImageResponseInputTokensDetails represents the token breakdown for input tokens.
type ImageResponseInputTokensDetails struct {
TextTokens int `json:"text_tokens,omitempty"`
ImageTokens int `json:"image_tokens,omitempty"`
}

// ImageResponseUsage represents the token usage information for image API.
type ImageResponseUsage struct {
TotalTokens int `json:"total_tokens,omitempty"`
InputTokens int `json:"input_tokens,omitempty"`
OutputTokens int `json:"output_tokens,omitempty"`
InputTokensDetails ImageResponseInputTokensDetails `json:"input_tokens_details,omitempty"`
}

// ImageResponseDataInner represents a response data structure for image API.
type ImageResponseDataInner struct {
URL string `json:"url,omitempty"`
Expand Down Expand Up @@ -91,6 +141,8 @@ type ImageEditRequest struct {
N int `json:"n,omitempty"`
Size string `json:"size,omitempty"`
ResponseFormat string `json:"response_format,omitempty"`
Quality string `json:"quality,omitempty"`
User string `json:"user,omitempty"`
}

// CreateEditImage - API call to create an image. This is the main endpoint of the DALL-E API.
Expand Down Expand Up @@ -159,6 +211,7 @@ type ImageVariRequest struct {
N int `json:"n,omitempty"`
Size string `json:"size,omitempty"`
ResponseFormat string `json:"response_format,omitempty"`
User string `json:"user,omitempty"`
}

// CreateVariImage - API call to create an image variation. This is the main endpoint of the DALL-E API.
Expand Down
Loading