From 8064e5136359e9b54612aa605bdec66f4406f491 Mon Sep 17 00:00:00 2001 From: Christopher Hertel Date: Fri, 14 Mar 2025 20:58:45 +0100 Subject: [PATCH] feat: add whisper azure support --- README.md | 2 +- src/Bridge/Azure/OpenAI/PlatformFactory.php | 6 +- .../Azure/OpenAI/WhisperModelClient.php | 58 +++++++++++++++++++ src/Bridge/OpenAI/PlatformFactory.php | 6 +- src/Bridge/OpenAI/Whisper/ModelClient.php | 14 +---- .../OpenAI/Whisper/ResponseConverter.php | 27 +++++++++ 6 files changed, 95 insertions(+), 18 deletions(-) create mode 100644 src/Bridge/Azure/OpenAI/WhisperModelClient.php create mode 100644 src/Bridge/OpenAI/Whisper/ResponseConverter.php diff --git a/README.md b/README.md index d6278dac..bdc4beac 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ $embeddings = new Embeddings(); * [Voyage's Embeddings](https://docs.voyageai.com/docs/embeddings) with [Voyage](https://www.voyageai.com/) as Platform * Other Models * [OpenAI's DallĀ·E](https://platform.openai.com/docs/guides/image-generation) with [OpenAI](https://platform.openai.com/docs/overview) as Platform - * [OpenAI's Whisper](https://platform.openai.com/docs/guides/speech-to-text) with [OpenAI](https://platform.openai.com/docs/overview) as Platform + * [OpenAI's Whisper](https://platform.openai.com/docs/guides/speech-to-text) with [OpenAI](https://platform.openai.com/docs/overview) and [Azure](https://learn.microsoft.com/azure/ai-services/openai/concepts/models) as Platform See [issue #28](https://github.com/php-llm/llm-chain/issues/28) for planned support of other models and platforms. diff --git a/src/Bridge/Azure/OpenAI/PlatformFactory.php b/src/Bridge/Azure/OpenAI/PlatformFactory.php index 13215171..25b8719f 100644 --- a/src/Bridge/Azure/OpenAI/PlatformFactory.php +++ b/src/Bridge/Azure/OpenAI/PlatformFactory.php @@ -6,6 +6,7 @@ use PhpLlm\LlmChain\Bridge\OpenAI\Embeddings; use PhpLlm\LlmChain\Bridge\OpenAI\GPT\ResponseConverter; +use PhpLlm\LlmChain\Bridge\OpenAI\Whisper; use PhpLlm\LlmChain\Platform; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -23,10 +24,11 @@ public static function create( $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); $embeddingsResponseFactory = new EmbeddingsModelClient($httpClient, $baseUrl, $deployment, $apiVersion, $apiKey); $GPTResponseFactory = new GPTModelClient($httpClient, $baseUrl, $deployment, $apiVersion, $apiKey); + $whisperResponseFactory = new WhisperModelClient($httpClient, $baseUrl, $deployment, $apiVersion, $apiKey); return new Platform( - [$GPTResponseFactory, $embeddingsResponseFactory], - [new ResponseConverter(), new Embeddings\ResponseConverter()], + [$GPTResponseFactory, $embeddingsResponseFactory, $whisperResponseFactory], + [new ResponseConverter(), new Embeddings\ResponseConverter(), new Whisper\ResponseConverter()], ); } } diff --git a/src/Bridge/Azure/OpenAI/WhisperModelClient.php b/src/Bridge/Azure/OpenAI/WhisperModelClient.php new file mode 100644 index 00000000..7e4fc18e --- /dev/null +++ b/src/Bridge/Azure/OpenAI/WhisperModelClient.php @@ -0,0 +1,58 @@ +httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); + Assert::notStartsWith($baseUrl, 'http://', 'The base URL must not contain the protocol.'); + Assert::notStartsWith($baseUrl, 'https://', 'The base URL must not contain the protocol.'); + Assert::stringNotEmpty($deployment, 'The deployment must not be empty.'); + Assert::stringNotEmpty($apiVersion, 'The API version must not be empty.'); + Assert::stringNotEmpty($apiKey, 'The API key must not be empty.'); + } + + public function supports(Model $model, object|array|string $input): bool + { + return $model instanceof Whisper && $input instanceof File; + } + + public function request(Model $model, object|array|string $input, array $options = []): ResponseInterface + { + assert($input instanceof File); + + $url = sprintf('https://%s/openai/deployments/%s/audio/translations', $this->baseUrl, $this->deployment); + + return $this->httpClient->request('POST', $url, [ + 'headers' => [ + 'api-key' => $this->apiKey, + 'Content-Type' => 'multipart/form-data', + ], + 'query' => ['api-version' => $this->apiVersion], + 'body' => array_merge($options, $model->getOptions(), [ + 'model' => $model->getVersion(), + 'file' => fopen($input->path, 'r'), + ]), + ]); + } +} diff --git a/src/Bridge/OpenAI/PlatformFactory.php b/src/Bridge/OpenAI/PlatformFactory.php index 7453bf4b..dbaf8dfc 100644 --- a/src/Bridge/OpenAI/PlatformFactory.php +++ b/src/Bridge/OpenAI/PlatformFactory.php @@ -10,6 +10,7 @@ use PhpLlm\LlmChain\Bridge\OpenAI\GPT\ModelClient as GPTModelClient; use PhpLlm\LlmChain\Bridge\OpenAI\GPT\ResponseConverter as GPTResponseConverter; use PhpLlm\LlmChain\Bridge\OpenAI\Whisper\ModelClient as WhisperModelClient; +use PhpLlm\LlmChain\Bridge\OpenAI\Whisper\ResponseConverter as WhisperResponseConverter; use PhpLlm\LlmChain\Platform; use Symfony\Component\HttpClient\EventSourceHttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -24,20 +25,19 @@ public static function create( $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); $dallEModelClient = new DallEModelClient($httpClient, $apiKey); - $whisperModelClient = new WhisperModelClient($httpClient, $apiKey); return new Platform( [ new GPTModelClient($httpClient, $apiKey), new EmbeddingsModelClient($httpClient, $apiKey), $dallEModelClient, - $whisperModelClient, + new WhisperModelClient($httpClient, $apiKey), ], [ new GPTResponseConverter(), new EmbeddingsResponseConverter(), $dallEModelClient, - $whisperModelClient, + new WhisperResponseConverter(), ], ); } diff --git a/src/Bridge/OpenAI/Whisper/ModelClient.php b/src/Bridge/OpenAI/Whisper/ModelClient.php index 304674f8..93b174ef 100644 --- a/src/Bridge/OpenAI/Whisper/ModelClient.php +++ b/src/Bridge/OpenAI/Whisper/ModelClient.php @@ -6,15 +6,12 @@ use PhpLlm\LlmChain\Bridge\OpenAI\Whisper; use PhpLlm\LlmChain\Model\Model; -use PhpLlm\LlmChain\Model\Response\ResponseInterface as LlmResponse; -use PhpLlm\LlmChain\Model\Response\TextResponse; -use PhpLlm\LlmChain\Platform\ModelClient as PlatformResponseFactory; -use PhpLlm\LlmChain\Platform\ResponseConverter as PlatformResponseConverter; +use PhpLlm\LlmChain\Platform\ModelClient as BaseModelClient; use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\ResponseInterface; use Webmozart\Assert\Assert; -final readonly class ModelClient implements PlatformResponseFactory, PlatformResponseConverter +final readonly class ModelClient implements BaseModelClient { public function __construct( private HttpClientInterface $httpClient, @@ -42,11 +39,4 @@ public function request(Model $model, object|array|string $input, array $options ]), ]); } - - public function convert(ResponseInterface $response, array $options = []): LlmResponse - { - $data = $response->toArray(); - - return new TextResponse($data['text']); - } } diff --git a/src/Bridge/OpenAI/Whisper/ResponseConverter.php b/src/Bridge/OpenAI/Whisper/ResponseConverter.php new file mode 100644 index 00000000..747aa469 --- /dev/null +++ b/src/Bridge/OpenAI/Whisper/ResponseConverter.php @@ -0,0 +1,27 @@ +toArray(); + + return new TextResponse($data['text']); + } +}