diff --git a/examples/store/mariadb-similarity-search-gemini.php b/examples/store/mariadb-similarity-search-gemini.php new file mode 100644 index 00000000..3c29374f --- /dev/null +++ b/examples/store/mariadb-similarity-search-gemini.php @@ -0,0 +1,76 @@ +loadEnv(dirname(__DIR__, 2).'/.env'); + +if (empty($_ENV['GOOGLE_API_KEY']) || empty($_ENV['MARIADB_URI'])) { + echo 'Please set GOOGLE_API_KEY and MARIADB_URI environment variables.'.\PHP_EOL; + exit(1); +} + +// initialize the store +$store = Store::fromDbal( + connection: DriverManager::getConnection((new DsnParser())->parse($_ENV['MARIADB_URI'])), + tableName: 'my_table', + indexName: 'my_index', + vectorFieldName: 'embedding', +); + +// our data +$movies = [ + ['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'], + ['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'], + ['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'], +]; + +// create embeddings and documents +foreach ($movies as $i => $movie) { + $documents[] = new TextDocument( + id: Uuid::v4(), + content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'], + metadata: new Metadata($movie), + ); +} + +// initialize the table +$store->initialize(['dimensions' => 768]); + +// create embeddings for documents +$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']); +$embeddings = new Embeddings(options: ['dimensions' => 768, 'task_type' => TaskType::SemanticSimilarity]); +$indexer = new Indexer($platform, $embeddings, $store); +$indexer->index($documents); + +$model = new Gemini(Gemini::GEMINI_2_FLASH_LITE); + +$similaritySearch = new SimilaritySearch($platform, $embeddings, $store); +$toolbox = Toolbox::create($similaritySearch); +$processor = new ChainProcessor($toolbox); +$chain = new Chain($platform, $model, [$processor], [$processor]); + +$messages = new MessageBag( + Message::forSystem('Please answer all user questions only using SimilaritySearch function.'), + Message::ofUser('Which movie fits the theme of the mafia?') +); +$response = $chain->call($messages); + +echo $response->getContent().\PHP_EOL; diff --git a/src/Platform/Bridge/Google/Embeddings.php b/src/Platform/Bridge/Google/Embeddings.php new file mode 100644 index 00000000..14f4053a --- /dev/null +++ b/src/Platform/Bridge/Google/Embeddings.php @@ -0,0 +1,30 @@ + + */ +class Embeddings extends Model +{ + /** Supported dimensions: 3072, 1536, or 768 */ + public const GEMINI_EMBEDDING_EXP_03_07 = 'gemini-embedding-exp-03-07'; + /** Fixed 768 dimensions */ + public const TEXT_EMBEDDING_004 = 'text-embedding-004'; + /** Fixed 768 dimensions */ + public const EMBEDDING_001 = 'embedding-001'; + + /** + * @param array{dimensions?: int, task_type?: TaskType|string} $options + */ + public function __construct(string $name = self::GEMINI_EMBEDDING_EXP_03_07, array $options = []) + { + parent::__construct($name, [Capability::INPUT_MULTIPLE], $options); + } +} diff --git a/src/Platform/Bridge/Google/Embeddings/ModelClient.php b/src/Platform/Bridge/Google/Embeddings/ModelClient.php new file mode 100644 index 00000000..400cfeb5 --- /dev/null +++ b/src/Platform/Bridge/Google/Embeddings/ModelClient.php @@ -0,0 +1,71 @@ + + */ +final readonly class ModelClient implements ModelClientInterface, ResponseConverterInterface +{ + public function __construct( + private HttpClientInterface $httpClient, + #[\SensitiveParameter] + private string $apiKey, + ) { + } + + public function supports(Model $model): bool + { + return $model instanceof Embeddings; + } + + public function request(Model $model, array|string $payload, array $options = []): ResponseInterface + { + $url = \sprintf('https://generativelanguage.googleapis.com/v1beta/models/%s:%s', $model->getName(), 'batchEmbedContents'); + $modelOptions = $model->getOptions(); + + return $this->httpClient->request('POST', $url, [ + 'headers' => [ + 'x-goog-api-key' => $this->apiKey, + ], + 'json' => [ + 'requests' => array_map( + static fn (string $text) => array_filter([ + 'model' => 'models/'.$model->getName(), + 'content' => ['parts' => [['text' => $text]]], + 'outputDimensionality' => $modelOptions['dimensions'] ?? null, + 'taskType' => $modelOptions['task_type'] ?? null, + 'title' => $options['title'] ?? null, + ]), + \is_array($payload) ? $payload : [$payload], + ), + ], + ]); + } + + public function convert(ResponseInterface $response, array $options = []): VectorResponse + { + $data = $response->toArray(); + + if (!isset($data['embeddings'])) { + throw new RuntimeException('Response does not contain data'); + } + + return new VectorResponse( + ...array_map( + static fn (array $item): Vector => new Vector($item['values']), + $data['embeddings'], + ), + ); + } +} diff --git a/src/Platform/Bridge/Google/Embeddings/TaskType.php b/src/Platform/Bridge/Google/Embeddings/TaskType.php new file mode 100644 index 00000000..29e9eff4 --- /dev/null +++ b/src/Platform/Bridge/Google/Embeddings/TaskType.php @@ -0,0 +1,25 @@ + $payload - * @param array $options + * @param array $payload + * @param array $options */ public function request(Model $model, array|string $payload, array $options = []): ResponseInterface; } diff --git a/tests/Platform/Bridge/Google/Embeddings/EmbeddingsModelClientTest.php b/tests/Platform/Bridge/Google/Embeddings/EmbeddingsModelClientTest.php new file mode 100644 index 00000000..49fc8b81 --- /dev/null +++ b/tests/Platform/Bridge/Google/Embeddings/EmbeddingsModelClientTest.php @@ -0,0 +1,102 @@ +createStub(ResponseInterface::class); + $response + ->method('toArray') + ->willReturn(json_decode($this->getEmbeddingStub(), true)); + + $httpClient = self::createMock(HttpClientInterface::class); + $httpClient->expects(self::once()) + ->method('request') + ->with( + 'POST', + 'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-exp-03-07:batchEmbedContents', + [ + 'headers' => ['x-goog-api-key' => 'test'], + 'json' => [ + 'requests' => [ + [ + 'model' => 'models/gemini-embedding-exp-03-07', + 'content' => ['parts' => [['text' => 'payload1']]], + 'outputDimensionality' => 1536, + 'taskType' => 'CLASSIFICATION', + ], + [ + 'model' => 'models/gemini-embedding-exp-03-07', + 'content' => ['parts' => [['text' => 'payload2']]], + 'outputDimensionality' => 1536, + 'taskType' => 'CLASSIFICATION', + ], + ], + ], + ], + ) + ->willReturn($response); + + $model = new Embeddings(Embeddings::GEMINI_EMBEDDING_EXP_03_07, ['dimensions' => 1536, 'task_type' => 'CLASSIFICATION']); + + $httpResponse = (new ModelClient($httpClient, 'test'))->request($model, ['payload1', 'payload2']); + self::assertSame(json_decode($this->getEmbeddingStub(), true), $httpResponse->toArray()); + } + + #[Test] + public function itConvertsAResponseToAVectorResponse(): void + { + $response = $this->createStub(ResponseInterface::class); + $response + ->method('toArray') + ->willReturn(json_decode($this->getEmbeddingStub(), true)); + + $httpClient = self::createMock(HttpClientInterface::class); + + $vectorResponse = (new ModelClient($httpClient, 'test'))->convert($response); + $convertedContent = $vectorResponse->getContent(); + + self::assertCount(2, $convertedContent); + + self::assertSame([0.3, 0.4, 0.4], $convertedContent[0]->getData()); + self::assertSame([0.0, 0.0, 0.2], $convertedContent[1]->getData()); + } + + private function getEmbeddingStub(): string + { + return <<<'JSON' + { + "embeddings": [ + { + "values": [0.3, 0.4, 0.4] + }, + { + "values": [0.0, 0.0, 0.2] + } + ] + } + JSON; + } +}