diff --git a/README.md b/README.md index 9613e67b..cc5997d0 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Supported Stores * [x] [ChromaDB](https://trychroma.com) * [x] [Azure AI Search](https://azure.microsoft.com/en-us/products/ai-services/ai-search) * [x] [MongoDB Atlas Search](https://mongodb.com/products/platform/atlas-vector-search) -* [ ] [Pinecone](https://pinecone.io) +* [x] [Pinecone](https://pinecone.io) Provided Tools -------------- diff --git a/composer.json b/composer.json index 83f1fc21..dbaf6029 100644 --- a/composer.json +++ b/composer.json @@ -38,8 +38,9 @@ "symfony/var-dumper": "^6.4 || ^7.1" }, "suggest": { - "mongodb/mongodb": "For using MongoDB Atlas as retrieval vector store.", "codewithkyrian/chromadb-php": "For using the ChromaDB as retrieval vector store.", + "mongodb/mongodb": "For using MongoDB Atlas as retrieval vector store.", + "probots-io/pinecone-php": "For using the Pinecone as retrieval vector store.", "symfony/clock": "For using the clock tool.", "symfony/css-selector": "For using the YouTube transcription tool.", "symfony/dom-crawler": "For using the YouTube transcription tool." diff --git a/src/Document/Vector.php b/src/Document/Vector.php index 63370530..fdf7d12e 100644 --- a/src/Document/Vector.php +++ b/src/Document/Vector.php @@ -9,7 +9,7 @@ final class Vector /** * @param list $data */ - private function __construct( + public function __construct( private readonly array $data, private ?int $dimensions = null, ) { diff --git a/src/Store/MongoDB/Store.php b/src/Store/MongoDB/Store.php index 16836768..db8228dc 100644 --- a/src/Store/MongoDB/Store.php +++ b/src/Store/MongoDB/Store.php @@ -126,7 +126,7 @@ public function query(Vector $vector, array $options = []): array foreach ($results as $result) { $documents[] = Document::fromVector( - Vector::create1536($result[$this->vectorFieldName]), + new Vector($result[$this->vectorFieldName]), $this->toUuid($result['_id']), new Metadata($result['metadata'] ?? []), ); diff --git a/src/Store/Pinecone/Store.php b/src/Store/Pinecone/Store.php new file mode 100644 index 00000000..98be524f --- /dev/null +++ b/src/Store/Pinecone/Store.php @@ -0,0 +1,80 @@ + $filter + */ + public function __construct( + private Client $pinecone, + private LoggerInterface $logger, + private ?string $namespace = null, + private array $filter = [], + private int $topK = 3, + ) { + } + + public function addDocument(Document $document): void + { + $this->addDocuments([$document]); + } + + public function addDocuments(array $documents): void + { + $vectors = []; + foreach ($documents as $document) { + if (!$document->hasVector()) { + $this->logger->warning('Document {id} does not have a vector', ['id' => $document->id]); + continue; + } + + $vectors[] = [ + 'id' => (string) $document->id, + 'values' => $document->vector->getData(), + 'metadata' => $document->metadata->getArrayCopy(), + ]; + } + + $this->getVectors()->upsert($vectors); + } + + public function query(Vector $vector, array $options = []): array + { + $response = $this->getVectors()->query( + vector: $vector->getData(), + namespace: $options['namespace'] ?? $this->namespace, + filter: $options['filter'] ?? $this->filter, + topK: $options['topK'] ?? $this->topK, + includeValues: true, + ); + + $documents = []; + foreach ($response->json()['matches'] as $match) { + $documents[] = Document::fromVector( + new Vector($match['values']), + Uuid::fromString($match['id']), + new Metadata($match['metadata']), + ); + } + + return $documents; + } + + private function getVectors(): VectorResource + { + return $this->pinecone->data()->vectors(); + } +}