This repository was archived by the owner on Jul 16, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 24
feat: add MariaDB store #342
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,3 +64,6 @@ RUN_EXPENSIVE_EXAMPLES=false | |
|
||
# For using Gemini | ||
GOOGLE_API_KEY= | ||
|
||
# For MariaDB store. Server defined in compose.yaml | ||
MARIADB_URI=pdo-mysql://[email protected]:3309/my_database |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
services: | ||
mariadb: | ||
image: mariadb:11.7 | ||
environment: | ||
MARIADB_ALLOW_EMPTY_ROOT_PASSWORD: 1 | ||
MARIADB_DATABASE: my_database | ||
ports: | ||
- "3309:3306" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
<?php | ||
|
||
use Doctrine\DBAL\DriverManager; | ||
use Doctrine\DBAL\Tools\DsnParser; | ||
use PhpLlm\LlmChain\Chain\Chain; | ||
use PhpLlm\LlmChain\Chain\Toolbox\ChainProcessor; | ||
use PhpLlm\LlmChain\Chain\Toolbox\Tool\SimilaritySearch; | ||
use PhpLlm\LlmChain\Chain\Toolbox\Toolbox; | ||
use PhpLlm\LlmChain\Platform\Bridge\OpenAI\Embeddings; | ||
use PhpLlm\LlmChain\Platform\Bridge\OpenAI\GPT; | ||
use PhpLlm\LlmChain\Platform\Bridge\OpenAI\PlatformFactory; | ||
use PhpLlm\LlmChain\Platform\Message\Message; | ||
use PhpLlm\LlmChain\Platform\Message\MessageBag; | ||
use PhpLlm\LlmChain\Store\Bridge\MariaDB\Store; | ||
use PhpLlm\LlmChain\Store\Document\Metadata; | ||
use PhpLlm\LlmChain\Store\Document\TextDocument; | ||
use PhpLlm\LlmChain\Store\Indexer; | ||
use Symfony\Component\Dotenv\Dotenv; | ||
use Symfony\Component\Uid\Uuid; | ||
|
||
require_once dirname(__DIR__, 2).'/vendor/autoload.php'; | ||
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env'); | ||
|
||
if (empty($_ENV['OPENAI_API_KEY']) || empty($_ENV['MARIADB_URI'])) { | ||
echo 'Please set OPENAI_API_KEY and MARIADB_URI environment variables.'.\PHP_EOL; | ||
exit(1); | ||
} | ||
|
||
// initialize the store | ||
$store = Store::fromDbal( | ||
connection: DriverManager::getConnection((new DsnParser())->parse($_ENV['MARIADB_URI'])), | ||
tableName: 'my_table', | ||
indexName: 'my_index', | ||
vectorFieldName: 'embedding', | ||
); | ||
|
||
// our data | ||
$movies = [ | ||
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'], | ||
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'], | ||
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'], | ||
]; | ||
|
||
// create embeddings and documents | ||
foreach ($movies as $i => $movie) { | ||
$documents[] = new TextDocument( | ||
id: Uuid::v4(), | ||
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'], | ||
metadata: new Metadata($movie), | ||
); | ||
} | ||
|
||
// initialize the table | ||
$store->initialize(); | ||
|
||
// create embeddings for documents | ||
$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']); | ||
$indexer = new Indexer($platform, $embeddings = new Embeddings(), $store); | ||
$indexer->index($documents); | ||
|
||
$model = new GPT(GPT::GPT_4O_MINI); | ||
|
||
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store); | ||
$toolbox = Toolbox::create($similaritySearch); | ||
$processor = new ChainProcessor($toolbox); | ||
$chain = new Chain($platform, $model, [$processor], [$processor]); | ||
|
||
$messages = new MessageBag( | ||
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'), | ||
Message::ofUser('Which movie fits the theme of the mafia?') | ||
); | ||
$response = $chain->call($messages); | ||
|
||
echo $response->getContent().\PHP_EOL; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace PhpLlm\LlmChain\Store\Bridge\MariaDB; | ||
|
||
use Doctrine\DBAL\Connection; | ||
use Doctrine\DBAL\Exception as DBALException; | ||
use PhpLlm\LlmChain\Platform\Vector\Vector; | ||
use PhpLlm\LlmChain\Store\Document\Metadata; | ||
use PhpLlm\LlmChain\Store\Document\VectorDocument; | ||
use PhpLlm\LlmChain\Store\Exception\InvalidArgumentException; | ||
use PhpLlm\LlmChain\Store\InitializableStoreInterface; | ||
use PhpLlm\LlmChain\Store\VectorStoreInterface; | ||
use Symfony\Component\Uid\Uuid; | ||
|
||
/** | ||
* Requires MariaDB >=11.7. | ||
* | ||
* @see https://mariadb.org/rag-with-mariadb-vector/ | ||
* | ||
* @author Valtteri R <[email protected]> | ||
*/ | ||
final readonly class Store implements VectorStoreInterface, InitializableStoreInterface | ||
{ | ||
/** | ||
* @param string $tableName The name of the table | ||
* @param string $indexName The name of the vector search index | ||
* @param string $vectorFieldName The name of the field in the index that contains the vector | ||
*/ | ||
public function __construct( | ||
private \PDO $connection, | ||
private string $tableName, | ||
private string $indexName, | ||
private string $vectorFieldName, | ||
) { | ||
} | ||
|
||
public static function fromPdo(\PDO $connection, string $tableName, string $indexName = 'embedding', string $vectorFieldName = 'embedding'): self | ||
{ | ||
return new self($connection, $tableName, $indexName, $vectorFieldName); | ||
} | ||
|
||
/** | ||
* @throws DBALException | ||
*/ | ||
public static function fromDbal(Connection $connection, string $tableName, string $indexName = 'embedding', string $vectorFieldName = 'embedding'): self | ||
{ | ||
$pdo = $connection->getNativeConnection(); | ||
|
||
if (!$pdo instanceof \PDO) { | ||
throw new InvalidArgumentException('Only DBAL connections using PDO driver are supported.'); | ||
} | ||
|
||
return self::fromPdo($pdo, $tableName, $indexName, $vectorFieldName); | ||
} | ||
|
||
public function add(VectorDocument ...$documents): void | ||
{ | ||
$statement = $this->connection->prepare( | ||
\sprintf( | ||
<<<'SQL' | ||
INSERT INTO %1$s (id, metadata, %2$s) | ||
VALUES (:id, :metadata, VEC_FromText(:vector)) | ||
ON DUPLICATE KEY UPDATE metadata = :metadata, %2$s = VEC_FromText(:vector) | ||
SQL, | ||
$this->tableName, | ||
$this->vectorFieldName, | ||
), | ||
); | ||
|
||
foreach ($documents as $document) { | ||
$operation = [ | ||
'id' => $document->id->toBinary(), | ||
'metadata' => json_encode($document->metadata->getArrayCopy()), | ||
'vector' => json_encode($document->vector->getData()), | ||
]; | ||
|
||
$statement->execute($operation); | ||
} | ||
} | ||
|
||
/** | ||
* @param array{ | ||
* limit?: positive-int, | ||
* } $options | ||
*/ | ||
public function query(Vector $vector, array $options = [], ?float $minScore = null): array | ||
{ | ||
$statement = $this->connection->prepare( | ||
\sprintf( | ||
<<<'SQL' | ||
SELECT id, VEC_ToText(%1$s) embedding, metadata, VEC_DISTANCE_EUCLIDEAN(%1$s, VEC_FromText(:embedding)) AS score | ||
FROM %2$s | ||
%3$s | ||
ORDER BY score ASC | ||
LIMIT %4$d | ||
SQL, | ||
$this->vectorFieldName, | ||
$this->tableName, | ||
null !== $minScore ? 'WHERE VEC_DISTANCE_EUCLIDEAN(%1$s, VEC_FromText(:embedding)) >= :minScore' : '', | ||
$options['limit'] ?? 5, | ||
), | ||
); | ||
|
||
$params = ['embedding' => json_encode($vector->getData())]; | ||
|
||
if (null !== $minScore) { | ||
$params['minScore'] = $minScore; | ||
} | ||
|
||
$documents = []; | ||
|
||
$statement->execute($params); | ||
|
||
foreach ($statement->fetchAll(\PDO::FETCH_ASSOC) as $result) { | ||
$documents[] = new VectorDocument( | ||
id: Uuid::fromBinary($result['id']), | ||
vector: new Vector(json_decode((string) $result['embedding'], true)), | ||
metadata: new Metadata(json_decode($result['metadata'] ?? '{}', true)), | ||
score: $result['score'], | ||
); | ||
} | ||
|
||
return $documents; | ||
} | ||
|
||
/** | ||
* @param array{} $options | ||
*/ | ||
public function initialize(array $options = []): void | ||
{ | ||
if ([] !== $options) { | ||
throw new InvalidArgumentException('No supported options'); | ||
} | ||
|
||
$serverVersion = $this->connection->getAttribute(\PDO::ATTR_SERVER_VERSION); | ||
|
||
if (!str_contains((string) $serverVersion, 'MariaDB') || version_compare($serverVersion, '11.7.0') < 0) { | ||
throw new InvalidArgumentException('You need MariaDB >=11.7 to use this feature'); | ||
} | ||
|
||
$this->connection->exec( | ||
\sprintf( | ||
<<<'SQL' | ||
CREATE TABLE IF NOT EXISTS %1$s ( | ||
id BINARY(16) NOT NULL PRIMARY KEY, | ||
metadata JSON, | ||
%2$s VECTOR(1536) NOT NULL, | ||
VECTOR INDEX %3$s (%2$s) | ||
) | ||
SQL, | ||
$this->tableName, | ||
$this->vectorFieldName, | ||
$this->indexName, | ||
), | ||
); | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider initializing the $documents array before the foreach loop to ensure clarity and avoid potential undefined variable issues.
Copilot uses AI. Check for mistakes.