Skip to content
This repository was archived by the owner on Jul 16, 2025. It is now read-only.

feat: add MariaDB store #342

Merged
merged 1 commit into from
Jun 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,6 @@ RUN_EXPENSIVE_EXAMPLES=false

# For using Gemini
GOOGLE_API_KEY=

# For MariaDB store. Server defined in compose.yaml
MARIADB_URI=pdo-mysql://[email protected]:3309/my_database
8 changes: 8 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
services:
mariadb:
image: mariadb:11.7
environment:
MARIADB_ALLOW_EMPTY_ROOT_PASSWORD: 1
MARIADB_DATABASE: my_database
ports:
- "3309:3306"
4 changes: 4 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@
"webmozart/assert": "^1.11"
},
"require-dev": {
"ext-pdo": "*",
"codewithkyrian/chromadb-php": "^0.2.1 || ^0.3 || ^0.4",
"codewithkyrian/transformers": "^0.5.3",
"async-aws/bedrock-runtime": "^0.1.0",
"doctrine/dbal": "^3.0 || ^4.0",
"mongodb/mongodb": "^1.21 || ^2.0",
"php-cs-fixer/shim": "^3.70",
"phpstan/phpstan": "^2.0",
Expand All @@ -58,9 +60,11 @@
"symfony/var-dumper": "^6.4 || ^7.1"
},
"suggest": {
"ext-pdo": "For using MariaDB as retrieval vector store.",
"async-aws/bedrock-runtime": "For using the Bedrock platform.",
"codewithkyrian/chromadb-php": "For using the ChromaDB as retrieval vector store.",
"codewithkyrian/transformers": "For using the TransformersPHP with FFI to run models in PHP.",
"doctrine/dbal": "For using MariaDB via Doctrine as retrieval vector store",
"mongodb/mongodb": "For using MongoDB Atlas as retrieval vector store.",
"probots-io/pinecone-php": "For using the Pinecone as retrieval vector store.",
"symfony/css-selector": "For using the YouTube transcription tool.",
Expand Down
74 changes: 74 additions & 0 deletions examples/store/mariadb-similarity-search.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php

use Doctrine\DBAL\DriverManager;
use Doctrine\DBAL\Tools\DsnParser;
use PhpLlm\LlmChain\Chain\Chain;
use PhpLlm\LlmChain\Chain\Toolbox\ChainProcessor;
use PhpLlm\LlmChain\Chain\Toolbox\Tool\SimilaritySearch;
use PhpLlm\LlmChain\Chain\Toolbox\Toolbox;
use PhpLlm\LlmChain\Platform\Bridge\OpenAI\Embeddings;
use PhpLlm\LlmChain\Platform\Bridge\OpenAI\GPT;
use PhpLlm\LlmChain\Platform\Bridge\OpenAI\PlatformFactory;
use PhpLlm\LlmChain\Platform\Message\Message;
use PhpLlm\LlmChain\Platform\Message\MessageBag;
use PhpLlm\LlmChain\Store\Bridge\MariaDB\Store;
use PhpLlm\LlmChain\Store\Document\Metadata;
use PhpLlm\LlmChain\Store\Document\TextDocument;
use PhpLlm\LlmChain\Store\Indexer;
use Symfony\Component\Dotenv\Dotenv;
use Symfony\Component\Uid\Uuid;

require_once dirname(__DIR__, 2).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');

if (empty($_ENV['OPENAI_API_KEY']) || empty($_ENV['MARIADB_URI'])) {
echo 'Please set OPENAI_API_KEY and MARIADB_URI environment variables.'.\PHP_EOL;
exit(1);
}

// initialize the store
$store = Store::fromDbal(
connection: DriverManager::getConnection((new DsnParser())->parse($_ENV['MARIADB_URI'])),
tableName: 'my_table',
indexName: 'my_index',
vectorFieldName: 'embedding',
);

// our data
$movies = [
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'],
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'],
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'],
];

// create embeddings and documents
Copy link
Preview

Copilot AI Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider initializing the $documents array before the foreach loop to ensure clarity and avoid potential undefined variable issues.

Suggested change
// create embeddings and documents
// create embeddings and documents
$documents = [];

Copilot uses AI. Check for mistakes.

foreach ($movies as $i => $movie) {
$documents[] = new TextDocument(
id: Uuid::v4(),
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
metadata: new Metadata($movie),
);
}

// initialize the table
$store->initialize();

// create embeddings for documents
$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']);
$indexer = new Indexer($platform, $embeddings = new Embeddings(), $store);
$indexer->index($documents);

$model = new GPT(GPT::GPT_4O_MINI);

$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
$toolbox = Toolbox::create($similaritySearch);
$processor = new ChainProcessor($toolbox);
$chain = new Chain($platform, $model, [$processor], [$processor]);

$messages = new MessageBag(
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
Message::ofUser('Which movie fits the theme of the mafia?')
);
$response = $chain->call($messages);

echo $response->getContent().\PHP_EOL;
159 changes: 159 additions & 0 deletions src/Store/Bridge/MariaDB/Store.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
<?php

declare(strict_types=1);

namespace PhpLlm\LlmChain\Store\Bridge\MariaDB;

use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Exception as DBALException;
use PhpLlm\LlmChain\Platform\Vector\Vector;
use PhpLlm\LlmChain\Store\Document\Metadata;
use PhpLlm\LlmChain\Store\Document\VectorDocument;
use PhpLlm\LlmChain\Store\Exception\InvalidArgumentException;
use PhpLlm\LlmChain\Store\InitializableStoreInterface;
use PhpLlm\LlmChain\Store\VectorStoreInterface;
use Symfony\Component\Uid\Uuid;

/**
* Requires MariaDB >=11.7.
*
* @see https://mariadb.org/rag-with-mariadb-vector/
*
* @author Valtteri R <[email protected]>
*/
final readonly class Store implements VectorStoreInterface, InitializableStoreInterface
{
/**
* @param string $tableName The name of the table
* @param string $indexName The name of the vector search index
* @param string $vectorFieldName The name of the field in the index that contains the vector
*/
public function __construct(
private \PDO $connection,
private string $tableName,
private string $indexName,
private string $vectorFieldName,
) {
}

public static function fromPdo(\PDO $connection, string $tableName, string $indexName = 'embedding', string $vectorFieldName = 'embedding'): self
{
return new self($connection, $tableName, $indexName, $vectorFieldName);
}

/**
* @throws DBALException
*/
public static function fromDbal(Connection $connection, string $tableName, string $indexName = 'embedding', string $vectorFieldName = 'embedding'): self
{
$pdo = $connection->getNativeConnection();

if (!$pdo instanceof \PDO) {
throw new InvalidArgumentException('Only DBAL connections using PDO driver are supported.');
}

return self::fromPdo($pdo, $tableName, $indexName, $vectorFieldName);
}

public function add(VectorDocument ...$documents): void
{
$statement = $this->connection->prepare(
\sprintf(
<<<'SQL'
INSERT INTO %1$s (id, metadata, %2$s)
VALUES (:id, :metadata, VEC_FromText(:vector))
ON DUPLICATE KEY UPDATE metadata = :metadata, %2$s = VEC_FromText(:vector)
SQL,
$this->tableName,
$this->vectorFieldName,
),
);

foreach ($documents as $document) {
$operation = [
'id' => $document->id->toBinary(),
'metadata' => json_encode($document->metadata->getArrayCopy()),
'vector' => json_encode($document->vector->getData()),
];

$statement->execute($operation);
}
}

/**
* @param array{
* limit?: positive-int,
* } $options
*/
public function query(Vector $vector, array $options = [], ?float $minScore = null): array
{
$statement = $this->connection->prepare(
\sprintf(
<<<'SQL'
SELECT id, VEC_ToText(%1$s) embedding, metadata, VEC_DISTANCE_EUCLIDEAN(%1$s, VEC_FromText(:embedding)) AS score
FROM %2$s
%3$s
ORDER BY score ASC
LIMIT %4$d
SQL,
$this->vectorFieldName,
$this->tableName,
null !== $minScore ? 'WHERE VEC_DISTANCE_EUCLIDEAN(%1$s, VEC_FromText(:embedding)) >= :minScore' : '',
$options['limit'] ?? 5,
),
);

$params = ['embedding' => json_encode($vector->getData())];

if (null !== $minScore) {
$params['minScore'] = $minScore;
}

$documents = [];

$statement->execute($params);

foreach ($statement->fetchAll(\PDO::FETCH_ASSOC) as $result) {
$documents[] = new VectorDocument(
id: Uuid::fromBinary($result['id']),
vector: new Vector(json_decode((string) $result['embedding'], true)),
metadata: new Metadata(json_decode($result['metadata'] ?? '{}', true)),
score: $result['score'],
);
}

return $documents;
}

/**
* @param array{} $options
*/
public function initialize(array $options = []): void
{
if ([] !== $options) {
throw new InvalidArgumentException('No supported options');
}

$serverVersion = $this->connection->getAttribute(\PDO::ATTR_SERVER_VERSION);

if (!str_contains((string) $serverVersion, 'MariaDB') || version_compare($serverVersion, '11.7.0') < 0) {
throw new InvalidArgumentException('You need MariaDB >=11.7 to use this feature');
}

$this->connection->exec(
\sprintf(
<<<'SQL'
CREATE TABLE IF NOT EXISTS %1$s (
id BINARY(16) NOT NULL PRIMARY KEY,
metadata JSON,
%2$s VECTOR(1536) NOT NULL,
VECTOR INDEX %3$s (%2$s)
)
SQL,
$this->tableName,
$this->vectorFieldName,
$this->indexName,
),
);
}
}