Skip to content

Commit 3d34260

Browse files
koteusKonstantine Kalbazovdluc
authored
Add AWS S3 Document Storage (WIP) (#596)
## Motivation and Context (Why the change? What's the scenario?) Allow KM to store documents/files in AWS S3 and compatible services. ## High level description (Approach, Design) Added new extension for AWS S3 storage, configuration settings, and nuget. --------- Co-authored-by: Konstantine Kalbazov <[email protected]> Co-authored-by: Devis Lucato <[email protected]> Co-authored-by: Devis Lucato <[email protected]>
1 parent 788f253 commit 3d34260

File tree

19 files changed

+547
-42
lines changed

19 files changed

+547
-42
lines changed

Directory.Packages.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
</PropertyGroup>
55
<ItemGroup>
66
<PackageVersion Include="Aspire.Npgsql.EntityFrameworkCore.PostgreSQL" Version="8.0.0-preview.7.24251.11" />
7+
<PackageVersion Include="AWSSDK.S3" Version="3.7.308.9" />
78
<PackageVersion Include="Azure.AI.FormRecognizer" Version="4.1.0" />
89
<PackageVersion Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.3.1" />
910
<PackageVersion Include="Azure.Identity" Version="1.12.0" />

KernelMemory.sln

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{CA49F1A1
116116
tools\run-redis.sh = tools\run-redis.sh
117117
tools\search.sh = tools\search.sh
118118
tools\upload-file.sh = tools\upload-file.sh
119+
tools\run-s3ninja.sh = tools\run-s3ninja.sh
119120
EndProjectSection
120121
EndProject
121122
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Abstractions", "service\Abstractions\Abstractions.csproj", "{8A9FA587-7EBA-4D43-BE47-38D798B1C74C}"
@@ -290,6 +291,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "209-dotnet-using-context-ov
290291
EndProject
291292
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "210-KM-without-builder", "examples\210-KM-without-builder\210-KM-without-builder.csproj", "{00A3DDF3-2230-4AEC-8B5B-B75F958D194B}"
292293
EndProject
294+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "S3", "extensions\AWS\S3\S3.csproj", "{5A14582B-C6D0-459E-BBB8-EA46CE8DC52E}"
295+
EndProject
293296
Global
294297
GlobalSection(SolutionConfigurationPlatforms) = preSolution
295298
Debug|Any CPU = Debug|Any CPU
@@ -548,6 +551,10 @@ Global
548551
{00A3DDF3-2230-4AEC-8B5B-B75F958D194B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
549552
{00A3DDF3-2230-4AEC-8B5B-B75F958D194B}.Debug|Any CPU.Build.0 = Debug|Any CPU
550553
{00A3DDF3-2230-4AEC-8B5B-B75F958D194B}.Release|Any CPU.ActiveCfg = Release|Any CPU
554+
{5A14582B-C6D0-459E-BBB8-EA46CE8DC52E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
555+
{5A14582B-C6D0-459E-BBB8-EA46CE8DC52E}.Debug|Any CPU.Build.0 = Debug|Any CPU
556+
{5A14582B-C6D0-459E-BBB8-EA46CE8DC52E}.Release|Any CPU.ActiveCfg = Release|Any CPU
557+
{5A14582B-C6D0-459E-BBB8-EA46CE8DC52E}.Release|Any CPU.Build.0 = Release|Any CPU
551558
EndGlobalSection
552559
GlobalSection(SolutionProperties) = preSolution
553560
HideSolutionNode = FALSE
@@ -637,6 +644,7 @@ Global
637644
{C746CE00-8BAE-4B46-A757-FE85D68747CE} = {DBEA0A6B-474A-4E8C-BCC8-D5D43C063A54}
638645
{06A507C7-46B9-4D36-B88B-B4E4A0E8C0AC} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
639646
{00A3DDF3-2230-4AEC-8B5B-B75F958D194B} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
647+
{5A14582B-C6D0-459E-BBB8-EA46CE8DC52E} = {155DA079-E267-49AF-973A-D1D44681970F}
640648
EndGlobalSection
641649
GlobalSection(ExtensibilityGlobals) = postSolution
642650
SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}

KernelMemory.sln.DotSettings

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ public void It$SOMENAME$()
219219
</s:String>
220220
<s:Boolean x:Key="/Default/UserDictionary/Words/=abcdefghijklmnopqrstuvwxyz/@EntryIndexedValue">True</s:Boolean>
221221
<s:Boolean x:Key="/Default/UserDictionary/Words/=AOAI/@EntryIndexedValue">True</s:Boolean>
222+
<s:Boolean x:Key="/Default/UserDictionary/Words/=AWSS3/@EntryIndexedValue">True</s:Boolean>
223+
<s:Boolean x:Key="/Default/UserDictionary/Words/=AWS/@EntryIndexedValue">True</s:Boolean>
222224
<s:Boolean x:Key="/Default/UserDictionary/Words/=AZAI/@EntryIndexedValue">True</s:Boolean>
223225
<s:Boolean x:Key="/Default/UserDictionary/Words/=AZDOCINTEL/@EntryIndexedValue">True</s:Boolean>
224226
<s:Boolean x:Key="/Default/UserDictionary/Words/=AZSEARCH/@EntryIndexedValue">True</s:Boolean>
@@ -299,4 +301,4 @@ public void It$SOMENAME$()
299301
<s:Boolean x:Key="/Default/UserDictionary/Words/=wellknown/@EntryIndexedValue">True</s:Boolean>
300302
<s:Boolean x:Key="/Default/UserDictionary/Words/=Wordprocessing/@EntryIndexedValue">True</s:Boolean>
301303
<s:Boolean x:Key="/Default/UserDictionary/Words/=xact/@EntryIndexedValue">True</s:Boolean>
302-
</wpf:ResourceDictionary>
304+
</wpf:ResourceDictionary>

README.md

Lines changed: 19 additions & 19 deletions
Large diffs are not rendered by default.

docs/index.md

Lines changed: 19 additions & 19 deletions
Large diffs are not rendered by default.

examples/002-dotnet-Serverless/Program.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public static async Task Main()
3131
var azureAISearchConfig = new AzureAISearchConfig();
3232
var postgresConfig = new PostgresConfig();
3333
var azureBlobConfig = new AzureBlobsConfig();
34+
var awsS3Config = new AWSS3Config();
3435

3536
new ConfigurationBuilder()
3637
.AddJsonFile("appsettings.json")
@@ -44,6 +45,7 @@ public static async Task Main()
4445
.BindSection("KernelMemory:Services:AzureAIDocIntel", azDocIntelConfig)
4546
.BindSection("KernelMemory:Services:AzureAISearch", azureAISearchConfig)
4647
.BindSection("KernelMemory:Services:AzureBlobs", azureBlobConfig)
48+
.BindSection("KernelMemory:Services:AWSS3", awsS3Config)
4749
.BindSection("KernelMemory:Services:Postgres", postgresConfig)
4850
.BindSection("KernelMemory:Retrieval:SearchClient", searchClientConfig);
4951

@@ -60,9 +62,10 @@ public static async Task Main()
6062
// .WithAzureAISearchMemoryDb(azureAISearchConfig) // Store memories in Azure AI Search
6163
// .WithPostgresMemoryDb(postgresConfig) // Store memories in Postgres
6264
// .WithQdrantMemoryDb("http://127.0.0.1:6333") // Store memories in Qdrant
63-
// .WithAzureBlobsDocumentStorage(azureBlobConfig) // Store files in Azure Blobs
6465
// .WithSimpleVectorDb(SimpleVectorDbConfig.Persistent) // Store memories on disk
66+
// .WithAzureBlobsDocumentStorage(azureBlobConfig) // Store files in Azure Blobs
6567
// .WithSimpleFileStorage(SimpleFileStorageConfig.Persistent) // Store files on disk
68+
// .WithAWSS3DocumentStorage(awsS3Config) // Store files on AWS S3
6669
.WithAzureOpenAITextGeneration(azureOpenAITextConfig, new DefaultGPTTokenizer())
6770
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig, new DefaultGPTTokenizer());
6871

examples/002-dotnet-Serverless/appsettings.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,23 @@
102102
// See https://platform.openai.com/docs/api-reference/embeddings/create
103103
"MaxEmbeddingBatchSize": 100
104104
},
105+
"AWSS3": {
106+
"Auth": "AccessKey",
107+
// AccessKey ID, required when using AccessKey auth
108+
// Note: you can use an env var 'KernelMemory__Services__AWSS3__AccessKey' to set this
109+
"AccessKey": "",
110+
// SecretAccessKey, required when using AccessKey auth
111+
// Note: you can use an env var 'KernelMemory__Services__AWSS3__SecretAccessKey' to set this
112+
"SecretAccessKey": "",
113+
// Required bucket name where to create directories and upload files.
114+
// Note: you can use an env var 'KernelMemory__Services__AWSS3__BucketName' to set this
115+
"BucketName": ""
116+
// Allows to specify a custom AWS or a compatible endpoint
117+
// Examples: "https://s3.amazonaws.com", "https://s3.us-west-2.amazonaws.com", "http://127.0.0.1:9444"
118+
// Note: you can use an env var 'KernelMemory__Services__AWSS3__Endpoint' to set this
119+
// Note: you can test locally using S3 Ninja https://s3ninja.net
120+
// "Endpoint": "https://s3.amazonaws.com"
121+
},
105122
"LlamaSharp": {
106123
// path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
107124
"ModelPath": "",

extensions/AWS/S3/AWSS3Config.cs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System.Text.Json.Serialization;
4+
5+
#pragma warning disable IDE0130 // reduce number of "using" statements
6+
// ReSharper disable once CheckNamespace - reduce number of "using" statements
7+
namespace Microsoft.KernelMemory;
8+
9+
public class AWSS3Config
10+
{
11+
[JsonConverter(typeof(JsonStringEnumConverter))]
12+
public enum AuthTypes
13+
{
14+
Unknown = -1,
15+
AccessKey,
16+
}
17+
18+
public AuthTypes Auth { get; set; } = AuthTypes.Unknown;
19+
20+
/// <summary>
21+
/// AWS IAM Access Key (aka Key Name)
22+
/// </summary>
23+
public string AccessKey { get; set; } = string.Empty;
24+
25+
/// <summary>
26+
/// AWS IAM Secret Access Key (aka Password)
27+
/// </summary>
28+
public string SecretAccessKey { get; set; } = string.Empty;
29+
30+
/// <summary>
31+
/// AWS S3 endpoint, e.g. https://s3.us-west-2.amazonaws.com
32+
/// You can use S3 compatible services and dev tools like S3 Ninja.
33+
/// </summary>
34+
public string Endpoint { get; set; } = "https://s3.amazonaws.com";
35+
36+
/// <summary>
37+
/// S3 bucket name
38+
/// </summary>
39+
public string BucketName { get; set; } = string.Empty;
40+
41+
public void Validate()
42+
{
43+
if (this.Auth == AuthTypes.Unknown)
44+
{
45+
throw new ConfigurationException($"Authentication type '{this.Auth}' undefined or not supported");
46+
}
47+
48+
if (string.IsNullOrWhiteSpace(this.AccessKey))
49+
{
50+
throw new ConfigurationException("S3 Access Key is undefined");
51+
}
52+
53+
if (string.IsNullOrWhiteSpace(this.SecretAccessKey))
54+
{
55+
throw new ConfigurationException("S3 Secret Key Access undefined");
56+
}
57+
58+
if (string.IsNullOrWhiteSpace(this.BucketName))
59+
{
60+
throw new ConfigurationException("S3 bucket name undefined");
61+
}
62+
63+
if (string.IsNullOrWhiteSpace(this.Endpoint))
64+
{
65+
throw new ConfigurationException("S3 endpoint name undefined");
66+
}
67+
}
68+
}

0 commit comments

Comments
 (0)