Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: OpenSearch. added image search by text and image similarity. added integration tests #197

Merged
merged 21 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a0657ea
doc: Crew travel agent app
curlyfro Mar 23, 2024
26e8d51
fix: minor fixes
curlyfro Mar 26, 2024
076c36d
fix: minor updates
curlyfro Apr 1, 2024
a2b0249
Merge branch 'main' of https:/curlyfro/LangChain
curlyfro Apr 1, 2024
6480095
feat: updated Amazon Bedrock provider for accessKeyId and secretAcces…
curlyfro Apr 1, 2024
aa1b0b1
feat: added image search by text and image similarity. added integra…
curlyfro Apr 8, 2024
f3257bb
Merge branch 'main' of https:/curlyfro/LangChain
curlyfro Apr 8, 2024
f262d95
Update usingBedrock.md
HavenDV Apr 8, 2024
7220c64
Update usingBedrock.md
HavenDV Apr 8, 2024
1e65fcb
Update usingBedrock.md
HavenDV Apr 8, 2024
5125285
Update usingBedrock.md
HavenDV Apr 8, 2024
4ae4285
fix: removed test attribute. removed deprecated package and added st…
curlyfro Apr 8, 2024
d9f1c72
Merge branch 'main' of https:/curlyfro/LangChain
curlyfro Apr 8, 2024
66e463b
fix: removed nullable warning
curlyfro Apr 8, 2024
8f37069
Update src/Providers/Abstractions/src/Embedding/IEmbeddingModel.cs
HavenDV Apr 8, 2024
36f6b8f
Update src/Providers/Abstractions/src/Embedding/IEmbeddingModel.cs
HavenDV Apr 8, 2024
3972133
Update src/Directory.Packages.props
HavenDV Apr 8, 2024
2f59b28
Update src/Providers/Abstractions/src/Embedding/IEmbeddingModel.cs
HavenDV Apr 8, 2024
f5cc6dd
Update src/Providers/Abstractions/src/Embedding/IEmbeddingModel.cs
HavenDV Apr 8, 2024
d50f0a1
Update src/Providers/Amazon.Bedrock/src/Embedding/AmazonTitanEmbeddin…
HavenDV Apr 8, 2024
3d7b161
Update src/Providers/Amazon.Bedrock/src/Embedding/AmazonTitanImageEmb…
HavenDV Apr 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion src/Databases/OpenSearch/src/OpenSearchVectorStore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,46 @@ public override async Task<IEnumerable<string>> AddDocumentsAsync(IEnumerable<Do
return enumerable.Select(x => x.PageContent);
}

public async Task<IEnumerable<string>> AddImagesAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default)
{
var bulkDescriptor = new BulkDescriptor();
var i = 1;

var enumerable = documents as Document[] ?? documents.ToArray();
foreach (var document in enumerable)
{
document.Metadata.TryGetValue(document.PageContent, out object? value);
var image = (BinaryData)value!;
var images = new List<Data> { Data.FromBytes(image.ToArray()) };

var embeddingRequest = new EmbeddingRequest
{
Strings = new List<string>() { document.PageContent },
Images = images
};
var embed = await EmbeddingModel.CreateEmbeddingsAsync(embeddingRequest, cancellationToken: cancellationToken)
.ConfigureAwait(false);

var vectorRecord = new VectorRecord
{
Id = i++.ToString(CultureInfo.InvariantCulture),
Text = document.PageContent,
Vector = embed.Values.SelectMany(x => x).ToArray()
};

bulkDescriptor.Index<VectorRecord>(desc => desc
.Document(vectorRecord)
.Index(_indexName)
);
}

var bulkResponse = await _client!.BulkAsync(bulkDescriptor, cancellationToken)
.ConfigureAwait(false);

return new List<string>();
}
HavenDV marked this conversation as resolved.
Show resolved Hide resolved


public override Task<IEnumerable<string>> AddTextsAsync(IEnumerable<string> texts, IEnumerable<Dictionary<string, object>>? metadatas = null, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
Expand Down Expand Up @@ -153,7 +193,7 @@ private void CreateIndex(OpenSearchVectorStoreOptions options)
.Properties(p => p
.Keyword(k => k.Name(n => n.Id))
.Text(t => t.Name(n => n.Text))
.KnnVector(d => d.Name(n => n.Vector).Dimension(1536).Similarity("cosine"))
.KnnVector(d => d.Name(n => n.Vector).Dimension(options.Dimensions).Similarity("cosine"))
)
));
}
Expand Down
5 changes: 2 additions & 3 deletions src/Databases/OpenSearch/src/OpenSearchVectorStoreOptions.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
using OpenSearch.Client;

namespace LangChain.Databases.OpenSearch;
namespace LangChain.Databases.OpenSearch;

public class OpenSearchVectorStoreOptions
{
public string? IndexName { get; set; }
public Uri? ConnectionUri { get; set; }
public string? Username { get; set; }
public string? Password { get; set; }
public int? Dimensions { get; set; }
}
157 changes: 148 additions & 9 deletions src/Databases/OpenSearch/test/OpenSearchTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using LangChain.Indexes;
using LangChain.Providers;
using LangChain.Providers.Amazon.Bedrock;
using LangChain.Providers.Amazon.Bedrock.Predefined.Amazon;
using LangChain.Providers.Amazon.Bedrock.Predefined.Anthropic;
Expand All @@ -18,34 +19,166 @@ public class OpenSearchTests
private OpenSearchVectorStoreOptions? _options;
private OpenSearchVectorStore? _vectorStore;
private BedrockProvider? _provider;
private IEmbeddingModel? _embeddings;

[SetUp]
public void Setup()
#region Query Images

[Test]
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
public Task setup_image_tests()
{
_indexName = "test-index";
_indexName = "images-index";
var username = Environment.GetEnvironmentVariable("OPENSEARCH_USERNAME");
var endpoint = Environment.GetEnvironmentVariable("OPENSEARCH_URI");
var uri = new Uri(endpoint);
var uri = new Uri(endpoint);
//var uri = new Uri("http://localhost:9200");
var password = Environment.GetEnvironmentVariable("OPENSEARCH_INITIAL_ADMIN_PASSWORD");
_options = new OpenSearchVectorStoreOptions
{
IndexName = _indexName,
ConnectionUri = uri,
Username = username,
Password = password
Password = password,
IndexName = _indexName,
Dimensions = 1024
};

_provider = new BedrockProvider();
var embeddings = new TitanEmbedTextV1Model(_provider!);
_vectorStore = new OpenSearchVectorStore(embeddings, _options);
_embeddings = new TitanEmbedImageV1Model(_provider)
{
Settings = new BedrockEmbeddingSettings
{
Dimensions = _options.Dimensions
}
};
_vectorStore = new OpenSearchVectorStore(_embeddings, _options);

return Task.CompletedTask;
}
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
HavenDV marked this conversation as resolved.
Show resolved Hide resolved

[Test]
public async Task index_test_images()
{
await setup_image_tests();

var files = Directory.EnumerateFiles(@"you image location");
var images = files.ToBinaryData();

var documents = new List<Document>();

foreach (BinaryData image in images)
{
var model = new Claude3HaikuModel(_provider);
var message = new Message(" \"what's this a picture of and describe details?\"", MessageRole.Human);

var chatRequest = ChatRequest.ToChatRequest(message);
chatRequest.Image = image;

var response = await model.GenerateAsync(chatRequest);

var document = new Document
{
PageContent = response,
Metadata = new Dictionary<string, object>()
{
{response, image}
}
};

documents.Add(document);
}

var pages = await _vectorStore!.AddImagesAsync(documents);
}

[Test]
public async Task can_query_image_against_images()
{
await setup_image_tests();

var path = Path.Combine(Path.GetTempPath(), "test_image.jpg");
var imageData = await File.ReadAllBytesAsync(path);
var binaryData = new BinaryData(imageData, "image/jpg");

var embeddingRequest = new EmbeddingRequest
{
Strings = new List<string>(),
Images = new List<Data> { Data.FromBytes(binaryData.ToArray()) }
};
var embedding = await _embeddings.CreateEmbeddingsAsync(embeddingRequest)
.ConfigureAwait(false);

var floats = embedding.ToSingleArray();
IEnumerable<Document> similaritySearchByVectorAsync = await (_vectorStore?.SimilaritySearchByVectorAsync(floats)!).ConfigureAwait(false);

Console.WriteLine("Count: " + similaritySearchByVectorAsync.Count());
}

[Test]
public async Task can_query_text_against_images()
{
await setup_image_tests();

var llm = new Claude3SonnetModel(_provider);
var index = new VectorStoreIndexWrapper(_vectorStore!);

var promptText =
@"Use the following pieces of context to answer the question at the end. If the answer is not in context then just say that you don't know, don't try to make up an answer. Keep the answer as short as possible.

{context}

Question: {question}
Helpful Answer:";

var chain =
Set("tell me about the orange shirt", outputKey: "question") // set the question
| RetrieveDocuments(index, inputKey: "question", outputKey: "documents", amount: 10) // take 5 most similar documents
| StuffDocuments(inputKey: "documents", outputKey: "context") // combine documents together and put them into context
| Template(promptText) // replace context and question in the prompt with their values
| LLM(llm); // send the result to the language model

var res = await chain.Run("text");
Console.WriteLine(res);
}

#endregion

#region Query Simple Documents

[Test]
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
public Task setup_document_tests()
{
_indexName = "test-index";
var username = Environment.GetEnvironmentVariable("OPENSEARCH_USERNAME");
var endpoint = Environment.GetEnvironmentVariable("OPENSEARCH_URI");
var uri = new Uri(endpoint);
//var uri = new Uri("http://localhost:9200");
var password = Environment.GetEnvironmentVariable("OPENSEARCH_INITIAL_ADMIN_PASSWORD");
_options = new OpenSearchVectorStoreOptions
{
ConnectionUri = uri,
Username = username,
Password = password,
IndexName = _indexName,
Dimensions = 1536
};

_provider = new BedrockProvider();
var embeddings = new TitanEmbedTextV1Model(_provider)
{
Settings = new BedrockEmbeddingSettings
{
Dimensions = _options.Dimensions
}
};
_vectorStore = new OpenSearchVectorStore(embeddings, _options);

return Task.CompletedTask;
}
HavenDV marked this conversation as resolved.
Show resolved Hide resolved

[Test]
public async Task index_test_documents()
{
await setup_document_tests();

var documents = new[]
{
"I spent entire day watching TV",
Expand All @@ -62,7 +195,9 @@ public async Task index_test_documents()
[Test]
public async Task can_query_test_documents()
{
var llm = new TitanTextExpressV1Model(_provider!);
await setup_document_tests();

var llm = new Claude3SonnetModel(_provider!);
var index = new VectorStoreIndexWrapper(_vectorStore!);

const string question = "what color is the car?";
Expand Down Expand Up @@ -93,6 +228,8 @@ public async Task can_query_test_documents()
[Test]
public async Task index_harry_potter_book()
{
await setup_document_tests();

HavenDV marked this conversation as resolved.
Show resolved Hide resolved
var pdfSource = new PdfPigPdfSource("x:\\Harry-Potter-Book-1.pdf");
var documents = await pdfSource.LoadAsync();

Expand All @@ -103,6 +240,8 @@ public async Task index_harry_potter_book()
[Test]
public async Task can_query_harry_potter_book()
{
await setup_document_tests();

var llm = new Claude3SonnetModel(_provider);
var index = new VectorStoreIndexWrapper(_vectorStore!);

Expand Down
1 change: 1 addition & 0 deletions src/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
<PackageVersion Include="LeonardoAi" Version="0.1.0" />
<PackageVersion Include="LLamaSharp" Version="0.10.0" />
<PackageVersion Include="LLamaSharp.Backend.Cpu" Version="0.10.0" />
<PackageVersion Include="Microsoft.AspNetCore.StaticFiles" Version="2.2.0" />
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
<PackageVersion Include="Microsoft.CodeAnalysis.CSharp" Version="4.8.0" />
<PackageVersion Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="4.8.0" />
<PackageVersion Include="Microsoft.CodeAnalysis.PublicApiAnalyzers" Version="3.3.4" />
Expand Down
5 changes: 0 additions & 5 deletions src/Providers/Abstractions/src/Embedding/IEmbeddingModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@ namespace LangChain.Providers;
/// </summary>
public interface IEmbeddingModel : IModel<EmbeddingSettings>
{
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
/// <summary>
HavenDV marked this conversation as resolved.
Show resolved Hide resolved
///
/// </summary>
public int MaximumInputLength { get; }

/// <summary>
///
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

namespace LangChain.Providers.Amazon.Bedrock;

internal static class BedrockModelStreamRequest
internal static class BedrockModelRequest
{
public static InvokeModelWithResponseStreamRequest Create(string modelId, JsonObject bodyJson)
public static InvokeModelWithResponseStreamRequest CreateStreamRequest(string modelId, JsonObject bodyJson)
{
bodyJson = bodyJson ?? throw new ArgumentNullException(nameof(bodyJson));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public override async Task<ChatResponse> GenerateAsync(

if (usedSettings.UseStreaming == true)
{
var streamRequest = BedrockModelStreamRequest.Create(Id, bodyJson);
var streamRequest = BedrockModelRequest.CreateStreamRequest(Id, bodyJson);
var response = await provider.Api.InvokeModelWithResponseStreamAsync(streamRequest, cancellationToken).ConfigureAwait(false);

foreach (var payloadPart in response.Body)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public override async Task<ChatResponse> GenerateAsync(

if (usedSettings.UseStreaming == true)
{
var streamRequest = BedrockModelStreamRequest.Create(Id, bodyJson);
var streamRequest = BedrockModelRequest.CreateStreamRequest(Id, bodyJson);
var response = await provider.Api.InvokeModelWithResponseStreamAsync(streamRequest, cancellationToken).ConfigureAwait(false);

foreach (var payloadPart in response.Body)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ public override async Task<ChatResponse> GenerateAsync(

if (usedSettings.UseStreaming == true)
{
var streamRequest = BedrockModelStreamRequest.Create(Id, bodyJson);
var response = await provider.Api.InvokeModelWithResponseStreamAsync(streamRequest, cancellationToken).ConfigureAwait(false);
var streamRequest = BedrockModelRequest.CreateStreamRequest(Id, bodyJson);
InvokeModelWithResponseStreamResponse? response = await provider.Api.InvokeModelWithResponseStreamAsync(streamRequest, cancellationToken).ConfigureAwait(false);

foreach (var payloadPart in response.Body)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public override async Task<ChatResponse> GenerateAsync(

if (usedSettings.UseStreaming == true)
{
var streamRequest = BedrockModelStreamRequest.Create(Id, bodyJson);
var streamRequest = BedrockModelRequest.CreateStreamRequest(Id, bodyJson);
var response = await provider.Api.InvokeModelWithResponseStreamAsync(streamRequest, cancellationToken).ConfigureAwait(false);

foreach (var payloadPart in response.Body)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public override async Task<ChatResponse> GenerateAsync(

if (usedSettings.UseStreaming == true)
{
var streamRequest = BedrockModelStreamRequest.Create(Id, bodyJson);
var streamRequest = BedrockModelRequest.CreateStreamRequest(Id, bodyJson);
var response = await provider.Api.InvokeModelWithResponseStreamAsync(streamRequest, cancellationToken).ConfigureAwait(false);

foreach (var payloadPart in response.Body)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public override async Task<ChatResponse> GenerateAsync(

if (usedSettings.UseStreaming == true)
{
var streamRequest = BedrockModelStreamRequest.Create(Id, bodyJson);
var streamRequest = BedrockModelRequest.CreateStreamRequest(Id, bodyJson);
var response = await provider.Api.InvokeModelWithResponseStreamAsync(streamRequest, cancellationToken).ConfigureAwait(false);

foreach (var payloadPart in response.Body)
Expand Down
Loading
Loading