Skip to content

Commit

Permalink
Add functional tests for the evaluation
Browse files Browse the repository at this point in the history
  • Loading branch information
kbeaugrand committed Jun 9, 2024
1 parent 837a6aa commit d17f431
Show file tree
Hide file tree
Showing 8 changed files with 333 additions and 26 deletions.
30 changes: 20 additions & 10 deletions KernelMemory.sln
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "root", "root", "{6EF76FD8-4
Directory.Build.props = Directory.Build.props
Directory.Packages.props = Directory.Packages.props
Dockerfile = Dockerfile
KernelMemory.sln.DotSettings = KernelMemory.sln.DotSettings
LICENSE = LICENSE
nuget.config = nuget.config
README.md = README.md
SECURITY.md = SECURITY.md
KernelMemory.sln.DotSettings = KernelMemory.sln.DotSettings
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{B8976338-7CDC-47AE-8502-C2FBAFBEBD68}"
Expand All @@ -92,10 +92,10 @@ EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{48E79819-1E9E-4075-90DA-BAEC761C89B2}"
ProjectSection(SolutionItems) = preProject
.github\workflows\docker-build-push.yml = .github\workflows\docker-build-push.yml
.github\workflows\dotnet-build.yml = .github\workflows\dotnet-build.yml
.github\workflows\dotnet-unit-tests.yml = .github\workflows\dotnet-unit-tests.yml
.github\workflows\github-pages-jekyll.yml = .github\workflows\github-pages-jekyll.yml
.github\workflows\spell-check-with-typos.yml = .github\workflows\spell-check-with-typos.yml
.github\workflows\dotnet-unit-tests.yml = .github\workflows\dotnet-unit-tests.yml
.github\workflows\dotnet-build.yml = .github\workflows\dotnet-build.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "service", "service", "{87DEAE8D-138C-4FDD-B4C9-11C3A7817E8F}"
Expand Down Expand Up @@ -268,21 +268,25 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Elasticsearch", "extensions
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Elasticsearch.FunctionalTests", "extensions\Elasticsearch\Elasticsearch.FunctionalTests\Elasticsearch.FunctionalTests.csproj", "{C5E6B28C-F54D-423D-954D-A9EAEFB89732}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Discord", "extensions\Discord\Discord\Discord.csproj", "{43877864-6AE8-4B03-BEDA-6B6FA8BB1D8B}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Discord", "extensions\Discord\Discord\Discord.csproj", "{43877864-6AE8-4B03-BEDA-6B6FA8BB1D8B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "301-discord-test-application", "examples\301-discord-test-application\301-discord-test-application.csproj", "{FAE4C6B8-38B2-43E7-8881-99693C9CEDC6}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "301-discord-test-application", "examples\301-discord-test-application\301-discord-test-application.csproj", "{FAE4C6B8-38B2-43E7-8881-99693C9CEDC6}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "applications", "applications", "{DBEA0A6B-474A-4E8C-BCC8-D5D43C063A54}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Evaluation", "applications\evaluation\Evaluation.csproj", "{432AC1B4-8275-4284-9A44-44988A6F0C24}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Evaluation", "applications\evaluation\Evaluation.csproj", "{432AC1B4-8275-4284-9A44-44988A6F0C24}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Anthropic", "extensions\Anthropic\Anthropic.csproj", "{A0C81A29-715F-463E-A243-7E45DB8AE53F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "110-dotnet-anthropic", "examples\110-dotnet-anthropic\110-dotnet-anthropic.csproj", "{EE0D8645-2770-4E12-8E18-019B30970FE6}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Anthropic", "extensions\Anthropic\Anthropic.csproj", "{A0C81A29-715F-463E-A243-7E45DB8AE53F}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "302-dotnet-sk-km-chat", "examples\302-dotnet-sk-km-chat\302-dotnet-sk-km-chat.csproj", "{37FA99CB-AD22-4BAC-B76F-961F84422DEE}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "110-dotnet-anthropic", "examples\110-dotnet-anthropic\110-dotnet-anthropic.csproj", "{EE0D8645-2770-4E12-8E18-019B30970FE6}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "401-evaluation", "examples\401-evaluation\401-evaluation.csproj", "{D1308C73-79B6-4635-B50D-420742D09C20}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "302-dotnet-sk-km-chat", "examples\302-dotnet-sk-km-chat\302-dotnet-sk-km-chat.csproj", "{37FA99CB-AD22-4BAC-B76F-961F84422DEE}"
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "applications", "applications", "{0B980824-DDC3-4A02-B26B-562162D95206}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "401-evaluation", "examples\401-evaluation\401-evaluation.csproj", "{D1308C73-79B6-4635-B50D-420742D09C20}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Evaluation.FunctionalTests", "applications\tests\Evaluation.Tests\Evaluation.FunctionalTests.csproj", "{A9A0BFC7-DFF9-40DB-B7A6-AE1236D00313}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -533,6 +537,10 @@ Global
{D1308C73-79B6-4635-B50D-420742D09C20}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D1308C73-79B6-4635-B50D-420742D09C20}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D1308C73-79B6-4635-B50D-420742D09C20}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A9A0BFC7-DFF9-40DB-B7A6-AE1236D00313}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A9A0BFC7-DFF9-40DB-B7A6-AE1236D00313}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A9A0BFC7-DFF9-40DB-B7A6-AE1236D00313}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A9A0BFC7-DFF9-40DB-B7A6-AE1236D00313}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -619,6 +627,8 @@ Global
{EE0D8645-2770-4E12-8E18-019B30970FE6} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
{37FA99CB-AD22-4BAC-B76F-961F84422DEE} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
{D1308C73-79B6-4635-B50D-420742D09C20} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
{0B980824-DDC3-4A02-B26B-562162D95206} = {5E7DD43D-B5E7-4827-B57D-447E5B428589}
{A9A0BFC7-DFF9-40DB-B7A6-AE1236D00313} = {0B980824-DDC3-4A02-B26B-562162D95206}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}
Expand Down
9 changes: 7 additions & 2 deletions applications/evaluation/TestSetEvaluatorBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public TestSetEvaluatorBuilder AddEvaluatorKernel(Kernel kernel)

public TestSetEvaluatorBuilder WithMemory(IKernelMemory memory)
{
this._serviceCollection.AddKeyedSingleton<IKernelMemory>(memory);
this._serviceCollection.AddSingleton<IKernelMemory>(memory);

return this;
}
Expand All @@ -39,7 +39,12 @@ public TestSetEvaluator Build()
throw new InvalidOperationException("Memory service is required to build the TestSetEvaluator");
}

this._serviceCollection.AddScoped<TestSetEvaluator>();
this._serviceCollection.AddScoped<TestSetEvaluator>(sp =>
{
return new TestSetEvaluator(
sp.GetKeyedService<Kernel>("evaluation")!,
sp.GetRequiredService<IKernelMemory>());
});

return this._serviceCollection.BuildServiceProvider()
.GetRequiredService<TestSetEvaluator>();
Expand Down
8 changes: 7 additions & 1 deletion applications/evaluation/TestSetGeneratorBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,13 @@ public TestSetGenerator Build()
throw new InvalidOperationException("MemoryDb service is required to build the TestSetGenerator");
}

this._serviceCollection.AddScoped<TestSetGenerator>();
this._serviceCollection.AddScoped<TestSetGenerator>(sp =>
{
return new TestSetGenerator(
sp.GetRequiredKeyedService<Kernel>("evaluation"),
sp.GetKeyedService<Kernel>("translation"),
sp.GetRequiredService<IMemoryDb>());
});

return this._serviceCollection.BuildServiceProvider()
.GetRequiredService<TestSetGenerator>();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<AssemblyName>Evaluation.FunctionalTests</AssemblyName>
<RootNamespace>Evaluation.FunctionalTests</RootNamespace>
<TargetFramework>net8.0</TargetFramework>
<RollForward>LatestMajor</RollForward>
<IsTestProject>true</IsTestProject>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" />
<PackageReference Include="xunit" />
<PackageReference Include="xunit.abstractions" />
<PackageReference Include="Xunit.DependencyInjection" />
<PackageReference Include="xunit.runner.visualstudio">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="coverlet.collector">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\evaluation\Evaluation.csproj" />
</ItemGroup>

<ItemGroup>
<Using Include="Xunit" />
</ItemGroup>

<ItemGroup>
<None Update="Properties\launchSettings.json">
<ExcludeFromSingleFile>true</ExcludeFromSingleFile>
<CopyToPublishDirectory>Never</CopyToPublishDirectory>
</None>
</ItemGroup>

</Project>
21 changes: 21 additions & 0 deletions applications/tests/Evaluation.Tests/Startup.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (c) Microsoft. All rights reserved.

/* IMPORTANT: the Startup class must be at the root of the namespace and
* the namespace must match exactly (required by Xunit.DependencyInjection) */

namespace Evaluation.FunctionalTests;

public class Startup
{
public void ConfigureHost(IHostBuilder hostBuilder)
{
var config = new ConfigurationBuilder()
.AddJsonFile("appsettings.json")
.AddJsonFile("appsettings.development.json", optional: true)
.AddUserSecrets<Startup>()
.AddEnvironmentVariables()
.Build();

hostBuilder.ConfigureHostConfiguration(builder => builder.AddConfiguration(config));
}
}
96 changes: 96 additions & 0 deletions applications/tests/Evaluation.Tests/TestsetGenerationTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.KernelMemory;
using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.Evaluation;

Check failure on line 5 in applications/tests/Evaluation.Tests/TestsetGenerationTests.cs

View workflow job for this annotation

GitHub Actions / Build (8.0.x, ubuntu-latest, Release)

The type or namespace name 'Evaluation' does not exist in the namespace 'Microsoft.KernelMemory' (are you missing an assembly reference?)

Check failure on line 5 in applications/tests/Evaluation.Tests/TestsetGenerationTests.cs

View workflow job for this annotation

GitHub Actions / Build (8.0.x, ubuntu-latest, Release)

The type or namespace name 'Evaluation' does not exist in the namespace 'Microsoft.KernelMemory' (are you missing an assembly reference?)
using Microsoft.KernelMemory.Evaluation.TestSet;

Check failure on line 6 in applications/tests/Evaluation.Tests/TestsetGenerationTests.cs

View workflow job for this annotation

GitHub Actions / Build (8.0.x, ubuntu-latest, Release)

The type or namespace name 'Evaluation' does not exist in the namespace 'Microsoft.KernelMemory' (are you missing an assembly reference?)

Check failure on line 6 in applications/tests/Evaluation.Tests/TestsetGenerationTests.cs

View workflow job for this annotation

GitHub Actions / Build (8.0.x, ubuntu-latest, Release)

The type or namespace name 'Evaluation' does not exist in the namespace 'Microsoft.KernelMemory' (are you missing an assembly reference?)
using Microsoft.SemanticKernel;
using Xunit.Abstractions;

namespace Microsoft.SQLServer.FunctionalTests;

public class TestsetGenerationTests
{
private readonly IKernelMemory _memory;
private readonly TestSetGenerator _testSetGenerator;

Check failure on line 15 in applications/tests/Evaluation.Tests/TestsetGenerationTests.cs

View workflow job for this annotation

GitHub Actions / Build (8.0.x, ubuntu-latest, Release)

The type or namespace name 'TestSetGenerator' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 15 in applications/tests/Evaluation.Tests/TestsetGenerationTests.cs

View workflow job for this annotation

GitHub Actions / Build (8.0.x, ubuntu-latest, Release)

The type or namespace name 'TestSetGenerator' could not be found (are you missing a using directive or an assembly reference?)
private readonly TestSetEvaluator _testSetEvaluator;

Check failure on line 16 in applications/tests/Evaluation.Tests/TestsetGenerationTests.cs

View workflow job for this annotation

GitHub Actions / Build (8.0.x, ubuntu-latest, Release)

The type or namespace name 'TestSetEvaluator' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 16 in applications/tests/Evaluation.Tests/TestsetGenerationTests.cs

View workflow job for this annotation

GitHub Actions / Build (8.0.x, ubuntu-latest, Release)

The type or namespace name 'TestSetEvaluator' could not be found (are you missing a using directive or an assembly reference?)

private readonly Kernel _kernel;

public TestsetGenerationTests(IConfiguration cfg, ITestOutputHelper output)
{
var azureOpenAITextConfig = new AzureOpenAIConfig();
var azureOpenAIEmbeddingConfig = new AzureOpenAIConfig();

cfg
.BindSection("KernelMemory:Services:AzureOpenAIText", azureOpenAITextConfig)
.BindSection("KernelMemory:Services:AzureOpenAIEmbedding", azureOpenAIEmbeddingConfig);

var memoryBuilder = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithAzureOpenAITextGeneration(azureOpenAITextConfig, new DefaultGPTTokenizer())
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig, new DefaultGPTTokenizer());

this._kernel = Kernel
.CreateBuilder()
.AddAzureOpenAITextEmbeddingGeneration(
deploymentName: azureOpenAIEmbeddingConfig.Deployment,
endpoint: azureOpenAIEmbeddingConfig.Endpoint,
apiKey: azureOpenAIEmbeddingConfig.APIKey)
.AddAzureOpenAIChatCompletion(
deploymentName: azureOpenAITextConfig.Deployment,
endpoint: azureOpenAITextConfig.Endpoint,
apiKey: azureOpenAITextConfig.APIKey)
.Build();

this._testSetGenerator = new TestSetGeneratorBuilder(memoryBuilder.Services)
.AddEvaluatorKernel(this._kernel)
.Build();

this._memory = memoryBuilder.Build();

this._testSetEvaluator = new TestSetEvaluatorBuilder()
.AddEvaluatorKernel(this._kernel)
.WithMemory(this._memory)
.Build();
}

[Fact]
[Trait("Category", "Evaluation")]
public async Task ItGenerateTestSetAsync()
{
await this._memory
.ImportDocumentAsync(
"file1-NASA-news.pdf",
documentId: "file1-NASA-news",
steps: Constants.PipelineWithoutSummary);

var testSets = await this._testSetGenerator.GenerateTestSetsAsync("default4tests", retryCount: 5, count: 1)
.ToArrayAsync();

Assert.NotEmpty(testSets);
Assert.Equal(1, testSets.Length);
}

[Fact]
[Trait("Category", "Evaluation")]
public async Task ItEvaluateTestSetAsync()
{
await this._memory
.ImportDocumentAsync(
"file1-NASA-news.pdf",
documentId: "file1-NASA-news",
steps: Constants.PipelineWithoutSummary);

var evaluation = await this._testSetEvaluator.EvaluateTestSetAsync("default4tests", new[]
{
new TestSetItem
{
Question = "What is the role of the Department of Defense in the recovery operations for the Artemis II mission?",
GroundTruth = "The Department of Defense personnel are involved in practicing recovery operations for the Artemis II mission. They use a crew module test article to help verify the recovery team's readiness to recover the Artemis II crew and the Orion spacecraft.",
}
}).ToArrayAsync();

Assert.NotEmpty(evaluation);
}
}
Loading

0 comments on commit d17f431

Please sign in to comment.