-
Notifications
You must be signed in to change notification settings - Fork 299
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add functional tests for the evaluation
- Loading branch information
1 parent
837a6aa
commit 2133866
Showing
7 changed files
with
319 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
applications/tests/Evaluation.Tests/Evaluation.FunctionalTests.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
<Project Sdk="Microsoft.NET.Sdk.Web"> | ||
|
||
<PropertyGroup> | ||
<AssemblyName>Evaluation.FunctionalTests</AssemblyName> | ||
<RootNamespace>Evaluation.FunctionalTests</RootNamespace> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<RollForward>LatestMajor</RollForward> | ||
<IsTestProject>true</IsTestProject> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
<IsPackable>false</IsPackable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Microsoft.NET.Test.Sdk" /> | ||
<PackageReference Include="xunit" /> | ||
<PackageReference Include="xunit.abstractions" /> | ||
<PackageReference Include="Xunit.DependencyInjection" /> | ||
<PackageReference Include="xunit.runner.visualstudio"> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
<PrivateAssets>all</PrivateAssets> | ||
</PackageReference> | ||
<PackageReference Include="coverlet.collector"> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
<PrivateAssets>all</PrivateAssets> | ||
</PackageReference> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\..\evaluation\Evaluation.csproj" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<Using Include="Xunit" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<None Update="Properties\launchSettings.json"> | ||
<ExcludeFromSingleFile>true</ExcludeFromSingleFile> | ||
<CopyToPublishDirectory>Never</CopyToPublishDirectory> | ||
</None> | ||
</ItemGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
/* IMPORTANT: the Startup class must be at the root of the namespace and | ||
* the namespace must match exactly (required by Xunit.DependencyInjection) */ | ||
|
||
namespace Evaluation.FunctionalTests; | ||
|
||
public class Startup | ||
{ | ||
public void ConfigureHost(IHostBuilder hostBuilder) | ||
{ | ||
var config = new ConfigurationBuilder() | ||
.AddJsonFile("appsettings.json") | ||
.AddJsonFile("appsettings.development.json", optional: true) | ||
.AddUserSecrets<Startup>() | ||
.AddEnvironmentVariables() | ||
.Build(); | ||
|
||
hostBuilder.ConfigureHostConfiguration(builder => builder.AddConfiguration(config)); | ||
} | ||
} |
96 changes: 96 additions & 0 deletions
96
applications/tests/Evaluation.Tests/TestsetGenerationTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Microsoft.KernelMemory; | ||
using Microsoft.KernelMemory.AI.OpenAI; | ||
using Microsoft.KernelMemory.Evaluation; | ||
using Microsoft.KernelMemory.Evaluation.TestSet; | ||
using Microsoft.SemanticKernel; | ||
using Xunit.Abstractions; | ||
|
||
namespace Microsoft.SQLServer.FunctionalTests; | ||
|
||
public class TestsetGenerationTests | ||
{ | ||
private readonly IKernelMemory _memory; | ||
private readonly TestSetGenerator _testSetGenerator; | ||
private readonly TestSetEvaluator _testSetEvaluator; | ||
|
||
private readonly Kernel _kernel; | ||
|
||
public TestsetGenerationTests(IConfiguration cfg, ITestOutputHelper output) | ||
{ | ||
var azureOpenAITextConfig = new AzureOpenAIConfig(); | ||
var azureOpenAIEmbeddingConfig = new AzureOpenAIConfig(); | ||
|
||
cfg | ||
.BindSection("KernelMemory:Services:AzureOpenAIText", azureOpenAITextConfig) | ||
.BindSection("KernelMemory:Services:AzureOpenAIEmbedding", azureOpenAIEmbeddingConfig); | ||
|
||
var memoryBuilder = new KernelMemoryBuilder() | ||
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" }) | ||
.WithAzureOpenAITextGeneration(azureOpenAITextConfig, new DefaultGPTTokenizer()) | ||
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig, new DefaultGPTTokenizer()); | ||
|
||
this._kernel = Kernel | ||
.CreateBuilder() | ||
.AddAzureOpenAITextEmbeddingGeneration( | ||
deploymentName: azureOpenAIEmbeddingConfig.Deployment, | ||
endpoint: azureOpenAIEmbeddingConfig.Endpoint, | ||
apiKey: azureOpenAIEmbeddingConfig.APIKey) | ||
.AddAzureOpenAIChatCompletion( | ||
deploymentName: azureOpenAITextConfig.Deployment, | ||
endpoint: azureOpenAITextConfig.Endpoint, | ||
apiKey: azureOpenAITextConfig.APIKey) | ||
.Build(); | ||
|
||
this._testSetGenerator = new TestSetGeneratorBuilder(memoryBuilder.Services) | ||
.AddEvaluatorKernel(this._kernel) | ||
.Build(); | ||
|
||
this._memory = memoryBuilder.Build(); | ||
|
||
this._testSetEvaluator = new TestSetEvaluatorBuilder() | ||
.AddEvaluatorKernel(this._kernel) | ||
.WithMemory(this._memory) | ||
.Build(); | ||
} | ||
|
||
[Fact] | ||
[Trait("Category", "Evaluation")] | ||
public async Task ItGenerateTestSetAsync() | ||
{ | ||
await this._memory | ||
.ImportDocumentAsync( | ||
"file1-NASA-news.pdf", | ||
documentId: "file1-NASA-news", | ||
steps: Constants.PipelineWithoutSummary); | ||
|
||
var testSets = await this._testSetGenerator.GenerateTestSetsAsync("default4tests", retryCount: 5, count: 1) | ||
.ToArrayAsync(); | ||
|
||
Assert.NotEmpty(testSets); | ||
Assert.Equal(1, testSets.Length); | ||
} | ||
|
||
[Fact] | ||
[Trait("Category", "Evaluation")] | ||
public async Task ItEvaluateTestSetAsync() | ||
{ | ||
await this._memory | ||
.ImportDocumentAsync( | ||
"file1-NASA-news.pdf", | ||
documentId: "file1-NASA-news", | ||
steps: Constants.PipelineWithoutSummary); | ||
|
||
var evaluation = await this._testSetEvaluator.EvaluateTestSetAsync("default4tests", new[] | ||
{ | ||
new TestSetItem | ||
{ | ||
Question = "What is the role of the Department of Defense in the recovery operations for the Artemis II mission?", | ||
GroundTruth = "The Department of Defense personnel are involved in practicing recovery operations for the Artemis II mission. They use a crew module test article to help verify the recovery team's readiness to recover the Artemis II crew and the Orion spacecraft.", | ||
} | ||
}).ToArrayAsync(); | ||
|
||
Assert.NotEmpty(evaluation); | ||
} | ||
} |
Oops, something went wrong.