From e30448122618dc8cbcd2fd5cd7588844d4419a9e Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Fri, 10 May 2024 10:07:34 +0200 Subject: [PATCH 1/2] KernelMemory bug fix While using WithLLamaSharpDefaults it was not possible to dispose GPU memory because the embedding was not defined well. This PR fixes that issue and also fixes some small problems with not setting all important model parameters. --- LLama.KernelMemory/BuilderExtensions.cs | 5 ++--- .../LLamaSharpTextEmbeddingGenerator.cs | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/LLama.KernelMemory/BuilderExtensions.cs b/LLama.KernelMemory/BuilderExtensions.cs index 07770244b..32fcf1bc6 100644 --- a/LLama.KernelMemory/BuilderExtensions.cs +++ b/LLama.KernelMemory/BuilderExtensions.cs @@ -1,4 +1,4 @@ -using Microsoft.KernelMemory; +using Microsoft.KernelMemory; using System; using System.Collections.Generic; using System.Linq; @@ -96,8 +96,7 @@ public static IKernelMemoryBuilder WithLLamaSharpDefaults(this IKernelMemoryBuil } var executor = new StatelessExecutor(weights, parameters); - var embedder = new LLamaEmbedder(weights, parameters); - builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGenerator(embedder)); + builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGenerator(config, weights)); builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGenerator(weights, context, executor, config?.DefaultInferenceParams)); return builder; } diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs index b72f49a0e..53e851a13 100644 --- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs +++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs @@ -1,5 +1,6 @@ -using LLama; +using LLama; using LLama.Common; +using LLama.Native; using Microsoft.KernelMemory; using Microsoft.KernelMemory.AI; @@ -29,9 +30,12 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config) this._config = config; var @params = new ModelParams(_config.ModelPath) { + ContextSize = config?.ContextSize ?? 2048, + Seed = config?.Seed ?? 0, + GpuLayerCount = config?.GpuLayerCount ?? 20, Embeddings = true, - MainGpu = _config.MainGpu, - SplitMode = _config.SplitMode + MainGpu = _config?.MainGpu ?? 0, + SplitMode = _config?.SplitMode ?? GPUSplitMode.None }; _weights = LLamaWeights.LoadFromFile(@params); _embedder = new LLamaEmbedder(_weights, @params); @@ -49,9 +53,12 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights we this._config = config; var @params = new ModelParams(_config.ModelPath) { + ContextSize = config?.ContextSize ?? 2048, + Seed = config?.Seed ?? 0, + GpuLayerCount = config?.GpuLayerCount ?? 20, Embeddings = true, - MainGpu = _config.MainGpu, - SplitMode = _config.SplitMode + MainGpu = _config?.MainGpu ?? 0, + SplitMode = _config?.SplitMode ?? GPUSplitMode.None }; _weights = weights; _embedder = new LLamaEmbedder(_weights, @params); From 7b309d7bf63fa0e124d8914d1410cfd12973753e Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Fri, 10 May 2024 17:07:53 +0200 Subject: [PATCH 2/2] KernelMemory bug fix - cleanup nullable refs --- LLama.KernelMemory/BuilderExtensions.cs | 12 +++++------ .../LLamaSharpTextEmbeddingGenerator.cs | 20 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/LLama.KernelMemory/BuilderExtensions.cs b/LLama.KernelMemory/BuilderExtensions.cs index 32fcf1bc6..6b7412f43 100644 --- a/LLama.KernelMemory/BuilderExtensions.cs +++ b/LLama.KernelMemory/BuilderExtensions.cs @@ -81,15 +81,15 @@ public static IKernelMemoryBuilder WithLLamaSharpDefaults(this IKernelMemoryBuil { var parameters = new ModelParams(config.ModelPath) { - ContextSize = config?.ContextSize ?? 2048, - Seed = config?.Seed ?? 0, - GpuLayerCount = config?.GpuLayerCount ?? 20, + ContextSize = config.ContextSize ?? 2048, + Seed = config.Seed ?? 0, + GpuLayerCount = config.GpuLayerCount ?? 20, Embeddings = true, - MainGpu = config?.MainGpu ?? 0, - SplitMode = config?.SplitMode ?? GPUSplitMode.None, + MainGpu = config.MainGpu, + SplitMode = config.SplitMode }; - if (weights == null) + if (weights == null || context == null) { weights = LLamaWeights.LoadFromFile(parameters); context = weights.CreateContext(parameters); diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs index 53e851a13..42ba6dbc5 100644 --- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs +++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs @@ -30,12 +30,12 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config) this._config = config; var @params = new ModelParams(_config.ModelPath) { - ContextSize = config?.ContextSize ?? 2048, - Seed = config?.Seed ?? 0, - GpuLayerCount = config?.GpuLayerCount ?? 20, + ContextSize = config.ContextSize ?? 2048, + Seed = config.Seed ?? 0, + GpuLayerCount = config.GpuLayerCount ?? 20, Embeddings = true, - MainGpu = _config?.MainGpu ?? 0, - SplitMode = _config?.SplitMode ?? GPUSplitMode.None + MainGpu = _config.MainGpu, + SplitMode = _config.SplitMode }; _weights = LLamaWeights.LoadFromFile(@params); _embedder = new LLamaEmbedder(_weights, @params); @@ -53,12 +53,12 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights we this._config = config; var @params = new ModelParams(_config.ModelPath) { - ContextSize = config?.ContextSize ?? 2048, - Seed = config?.Seed ?? 0, - GpuLayerCount = config?.GpuLayerCount ?? 20, + ContextSize = config.ContextSize ?? 2048, + Seed = config.Seed ?? 0, + GpuLayerCount = config.GpuLayerCount ?? 20, Embeddings = true, - MainGpu = _config?.MainGpu ?? 0, - SplitMode = _config?.SplitMode ?? GPUSplitMode.None + MainGpu = _config.MainGpu, + SplitMode = _config.SplitMode }; _weights = weights; _embedder = new LLamaEmbedder(_weights, @params);