diff --git a/models/llama-3.1-70b-instruct.yaml b/models/llama-3.1-70b-instruct.yaml index 50db3322..b5d945da 100644 --- a/models/llama-3.1-70b-instruct.yaml +++ b/models/llama-3.1-70b-instruct.yaml @@ -4,8 +4,8 @@ debug: true runtime: cuda models: - name: llama-3.1-70b-instruct - source: https://huggingface.co/lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf - sha256: "34d1d88b70a67dc19088ca84e226673962766607791882a7a85959b92857bcb3" + source: https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf + sha256: "3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab" promptTemplates: - name: chatMsg template: | @@ -63,7 +63,7 @@ config: | response_regex: - \w+)>(?P.*) parameters: - model: Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf + model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf context_size: 8192 f16: true template: @@ -74,5 +74,5 @@ config: | stopwords: - <|im_end|> - - - \"<|eot_id|>\" + - <|eot_id|> - <|end_of_text|>