spyglass-search · a5huynh · Oct 9, 2023 · Sep 20, 2023 · Sep 21, 2023 · Sep 21, 2023
diff --git a/.env.template b/.env.template
@@ -4,4 +4,12 @@ PORT=8181
 # Use postgres for "production"
 DATABASE_CONNECTION=sqlite://data/sqlite.db
 # Use qdrant/etc. for "production"
-VECTOR_CONNECTION=hnsw://data/vdb
+VECTOR_CONNECTION=hnsw://data/vdb
+# When using OpenSearch as the vector backend
+# VECTOR_CONNECTION=opensearch+https://admin:admin@localhost:9200
+
+# If using OpenAPI, setup your API key here
+OPENAI_API_KEY=
+# Or point to local LLM configuration file. By default, memex wil use
+# llama2
+LOCAL_LLM_CONFIG=resources/config.llama2.toml
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -23,7 +23,22 @@ since Linux ARM builds are very finicky.
 2023-06-13T05:04:21.518732Z INFO memex: starting server with roles: [Api, Worker]
 ```
 
-## Add a document
+## Using a LLM
+You can use either OpenAI or a local LLM for LLM based functionality (such as the
+summarization or extraction APIs).
+
+Set `OPENAI_API_KEY` to your API key in the `.env` file or set `LOCAL_LLM_CONFIG` to
+a LLM configuration file. See `resources/config.llama2.toml` for an example. By
+default, a base memex will use the llama-2 configuration file.
+
+### Supported local models
+
+Currently we have supported (and have tested) the following models:
+- Llama based models (llama 1 & 2, Mistral, etc.) - *recommended*
+- Gptj (e.g. GPT4All)
+
+
+## Adding a document
 
 NOTE: If the `test` collection does not initially exist, it'll be created.
 
@@ -80,7 +95,7 @@ Or if it's finished, something like so:
 One the task is shown as "Completed", you can now run a query against the doc(s)
 you've just added.
 
-## Run a query
+## Run a search query
 
 ``` bash
 > curl http://localhost:8181/api/collections/test/search \
@@ -100,6 +115,22 @@ you've just added.
 }
 ```
 
+## Ask a question
+```bash
+> curl http://localhost:8181/api/action/ask \
+ -H "Content-Type: application/json" \
+ -X POST \
+ -d "{\"text\": \"<context if any>\", \"query\": \"What is the airspeed velocity of an unladen swallow?\", "json_schema": { .. }}"
+{
+ "time": 1.234,
+ "status": "ok",
+ "result": {
+ "answer": "The airspeed velocity of an unladen swallow is..."
+ }
+}
+
+```
+
 ## Env variables
 
 - `HOST`: Defaults to `127.0.0.1`

diff --git a/bin/memex/src/main.rs b/bin/memex/src/main.rs
@@ -1,3 +1,4 @@
+use api::ApiConfig;
 use clap::{Parser, Subcommand};
 use futures::future::join_all;
 use std::{net::Ipv4Addr, process::ExitCode};
@@ -25,6 +26,10 @@ pub struct Args {
  database_connection: Option<String>,
  #[clap(long, value_parser, value_name = "VECTOR_CONNECTION", env)]
  vector_connection: Option<String>,
+ #[clap(long, value_parser, value_name = "OPENAI_API_KEY", env)]
+ openai_api_key: Option<String>,
+ #[clap(long, value_parser, value_name = "LOCAL_LLM_CONFIG", env)]
+ local_llm_config: Option<String>,
 }
 
 #[derive(Debug, Display, Clone, PartialEq, EnumString)]
@@ -100,9 +105,21 @@ async fn main() -> ExitCode {
 
  let _vector_store_uri = args.vector_connection.expect("VECTOR_CONNECTION not set");
 
+ if args.openai_api_key.is_none() && args.local_llm_config.is_none() {
+ log::error!("Must set either OPENAI_API_KEY or LOCAL_LLM_CONFIG");
+ return ExitCode::FAILURE;
+ }
+
  if roles.contains(&Roles::Api) {
  let db_uri = db_uri.clone();
- handles.push(tokio::spawn(api::start(host, port, db_uri)));
+ let cfg = ApiConfig {
+ host,
+ port,
+ db_uri,
+ open_ai_key: args.openai_api_key,
+ local_llm_config: args.local_llm_config,
+ };
+ handles.push(tokio::spawn(api::start(cfg)));
  }
 
  if roles.contains(&Roles::Worker) {

diff --git a/lib/api/src/endpoints/actions/filters.rs b/lib/api/src/endpoints/actions/filters.rs
@@ -1,5 +1,7 @@
+use std::sync::Arc;
+
 use crate::{endpoints::json_body, with_db, with_llm};
-use libmemex::llm::openai::OpenAIClient;
+use libmemex::llm::LLM;
 use sea_orm::DatabaseConnection;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
@@ -24,7 +26,7 @@ pub struct SummarizeRequest {
 }
 
 fn extract(
- llm: &OpenAIClient,
+ llm: &Arc<Box<dyn LLM>>,
 ) -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone {
  warp::path!("action" / "ask")
  .and(warp::post())
@@ -44,7 +46,7 @@ fn summarize(
 }
 
 pub fn build(
- llm: &OpenAIClient,
+ llm: &Arc<Box<dyn LLM>>,
  db: &DatabaseConnection,
 ) -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone {
  extract(llm).or(summarize(db))

diff --git a/lib/api/src/endpoints/actions/handlers.rs b/lib/api/src/endpoints/actions/handlers.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use crate::{
  schema::{ApiResponse, TaskResult},
  ServerError,
@@ -9,19 +11,16 @@ use warp::reject::Rejection;
 use super::filters;
 use libmemex::{
  db::queue,
- llm::{
- openai::{truncate_text, OpenAIClient},
- prompter,
- },
+ llm::{prompter, LLM},
 };
 
 pub async fn handle_extract(
- llm: OpenAIClient,
+ llm: Arc<Box<dyn LLM>>,
  request: filters::AskRequest,
 ) -> Result<impl warp::Reply, Rejection> {
  let time = std::time::Instant::now();
 
- let (content, model) = truncate_text(&request.text);
+ let (content, model) = llm.truncate_text(&request.text);
 
  // Build prompt
  let prompt = if let Some(schema) = &request.json_schema {
@@ -34,10 +33,11 @@ pub async fn handle_extract(
  };
 
  let response = llm
- .chat_completion(&model, &prompt)
+ .chat_completion(model.as_ref(), &prompt)
  .await
  .map_err(|err| ServerError::Other(err.to_string()))?;
 
+ log::debug!("llm response: {response}");
  let val = serde_json::from_str::<serde_json::Value>(&response)
  .map_err(|err| ServerError::Other(err.to_string()))?;
 

diff --git a/lib/api/src/endpoints/collections/handlers.rs b/lib/api/src/endpoints/collections/handlers.rs
@@ -4,7 +4,7 @@ use crate::{
 };
 use libmemex::{
  db::{embedding, queue},
- embedding::{ModelConfig, SentenceEmbedder},
+ llm::embedding::{ModelConfig, SentenceEmbedder},
  storage::get_vector_storage,
 };
 use sea_orm::{ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter};

diff --git a/lib/api/src/endpoints/mod.rs b/lib/api/src/endpoints/mod.rs
@@ -1,4 +1,6 @@
-use libmemex::llm::openai::OpenAIClient;
+use std::sync::Arc;
+
+use libmemex::llm::LLM;
 use sea_orm::DatabaseConnection;
 use serde::de::DeserializeOwned;
 use warp::Filter;
@@ -24,7 +26,7 @@ pub fn json_body<T: std::marker::Send + DeserializeOwned>(
 
 pub fn build(
  db: &DatabaseConnection,
- llm: &OpenAIClient,
+ llm: &Arc<Box<dyn LLM>>,
 ) -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone {
  actions::filters::build(llm, db)
  .or(collections::filters::build(db))

diff --git a/lib/api/src/lib.rs b/lib/api/src/lib.rs
@@ -1,8 +1,11 @@
 use dotenv_codegen::dotenv;
-use libmemex::{db::create_connection_by_uri, llm::openai::OpenAIClient};
+use libmemex::{
+ db::create_connection_by_uri,
+ llm::{local::load_from_cfg, openai::OpenAIClient, LLM},
+};
 use sea_orm::DatabaseConnection;
 use serde_json::json;
-use std::{convert::Infallible, net::Ipv4Addr, path::PathBuf};
+use std::{convert::Infallible, net::Ipv4Addr, path::PathBuf, sync::Arc};
 use thiserror::Error;
 use warp::{hyper::StatusCode, reject::Reject, Filter, Rejection, Reply};
 
@@ -22,6 +25,14 @@ pub enum ServerError {
 
 impl Reject for ServerError {}
 
+pub struct ApiConfig {
+ pub host: Ipv4Addr,
+ pub port: u16,
+ pub db_uri: String,
+ pub open_ai_key: Option<String>,
+ pub local_llm_config: Option<String>,
+}
+
 // Handle custom errors/rejections
 async fn handle_rejection(err: Rejection) -> Result<impl Reply, Infallible> {
  let code;
@@ -35,6 +46,12 @@ async fn handle_rejection(err: Rejection) -> Result<impl Reply, Infallible> {
  // and render it however we want
  code = StatusCode::METHOD_NOT_ALLOWED;
  message = "METHOD_NOT_ALLOWED".into();
+ } else if let Some(err) = err.find::<ServerError>() {
+ (code, message) = match err {
+ ServerError::ClientRequestError(err) => (StatusCode::BAD_REQUEST, err.to_string()),
+ ServerError::DatabaseError(err) => (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()),
+ ServerError::Other(err) => (StatusCode::BAD_REQUEST, err.to_string()),
+ };
  } else {
  // We should have expected this... Just log and say its a 500
  eprintln!("unhandled rejection: {:?}", err);
@@ -59,8 +76,8 @@ pub fn health_check() -> impl Filter<Extract = (impl warp::Reply,), Error = warp
  .map(move || warp::reply::json(&json!({ "version": version })))
 }
 
-pub async fn start(host: Ipv4Addr, port: u16, db_uri: String) {
- log::info!("starting api server @ {}:{}", host, port);
+pub async fn start(config: ApiConfig) {
+ log::info!("starting api server @ {}:{}", config.host, config.port);
 
  log::info!("checking for upload directory...");
  let data_dir_path: PathBuf = endpoints::UPLOAD_DATA_DIR.into();
@@ -70,12 +87,21 @@ pub async fn start(host: Ipv4Addr, port: u16, db_uri: String) {
  }
 
  // Attempt to connect to db
- let db_connection = create_connection_by_uri(&db_uri, true)
+ let db_connection = create_connection_by_uri(&config.db_uri, true)
  .await
- .unwrap_or_else(|err| panic!("Unable to connect to database: {} - {err}", db_uri));
+ .unwrap_or_else(|err| panic!("Unable to connect to database: {} - {err}", config.db_uri));
+
+ let llm_client: Arc<Box<dyn LLM>> = if let Some(openai_key) = config.open_ai_key {
+ Arc::new(Box::new(OpenAIClient::new(&openai_key)))
+ } else if let Some(llm_config_path) = config.local_llm_config {
+ let llm = load_from_cfg(llm_config_path.into(), true)
+ .await
+ .expect("Unable to load local LLM");
+ Arc::new(llm)
+ } else {
+ panic!("Please setup OPENAI_API_KEY or LOCAL_LLM_CONFIG");
+ };
 
- let llm_client =
- OpenAIClient::new(&std::env::var("OPENAI_API_KEY").expect("OpenAI API key not set"));
  let cors = warp::cors()
  .allow_any_origin()
  .allow_methods(vec!["GET", "POST", "PUT", "PATCH", "DELETE"])
@@ -88,7 +114,7 @@ pub async fn start(host: Ipv4Addr, port: u16, db_uri: String) {
  let filters = health_check().or(api).with(cors).recover(handle_rejection);
 
  let (_addr, handle) =
- warp::serve(filters).bind_with_graceful_shutdown((host, port), async move {
+ warp::serve(filters).bind_with_graceful_shutdown((config.host, config.port), async move {
  tokio::signal::ctrl_c()
  .await
  .expect("failed to listen to shutdown signal");
@@ -105,7 +131,7 @@ pub fn with_db(
 }
 
 pub fn with_llm(
- llm: OpenAIClient,
-) -> impl Filter<Extract = (OpenAIClient,), Error = std::convert::Infallible> + Clone {
+ llm: Arc<Box<dyn LLM>>,
+) -> impl Filter<Extract = (Arc<Box<dyn LLM>>,), Error = std::convert::Infallible> + Clone {
  warp::any().map(move || llm.clone())
 }
diff --git a/lib/libmemex/Cargo.toml b/lib/libmemex/Cargo.toml
@@ -12,20 +12,24 @@ chrono = { workspace = true }
 dotenv = { workspace = true }
 handlebars = "4.4.0"
 hnsw_rs = { git = "https:/jean-pierreBoth/hnswlib-rs", rev = "52a7f9174e002820d168fa65ca7303364ee3ac33" }
+llm = { git = "https:/rustformers/llm.git", rev = "84800b02a7a96f62c0c9c03a38c36cb23bf4b2ec" }
 log = { workspace = true }
 migration ={ path = "../../migration" }
 opensearch = "2.1.0"
 qdrant-client = "1.2.0"
 reqwest = { version = "0.11.16", features = ["stream" ] }
+rand = "0.8.5"
 rust-bert = { version = "0.21.0", features= ["download-libtorch"] }
 sea-orm = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 strum = "0.25"
 strum_macros = "0.25"
+tera = "1.19.0"
 thiserror = "1.0"
 tiktoken-rs = "0.5.4"
 tokenizers = { version = "0.14", features = ["http"] }
 tokio = { workspace = true }
+toml = "0.7.4"
 url = "2.4.0"
-uuid = { workspace = true }
+uuid = { workspace = true }
diff --git a/lib/libmemex/src/lib.rs b/lib/libmemex/src/lib.rs
@@ -1,5 +1,4 @@
 pub mod db;
-pub mod embedding;
 pub mod llm;
 pub mod storage;
 

diff --git a/lib/libmemex/src/embedding.rs → lib/libmemex/src/llm/embedding.rs b/lib/libmemex/src/embedding.rs → lib/libmemex/src/llm/embedding.rs