From cfcfd3a2ff8673177cac2489b9d83fbcf2388a81 Mon Sep 17 00:00:00 2001
From: Utkarsh <49331882+uppalutkarsh@users.noreply.github.com>
Date: Tue, 9 Jul 2024 20:00:50 +0530
Subject: [PATCH 1/2] Add support for custom tokenizer

---
 examples/apps/fastapi_server.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/examples/apps/fastapi_server.py b/examples/apps/fastapi_server.py
index 972cb90d6..777e27a1e 100755
--- a/examples/apps/fastapi_server.py
+++ b/examples/apps/fastapi_server.py
@@ -79,11 +79,13 @@ async def __call__(self, host, port):
 
 @click.command()
 @click.argument("model_dir")
+@click.argument("tokenizer_path")  
 @click.option("--host", type=str, default=None)
 @click.option("--port", type=int, default=8000)
 @click.option("--max_beam_width", type=int, default=1)
 @click.option("--tp_size", type=int, default=1)
 def entrypoint(model_dir: str,
+               tokenizer_path: str,
                host: Optional[str] = None,
                port: int = 8000,
                max_beam_width: int = 1,
@@ -95,6 +97,7 @@ def entrypoint(model_dir: str,
     build_config = BuildConfig(max_batch_size=10, max_beam_width=max_beam_width)
 
     llm = LLM(model_dir,
+              tokenizer_path,
               tensor_parallel_size=tp_size,
               build_config=build_config)
 

From 6e6a1fc5b76db0873ece474a25f7ac74ae546256 Mon Sep 17 00:00:00 2001
From: Utkarsh <49331882+uppalutkarsh@users.noreply.github.com>
Date: Tue, 9 Jul 2024 20:22:18 +0530
Subject: [PATCH 2/2] Add support for custom tokenizer and max_batch_size

---
 examples/apps/fastapi_server.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/apps/fastapi_server.py b/examples/apps/fastapi_server.py
index 777e27a1e..59d0226bf 100755
--- a/examples/apps/fastapi_server.py
+++ b/examples/apps/fastapi_server.py
@@ -84,17 +84,19 @@ async def __call__(self, host, port):
 @click.option("--port", type=int, default=8000)
 @click.option("--max_beam_width", type=int, default=1)
 @click.option("--tp_size", type=int, default=1)
+@click.option("--max_batch_size", type=int, default=10)
 def entrypoint(model_dir: str,
                tokenizer_path: str,
                host: Optional[str] = None,
                port: int = 8000,
                max_beam_width: int = 1,
-               tp_size: int = 1):
+               tp_size: int = 1,
+               max_batch_size: int = 10):
     host = host or "0.0.0.0"
     port = port or 8000
     logging.info(f"Starting server at {host}:{port}")
 
-    build_config = BuildConfig(max_batch_size=10, max_beam_width=max_beam_width)
+    build_config = BuildConfig(max_batch_size=max_batch_size, max_beam_width=max_beam_width)
 
     llm = LLM(model_dir,
               tokenizer_path,