Keep model loaded in memory, saves 5 seconds

2025-01-20 18:24:59 +00:00 · 2025-01-20 18:24:59 +00:00 · dcd8af2895
commit dcd8af2895
parent 587bc67416
1 changed files with 1 additions and 1 deletions
--- a/main.py
+++ b/main.py
@ -23,7 +23,7 @@ def controller_message(message):
        return False
 def llama(prompt):
-    data = dict(model='llama3.1', prompt=prompt, stream=False)
+    data = dict(model='llama3.1', prompt=prompt, stream=False, keep_alive=-1)
    try:
        r = requests.post(LLAMA_URL, json=data, timeout=20)
        r.raise_for_status()