Keep model loaded in memory, saves 5 seconds
This commit is contained in:
parent
587bc67416
commit
dcd8af2895
2
main.py
2
main.py
|
@ -23,7 +23,7 @@ def controller_message(message):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def llama(prompt):
|
def llama(prompt):
|
||||||
data = dict(model='llama3.1', prompt=prompt, stream=False)
|
data = dict(model='llama3.1', prompt=prompt, stream=False, keep_alive=-1)
|
||||||
try:
|
try:
|
||||||
r = requests.post(LLAMA_URL, json=data, timeout=20)
|
r = requests.post(LLAMA_URL, json=data, timeout=20)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user