Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -48,7 +48,7 @@ def chat(message: str, history: list, stm_state: torch.Tensor, llm_history: list
|
|
| 48 |
|
| 49 |
with torch.amp.autocast(device_type=device.type, dtype=torch.bfloat16):
|
| 50 |
for token_id in llm_model.generate(**llm_chat_history, max_seq_len=llm_seq_len, temperature=temperature, top_p=top_p):
|
| 51 |
-
llm_response +=
|
| 52 |
yield history + [[message, response]], stm_state, llm_history + [[message, llm_response]]
|
| 53 |
|
| 54 |
return history + [[message, response]], model.export_stm_state().cpu(), llm_history + [[message, llm_response]]
|
|
|
|
| 48 |
|
| 49 |
with torch.amp.autocast(device_type=device.type, dtype=torch.bfloat16):
|
| 50 |
for token_id in llm_model.generate(**llm_chat_history, max_seq_len=llm_seq_len, temperature=temperature, top_p=top_p):
|
| 51 |
+
llm_response += llm_model.stringify_token(llm_tokenizer, token_id)
|
| 52 |
yield history + [[message, response]], stm_state, llm_history + [[message, llm_response]]
|
| 53 |
|
| 54 |
return history + [[message, response]], model.export_stm_state().cpu(), llm_history + [[message, llm_response]]
|