语音实时交互数字人,采用ollama的本地Qwen3:4B做LLM的代码
发布日期:2025/5/7 7:40:59 浏览量:
语音实时交互数字人,采用ollama的本地Qwen3:4B做LLM代码参考,以及设置人设Prompt
import time
import os
import requests
import json
from basereal import BaseReal
from logger import logger
def llm_response(message, nerfreal: BaseReal):
start = time.perf_counter()
# 构造包含系统提示和用户消息的完整prompt
full_prompt = f"""你是一个乐于助人的助手。请用中文回答用户的问题。
用户: {message}
助手:"""
# Prepare the request data for Ollama API
request_data = {
"model": "qwen:4b",
"prompt": full_prompt, # 使用包含系统提示的完整prompt
"stream": True,
"options": {
"temperature": 0.7,
"top_p": 0.9
}
}
end = time.perf_counter()
logger.info(f"llm Time init: {end-start}s")
# Make the request to local Ollama API
response = requests.post(
"http://localhost:11434/api/generate",
json=request_data,
stream=True
)
result = ""
first = True
for line in response.iter_lines():
if line:
# Decode the line and parse the JSON
decoded_line = line.decode("utf-8")
try:
chunk = json.loads(decoded_line)
if "response" in chunk:
msg = chunk["response"]
if first:
end = time.perf_counter()
logger.info(f"llm Time to first chunk: {end-start}s")
first = False
lastpos = 0
for i, char in enumerate(msg):
if char in ",.!;:,。!?:;":
result = result + msg[lastpos:i+1]
lastpos = i+1
if len(result) > 10:
logger.info(result)
nerfreal.put_msg_txt(result)
result = ""
result = result + msg[lastpos:]
except json.JSONDecodeError:
logger.error(f"Failed to parse JSON: {decoded_line}")
end = time.perf_counter()
logger.info(f"llm Time to last chunk: {end-start}s")
if result: # Send any remaining text
nerfreal.put_msg_txt(result)

马上咨询: 如果您有业务方面的问题或者需求,欢迎您咨询!我们带来的不仅仅是技术,还有行业经验积累。
QQ: 39764417/308460098 Phone: 13 9800 1 9844 / 135 6887 9550 联系人:石先生/雷先生