prompt = "用户输入"
'''========== 单轮对话 =========='''
response, history = model.chat(tokenizer, prompt, history=[])
'''========== 多轮对话不限制显存 =========='''
response, history = model.chat(tokenizer, prompt, history=history)
'''========== 多轮对话限制显存 =========='''
# 直接挤掉前面记忆,保留最近三轮对话内容(不包括当前轮),此时 len(history) = 4
response, history = model.chat(tokenizer, prompt, history if len(history) <= 3 else history[-3:])
# 保留第一轮和最近两轮对话记忆(不包括当前轮),此时 len(history) = 4
first_ans = ('','')
def chat_behind(tokenizer, prompt, history=None):
global first_ans
if history is None:
history = []
response, history = model.chat(tokenizer, prompt, history if len(history) <= 3 else [first_ans] + history[-2:])
if len(history) <= 1:
first_ans = history[0]
return response, history
response, history = chat_behind(tokenizer, prompt, history)
chatglm 多轮对话限制显存
猜你喜欢
转载自blog.csdn.net/qq_42363032/article/details/130824002
今日推荐
周排行