Add LLM NPC chat memory; fix thinking timeout issue
This commit is contained in:
parent
20273ec761
commit
046fcc6356
2 changed files with 80 additions and 22 deletions
|
|
@ -136,6 +136,9 @@ class LLMClient:
|
|||
"""Call the LLM server and handle the response/failure"""
|
||||
request_body = self._format_request_body(prompt)
|
||||
|
||||
if settings.DEBUG:
|
||||
logger.log_info(f"LLM request body: {request_body}")
|
||||
|
||||
d = self.agent.request(
|
||||
b"POST",
|
||||
bytes(self.hostname + self.pathname, "utf-8"),
|
||||
|
|
@ -164,6 +167,8 @@ class LLMClient:
|
|||
"""
|
||||
status_code, response = yield self._get_response_from_llm_server(prompt)
|
||||
if status_code == 200:
|
||||
if settings.DEBUG:
|
||||
logger.log_info(f"LLM response: {response}")
|
||||
return json.loads(response)["results"][0]["text"]
|
||||
else:
|
||||
logger.log_err(f"LLM API error (status {status_code}): {response}")
|
||||
|
|
|
|||
|
|
@ -10,13 +10,14 @@ echo a 'thinking...' message if the LLM server takes too long to respond.
|
|||
|
||||
"""
|
||||
|
||||
from collections import defaultdict
|
||||
from random import choice
|
||||
|
||||
from django.conf import settings
|
||||
from evennia import Command, DefaultCharacter
|
||||
from evennia import AttributeProperty, Command, DefaultCharacter
|
||||
from evennia.utils.utils import make_iter
|
||||
from twisted.internet import reactor, task
|
||||
from twisted.internet.defer import inlineCallbacks
|
||||
from twisted.internet.defer import CancelledError, inlineCallbacks
|
||||
|
||||
from .llm_client import LLMClient
|
||||
|
||||
|
|
@ -24,7 +25,7 @@ from .llm_client import LLMClient
|
|||
# npc.db.prompt_prefix, npcClass.prompt_prefix, then settings.LLM_PROMPT_PREFIX, then this
|
||||
DEFAULT_PROMPT_PREFIX = (
|
||||
"You are roleplaying that your name is {name}, a {desc} existing in {location}. "
|
||||
"Roleplay a suitable response to the following input only: "
|
||||
"From here on, the conversation between {character} and {name} begins."
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -32,16 +33,25 @@ class LLMNPC(DefaultCharacter):
|
|||
"""An NPC that uses the LLM server to generate its responses. If the server is slow, it will
|
||||
echo a thinking message to the character while it waits for a response."""
|
||||
|
||||
# use this to override the prefix per class
|
||||
# use this to override the prefix per class. Assign an Attribute to override per-instance.
|
||||
prompt_prefix = None
|
||||
|
||||
response_template = "$You() $conj(say) (to $You(character)): {response}"
|
||||
thinking_timeout = 2 # seconds
|
||||
thinking_messages = [
|
||||
"{name} thinks about what you said ...",
|
||||
"{name} ponders your words ...",
|
||||
"{name} ponders ...",
|
||||
]
|
||||
response_template = AttributeProperty(
|
||||
"$You() $conj(say) (to $You(character)): {response}", autocreate=False
|
||||
)
|
||||
thinking_timeout = AttributeProperty(2, autocreate=False) # seconds
|
||||
thinking_messages = AttributeProperty(
|
||||
[
|
||||
"{name} thinks about what you said ...",
|
||||
"{name} ponders your words ...",
|
||||
"{name} ponders ...",
|
||||
],
|
||||
autocreate=False,
|
||||
)
|
||||
|
||||
max_chat_memory_size = AttributeProperty(25, autocreate=False)
|
||||
# this is a store of {character: [chat, chat, ...]}
|
||||
chat_memory = AttributeProperty(defaultdict(list))
|
||||
|
||||
@property
|
||||
def llm_client(self):
|
||||
|
|
@ -60,6 +70,41 @@ class LLMNPC(DefaultCharacter):
|
|||
),
|
||||
)
|
||||
|
||||
def _add_to_memory(self, character, who_talked, speech):
|
||||
"""Add a person's speech to the memory. This is stored as name: chat for the LLM."""
|
||||
memory = self.chat_memory[character]
|
||||
memory.append(f"{who_talked.get_display_name(self)}: {speech}")
|
||||
|
||||
# trim the memory if it's getting too long in order to save space
|
||||
memory = memory[-self.max_chat_memory_size :]
|
||||
self.chat_memory[character] = memory
|
||||
|
||||
def build_prompt(self, character, speech):
|
||||
"""
|
||||
Build the prompt to send to the LLM server.
|
||||
|
||||
Args:
|
||||
character (Object): The one talking to the NPC.
|
||||
speech (str): The latest speech from the character.
|
||||
|
||||
Returns:
|
||||
str: The prompt to return.
|
||||
|
||||
"""
|
||||
name = self.get_display_name(character)
|
||||
charname = character.get_display_name(self)
|
||||
memory = self.chat_memory[character]
|
||||
|
||||
# get starting prompt
|
||||
prompt = self.llm_prompt_prefix.format(
|
||||
name=name,
|
||||
desc=self.db.desc or "someone",
|
||||
location=self.location.key if self.location else "the void",
|
||||
character=charname,
|
||||
)
|
||||
prompt += "\n" + "\n".join(mem for mem in memory)
|
||||
return prompt
|
||||
|
||||
@inlineCallbacks
|
||||
def at_talked_to(self, speech, character):
|
||||
"""Called when this NPC is talked to by a character."""
|
||||
|
|
@ -71,9 +116,14 @@ class LLMNPC(DefaultCharacter):
|
|||
# abort the thinking message if we were fast enough
|
||||
thinking_defer.cancel()
|
||||
|
||||
# remember this response
|
||||
self._add_to_memory(character, self, response)
|
||||
|
||||
response = self.response_template.format(
|
||||
name=self.get_display_name(character), response=response
|
||||
)
|
||||
|
||||
# tell the character about it
|
||||
if character.location:
|
||||
character.location.msg_contents(
|
||||
response,
|
||||
|
|
@ -84,6 +134,8 @@ class LLMNPC(DefaultCharacter):
|
|||
# fallback if character is not in a location
|
||||
character.msg(f"{self.get_display_name(character)} says, {response}")
|
||||
|
||||
# if response takes too long, note that the NPC is thinking.
|
||||
|
||||
def _echo_thinking_message():
|
||||
"""Echo a random thinking message to the character"""
|
||||
thinking_message = choice(
|
||||
|
|
@ -96,18 +148,19 @@ class LLMNPC(DefaultCharacter):
|
|||
thinking_message = thinking_message.format(name=self.get_display_name(character))
|
||||
character.msg(thinking_message)
|
||||
|
||||
# if response takes too long, note that the NPC is thinking.
|
||||
thinking_defer = task.deferLater(reactor, self.thinking_timeout, _echo_thinking_message)
|
||||
def _handle_cancel_error(failure):
|
||||
"""Suppress task-cancel errors only"""
|
||||
failure.trap(CancelledError)
|
||||
|
||||
prompt = (
|
||||
self.llm_prompt_prefix.format(
|
||||
name=self.key,
|
||||
desc=self.db.desc or "commoner",
|
||||
location=self.location.key if self.location else "the void",
|
||||
)
|
||||
+ " "
|
||||
+ speech
|
||||
)
|
||||
thinking_defer = task.deferLater(
|
||||
reactor, self.thinking_timeout, _echo_thinking_message
|
||||
).addErrback(_handle_cancel_error)
|
||||
|
||||
# remember latest input in memory, so it's included in the prompt
|
||||
self._add_to_memory(character, character, speech)
|
||||
|
||||
# build the prompt
|
||||
prompt = self.build_prompt(character, speech)
|
||||
|
||||
# get the response from the LLM server
|
||||
yield self.llm_client.get_response(prompt).addCallback(_respond)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue