Add LLM NPC chat memory; fix thinking timeout issue
This commit is contained in:
parent
20273ec761
commit
046fcc6356
2 changed files with 80 additions and 22 deletions
|
|
@ -136,6 +136,9 @@ class LLMClient:
|
||||||
"""Call the LLM server and handle the response/failure"""
|
"""Call the LLM server and handle the response/failure"""
|
||||||
request_body = self._format_request_body(prompt)
|
request_body = self._format_request_body(prompt)
|
||||||
|
|
||||||
|
if settings.DEBUG:
|
||||||
|
logger.log_info(f"LLM request body: {request_body}")
|
||||||
|
|
||||||
d = self.agent.request(
|
d = self.agent.request(
|
||||||
b"POST",
|
b"POST",
|
||||||
bytes(self.hostname + self.pathname, "utf-8"),
|
bytes(self.hostname + self.pathname, "utf-8"),
|
||||||
|
|
@ -164,6 +167,8 @@ class LLMClient:
|
||||||
"""
|
"""
|
||||||
status_code, response = yield self._get_response_from_llm_server(prompt)
|
status_code, response = yield self._get_response_from_llm_server(prompt)
|
||||||
if status_code == 200:
|
if status_code == 200:
|
||||||
|
if settings.DEBUG:
|
||||||
|
logger.log_info(f"LLM response: {response}")
|
||||||
return json.loads(response)["results"][0]["text"]
|
return json.loads(response)["results"][0]["text"]
|
||||||
else:
|
else:
|
||||||
logger.log_err(f"LLM API error (status {status_code}): {response}")
|
logger.log_err(f"LLM API error (status {status_code}): {response}")
|
||||||
|
|
|
||||||
|
|
@ -10,13 +10,14 @@ echo a 'thinking...' message if the LLM server takes too long to respond.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
from random import choice
|
from random import choice
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from evennia import Command, DefaultCharacter
|
from evennia import AttributeProperty, Command, DefaultCharacter
|
||||||
from evennia.utils.utils import make_iter
|
from evennia.utils.utils import make_iter
|
||||||
from twisted.internet import reactor, task
|
from twisted.internet import reactor, task
|
||||||
from twisted.internet.defer import inlineCallbacks
|
from twisted.internet.defer import CancelledError, inlineCallbacks
|
||||||
|
|
||||||
from .llm_client import LLMClient
|
from .llm_client import LLMClient
|
||||||
|
|
||||||
|
|
@ -24,7 +25,7 @@ from .llm_client import LLMClient
|
||||||
# npc.db.prompt_prefix, npcClass.prompt_prefix, then settings.LLM_PROMPT_PREFIX, then this
|
# npc.db.prompt_prefix, npcClass.prompt_prefix, then settings.LLM_PROMPT_PREFIX, then this
|
||||||
DEFAULT_PROMPT_PREFIX = (
|
DEFAULT_PROMPT_PREFIX = (
|
||||||
"You are roleplaying that your name is {name}, a {desc} existing in {location}. "
|
"You are roleplaying that your name is {name}, a {desc} existing in {location}. "
|
||||||
"Roleplay a suitable response to the following input only: "
|
"From here on, the conversation between {character} and {name} begins."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -32,16 +33,25 @@ class LLMNPC(DefaultCharacter):
|
||||||
"""An NPC that uses the LLM server to generate its responses. If the server is slow, it will
|
"""An NPC that uses the LLM server to generate its responses. If the server is slow, it will
|
||||||
echo a thinking message to the character while it waits for a response."""
|
echo a thinking message to the character while it waits for a response."""
|
||||||
|
|
||||||
# use this to override the prefix per class
|
# use this to override the prefix per class. Assign an Attribute to override per-instance.
|
||||||
prompt_prefix = None
|
prompt_prefix = None
|
||||||
|
|
||||||
response_template = "$You() $conj(say) (to $You(character)): {response}"
|
response_template = AttributeProperty(
|
||||||
thinking_timeout = 2 # seconds
|
"$You() $conj(say) (to $You(character)): {response}", autocreate=False
|
||||||
thinking_messages = [
|
)
|
||||||
|
thinking_timeout = AttributeProperty(2, autocreate=False) # seconds
|
||||||
|
thinking_messages = AttributeProperty(
|
||||||
|
[
|
||||||
"{name} thinks about what you said ...",
|
"{name} thinks about what you said ...",
|
||||||
"{name} ponders your words ...",
|
"{name} ponders your words ...",
|
||||||
"{name} ponders ...",
|
"{name} ponders ...",
|
||||||
]
|
],
|
||||||
|
autocreate=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
max_chat_memory_size = AttributeProperty(25, autocreate=False)
|
||||||
|
# this is a store of {character: [chat, chat, ...]}
|
||||||
|
chat_memory = AttributeProperty(defaultdict(list))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def llm_client(self):
|
def llm_client(self):
|
||||||
|
|
@ -60,6 +70,41 @@ class LLMNPC(DefaultCharacter):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _add_to_memory(self, character, who_talked, speech):
|
||||||
|
"""Add a person's speech to the memory. This is stored as name: chat for the LLM."""
|
||||||
|
memory = self.chat_memory[character]
|
||||||
|
memory.append(f"{who_talked.get_display_name(self)}: {speech}")
|
||||||
|
|
||||||
|
# trim the memory if it's getting too long in order to save space
|
||||||
|
memory = memory[-self.max_chat_memory_size :]
|
||||||
|
self.chat_memory[character] = memory
|
||||||
|
|
||||||
|
def build_prompt(self, character, speech):
|
||||||
|
"""
|
||||||
|
Build the prompt to send to the LLM server.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
character (Object): The one talking to the NPC.
|
||||||
|
speech (str): The latest speech from the character.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The prompt to return.
|
||||||
|
|
||||||
|
"""
|
||||||
|
name = self.get_display_name(character)
|
||||||
|
charname = character.get_display_name(self)
|
||||||
|
memory = self.chat_memory[character]
|
||||||
|
|
||||||
|
# get starting prompt
|
||||||
|
prompt = self.llm_prompt_prefix.format(
|
||||||
|
name=name,
|
||||||
|
desc=self.db.desc or "someone",
|
||||||
|
location=self.location.key if self.location else "the void",
|
||||||
|
character=charname,
|
||||||
|
)
|
||||||
|
prompt += "\n" + "\n".join(mem for mem in memory)
|
||||||
|
return prompt
|
||||||
|
|
||||||
@inlineCallbacks
|
@inlineCallbacks
|
||||||
def at_talked_to(self, speech, character):
|
def at_talked_to(self, speech, character):
|
||||||
"""Called when this NPC is talked to by a character."""
|
"""Called when this NPC is talked to by a character."""
|
||||||
|
|
@ -71,9 +116,14 @@ class LLMNPC(DefaultCharacter):
|
||||||
# abort the thinking message if we were fast enough
|
# abort the thinking message if we were fast enough
|
||||||
thinking_defer.cancel()
|
thinking_defer.cancel()
|
||||||
|
|
||||||
|
# remember this response
|
||||||
|
self._add_to_memory(character, self, response)
|
||||||
|
|
||||||
response = self.response_template.format(
|
response = self.response_template.format(
|
||||||
name=self.get_display_name(character), response=response
|
name=self.get_display_name(character), response=response
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# tell the character about it
|
||||||
if character.location:
|
if character.location:
|
||||||
character.location.msg_contents(
|
character.location.msg_contents(
|
||||||
response,
|
response,
|
||||||
|
|
@ -84,6 +134,8 @@ class LLMNPC(DefaultCharacter):
|
||||||
# fallback if character is not in a location
|
# fallback if character is not in a location
|
||||||
character.msg(f"{self.get_display_name(character)} says, {response}")
|
character.msg(f"{self.get_display_name(character)} says, {response}")
|
||||||
|
|
||||||
|
# if response takes too long, note that the NPC is thinking.
|
||||||
|
|
||||||
def _echo_thinking_message():
|
def _echo_thinking_message():
|
||||||
"""Echo a random thinking message to the character"""
|
"""Echo a random thinking message to the character"""
|
||||||
thinking_message = choice(
|
thinking_message = choice(
|
||||||
|
|
@ -96,18 +148,19 @@ class LLMNPC(DefaultCharacter):
|
||||||
thinking_message = thinking_message.format(name=self.get_display_name(character))
|
thinking_message = thinking_message.format(name=self.get_display_name(character))
|
||||||
character.msg(thinking_message)
|
character.msg(thinking_message)
|
||||||
|
|
||||||
# if response takes too long, note that the NPC is thinking.
|
def _handle_cancel_error(failure):
|
||||||
thinking_defer = task.deferLater(reactor, self.thinking_timeout, _echo_thinking_message)
|
"""Suppress task-cancel errors only"""
|
||||||
|
failure.trap(CancelledError)
|
||||||
|
|
||||||
prompt = (
|
thinking_defer = task.deferLater(
|
||||||
self.llm_prompt_prefix.format(
|
reactor, self.thinking_timeout, _echo_thinking_message
|
||||||
name=self.key,
|
).addErrback(_handle_cancel_error)
|
||||||
desc=self.db.desc or "commoner",
|
|
||||||
location=self.location.key if self.location else "the void",
|
# remember latest input in memory, so it's included in the prompt
|
||||||
)
|
self._add_to_memory(character, character, speech)
|
||||||
+ " "
|
|
||||||
+ speech
|
# build the prompt
|
||||||
)
|
prompt = self.build_prompt(character, speech)
|
||||||
|
|
||||||
# get the response from the LLM server
|
# get the response from the LLM server
|
||||||
yield self.llm_client.get_response(prompt).addCallback(_respond)
|
yield self.llm_client.get_response(prompt).addCallback(_respond)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue