Add LLM NPC chat memory; fix thinking timeout issue

2023-07-16 11:08:07 +02:00 · 2023-07-16 11:08:07 +02:00 · 046fcc6356
commit 046fcc6356
parent 20273ec761
2 changed files with 80 additions and 22 deletions
--- a/evennia/contrib/rpg/llm/llm_client.py
+++ b/evennia/contrib/rpg/llm/llm_client.py
@ -136,6 +136,9 @@ class LLMClient:
        """Call the LLM server and handle the response/failure"""
        request_body = self._format_request_body(prompt)

+        if settings.DEBUG:
+            logger.log_info(f"LLM request body: {request_body}")
+
        d = self.agent.request(
            b"POST",
            bytes(self.hostname + self.pathname, "utf-8"),
@ -164,6 +167,8 @@ class LLMClient:
        """
        status_code, response = yield self._get_response_from_llm_server(prompt)
        if status_code == 200:
+            if settings.DEBUG:
+                logger.log_info(f"LLM response: {response}")
            return json.loads(response)["results"][0]["text"]
        else:
            logger.log_err(f"LLM API error (status {status_code}): {response}")
--- a/evennia/contrib/rpg/llm/llm_npc.py
+++ b/evennia/contrib/rpg/llm/llm_npc.py
@ -10,13 +10,14 @@ echo a 'thinking...' message if the LLM server takes too long to respond.

 """

+from collections import defaultdict
 from random import choice

 from django.conf import settings
-from evennia import Command, DefaultCharacter
+from evennia import AttributeProperty, Command, DefaultCharacter
 from evennia.utils.utils import make_iter
 from twisted.internet import reactor, task
-from twisted.internet.defer import inlineCallbacks
+from twisted.internet.defer import CancelledError, inlineCallbacks

 from .llm_client import LLMClient

@ -24,7 +25,7 @@ from .llm_client import LLMClient
 # npc.db.prompt_prefix, npcClass.prompt_prefix, then settings.LLM_PROMPT_PREFIX, then this
 DEFAULT_PROMPT_PREFIX = (
    "You are roleplaying that your name is {name}, a {desc} existing in {location}. "
-    "Roleplay a suitable response to the following input only: "
+    "From here on, the conversation between {character} and {name} begins."
 )


@ -32,16 +33,25 @@ class LLMNPC(DefaultCharacter):
    """An NPC that uses the LLM server to generate its responses. If the server is slow, it will
    echo a thinking message to the character while it waits for a response."""

-    # use this to override the prefix per class
+    # use this to override the prefix per class. Assign an Attribute to override per-instance.
    prompt_prefix = None

-    response_template = "$You() $conj(say) (to $You(character)): {response}"
-    thinking_timeout = 2  # seconds
-    thinking_messages = [
-        "{name} thinks about what you said ...",
-        "{name} ponders your words ...",
-        "{name} ponders ...",
-    ]
+    response_template = AttributeProperty(
+        "$You() $conj(say) (to $You(character)): {response}", autocreate=False
+    )
+    thinking_timeout = AttributeProperty(2, autocreate=False)  # seconds
+    thinking_messages = AttributeProperty(
+        [
+            "{name} thinks about what you said ...",
+            "{name} ponders your words ...",
+            "{name} ponders ...",
+        ],
+        autocreate=False,
+    )
+
+    max_chat_memory_size = AttributeProperty(25, autocreate=False)
+    # this is a store of {character: [chat, chat, ...]}
+    chat_memory = AttributeProperty(defaultdict(list))

    @property
    def llm_client(self):
@ -60,6 +70,41 @@ class LLMNPC(DefaultCharacter):
            ),
        )

+    def _add_to_memory(self, character, who_talked, speech):
+        """Add a person's speech to the memory. This is stored as name: chat for the LLM."""
+        memory = self.chat_memory[character]
+        memory.append(f"{who_talked.get_display_name(self)}: {speech}")
+
+        # trim the memory if it's getting too long in order to save space
+        memory = memory[-self.max_chat_memory_size :]
+        self.chat_memory[character] = memory
+
+    def build_prompt(self, character, speech):
+        """
+        Build the prompt to send to the LLM server.
+
+        Args:
+            character (Object): The one talking to the NPC.
+            speech (str): The latest speech from the character.
+
+        Returns:
+            str: The prompt to return.
+
+        """
+        name = self.get_display_name(character)
+        charname = character.get_display_name(self)
+        memory = self.chat_memory[character]
+
+        # get starting prompt
+        prompt = self.llm_prompt_prefix.format(
+            name=name,
+            desc=self.db.desc or "someone",
+            location=self.location.key if self.location else "the void",
+            character=charname,
+        )
+        prompt += "\n" + "\n".join(mem for mem in memory)
+        return prompt
+
    @inlineCallbacks
    def at_talked_to(self, speech, character):
        """Called when this NPC is talked to by a character."""
@ -71,9 +116,14 @@ class LLMNPC(DefaultCharacter):
                # abort the thinking message if we were fast enough
                thinking_defer.cancel()

+            # remember this response
+            self._add_to_memory(character, self, response)
+
            response = self.response_template.format(
                name=self.get_display_name(character), response=response
            )
+
+            # tell the character about it
            if character.location:
                character.location.msg_contents(
                    response,
@ -84,6 +134,8 @@ class LLMNPC(DefaultCharacter):
                # fallback if character is not in a location
                character.msg(f"{self.get_display_name(character)} says, {response}")

+        # if response takes too long, note that the NPC is thinking.
+
        def _echo_thinking_message():
            """Echo a random thinking message to the character"""
            thinking_message = choice(
@ -96,18 +148,19 @@ class LLMNPC(DefaultCharacter):
                thinking_message = thinking_message.format(name=self.get_display_name(character))
                character.msg(thinking_message)

-        # if response takes too long, note that the NPC is thinking.
-        thinking_defer = task.deferLater(reactor, self.thinking_timeout, _echo_thinking_message)
+        def _handle_cancel_error(failure):
+            """Suppress task-cancel errors only"""
+            failure.trap(CancelledError)

-        prompt = (
-            self.llm_prompt_prefix.format(
-                name=self.key,
-                desc=self.db.desc or "commoner",
-                location=self.location.key if self.location else "the void",
-            )
-            + " "
-            + speech
-        )
+        thinking_defer = task.deferLater(
+            reactor, self.thinking_timeout, _echo_thinking_message
+        ).addErrback(_handle_cancel_error)
+
+        # remember latest input in memory, so it's included in the prompt
+        self._add_to_memory(character, character, speech)
+
+        # build the prompt
+        prompt = self.build_prompt(character, speech)

        # get the response from the LLM server
        yield self.llm_client.get_response(prompt).addCallback(_respond)