From 046fcc63562e2950408ef4c75c928867b872c3d5 Mon Sep 17 00:00:00 2001
From: Griatch <griatch@gmail.com>
Date: Sun, 16 Jul 2023 11:08:07 +0200
Subject: [PATCH] Add LLM NPC chat memory; fix thinking timeout issue

---
 evennia/contrib/rpg/llm/llm_client.py |  5 ++
 evennia/contrib/rpg/llm/llm_npc.py    | 97 +++++++++++++++++++++------
 2 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/evennia/contrib/rpg/llm/llm_client.py b/evennia/contrib/rpg/llm/llm_client.py
index d75c7939c..3284d5c1c 100644
--- a/evennia/contrib/rpg/llm/llm_client.py
+++ b/evennia/contrib/rpg/llm/llm_client.py
@@ -136,6 +136,9 @@ class LLMClient:
         """Call the LLM server and handle the response/failure"""
         request_body = self._format_request_body(prompt)
 
+        if settings.DEBUG:
+            logger.log_info(f"LLM request body: {request_body}")
+
         d = self.agent.request(
             b"POST",
             bytes(self.hostname + self.pathname, "utf-8"),
@@ -164,6 +167,8 @@ class LLMClient:
         """
         status_code, response = yield self._get_response_from_llm_server(prompt)
         if status_code == 200:
+            if settings.DEBUG:
+                logger.log_info(f"LLM response: {response}")
             return json.loads(response)["results"][0]["text"]
         else:
             logger.log_err(f"LLM API error (status {status_code}): {response}")
diff --git a/evennia/contrib/rpg/llm/llm_npc.py b/evennia/contrib/rpg/llm/llm_npc.py
index 85db3cc6e..ca5ca7943 100644
--- a/evennia/contrib/rpg/llm/llm_npc.py
+++ b/evennia/contrib/rpg/llm/llm_npc.py
@@ -10,13 +10,14 @@ echo a 'thinking...' message if the LLM server takes too long to respond.
 
 """
 
+from collections import defaultdict
 from random import choice
 
 from django.conf import settings
-from evennia import Command, DefaultCharacter
+from evennia import AttributeProperty, Command, DefaultCharacter
 from evennia.utils.utils import make_iter
 from twisted.internet import reactor, task
-from twisted.internet.defer import inlineCallbacks
+from twisted.internet.defer import CancelledError, inlineCallbacks
 
 from .llm_client import LLMClient
 
@@ -24,7 +25,7 @@ from .llm_client import LLMClient
 # npc.db.prompt_prefix, npcClass.prompt_prefix, then settings.LLM_PROMPT_PREFIX, then this
 DEFAULT_PROMPT_PREFIX = (
     "You are roleplaying that your name is {name}, a {desc} existing in {location}. "
-    "Roleplay a suitable response to the following input only: "
+    "From here on, the conversation between {character} and {name} begins."
 )
 
 
@@ -32,16 +33,25 @@ class LLMNPC(DefaultCharacter):
     """An NPC that uses the LLM server to generate its responses. If the server is slow, it will
     echo a thinking message to the character while it waits for a response."""
 
-    # use this to override the prefix per class
+    # use this to override the prefix per class. Assign an Attribute to override per-instance.
     prompt_prefix = None
 
-    response_template = "$You() $conj(say) (to $You(character)): {response}"
-    thinking_timeout = 2  # seconds
-    thinking_messages = [
-        "{name} thinks about what you said ...",
-        "{name} ponders your words ...",
-        "{name} ponders ...",
-    ]
+    response_template = AttributeProperty(
+        "$You() $conj(say) (to $You(character)): {response}", autocreate=False
+    )
+    thinking_timeout = AttributeProperty(2, autocreate=False)  # seconds
+    thinking_messages = AttributeProperty(
+        [
+            "{name} thinks about what you said ...",
+            "{name} ponders your words ...",
+            "{name} ponders ...",
+        ],
+        autocreate=False,
+    )
+
+    max_chat_memory_size = AttributeProperty(25, autocreate=False)
+    # this is a store of {character: [chat, chat, ...]}
+    chat_memory = AttributeProperty(defaultdict(list))
 
     @property
     def llm_client(self):
@@ -60,6 +70,41 @@ class LLMNPC(DefaultCharacter):
             ),
         )
 
+    def _add_to_memory(self, character, who_talked, speech):
+        """Add a person's speech to the memory. This is stored as name: chat for the LLM."""
+        memory = self.chat_memory[character]
+        memory.append(f"{who_talked.get_display_name(self)}: {speech}")
+
+        # trim the memory if it's getting too long in order to save space
+        memory = memory[-self.max_chat_memory_size :]
+        self.chat_memory[character] = memory
+
+    def build_prompt(self, character, speech):
+        """
+        Build the prompt to send to the LLM server.
+
+        Args:
+            character (Object): The one talking to the NPC.
+            speech (str): The latest speech from the character.
+
+        Returns:
+            str: The prompt to return.
+
+        """
+        name = self.get_display_name(character)
+        charname = character.get_display_name(self)
+        memory = self.chat_memory[character]
+
+        # get starting prompt
+        prompt = self.llm_prompt_prefix.format(
+            name=name,
+            desc=self.db.desc or "someone",
+            location=self.location.key if self.location else "the void",
+            character=charname,
+        )
+        prompt += "\n" + "\n".join(mem for mem in memory)
+        return prompt
+
     @inlineCallbacks
     def at_talked_to(self, speech, character):
         """Called when this NPC is talked to by a character."""
@@ -71,9 +116,14 @@ class LLMNPC(DefaultCharacter):
                 # abort the thinking message if we were fast enough
                 thinking_defer.cancel()
 
+            # remember this response
+            self._add_to_memory(character, self, response)
+
             response = self.response_template.format(
                 name=self.get_display_name(character), response=response
             )
+
+            # tell the character about it
             if character.location:
                 character.location.msg_contents(
                     response,
@@ -84,6 +134,8 @@ class LLMNPC(DefaultCharacter):
                 # fallback if character is not in a location
                 character.msg(f"{self.get_display_name(character)} says, {response}")
 
+        # if response takes too long, note that the NPC is thinking.
+
         def _echo_thinking_message():
             """Echo a random thinking message to the character"""
             thinking_message = choice(
@@ -96,18 +148,19 @@ class LLMNPC(DefaultCharacter):
                 thinking_message = thinking_message.format(name=self.get_display_name(character))
                 character.msg(thinking_message)
 
-        # if response takes too long, note that the NPC is thinking.
-        thinking_defer = task.deferLater(reactor, self.thinking_timeout, _echo_thinking_message)
+        def _handle_cancel_error(failure):
+            """Suppress task-cancel errors only"""
+            failure.trap(CancelledError)
 
-        prompt = (
-            self.llm_prompt_prefix.format(
-                name=self.key,
-                desc=self.db.desc or "commoner",
-                location=self.location.key if self.location else "the void",
-            )
-            + " "
-            + speech
-        )
+        thinking_defer = task.deferLater(
+            reactor, self.thinking_timeout, _echo_thinking_message
+        ).addErrback(_handle_cancel_error)
+
+        # remember latest input in memory, so it's included in the prompt
+        self._add_to_memory(character, character, speech)
+
+        # build the prompt
+        prompt = self.build_prompt(character, speech)
 
         # get the response from the LLM server
         yield self.llm_client.get_response(prompt).addCallback(_respond)