Correct help lunr search boosts

This commit is contained in:
Griatch 2021-05-08 18:18:15 +02:00
parent a95d801b1e
commit 88a48e6842
8 changed files with 310 additions and 60 deletions

View file

@ -119,9 +119,11 @@ class CommandMeta(type):
# parsing errors. # parsing errors.
class Command(object, metaclass=CommandMeta): class Command(metaclass=CommandMeta):
""" """
Base command ## Base command
(you may see this if a child command had no help text defined)
Usage: Usage:
command [args] command [args]

View file

@ -1882,7 +1882,7 @@ class CmdIRCStatus(COMMAND_DEFAULT_CLASS):
Check and reboot IRC bot. Check and reboot IRC bot.
Usage: Usage:
ircstatus [#dbref ping||nicklist||reconnect] ircstatus [#dbref ping | nicklist | reconnect]
If not given arguments, will return a list of all bots (like If not given arguments, will return a list of all bots (like
irc2chan/list). The 'ping' argument will ping the IRC network to irc2chan/list). The 'ping' argument will ping the IRC network to

View file

@ -1,9 +1,10 @@
""" """
The help command. The basic idea is that help texts for commands The help command. The basic idea is that help texts for commands are best
are best written by those that write the commands - the admins. So written by those that write the commands - the developers. So command-help is
command-help is all auto-loaded and searched from the current command all auto-loaded and searched from the current command set. The normal,
set. The normal, database-tied help system is used for collaborative database-tied help system is used for collaborative creation of other help
creation of other help topics such as RP help or game-world aides. topics such as RP help or game-world aides. Help entries can also be created
outside the game in modules given by ``settings.FILE_HELP_ENTRY_MODULES``.
""" """
@ -34,7 +35,6 @@ _DEFAULT_WIDTH = settings.CLIENT_DEFAULT_WIDTH
_SEP = "|C" + "-" * _DEFAULT_WIDTH + "|n" _SEP = "|C" + "-" * _DEFAULT_WIDTH + "|n"
@dataclass @dataclass
class HelpCategory: class HelpCategory:
""" """
@ -144,7 +144,7 @@ class CmdHelp(COMMAND_DEFAULT_CLASS):
""" """
start = f"{_SEP}\n" start = f"{_SEP}\n"
title = f"|CHelp for |w{topic}|n" if topic else "" title = f"|CHelp for |w{topic}|n" if topic else "|rNo help found|n"
if aliases: if aliases:
aliases = ( aliases = (
@ -165,7 +165,7 @@ class CmdHelp(COMMAND_DEFAULT_CLASS):
if suggested: if suggested:
suggested = ( suggested = (
"\n\n|CSuggested other topics:|n\n{}".format( "\n|CSuggestions:|n\n{}".format(
fill("|C,|n ".join(f"|w{sug}|n" for sug in suggested), indent=2)) fill("|C,|n ".join(f"|w{sug}|n" for sug in suggested), indent=2))
) )
else: else:
@ -175,7 +175,8 @@ class CmdHelp(COMMAND_DEFAULT_CLASS):
return "".join((start, title, aliases, help_text, subtopics, suggested, end)) return "".join((start, title, aliases, help_text, subtopics, suggested, end))
def format_help_index(self, cmd_help_dict=None, db_help_dict=None):
def format_help_index(self, cmd_help_dict=None, db_help_dict=None, title_lone_category=False):
""" """
Output a category-ordered g for displaying the main help, grouped by Output a category-ordered g for displaying the main help, grouped by
category. category.
@ -185,6 +186,10 @@ class CmdHelp(COMMAND_DEFAULT_CLASS):
command-based help. command-based help.
db_help_dict (dict): A dict `{"category": [topic, topic], ...]}` for db_help_dict (dict): A dict `{"category": [topic, topic], ...]}` for
database-based help. database-based help.
title_lone_category (bool, optional): If a lone category should
be titled with the category name or not. While pointless in a
general index, the title should probably show when explicitly
listing the category itself.
Returns: Returns:
str: The help index organized into a grid. str: The help index organized into a grid.
@ -199,7 +204,7 @@ class CmdHelp(COMMAND_DEFAULT_CLASS):
grid = [] grid = []
verbatim_elements = [] verbatim_elements = []
if len(help_dict) == 1: if len(help_dict) == 1 and not title_lone_category:
# don't list categories if there is only one # don't list categories if there is only one
for category in help_dict: for category in help_dict:
entries = sorted(set(help_dict.get(category, []))) entries = sorted(set(help_dict.get(category, [])))
@ -226,22 +231,25 @@ class CmdHelp(COMMAND_DEFAULT_CLASS):
width = self.client_width() width = self.client_width()
grid = [] grid = []
verbatim_elements = [] verbatim_elements = []
cmd_grid, db_grid = "", ""
# get the command-help entries by-category if any(cmd_help_dict.values()):
sep1 = (self.index_type_separator_clr # get the command-help entries by-category
+ pad("Commands", width=width, fillchar='-') sep1 = (self.index_type_separator_clr
+ self.index_topic_clr) + pad("Commands", width=width, fillchar='-')
grid, verbatim_elements = _group_by_category(cmd_help_dict) + self.index_topic_clr)
gridrows = format_grid(grid, width, sep=" ", verbatim_elements=verbatim_elements) grid, verbatim_elements = _group_by_category(cmd_help_dict)
cmd_grid = ANSIString("\n").join(gridrows) if gridrows else "" gridrows = format_grid(grid, width, sep=" ", verbatim_elements=verbatim_elements)
cmd_grid = ANSIString("\n").join(gridrows) if gridrows else ""
# get db-based help entries by-category if any(db_help_dict.values()):
sep2 = (self.index_type_separator_clr # get db-based help entries by-category
+ pad("Game & World", width=width, fillchar='-') sep2 = (self.index_type_separator_clr
+ self.index_topic_clr) + pad("Game & World", width=width, fillchar='-')
grid, verbatim_elements = _group_by_category(db_help_dict) + self.index_topic_clr)
gridrows = format_grid(grid, width, sep=" ", verbatim_elements=verbatim_elements) grid, verbatim_elements = _group_by_category(db_help_dict)
db_grid = ANSIString("\n").join(gridrows) if gridrows else "" gridrows = format_grid(grid, width, sep=" ", verbatim_elements=verbatim_elements)
db_grid = ANSIString("\n").join(gridrows) if gridrows else ""
# only show the main separators if there are actually both cmd and db-based help # only show the main separators if there are actually both cmd and db-based help
if cmd_grid and db_grid: if cmd_grid and db_grid:
@ -328,6 +336,7 @@ class CmdHelp(COMMAND_DEFAULT_CLASS):
cmdset.make_unique(caller) cmdset.make_unique(caller)
# retrieve all available commands and database / file-help topics # retrieve all available commands and database / file-help topics
from evennia.commands.default.system import CmdAbout
all_cmds = [cmd for cmd in cmdset if self.check_show_help(cmd, caller)] all_cmds = [cmd for cmd in cmdset if self.check_show_help(cmd, caller)]
# we group the file-help topics with the db ones, giving the db ones priority # we group the file-help topics with the db ones, giving the db ones priority
@ -370,30 +379,67 @@ class CmdHelp(COMMAND_DEFAULT_CLASS):
# all available options # all available options
entries = [cmd for cmd in all_cmds if cmd] + all_db_topics + all_categories entries = [cmd for cmd in all_cmds if cmd] + all_db_topics + all_categories
print("CmdAbout in entries: ", CmdAbout in entries)
# lunr search fields/boosts
search_fields=[
{"field_name": "key", "boost": 10},
{"field_name": "aliases", "boost": 9},
{"field_name": "category", "boost": 8},
{"field_name": "tags", "boost": 1}, # tags are not used by default
]
match, suggestions = None, None match, suggestions = None, None
for match_query in [f"{query}~1", f"{query}*"]: for match_query in (query, f"{query}*"):
# We first do an exact word-match followed by a start-by query # We first do an exact word-match followed by a start-by query. The
# the return of this will either be a HelpCategory, a Command or a HelpEntry. # return of this will either be a HelpCategory, a Command or a
# HelpEntry/FileHelpEntry.
matches, suggestions = help_search_with_index( matches, suggestions = help_search_with_index(
match_query, entries, suggestion_maxnum=self.suggestion_maxnum match_query, entries,
suggestion_maxnum=self.suggestion_maxnum,
fields=search_fields
) )
if matches: if matches:
match = matches[0] match = matches[0]
break break
if not match: if not match:
# no exact matches found. Just give suggestions. # no topic matches found. Only give suggestions.
help_text = f"There is no help topic matching '{query}'."
if not suggestions:
# we don't even have a good suggestion. Run a second search,
# doing a full-text search in the actual texts of the help
# entries
search_fields=[
{"field_name": "text", "boost": 1},
]
for match_query in [query, f"{query}*"]:
_, suggestions = help_search_with_index(
match_query, entries,
suggestion_maxnum=self.suggestion_maxnum,
fields=search_fields
)
if suggestions:
help_text += "\n... But matches where found within the help texts of the suggestions below."
break
output = self.format_help_entry( output = self.format_help_entry(
topic="", topic=None, # this will give a no-match style title
help_text=f"No help entry found for '{query}'", help_text=help_text,
suggested=suggestions suggested=suggestions
) )
self.msg_help(output) self.msg_help(output)
return return
if isinstance(match, HelpCategory): if isinstance(match, HelpCategory):
# no subtopics for categories - these are just lists of topics # no subtopics for categories - these are just lists of topics
output = self.format_help_index( output = self.format_help_index(
{ {
match.key: [ match.key: [
@ -409,6 +455,7 @@ class CmdHelp(COMMAND_DEFAULT_CLASS):
if match.key.lower() == topic.help_category if match.key.lower() == topic.help_category
] ]
}, },
title_lone_category=True
) )
self.msg_help(output) self.msg_help(output)
return return
@ -655,6 +702,7 @@ class CmdSetHelp(COMMAND_DEFAULT_CLASS):
old_entry.aliases.add(aliases) old_entry.aliases.add(aliases)
self.msg("Entry updated:\n%s%s" % (old_entry.entrytext, aliastxt)) self.msg("Entry updated:\n%s%s" % (old_entry.entrytext, aliastxt))
return return
if "delete" in switches or "del" in switches: if "delete" in switches or "del" in switches:
# delete the help entry # delete the help entry
if not old_entry: if not old_entry:

View file

@ -12,25 +12,23 @@ from evennia.commands.command import Command as BaseCommand
class Command(BaseCommand): class Command(BaseCommand):
""" """
Inherit from this if you want to create your own command styles Base command (you may see this if a child command had no help text defined)
from scratch. Note that Evennia's default commands inherits from
MuxCommand instead.
Note that the class's `__doc__` string (this text) is Note that the class's `__doc__` string is used by Evennia to create the
used by Evennia to create the automatic help entry for automatic help entry for the command, so make sure to document consistently
the command, so make sure to document consistently here. here. Without setting one, the parent's docstring will show (like now).
Each Command implements the following methods, called
in this order (only func() is actually required):
- at_pre_cmd(): If this returns anything truthy, execution is aborted.
- parse(): Should perform any extra parsing needed on self.args
and store the result on self.
- func(): Performs the actual work.
- at_post_cmd(): Extra actions, often things done after
every command, like prompts.
""" """
# Each Command class implements the following methods, called in this order
# (only func() is actually required):
#
# - at_pre_cmd(): If this returns anything truthy, execution is aborted.
# - parse(): Should perform any extra parsing needed on self.args
# and store the result on self.
# - func(): Performs the actual work.
# - at_post_cmd(): Extra actions, often things done after
# every command, like prompts.
#
pass pass

View file

@ -109,11 +109,8 @@ class HelpEntry(SharedMemoryModel):
# #
# #
def __str__(self):
return self.key
def __repr__(self): def __repr__(self):
return "%s" % self.key return f"<HelpEntry {self.key}>"
def access(self, accessing_obj, access_type="read", default=False): def access(self, accessing_obj, access_type="read", default=False):
""" """

203
evennia/help/utils.py Normal file
View file

@ -0,0 +1,203 @@
"""
Resources for indexing help entries and for splitting help entries into
sub-categories.
This is used primarily by the default `help` command.
"""
import re
_LUNR = None
_LUNR_EXCEPTION = None
_RE_HELP_SUBTOPICS_START = re.compile(
r"^\s*?#\s*?subtopics\s*?$", re.I + re.M)
_RE_HELP_SUBTOPIC_SPLIT = re.compile(r"^\s*?(\#{2,6}\s*?\w+?[a-z0-9 \-\?!,\.]*?)$", re.M + re.I)
_RE_HELP_SUBTOPIC_PARSE = re.compile(
r"^(?P<nesting>\#{2,6})\s*?(?P<name>.*?)$", re.I + re.M)
MAX_SUBTOPIC_NESTING = 5
def help_search_with_index(query, candidate_entries, suggestion_maxnum=5, fields=None):
"""
Lunr-powered fast index search and suggestion wrapper. See https://lunrjs.com/.
Args:
query (str): The query to search for.
candidate_entries (list): This is the body of possible entities to search. Each
must have a property `.search_index_entry` that returns a dict with all
keys in the `fields` arg.
suggestion_maxnum (int): How many matches to allow at most in a multi-match.
fields (list, optional): A list of Lunr field mappings
``{"field_name": str, "boost": int}``. See the Lunr documentation
for more details. The field name must exist in the dicts returned
by `.search_index_entry` of the candidates. If not given, a default setup
is used, prefering keys > aliases > category > tags.
Returns:
tuple: A tuple (matches, suggestions), each a list, where the `suggestion_maxnum` limits
how many suggestions are included.
"""
global _LUNR, _LUNR_EXCEPTION
if not _LUNR:
# we have to delay-load lunr because it messes with logging if it's imported
# before twisted's logging has been set up
from lunr import lunr as _LUNR
from lunr.exceptions import QueryParseError as _LUNR_EXCEPTION
indx = [cnd.search_index_entry for cnd in candidate_entries]
mapping = {indx[ix]["key"]: cand for ix, cand in enumerate(candidate_entries)}
if not fields:
fields = [
{"field_name": "key", "boost": 10},
{"field_name": "aliases", "boost": 9},
{"field_name": "category", "boost": 8},
{"field_name": "tags", "boost": 5},
]
search_index = _LUNR(
ref="key",
fields=fields,
documents=indx,
)
try:
matches = search_index.search(query)[:suggestion_maxnum]
except _LUNR_EXCEPTION:
# this is a user-input problem
matches = []
# matches (objs), suggestions (strs)
return (
[mapping[match["ref"]] for match in matches],
[str(match["ref"]) for match in matches], # + f" (score {match['score']})") # good debug
)
def parse_entry_for_subcategories(entry):
"""
Parse a command docstring for special sub-category blocks:
Args:
entry (str): A help entry to parse
Returns:
dict: The dict is a mapping that splits the entry into subcategories. This
will always hold a key `None` for the main help entry and
zero or more keys holding the subcategories. Each is itself
a dict with a key `None` for the main text of that subcategory
followed by any sub-sub-categories down to a max-depth of 5.
Example:
::
'''
Main topic text
# SUBTOPICS
## foo
A subcategory of the main entry, accessible as `help topic foo`
(or using /, like `help topic/foo`)
## bar
Another subcategory, accessed as `help topic bar`
(or `help topic/bar`)
### moo
A subcategory of bar, accessed as `help bar moo`
(or `help bar/moo`)
#### dum
A subcategory of moo, accessed `help bar moo dum`
(or `help bar/moo/dum`)
'''
This will result in this returned entry structure:
::
{
None: "Main topic text":
"foo": {
None: "main topic/foo text"
},
"bar": {
None: "Main topic/bar text",
"moo": {
None: "topic/bar/moo text"
"dum": {
None: "topic/bar/moo/dum text"
}
}
}
}
Apart from making
sub-categories at the bottom of the entry.
This will be applied both to command docstrings and database-based help
entries.
"""
topic, *subtopics = _RE_HELP_SUBTOPICS_START.split(entry, maxsplit=1)
structure = {None: topic.strip()}
if subtopics:
subtopics = subtopics[0]
else:
return structure
keypath = []
current_nesting = 0
subtopic = None
# from evennia import set_trace;set_trace()
for part in _RE_HELP_SUBTOPIC_SPLIT.split(subtopics.strip()):
subtopic_match = _RE_HELP_SUBTOPIC_PARSE.match(part.strip())
if subtopic_match:
# a new sub(-sub..) category starts.
mdict = subtopic_match.groupdict()
subtopic = mdict['name'].lower().strip()
new_nesting = len(mdict['nesting']) - 1
if new_nesting > MAX_SUBTOPIC_NESTING:
raise RuntimeError(
f"Can have max {MAX_SUBTOPIC_NESTING} levels of nested help subtopics.")
nestdiff = new_nesting - current_nesting
if nestdiff < 0:
# jumping back up in nesting
for _ in range(abs(nestdiff) + 1):
try:
keypath.pop()
except IndexError:
pass
elif nestdiff == 0:
# don't add a deeper nesting but replace the current
try:
keypath.pop()
except IndexError:
pass
keypath.append(subtopic)
current_nesting = new_nesting
else:
# an entry belonging to a subtopic - find the nested location
dct = structure
if not keypath and subtopic is not None:
structure[subtopic] = part
else:
for key in keypath:
if key in dct:
dct = dct[key]
else:
dct[key] = {
None: part
}
return structure

View file

@ -117,7 +117,7 @@ def check_errors(settings):
"Use PORTAL/SERVER_LOG_DAY_ROTATION and PORTAL/SERVER_LOG_MAX_SIZE " "Use PORTAL/SERVER_LOG_DAY_ROTATION and PORTAL/SERVER_LOG_MAX_SIZE "
"to control log cycling." "to control log cycling."
) )
if hasattr(settings, "CHANNEL_COMMAND_CLASS") or hasaattr(settings, "CHANNEL_HANDLER_CLASS"): if hasattr(settings, "CHANNEL_COMMAND_CLASS") or hasattr(settings, "CHANNEL_HANDLER_CLASS"):
raise DeprecationWarning( raise DeprecationWarning(
"settings.CHANNEL_HANDLER_CLASS and CHANNEL COMMAND_CLASS are " "settings.CHANNEL_HANDLER_CLASS and CHANNEL COMMAND_CLASS are "
"unused and should be removed. The ChannelHandler is no more; " "unused and should be removed. The ChannelHandler is no more; "

View file

@ -1868,12 +1868,14 @@ def format_grid(elements, width=78, sep=" ", verbatim_elements=None):
elements = [elements[ie] + sep for ie in range(nelements - 1)] + [elements[-1]] elements = [elements[ie] + sep for ie in range(nelements - 1)] + [elements[-1]]
wls = [len(elem) for elem in elements] wls = [len(elem) for elem in elements]
wls_percentile = [wl for iw, wl in enumerate(wls) if iw not in verbatim_elements] wls_percentile = [wl for iw, wl in enumerate(wls) if iw not in verbatim_elements]
# from pudb import debugger
# debugger.Debugger().set_trace()
# get the nth percentile as a good representation of average width if wls_percentile:
averlen = int(percentile(sorted(wls_percentile), 0.9)) + 2 # include extra space # get the nth percentile as a good representation of average width
aver_per_row = width // averlen + 1 averlen = int(percentile(sorted(wls_percentile), 0.9)) + 2 # include extra space
aver_per_row = width // averlen + 1
else:
# no adjustable rows, just keep all as-is
aver_per_row = 1
if aver_per_row == 1: if aver_per_row == 1:
# one line per row, output directly since this is trivial # one line per row, output directly since this is trivial