Changed how command not found errors are handled by default: Implemented a cos-likeness algorithm (Coling 2008) for comparing strings, which allows for decent suggestions and speed.

This commit is contained in:
Griatch 2012-04-22 16:36:31 +02:00
parent 4678234e9a
commit 0c292b5ff2
4 changed files with 76 additions and 12 deletions

View file

@ -42,6 +42,7 @@ from django.conf import settings
from src.comms.channelhandler import CHANNELHANDLER from src.comms.channelhandler import CHANNELHANDLER
from src.utils import logger, utils from src.utils import logger, utils
from src.commands.cmdparser import at_multimatch_cmd from src.commands.cmdparser import at_multimatch_cmd
from src.utils.utils import string_suggestions
__all__ = ("cmdhandler",) __all__ = ("cmdhandler",)
@ -191,7 +192,12 @@ def cmdhandler(caller, raw_string, testing=False):
if syscmd: if syscmd:
sysarg = raw_string sysarg = raw_string
else: else:
sysarg = "Huh? (Type \"help\" for help)" sysarg = "Command '%s' is not available." % raw_string
suggestions = string_suggestions(raw_string, cmdset.get_all_cmd_keys_and_aliases(), cutoff=0.7, maxnum=3)
if suggestions:
sysarg += " Did you maybe mean %s?" % utils.list_to_string(suggestions, 'or', addquote=True)
else:
sysarg += " Type \"help\" for help."
raise ExecSystemCommand(syscmd, sysarg) raise ExecSystemCommand(syscmd, sysarg)
if len(matches) > 1: if len(matches) > 1:

View file

@ -382,3 +382,12 @@ class CmdSet(object):
by use of self.add(). by use of self.add().
""" """
pass pass
def get_all_cmd_keys_and_aliases(self):
"""
Returns a list of all command keys and aliases
available in this cmdset.
"""
names = [cmd.key for cmd in self.commands]
[names.extend(cmd.aliases) for cmd in self.commands]
return names

View file

@ -6,6 +6,7 @@ set. The normal, database-tied help system is used for collaborative
creation of other help topics such as RP help or game-world aides. creation of other help topics such as RP help or game-world aides.
""" """
from collections import defaultdict
from src.utils.utils import fill, dedent from src.utils.utils import fill, dedent
from src.commands.command import Command from src.commands.command import Command
from src.help.models import HelpEntry from src.help.models import HelpEntry
@ -100,20 +101,14 @@ class CmdHelp(Command):
if query in LIST_ARGS: if query in LIST_ARGS:
# we want to list all available help entries, grouped by category. # we want to list all available help entries, grouped by category.
hdict_cmd = {} hdict_cmd = defaultdict(list)
for cmd in (cmd for cmd in cmdset if cmd.auto_help and not cmd.is_exit for cmd in (cmd for cmd in cmdset if cmd.auto_help and not cmd.is_exit
and not cmd.key.startswith('__') and cmd.access(caller)): and not cmd.key.startswith('__') and cmd.access(caller)):
try:
hdict_cmd[cmd.help_category].append(cmd.key) hdict_cmd[cmd.help_category].append(cmd.key)
except KeyError: hdict_db = defaultdict(list)
hdict_cmd[cmd.help_category] = [cmd.key]
hdict_db = {}
for topic in (topic for topic in HelpEntry.objects.get_all_topics() for topic in (topic for topic in HelpEntry.objects.get_all_topics()
if topic.access(caller, 'view', default=True)): if topic.access(caller, 'view', default=True)):
try:
hdict_db[topic.help_category].append(topic.key) hdict_db[topic.help_category].append(topic.key)
except KeyError:
hdict_db[topic.help_category] = [topic.key]
help_entry = format_help_list(hdict_cmd, hdict_db) help_entry = format_help_list(hdict_cmd, hdict_db)
caller.msg(help_entry) caller.msg(help_entry)
return return

View file

@ -7,7 +7,7 @@ be of use when designing your own game.
""" """
from inspect import ismodule from inspect import ismodule
import os, sys, imp, types import os, sys, imp, types, math
import textwrap import textwrap
import datetime import datetime
import random import random
@ -70,6 +70,25 @@ def dedent(text):
return "" return ""
return textwrap.dedent(text) return textwrap.dedent(text)
def list_to_string(inlist, endsep="and", addquote=False):
"""
This pretty-formats a list as string output, adding
an optional alternative separator to the second to last entry.
If addquote is True, the outgoing strints will be surrounded by quotes.
[1,2,3] -> '1, 2 and 3'
"""
if not inlist:
return ""
if addquote:
if len(inlist) == 1:
return "\"%s\"" % inlist[0]
return ", ".join("\"%s\"" % v for v in inlist[:-1]) + " %s %s" % (endsep, "\"%s\"" % inlist[-1])
else:
if len(inlist) == 1:
return str(inlist[0])
return ", ".join(str(v) for v in inlist[:-1]) + " %s %s" % (endsep, inlist[-1])
def wildcard_to_regexp(instring): def wildcard_to_regexp(instring):
""" """
Converts a player-supplied string that may have wildcards in it to regular Converts a player-supplied string that may have wildcards in it to regular
@ -664,3 +683,38 @@ def init_new_player(player):
if player.character: if player.character:
player.character.db.FIRST_LOGIN = True player.character.db.FIRST_LOGIN = True
player.db.FIRST_LOGIN = True player.db.FIRST_LOGIN = True
def string_similarity(string1, string2):
"""
This implements a "cosine-similarity" algorithm as described for example in
Proceedings of the 22nd International Conference on Computation Linguistics
(Coling 2008), pages 593-600, Manchester, August 2008
The measure vectors used is simply a "bag of words" type histogram (but for letters).
The function returns a value 0...1 rating how similar the two strings are. The strings can
contain multiple words.
"""
vocabulary = set(list(string1 + string2))
vec1 = [string1.count(v) for v in vocabulary]
vec2 = [string2.count(v) for v in vocabulary]
return float(sum(vec1[i]*vec2[i] for i in range(len(vocabulary)))) / \
(math.sqrt(sum(v1**2 for v1 in vec1)) * math.sqrt(sum(v2**2 for v2 in vec2)))
def string_suggestions(string, vocabulary, cutoff=0.6, maxnum=3):
"""
Given a string and a vocabulary, return a match or a list of suggestsion based on
string similarity.
Args:
string (str)- a string to search for
vocabulary (iterable) - a list of available strings
cutoff (int, 0-1) - limit the similarity matches (higher, the more exact is required)
maxnum (int) - maximum number of suggestions to return
Returns:
list of suggestions from vocabulary (could be empty if there are no matches)
"""
if string in vocabulary:
return [string]
# no exact match. Determine suggestions and return sorted with highest match first.
return [tup[1] for tup in sorted([(string_similarity(string, sugg), sugg) for sugg in vocabulary],
key=lambda tup: tup[0], reverse=True) if tup[0] >= cutoff][:maxnum]