Fix and cleanup the rplanguage contrib a bit
This commit is contained in:
parent
25bfc8f9e8
commit
5111f195a9
2 changed files with 83 additions and 28 deletions
|
|
@ -96,6 +96,7 @@ import re
|
||||||
from random import choice, randint
|
from random import choice, randint
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from evennia import DefaultScript
|
from evennia import DefaultScript
|
||||||
|
from evennia.utils import logger
|
||||||
|
|
||||||
|
|
||||||
#------------------------------------------------------------
|
#------------------------------------------------------------
|
||||||
|
|
@ -105,7 +106,8 @@ from evennia import DefaultScript
|
||||||
#------------------------------------------------------------
|
#------------------------------------------------------------
|
||||||
|
|
||||||
# default language grammar
|
# default language grammar
|
||||||
_PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw a e i u y p b t d f v t dh s z sh zh ch jh k ng g m n l r w"
|
_PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw a e i u y p b t d f v t dh " \
|
||||||
|
"s z sh zh ch jh k ng g m n l r w"
|
||||||
_VOWELS = "eaoiuy"
|
_VOWELS = "eaoiuy"
|
||||||
# these must be able to be constructed from phonemes (so for example,
|
# these must be able to be constructed from phonemes (so for example,
|
||||||
# if you have v here, there must exixt at least one single-character
|
# if you have v here, there must exixt at least one single-character
|
||||||
|
|
@ -115,12 +117,16 @@ _GRAMMAR = "v cv vc cvv vcc vcv cvcc vccv cvccv cvcvcc cvccvcv vccvccvc cvcvccvv
|
||||||
_RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.UNICODE
|
_RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.UNICODE
|
||||||
_RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS)
|
_RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS)
|
||||||
_RE_WORD = re.compile(r'\w+', _RE_FLAGS)
|
_RE_WORD = re.compile(r'\w+', _RE_FLAGS)
|
||||||
|
_RE_EXTRA_CHARS = re.compile(r'\s+(?=\W)|[,.?;](?=[,.?;]|\s+[,.?;])', _RE_FLAGS)
|
||||||
|
|
||||||
|
|
||||||
class LanguageExistsError(Exception):
|
class LanguageExistsError(Exception):
|
||||||
message = "Language is already created. Re-adding it will re-build" \
|
message = "Language is already created. Re-adding it will re-build" \
|
||||||
" its dictionary map. Use 'force=True' keyword if you are sure."
|
" its dictionary map. Use 'force=True' keyword if you are sure."
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.message
|
||||||
|
|
||||||
|
|
||||||
class LanguageHandler(DefaultScript):
|
class LanguageHandler(DefaultScript):
|
||||||
"""
|
"""
|
||||||
|
|
@ -156,8 +162,11 @@ class LanguageHandler(DefaultScript):
|
||||||
self.db.language_storage = {}
|
self.db.language_storage = {}
|
||||||
|
|
||||||
def add(self, key="default", phonemes=_PHONEMES,
|
def add(self, key="default", phonemes=_PHONEMES,
|
||||||
grammar=_GRAMMAR, word_length_variance=0, noun_prefix="",
|
grammar=_GRAMMAR, word_length_variance=0,
|
||||||
noun_postfix="", vowels=_VOWELS, manual_translations=None,
|
noun_translate=False,
|
||||||
|
noun_prefix="",
|
||||||
|
noun_postfix="",
|
||||||
|
vowels=_VOWELS, manual_translations=None,
|
||||||
auto_translations=None, force=False):
|
auto_translations=None, force=False):
|
||||||
"""
|
"""
|
||||||
Add a new language. Note that you generally only need to do
|
Add a new language. Note that you generally only need to do
|
||||||
|
|
@ -170,14 +179,21 @@ class LanguageHandler(DefaultScript):
|
||||||
will be used as an identifier for the language so it
|
will be used as an identifier for the language so it
|
||||||
should be short and unique.
|
should be short and unique.
|
||||||
phonemes (str, optional): Space-separated string of all allowed
|
phonemes (str, optional): Space-separated string of all allowed
|
||||||
phonemes in this language.
|
phonemes in this language. If either of the base phonemes
|
||||||
|
(c, v, cc, vv) are present in the grammar, the phoneme list must
|
||||||
|
at least include one example of each.
|
||||||
grammar (str): All allowed consonant (c) and vowel (v) combinations
|
grammar (str): All allowed consonant (c) and vowel (v) combinations
|
||||||
allowed to build up words. For example cvv would be a consonant
|
allowed to build up words. Grammars are broken into the base phonemes
|
||||||
followed by two vowels (would allow for a word like 'die').
|
(c, v, cc, vv) prioritizing the longer bases. So cvv would be a
|
||||||
|
the c + vv (would allow for a word like 'die' whereas
|
||||||
|
cvcvccc would be c+v+c+v+cc+c (a word like 'galosch').
|
||||||
word_length_variance (real): The variation of length of words.
|
word_length_variance (real): The variation of length of words.
|
||||||
0 means a minimal variance, higher variance may mean words
|
0 means a minimal variance, higher variance may mean words
|
||||||
have wildly varying length; this strongly affects how the
|
have wildly varying length; this strongly affects how the
|
||||||
language "looks".
|
language "looks".
|
||||||
|
noun_translate (bool, optional): If a proper noun, identified as a
|
||||||
|
capitalized word, should be translated or not. By default they
|
||||||
|
will not, allowing for e.g. the names of characters to be understandable.
|
||||||
noun_prefix (str, optional): A prefix to go before every noun
|
noun_prefix (str, optional): A prefix to go before every noun
|
||||||
in this language (if any).
|
in this language (if any).
|
||||||
noun_postfix (str, optuonal): A postfix to go after every noun
|
noun_postfix (str, optuonal): A postfix to go after every noun
|
||||||
|
|
@ -261,6 +277,7 @@ class LanguageHandler(DefaultScript):
|
||||||
"grammar": grammar,
|
"grammar": grammar,
|
||||||
"grammar2phonemes": dict(grammar2phonemes),
|
"grammar2phonemes": dict(grammar2phonemes),
|
||||||
"word_length_variance": word_length_variance,
|
"word_length_variance": word_length_variance,
|
||||||
|
"noun_translate": noun_translate,
|
||||||
"noun_prefix": noun_prefix,
|
"noun_prefix": noun_prefix,
|
||||||
"noun_postfix": noun_postfix}
|
"noun_postfix": noun_postfix}
|
||||||
self.db.language_storage[key] = storage
|
self.db.language_storage[key] = storage
|
||||||
|
|
@ -282,34 +299,63 @@ class LanguageHandler(DefaultScript):
|
||||||
"""
|
"""
|
||||||
word = match.group()
|
word = match.group()
|
||||||
lword = len(word)
|
lword = len(word)
|
||||||
|
|
||||||
if len(word) <= self.level:
|
if len(word) <= self.level:
|
||||||
# below level. Don't translate
|
# below level. Don't translate
|
||||||
new_word = word
|
new_word = word
|
||||||
else:
|
else:
|
||||||
# translate the word
|
# try to translate the word from dictionary
|
||||||
new_word = self.language["translation"].get(word.lower(), "")
|
new_word = self.language["translation"].get(word.lower(), "")
|
||||||
if not new_word:
|
if not new_word:
|
||||||
if word.istitle():
|
# no dictionary translation. Generate one
|
||||||
# capitalized word we don't have a translation for -
|
|
||||||
# treat as a name (don't translate)
|
# find out what preceeded this word
|
||||||
new_word = "%s%s%s" % (self.language["noun_prefix"], word, self.language["noun_postfix"])
|
wpos = match.start()
|
||||||
else:
|
preceeding = match.string[:wpos].strip()
|
||||||
# make up translation on the fly. Length can
|
start_sentence = preceeding.endswith(".") or not preceeding
|
||||||
# vary from un-translated word.
|
|
||||||
wlen = max(0, lword + sum(randint(-1, 1) for i
|
# make up translation on the fly. Length can
|
||||||
in range(self.language["word_length_variance"])))
|
# vary from un-translated word.
|
||||||
grammar = self.language["grammar"]
|
wlen = max(0, lword + sum(randint(-1, 1) for i
|
||||||
if wlen not in grammar:
|
in range(self.language["word_length_variance"])))
|
||||||
|
grammar = self.language["grammar"]
|
||||||
|
if wlen not in grammar:
|
||||||
|
if randint(0, 1) == 0:
|
||||||
# this word has no direct translation!
|
# this word has no direct translation!
|
||||||
return ""
|
wlen = 0
|
||||||
|
new_word = ''
|
||||||
|
else:
|
||||||
|
# use random word length
|
||||||
|
wlen = choice(grammar.keys())
|
||||||
|
|
||||||
|
if wlen:
|
||||||
structure = choice(grammar[wlen])
|
structure = choice(grammar[wlen])
|
||||||
grammar2phonemes = self.language["grammar2phonemes"]
|
grammar2phonemes = self.language["grammar2phonemes"]
|
||||||
for match in _RE_GRAMMAR.finditer(structure):
|
for match in _RE_GRAMMAR.finditer(structure):
|
||||||
# there are only four combinations: vv,cc,c,v
|
# there are only four combinations: vv,cc,c,v
|
||||||
new_word += choice(grammar2phonemes[match.group()])
|
try:
|
||||||
if word.istitle():
|
new_word += choice(grammar2phonemes[match.group()])
|
||||||
# capitalize words the same way
|
except KeyError:
|
||||||
new_word = new_word.capitalize()
|
logger.log_trace("You need to supply at least one example of each of "
|
||||||
|
"the four base phonemes (c, v, cc, vv)")
|
||||||
|
# abort translation here
|
||||||
|
new_word = ''
|
||||||
|
break
|
||||||
|
|
||||||
|
if word.istitle():
|
||||||
|
title_word = ''
|
||||||
|
if not start_sentence and not self.language.get("noun_translate", False):
|
||||||
|
# don't translate what we identify as proper nouns (names)
|
||||||
|
title_word = word
|
||||||
|
elif new_word:
|
||||||
|
title_word = new_word
|
||||||
|
|
||||||
|
if title_word:
|
||||||
|
# Regardless of if we translate or not, we will add the custom prefix/postfixes
|
||||||
|
new_word = "%s%s%s" % (self.language["noun_prefix"],
|
||||||
|
title_word.capitalize(),
|
||||||
|
self.language["noun_postfix"])
|
||||||
|
|
||||||
if len(word) > 1 and word.isupper():
|
if len(word) > 1 and word.isupper():
|
||||||
# keep LOUD words loud also when translated
|
# keep LOUD words loud also when translated
|
||||||
new_word = new_word.upper()
|
new_word = new_word.upper()
|
||||||
|
|
@ -341,7 +387,9 @@ class LanguageHandler(DefaultScript):
|
||||||
|
|
||||||
# configuring the translation
|
# configuring the translation
|
||||||
self.level = int(10 * (1.0 - max(0, min(level, 1.0))))
|
self.level = int(10 * (1.0 - max(0, min(level, 1.0))))
|
||||||
return _RE_WORD.sub(self._translate_sub, text)
|
translation = _RE_WORD.sub(self._translate_sub, text)
|
||||||
|
# the substitution may create too long empty spaces, remove those
|
||||||
|
return _RE_EXTRA_CHARS.sub("", translation)
|
||||||
|
|
||||||
|
|
||||||
# Language access functions
|
# Language access functions
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ from evennia.contrib import rplanguage
|
||||||
mtrans = {"testing": "1", "is": "2", "a": "3", "human": "4"}
|
mtrans = {"testing": "1", "is": "2", "a": "3", "human": "4"}
|
||||||
atrans = ["An", "automated", "advantageous", "repeatable", "faster"]
|
atrans = ["An", "automated", "advantageous", "repeatable", "faster"]
|
||||||
|
|
||||||
text = "Automated testing is advantageous for a number of reasons:" \
|
text = "Automated testing is advantageous for a number of reasons: " \
|
||||||
"tests may be executed Continuously without the need for human " \
|
"tests may be executed Continuously without the need for human " \
|
||||||
"intervention, They are easily repeatable, and often faster."
|
"intervention, They are easily repeatable, and often faster."
|
||||||
|
|
||||||
|
|
@ -33,6 +33,12 @@ class TestLanguage(EvenniaTest):
|
||||||
manual_translations=mtrans,
|
manual_translations=mtrans,
|
||||||
auto_translations=atrans,
|
auto_translations=atrans,
|
||||||
force=True)
|
force=True)
|
||||||
|
rplanguage.add_language(key="binary",
|
||||||
|
phonemes="oo ii ck w b d t",
|
||||||
|
grammar="cvvv cvv cvvcv cvvcvv cvvvc cvvvcvv cvvc",
|
||||||
|
vowels="oei",
|
||||||
|
noun_prefix='beep-',
|
||||||
|
word_length_variance=4)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
super(TestLanguage, self).tearDown()
|
super(TestLanguage, self).tearDown()
|
||||||
|
|
@ -50,16 +56,17 @@ class TestLanguage(EvenniaTest):
|
||||||
self.assertEqual(result1[1], "1")
|
self.assertEqual(result1[1], "1")
|
||||||
self.assertEqual(result1[2], "2")
|
self.assertEqual(result1[2], "2")
|
||||||
self.assertEqual(result2[-1], result2[-1])
|
self.assertEqual(result2[-1], result2[-1])
|
||||||
|
print(rplanguage.obfuscate_language(text, level=1.0, language='binary'))
|
||||||
|
|
||||||
def test_available_languages(self):
|
def test_available_languages(self):
|
||||||
self.assertEqual(rplanguage.available_languages(), ["testlang"])
|
self.assertEqual(rplanguage.available_languages(), ["testlang", "binary"])
|
||||||
|
|
||||||
def test_obfuscate_whisper(self):
|
def test_obfuscate_whisper(self):
|
||||||
self.assertEqual(rplanguage.obfuscate_whisper(text, level=0.0), text)
|
self.assertEqual(rplanguage.obfuscate_whisper(text, level=0.0), text)
|
||||||
assert (rplanguage.obfuscate_whisper(text, level=0.1).startswith(
|
assert (rplanguage.obfuscate_whisper(text, level=0.1).startswith(
|
||||||
'-utom-t-d t-sting is -dv-nt-g-ous for - numb-r of r--sons:t-sts m-y b- -x-cut-d Continuously'))
|
'-utom-t-d t-sting is -dv-nt-g-ous for - numb-r of r--sons: t-sts m-y b- -x-cut-d Continuously'))
|
||||||
assert(rplanguage.obfuscate_whisper(text, level=0.5).startswith(
|
assert(rplanguage.obfuscate_whisper(text, level=0.5).startswith(
|
||||||
'--------- --s---- -s -----------s f-- - ------ -f ---s--s:--s-s '))
|
'--------- --s---- -s -----------s f-- - ------ -f ---s--s: --s-s '))
|
||||||
self.assertEqual(rplanguage.obfuscate_whisper(text, level=1.0), "...")
|
self.assertEqual(rplanguage.obfuscate_whisper(text, level=1.0), "...")
|
||||||
|
|
||||||
# Testing of emoting / sdesc / recog system
|
# Testing of emoting / sdesc / recog system
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue