Fix and cleanup the rplanguage contrib a bit

2017-12-05 19:54:40 +01:00 · 2017-12-05 19:54:40 +01:00 · 5111f195a9
commit 5111f195a9
parent 25bfc8f9e8
2 changed files with 83 additions and 28 deletions
--- a/evennia/contrib/rplanguage.py
+++ b/evennia/contrib/rplanguage.py
@ -96,6 +96,7 @@ import re
 from random import choice, randint
 from collections import defaultdict
 from evennia import DefaultScript
 from evennia.utils import logger
 #------------------------------------------------------------
@ -105,7 +106,8 @@ from evennia import DefaultScript
 #------------------------------------------------------------
 # default language grammar
-_PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw a e i u y p b t d f v t dh s z sh zh ch jh k ng g m n l r w"
+_PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw a e i u y p b t d f v t dh " \
            "s z sh zh ch jh k ng g m n l r w"
 _VOWELS = "eaoiuy"
 # these must be able to be constructed from phonemes (so for example,
 # if you have v here, there must exixt at least one single-character
@ -115,12 +117,16 @@ _GRAMMAR = "v cv vc cvv vcc vcv cvcc vccv cvccv cvcvcc cvccvcv vccvccvc cvcvccvv
 _RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.UNICODE
 _RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS)
 _RE_WORD = re.compile(r'\w+', _RE_FLAGS)
 _RE_EXTRA_CHARS = re.compile(r'\s+(?=\W)|[,.?;](?=[,.?;]|\s+[,.?;])', _RE_FLAGS)
 class LanguageExistsError(Exception):
    message = "Language is already created. Re-adding it will re-build" \
              " its dictionary map. Use 'force=True' keyword if you are sure."
    def __str__(self):
        return self.message
 class LanguageHandler(DefaultScript):
    """
@ -156,8 +162,11 @@ class LanguageHandler(DefaultScript):
        self.db.language_storage = {}
    def add(self, key="default", phonemes=_PHONEMES,
-            grammar=_GRAMMAR, word_length_variance=0, noun_prefix="",
+            grammar=_GRAMMAR, word_length_variance=0,
-            noun_postfix="", vowels=_VOWELS, manual_translations=None,
+            noun_translate=False,
            noun_prefix="",
            noun_postfix="",
            vowels=_VOWELS, manual_translations=None,
            auto_translations=None, force=False):
        """
        Add a new language. Note that you generally only need to do
@ -170,14 +179,21 @@ class LanguageHandler(DefaultScript):
                will be used as an identifier for the language so it
                should be short and unique.
            phonemes (str, optional): Space-separated string of all allowed
-                phonemes in this language.
+                phonemes in this language. If either of the base phonemes
                (c, v, cc, vv) are present in the grammar, the phoneme list must
                at least include one example of each.
            grammar (str): All allowed consonant (c) and vowel (v) combinations
-                allowed to build up words. For example cvv would be a consonant
+                allowed to build up words. Grammars are broken into the base phonemes
-                followed by two vowels (would allow for a word like 'die').
+                (c, v, cc, vv) prioritizing the longer bases. So cvv would be a
                the c + vv (would allow for a word like 'die' whereas
                cvcvccc would be c+v+c+v+cc+c (a word like 'galosch').
            word_length_variance (real): The variation of length of words.
                0 means a minimal variance, higher variance may mean words
                have wildly varying length; this strongly affects how the
                language "looks".
            noun_translate (bool, optional): If a proper noun, identified as a
                capitalized word, should be translated or not. By default they
                will not, allowing for e.g. the names of characters to be understandable.
            noun_prefix (str, optional): A prefix to go before every noun
                in this language (if any).
            noun_postfix (str, optuonal): A postfix to go after every noun
@ -261,6 +277,7 @@ class LanguageHandler(DefaultScript):
                   "grammar": grammar,
                   "grammar2phonemes": dict(grammar2phonemes),
                   "word_length_variance": word_length_variance,
                   "noun_translate": noun_translate,
                   "noun_prefix": noun_prefix,
                   "noun_postfix": noun_postfix}
        self.db.language_storage[key] = storage
@ -282,34 +299,63 @@ class LanguageHandler(DefaultScript):
        """
        word = match.group()
        lword = len(word)
        if len(word) <= self.level:
            # below level. Don't translate
            new_word = word
        else:
-            # translate the word
+            # try to translate the word from dictionary
            new_word = self.language["translation"].get(word.lower(), "")
            if not new_word:
-                if word.istitle():
+                # no dictionary translation. Generate one
-                    # capitalized word we don't have a translation for -
+
-                    # treat as a name (don't translate)
+                # find out what preceeded this word
-                    new_word = "%s%s%s" % (self.language["noun_prefix"], word, self.language["noun_postfix"])
+                wpos = match.start()
-                else:
+                preceeding = match.string[:wpos].strip()
-                    # make up translation on the fly. Length can
+                start_sentence = preceeding.endswith(".") or not preceeding
-                    # vary from un-translated word.
+
-                    wlen = max(0, lword + sum(randint(-1, 1) for i
+                # make up translation on the fly. Length can
-                                              in range(self.language["word_length_variance"])))
+                # vary from un-translated word.
-                    grammar = self.language["grammar"]
+                wlen = max(0, lword + sum(randint(-1, 1) for i
-                    if wlen not in grammar:
+                                          in range(self.language["word_length_variance"])))
                grammar = self.language["grammar"]
                if wlen not in grammar:
                    if randint(0, 1) == 0:
                        # this word has no direct translation!
-                        return ""
+                        wlen = 0
                        new_word = ''
                    else:
                        # use random word length
                        wlen = choice(grammar.keys())
                if wlen:
                    structure = choice(grammar[wlen])
                    grammar2phonemes = self.language["grammar2phonemes"]
                    for match in _RE_GRAMMAR.finditer(structure):
                        # there are only four combinations: vv,cc,c,v
-                        new_word += choice(grammar2phonemes[match.group()])
+                        try:
-            if word.istitle():
+                            new_word += choice(grammar2phonemes[match.group()])
-                # capitalize words the same way
+                        except KeyError:
-                new_word = new_word.capitalize()
+                            logger.log_trace("You need to supply at least one example of each of "
                                             "the four base phonemes (c, v, cc, vv)")
                            # abort translation here
                            new_word = ''
                            break
                if word.istitle():
                    title_word = ''
                    if not start_sentence and not self.language.get("noun_translate", False):
                        # don't translate what we identify as proper nouns (names)
                        title_word = word
                    elif new_word:
                        title_word = new_word
                    if title_word:
                        # Regardless of if we translate or not, we will add the custom prefix/postfixes
                        new_word = "%s%s%s" % (self.language["noun_prefix"],
                                               title_word.capitalize(),
                                               self.language["noun_postfix"])
            if len(word) > 1 and word.isupper():
                # keep LOUD words loud also when translated
                new_word = new_word.upper()
@ -341,7 +387,9 @@ class LanguageHandler(DefaultScript):
        # configuring the translation
        self.level = int(10 * (1.0 - max(0, min(level, 1.0))))
-        return _RE_WORD.sub(self._translate_sub, text)
+        translation = _RE_WORD.sub(self._translate_sub, text)
        # the substitution may create too long empty spaces, remove those
        return _RE_EXTRA_CHARS.sub("", translation)
 # Language access functions
--- a/evennia/contrib/tests.py
+++ b/evennia/contrib/tests.py
@ -18,7 +18,7 @@ from evennia.contrib import rplanguage
 mtrans = {"testing": "1", "is": "2", "a": "3", "human": "4"}
 atrans = ["An", "automated", "advantageous", "repeatable", "faster"]
-text = "Automated testing is advantageous for a number of reasons:" \
+text = "Automated testing is advantageous for a number of reasons: " \
       "tests may be executed Continuously without the need for human " \
       "intervention, They are easily repeatable, and often faster."
@ -33,6 +33,12 @@ class TestLanguage(EvenniaTest):
                                manual_translations=mtrans,
                                auto_translations=atrans,
                                force=True)
        rplanguage.add_language(key="binary",
                                phonemes="oo ii ck w b d t",
                                grammar="cvvv cvv cvvcv cvvcvv cvvvc cvvvcvv cvvc",
                                vowels="oei",
                                noun_prefix='beep-',
                                word_length_variance=4)
    def tearDown(self):
        super(TestLanguage, self).tearDown()
@ -50,16 +56,17 @@ class TestLanguage(EvenniaTest):
        self.assertEqual(result1[1], "1")
        self.assertEqual(result1[2], "2")
        self.assertEqual(result2[-1], result2[-1])
        print(rplanguage.obfuscate_language(text, level=1.0, language='binary'))
    def test_available_languages(self):
-        self.assertEqual(rplanguage.available_languages(), ["testlang"])
+        self.assertEqual(rplanguage.available_languages(), ["testlang", "binary"])
    def test_obfuscate_whisper(self):
        self.assertEqual(rplanguage.obfuscate_whisper(text, level=0.0), text)
        assert (rplanguage.obfuscate_whisper(text, level=0.1).startswith(
-            '-utom-t-d t-sting is -dv-nt-g-ous for - numb-r of r--sons:t-sts m-y b- -x-cut-d Continuously'))
+            '-utom-t-d t-sting is -dv-nt-g-ous for - numb-r of r--sons: t-sts m-y b- -x-cut-d Continuously'))
        assert(rplanguage.obfuscate_whisper(text, level=0.5).startswith(
-            '--------- --s---- -s -----------s f-- - ------ -f ---s--s:--s-s '))
+            '--------- --s---- -s -----------s f-- - ------ -f ---s--s: --s-s '))
        self.assertEqual(rplanguage.obfuscate_whisper(text, level=1.0), "...")
 # Testing of emoting / sdesc / recog system