Changes to noun-handling and retain ellipsis in rplanguage contrib

2020-12-05 11:24:10 +01:00 · 2020-12-05 11:24:10 +01:00 · 806b020da4
commit 806b020da4
parent 99cd66e99c
2 changed files with 72 additions and 35 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -25,6 +25,8 @@
 - Renamed Tutorial classes "Weapon" and "WeaponRack" to "TutorialWeapon" and
  "TutorialWeaponRack" to prevent collisions with classes in mygame
 - New `crafting` contrib, adding a full crafting subsystem (Griatch 2020)
 - The `rplanguage` contrib now auto-capitalizes sentences and retains ellipsis (...). This
  change means that proper nouns at the start of sentences will not be treated as nouns.
 ### Evennia 0.9.5 (2019-2020)
--- a/evennia/contrib/rplanguage.py
+++ b/evennia/contrib/rplanguage.py
@ -60,19 +60,44 @@ Usage:
    Below is an example of "elvish", using "rounder" vowels and sounds:
    ```python
    # vowel/consonant grammar possibilities
    grammar = ("v vv vvc vcc vvcc cvvc vccv vvccv vcvccv vcvcvcc vvccvvcc "
               "vcvvccvvc cvcvvcvvcc vcvcvvccvcvv")
    # all not in this group is considered a consonant
    vowels = "eaoiuy"
    # you need a representative of all of the minimal grammars here, so if a
    # grammar v exists, there must be atleast one phoneme available with only
    # one vowel in it
    phonemes = ("oi oh ee ae aa eh ah ao aw ay er ey ow ia ih iy "
                "oy ua uh uw y p b t d f v t dh s z sh zh ch jh k "
                "ng g m n l r w")
-    vowels = "eaoiuy"
+
-    grammar = ("v vv vvc vcc vvcc cvvc vccv vvccv vcvccv vcvcvcc vvccvvcc "
+    # how much the translation varies in length compared to the original. 0 is
-               "vcvvccvvc cvcvvcvvcc vcvcvvccvcvv")
+    # smallest, higher values give ever bigger randomness (including removing
    # short words entirely)
    word_length_variance = 1
    # if a proper noun (word starting with capitalized letter) should be
    # translated or not. If not (default) it means e.g. names will remain
    # unchanged across languages.
    noun_translate = False
    # all proper nouns (words starting with a capital letter not at the beginning
    # of a sentence) can have either a postfix or -prefix added at all times
    noun_postfix = "'la"
    # words in dict will always be translated this way. The 'auto_translations'
    # is instead a list or filename to file with words to use to help build a
    # bigger dictionary by creating random translations of each word in the
    # list *once* and saving the result for subsequent use.
    manual_translations = {"the":"y'e", "we":"uyi", "she":"semi", "he":"emi",
                          "you": "do", 'me':'mi','i':'me', 'be':"hy'e", 'and':'y'}
    rplanguage.add_language(key="elvish", phonemes=phonemes, grammar=grammar,
                             word_length_variance=word_length_variance,
                             noun_translate=noun_translate,
                             noun_postfix=noun_postfix, vowels=vowels,
                             manual_translations=manual_translations,
                             auto_translations="my_word_file.txt")
@ -117,7 +142,8 @@ _GRAMMAR = "v cv vc cvv vcc vcv cvcc vccv cvccv cvcvcc cvccvcv vccvccvc cvcvccvv
 _RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.DOTALL + re.UNICODE
 _RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS)
 _RE_WORD = re.compile(r"\w+", _RE_FLAGS)
-_RE_EXTRA_CHARS = re.compile(r"\s+(?=\W)|[,.?;](?=[,.?;]|\s+[,.?;])", _RE_FLAGS)
+# superfluous chars, except ` ... `
 _RE_EXTRA_CHARS = re.compile(r"\s+(?!... )(?=\W)|[,.?;](?!.. )(?=[,?;]|\s+[,.?;])", _RE_FLAGS)
 class LanguageError(RuntimeError):
@ -198,9 +224,13 @@ class LanguageHandler(DefaultScript):
                0 means a minimal variance, higher variance may mean words
                have wildly varying length; this strongly affects how the
                language "looks".
-            noun_translate (bool, optional): If a proper noun, identified as a
+            noun_translate (bool, optional): If a proper noun should be translated or
-                capitalized word, should be translated or not. By default they
+                not. By default they will not, allowing for e.g. the names of characters
-                will not, allowing for e.g. the names of characters to be understandable.
+                to be understandable. A 'noun' is identified as a capitalized word
                *not at the start of a sentence*. This simple metric means that names
                starting a sentence always will be translated (- but hey, maybe
                the fantasy language just never uses a noun at the beginning of
                sentences, who knows?)
            noun_prefix (str, optional): A prefix to go before every noun
                in this language (if any).
            noun_postfix (str, optuonal): A postfix to go after every noun
@ -245,7 +275,7 @@ class LanguageHandler(DefaultScript):
        # {"vv": ["ea", "oh", ...], ...}
        grammar2phonemes = defaultdict(list)
        for phoneme in phonemes.split():
-            if re.search("\W", phoneme):
+            if re.search(r"\W", phoneme):
                raise LanguageError("The phoneme '%s' contains an invalid character" % phoneme)
            gram = "".join(["v" if char in vowels else "c" for char in phoneme])
            grammar2phonemes[gram].append(phoneme)
@ -253,7 +283,7 @@ class LanguageHandler(DefaultScript):
        # allowed grammar are grouped by length
        gramdict = defaultdict(list)
        for gram in grammar.split():
-            if re.search("\W|(!=[cv])", gram):
+            if re.search(r"\W|(!=[cv])", gram):
                raise LanguageError(
                    "The grammar '%s' is invalid (only 'c' and 'v' are allowed)" % gram
                )
@ -325,6 +355,11 @@ class LanguageHandler(DefaultScript):
        word = match.group()
        lword = len(word)
        # find out what preceeded this word
        wpos = match.start()
        preceeding = match.string[:wpos].strip()
        start_sentence = preceeding.endswith((".", "!", "?")) or not preceeding
        if len(word) <= self.level:
            # below level. Don't translate
            new_word = word
@ -334,11 +369,6 @@ class LanguageHandler(DefaultScript):
            if not new_word:
                # no dictionary translation. Generate one
                # find out what preceeded this word
                wpos = match.start()
                preceeding = match.string[:wpos].strip()
                start_sentence = preceeding.endswith((".", "!", "?")) or not preceeding
                # make up translation on the fly. Length can
                # vary from un-translated word.
                wlen = max(
@ -373,24 +403,30 @@ class LanguageHandler(DefaultScript):
                            break
                if word.istitle():
-                    title_word = ""
+                    if not start_sentence:
-                    if not start_sentence and not self.language.get("noun_translate", False):
+                        # this is a noun. We miss nouns at the start of
-                        # don't translate what we identify as proper nouns (names)
+                        # sentences this way, but it's as good as we can get
-                        title_word = word
+                        # with this simple analysis. Maybe the fantasy language
-                    elif new_word:
+                        # just don't consider nouns at the beginning of
-                        title_word = new_word
+                        # sentences, who knows?
                        if not self.language.get("noun_translate", False):
                            # don't translate what we identify as proper nouns (names)
                            new_word = word
-                    if title_word:
+                        # add noun prefix and/or postfix
-                        # Regardless of if we translate or not, we will add the custom prefix/postfixes
+                        new_word = "{prefix}{word}{postfix}".format(
-                        new_word = "%s%s%s" % (
+                            prefix=self.language["noun_prefix"],
-                            self.language["noun_prefix"],
+                            word=new_word.capitalize(),
-                            title_word.capitalize(),
+                            postfix=self.language["noun_postfix"],
                            self.language["noun_postfix"],
                        )
            if len(word) > 1 and word.isupper():
                # keep LOUD words loud also when translated
                new_word = new_word.upper()
            if start_sentence:
                new_word = new_word.capitalize()
        return new_word
    def translate(self, text, level=0.0, language="default"):
@ -497,19 +533,18 @@ def available_languages():
    return list(_LANGUAGE_HANDLER.attributes.get("language_storage", {}))
-# ------------------------------------------------------------
+# -----------------------------------------------------------------------------
 #
 # Whisper obscuration
 #
-# This obsucration table is designed by obscuring certain
+# This obsucration table is designed by obscuring certain vowels first,
-# vowels first, following by consonants that tend to be
+# following by consonants that tend to be more audible over long distances,
-# more audible over long distances, like s. Finally it
+# like s. Finally it does non-auditory replacements, like exclamation marks and
-# does non-auditory replacements, like exclamation marks
+# capitalized letters (assumed to be spoken louder) that may still give a user
-# and capitalized letters (assumed to be spoken louder) that may still
+# some idea of the sentence structure. Then the  word lengths are also
-# give a user some idea of the sentence structure. Then the  word
+# obfuscated and finally the whisper length itself.
 # lengths are also obfuscated and finally the whisper # length itself.
 #
-# ------------------------------------------------------------
+# ------------------------------------------------------------------------------
 _RE_WHISPER_OBSCURE = [