Changes to noun-handling and retain ellipsis in rplanguage contrib

This commit is contained in:
Griatch 2020-12-05 11:24:10 +01:00
parent 99cd66e99c
commit 806b020da4
2 changed files with 72 additions and 35 deletions

View file

@ -25,6 +25,8 @@
- Renamed Tutorial classes "Weapon" and "WeaponRack" to "TutorialWeapon" and - Renamed Tutorial classes "Weapon" and "WeaponRack" to "TutorialWeapon" and
"TutorialWeaponRack" to prevent collisions with classes in mygame "TutorialWeaponRack" to prevent collisions with classes in mygame
- New `crafting` contrib, adding a full crafting subsystem (Griatch 2020) - New `crafting` contrib, adding a full crafting subsystem (Griatch 2020)
- The `rplanguage` contrib now auto-capitalizes sentences and retains ellipsis (...). This
change means that proper nouns at the start of sentences will not be treated as nouns.
### Evennia 0.9.5 (2019-2020) ### Evennia 0.9.5 (2019-2020)

View file

@ -60,19 +60,44 @@ Usage:
Below is an example of "elvish", using "rounder" vowels and sounds: Below is an example of "elvish", using "rounder" vowels and sounds:
```python ```python
# vowel/consonant grammar possibilities
grammar = ("v vv vvc vcc vvcc cvvc vccv vvccv vcvccv vcvcvcc vvccvvcc "
"vcvvccvvc cvcvvcvvcc vcvcvvccvcvv")
# all not in this group is considered a consonant
vowels = "eaoiuy"
# you need a representative of all of the minimal grammars here, so if a
# grammar v exists, there must be atleast one phoneme available with only
# one vowel in it
phonemes = ("oi oh ee ae aa eh ah ao aw ay er ey ow ia ih iy " phonemes = ("oi oh ee ae aa eh ah ao aw ay er ey ow ia ih iy "
"oy ua uh uw y p b t d f v t dh s z sh zh ch jh k " "oy ua uh uw y p b t d f v t dh s z sh zh ch jh k "
"ng g m n l r w") "ng g m n l r w")
vowels = "eaoiuy"
grammar = ("v vv vvc vcc vvcc cvvc vccv vvccv vcvccv vcvcvcc vvccvvcc " # how much the translation varies in length compared to the original. 0 is
"vcvvccvvc cvcvvcvvcc vcvcvvccvcvv") # smallest, higher values give ever bigger randomness (including removing
# short words entirely)
word_length_variance = 1 word_length_variance = 1
# if a proper noun (word starting with capitalized letter) should be
# translated or not. If not (default) it means e.g. names will remain
# unchanged across languages.
noun_translate = False
# all proper nouns (words starting with a capital letter not at the beginning
# of a sentence) can have either a postfix or -prefix added at all times
noun_postfix = "'la" noun_postfix = "'la"
# words in dict will always be translated this way. The 'auto_translations'
# is instead a list or filename to file with words to use to help build a
# bigger dictionary by creating random translations of each word in the
# list *once* and saving the result for subsequent use.
manual_translations = {"the":"y'e", "we":"uyi", "she":"semi", "he":"emi", manual_translations = {"the":"y'e", "we":"uyi", "she":"semi", "he":"emi",
"you": "do", 'me':'mi','i':'me', 'be':"hy'e", 'and':'y'} "you": "do", 'me':'mi','i':'me', 'be':"hy'e", 'and':'y'}
rplanguage.add_language(key="elvish", phonemes=phonemes, grammar=grammar, rplanguage.add_language(key="elvish", phonemes=phonemes, grammar=grammar,
word_length_variance=word_length_variance, word_length_variance=word_length_variance,
noun_translate=noun_translate,
noun_postfix=noun_postfix, vowels=vowels, noun_postfix=noun_postfix, vowels=vowels,
manual_translations=manual_translations, manual_translations=manual_translations,
auto_translations="my_word_file.txt") auto_translations="my_word_file.txt")
@ -117,7 +142,8 @@ _GRAMMAR = "v cv vc cvv vcc vcv cvcc vccv cvccv cvcvcc cvccvcv vccvccvc cvcvccvv
_RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.DOTALL + re.UNICODE _RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.DOTALL + re.UNICODE
_RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS) _RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS)
_RE_WORD = re.compile(r"\w+", _RE_FLAGS) _RE_WORD = re.compile(r"\w+", _RE_FLAGS)
_RE_EXTRA_CHARS = re.compile(r"\s+(?=\W)|[,.?;](?=[,.?;]|\s+[,.?;])", _RE_FLAGS) # superfluous chars, except ` ... `
_RE_EXTRA_CHARS = re.compile(r"\s+(?!... )(?=\W)|[,.?;](?!.. )(?=[,?;]|\s+[,.?;])", _RE_FLAGS)
class LanguageError(RuntimeError): class LanguageError(RuntimeError):
@ -198,9 +224,13 @@ class LanguageHandler(DefaultScript):
0 means a minimal variance, higher variance may mean words 0 means a minimal variance, higher variance may mean words
have wildly varying length; this strongly affects how the have wildly varying length; this strongly affects how the
language "looks". language "looks".
noun_translate (bool, optional): If a proper noun, identified as a noun_translate (bool, optional): If a proper noun should be translated or
capitalized word, should be translated or not. By default they not. By default they will not, allowing for e.g. the names of characters
will not, allowing for e.g. the names of characters to be understandable. to be understandable. A 'noun' is identified as a capitalized word
*not at the start of a sentence*. This simple metric means that names
starting a sentence always will be translated (- but hey, maybe
the fantasy language just never uses a noun at the beginning of
sentences, who knows?)
noun_prefix (str, optional): A prefix to go before every noun noun_prefix (str, optional): A prefix to go before every noun
in this language (if any). in this language (if any).
noun_postfix (str, optuonal): A postfix to go after every noun noun_postfix (str, optuonal): A postfix to go after every noun
@ -245,7 +275,7 @@ class LanguageHandler(DefaultScript):
# {"vv": ["ea", "oh", ...], ...} # {"vv": ["ea", "oh", ...], ...}
grammar2phonemes = defaultdict(list) grammar2phonemes = defaultdict(list)
for phoneme in phonemes.split(): for phoneme in phonemes.split():
if re.search("\W", phoneme): if re.search(r"\W", phoneme):
raise LanguageError("The phoneme '%s' contains an invalid character" % phoneme) raise LanguageError("The phoneme '%s' contains an invalid character" % phoneme)
gram = "".join(["v" if char in vowels else "c" for char in phoneme]) gram = "".join(["v" if char in vowels else "c" for char in phoneme])
grammar2phonemes[gram].append(phoneme) grammar2phonemes[gram].append(phoneme)
@ -253,7 +283,7 @@ class LanguageHandler(DefaultScript):
# allowed grammar are grouped by length # allowed grammar are grouped by length
gramdict = defaultdict(list) gramdict = defaultdict(list)
for gram in grammar.split(): for gram in grammar.split():
if re.search("\W|(!=[cv])", gram): if re.search(r"\W|(!=[cv])", gram):
raise LanguageError( raise LanguageError(
"The grammar '%s' is invalid (only 'c' and 'v' are allowed)" % gram "The grammar '%s' is invalid (only 'c' and 'v' are allowed)" % gram
) )
@ -325,6 +355,11 @@ class LanguageHandler(DefaultScript):
word = match.group() word = match.group()
lword = len(word) lword = len(word)
# find out what preceeded this word
wpos = match.start()
preceeding = match.string[:wpos].strip()
start_sentence = preceeding.endswith((".", "!", "?")) or not preceeding
if len(word) <= self.level: if len(word) <= self.level:
# below level. Don't translate # below level. Don't translate
new_word = word new_word = word
@ -334,11 +369,6 @@ class LanguageHandler(DefaultScript):
if not new_word: if not new_word:
# no dictionary translation. Generate one # no dictionary translation. Generate one
# find out what preceeded this word
wpos = match.start()
preceeding = match.string[:wpos].strip()
start_sentence = preceeding.endswith((".", "!", "?")) or not preceeding
# make up translation on the fly. Length can # make up translation on the fly. Length can
# vary from un-translated word. # vary from un-translated word.
wlen = max( wlen = max(
@ -373,24 +403,30 @@ class LanguageHandler(DefaultScript):
break break
if word.istitle(): if word.istitle():
title_word = "" if not start_sentence:
if not start_sentence and not self.language.get("noun_translate", False): # this is a noun. We miss nouns at the start of
# don't translate what we identify as proper nouns (names) # sentences this way, but it's as good as we can get
title_word = word # with this simple analysis. Maybe the fantasy language
elif new_word: # just don't consider nouns at the beginning of
title_word = new_word # sentences, who knows?
if not self.language.get("noun_translate", False):
# don't translate what we identify as proper nouns (names)
new_word = word
if title_word: # add noun prefix and/or postfix
# Regardless of if we translate or not, we will add the custom prefix/postfixes new_word = "{prefix}{word}{postfix}".format(
new_word = "%s%s%s" % ( prefix=self.language["noun_prefix"],
self.language["noun_prefix"], word=new_word.capitalize(),
title_word.capitalize(), postfix=self.language["noun_postfix"],
self.language["noun_postfix"],
) )
if len(word) > 1 and word.isupper(): if len(word) > 1 and word.isupper():
# keep LOUD words loud also when translated # keep LOUD words loud also when translated
new_word = new_word.upper() new_word = new_word.upper()
if start_sentence:
new_word = new_word.capitalize()
return new_word return new_word
def translate(self, text, level=0.0, language="default"): def translate(self, text, level=0.0, language="default"):
@ -497,19 +533,18 @@ def available_languages():
return list(_LANGUAGE_HANDLER.attributes.get("language_storage", {})) return list(_LANGUAGE_HANDLER.attributes.get("language_storage", {}))
# ------------------------------------------------------------ # -----------------------------------------------------------------------------
# #
# Whisper obscuration # Whisper obscuration
# #
# This obsucration table is designed by obscuring certain # This obsucration table is designed by obscuring certain vowels first,
# vowels first, following by consonants that tend to be # following by consonants that tend to be more audible over long distances,
# more audible over long distances, like s. Finally it # like s. Finally it does non-auditory replacements, like exclamation marks and
# does non-auditory replacements, like exclamation marks # capitalized letters (assumed to be spoken louder) that may still give a user
# and capitalized letters (assumed to be spoken louder) that may still # some idea of the sentence structure. Then the word lengths are also
# give a user some idea of the sentence structure. Then the word # obfuscated and finally the whisper length itself.
# lengths are also obfuscated and finally the whisper # length itself.
# #
# ------------------------------------------------------------ # ------------------------------------------------------------------------------
_RE_WHISPER_OBSCURE = [ _RE_WHISPER_OBSCURE = [