Much improved inlinefunc regex; resolving #1498

This commit is contained in:
Griatch 2018-06-17 01:22:24 +02:00
parent d047f2b919
commit c5f1ea5781

View file

@ -188,17 +188,20 @@ except AttributeError:
# regex definitions # regex definitions
_RE_STARTTOKEN = re.compile(r"(?<!\\)\$(\w+)\(") # unescaped $funcname{ (start of function call) _RE_STARTTOKEN = re.compile(r"(?<!\\)\$(\w+)\(") # unescaped $funcname( (start of function call)
# note: this regex can be experimented with at https://regex101.com/r/kGR3vE/1
_RE_TOKEN = re.compile(r""" _RE_TOKEN = re.compile(r"""
(?<!\\)\'\'\'(?P<singlequote>.*?)(?<!\\)\'\'\'| # unescaped single-triples (escapes all inside them) (?<!\\)\'\'\'(?P<singlequote>.*?)(?<!\\)\'\'\'| # single-triplets escape all inside
(?<!\\)\"\"\"(?P<doublequote>.*?)(?<!\\)\"\"\"| # unescaped normal triple quotes (escapes all inside them) (?<!\\)\"\"\"(?P<doublequote>.*?)(?<!\\)\"\"\"| # double-triplets escape all inside
(?P<comma>(?<!\\)\,)| # unescaped , (argument separator) (?P<comma>(?<!\\)\,)| # , (argument sep)
(?P<end>(?<!\\)\))| # unescaped ) (end of function call) (?P<end>(?<!\\)\))| # ) (end of func call)
(?P<start>(?<!\\)\$\w+\()| # unescaped $funcname( (start of function call) (?P<start>(?<!\\)\$\w+\()| # $funcname (start of func call)
(?P<escaped>\\'|\\"|\\\)|\\$\w+\()| # escaped tokens should re-appear in text (?P<escaped> # escaped tokens to re-insert sans backslash
(?P<rest>[\w\s.-\/#!%\^&\*;:=\-_`~\|\(}{\[\]@\$\\\+\<\>?]+|\"{1}|\'{1}) # everything else """, \\\'|\\\"|\\\)|\\\$\w+\()|
re.UNICODE + re.IGNORECASE + re.VERBOSE + re.DOTALL) (?P<rest> # everything else to re-insert verbatim
\$(?!\w+\()|\'{1}|\"{1}|\\{1}|[^),$\'\"\\]+)""",
re.UNICODE | re.IGNORECASE | re.VERBOSE | re.DOTALL)
# Cache for function lookups. # Cache for function lookups.
_PARSING_CACHE = utils.LimitedSizeOrderedDict(size_limit=1000) _PARSING_CACHE = utils.LimitedSizeOrderedDict(size_limit=1000)
@ -293,6 +296,7 @@ def parse_inlinefunc(string, strip=False, available_funcs=None, **kwargs):
ncallable = 0 ncallable = 0
for match in _RE_TOKEN.finditer(string): for match in _RE_TOKEN.finditer(string):
gdict = match.groupdict() gdict = match.groupdict()
print("match: {}".format({key: val for key, val in gdict.items() if val}))
if gdict["singlequote"]: if gdict["singlequote"]:
stack.append(gdict["singlequote"]) stack.append(gdict["singlequote"])
elif gdict["doublequote"]: elif gdict["doublequote"]: