optimized ansi parsing a bit as well as did some general cleanup.

This commit is contained in:
Griatch 2014-03-01 17:06:04 +01:00
parent 2aa9ef9997
commit ef8755581c
3 changed files with 68 additions and 58 deletions

View file

@ -217,7 +217,7 @@ def format_script_list(scripts):
table = EvTable("{wdbref{n", "{wobj{n", "{wkey{n", "{wintval{n", "{wnext{n", table = EvTable("{wdbref{n", "{wobj{n", "{wkey{n", "{wintval{n", "{wnext{n",
"{wrept{n", "{wdb", "{wtypeclass{n", "{wdesc{n", "{wrept{n", "{wdb", "{wtypeclass{n", "{wdesc{n",
align='r', border="cells") align='r', border="tablecols")
for script in scripts: for script in scripts:
nextrep = script.time_until_next_repeat() nextrep = script.time_until_next_repeat()
if nextrep is None: if nextrep is None:

View file

@ -60,18 +60,9 @@ ANSI_SPACE = " "
# Escapes # Escapes
ANSI_ESCAPES = ("{{", "%%", "\\\\") ANSI_ESCAPES = ("{{", "%%", "\\\\")
from collections import OrderedDict
def sub_meth(obj, function): _PARSE_CACHE = OrderedDict()
""" _PARSE_CACHE_SIZE = 10000
RegexObject.sub() allows for the 'repl' argument to be a function.
However, it doesn't call bound methods correctly. This forces 'self'
to be passed.
"""
if isinstance(function, basestring):
return function
def wrapped(*args, **kwargs):
return function(obj, *args, **kwargs)
return wrapped
class ANSIParser(object): class ANSIParser(object):
@ -84,7 +75,14 @@ class ANSIParser(object):
an extra { for Merc-style codes an extra { for Merc-style codes
""" """
def parse_rgb(self, rgbmatch): def sub_ansi(self, ansimatch):
"""
Replacer used by re.sub to replace ansi
markers with correct ansi sequences
"""
return self.ansi_map.get(ansimatch.group(), "")
def sub_xterm256(self, rgbmatch):
""" """
This is a replacer method called by re.sub with the matched This is a replacer method called by re.sub with the matched
tag. It must return the correct ansi sequence. tag. It must return the correct ansi sequence.
@ -94,7 +92,9 @@ class ANSIParser(object):
""" """
if not rgbmatch: if not rgbmatch:
return "" return ""
rgbtag = rgbmatch.groups()[0]
# get tag, stripping the initial marker
rgbtag = rgbmatch.group()[1:]
background = rgbtag[0] == '[' background = rgbtag[0] == '['
if background: if background:
@ -179,23 +179,34 @@ class ANSIParser(object):
return string.clean() return string.clean()
else: else:
return string.raw() return string.raw()
if not string: if not string:
return '' return ''
self.do_xterm256 = xterm256
string = utils.to_str(string)
# go through all available mappings and translate them # check cached parsings
parts = self.ansi_escapes.split(string) + [" "] global _PARSE_CACHE
string = "" cachekey = "%s-%s-%s" % (string, strip_ansi, xterm256)
for part, sep in zip(parts[::2], parts[1::2]): if cachekey in _PARSE_CACHE:
for sub in self.ansi_sub: return _PARSE_CACHE[cachekey]
part = sub[0].sub(sub_meth(self, sub[1]), part)
string += "%s%s" % (part, sep[0].strip()) self.do_xterm256 = xterm256
in_string = utils.to_str(string)
# do string replacement
parsed_string = self.xterm256_sub.sub(self.sub_xterm256, in_string)
parsed_string = self.ansi_sub.sub(self.sub_ansi, parsed_string)
if strip_ansi: if strip_ansi:
# remove all ansi codes (including those manually # remove all ansi codes (including those manually
# inserted in string) # inserted in string)
string = self.ansi_regex.sub("", string) parsed_string = self.ansi_regex.sub("", parsed_string)
return string
# cache and crop old cache
_PARSE_CACHE[cachekey] = parsed_string
if len(_PARSE_CACHE) > _PARSE_CACHE_SIZE:
_PARSE_CACHE.popitem(last=False)
return parsed_string
# MUX-style mappings %cr %cn etc # MUX-style mappings %cr %cn etc
@ -237,8 +248,8 @@ class ANSIParser(object):
(r'{/', ANSI_RETURN), # line break (r'{/', ANSI_RETURN), # line break
(r'{-', ANSI_TAB), # tab (r'{-', ANSI_TAB), # tab
(r'{_', ANSI_SPACE), # space (r'{_', ANSI_SPACE), # space
(r'{\*', ANSI_INVERSE), # invert (r'{*', ANSI_INVERSE), # invert
(r'{\^', ANSI_BLINK), # blinking text (very annoying and not supported by all clients) (r'{^', ANSI_BLINK), # blinking text (very annoying and not supported by all clients)
(r'{r', hilite + ANSI_RED), (r'{r', hilite + ANSI_RED),
(r'{g', hilite + ANSI_GREEN), (r'{g', hilite + ANSI_GREEN),
@ -258,32 +269,35 @@ class ANSIParser(object):
(r'{W', normal + ANSI_WHITE), # light grey (r'{W', normal + ANSI_WHITE), # light grey
(r'{X', normal + ANSI_BLACK), # pure black (r'{X', normal + ANSI_BLACK), # pure black
(r'{\[r', ANSI_BACK_RED), (r'{[r', ANSI_BACK_RED),
(r'{\[g', ANSI_BACK_GREEN), (r'{[g', ANSI_BACK_GREEN),
(r'{\[y', ANSI_BACK_YELLOW), (r'{[y', ANSI_BACK_YELLOW),
(r'{\[b', ANSI_BACK_BLUE), (r'{[b', ANSI_BACK_BLUE),
(r'{\[m', ANSI_BACK_MAGENTA), (r'{[m', ANSI_BACK_MAGENTA),
(r'{\[c', ANSI_BACK_CYAN), (r'{[c', ANSI_BACK_CYAN),
(r'{\[w', ANSI_BACK_WHITE), # light grey background (r'{[w', ANSI_BACK_WHITE), # light grey background
(r'{\[x', ANSI_BACK_BLACK) # pure black background (r'{[x', ANSI_BACK_BLACK) # pure black background
] ]
# xterm256 {123, %c134, ansi_map = mux_ansi_map + ext_ansi_map
# xterm256 {123, %c134. These are replaced directly by
# the sub_xterm256 method
xterm256_map = [ xterm256_map = [
(r'%([0-5]{3})', parse_rgb), # %123 - foreground colour (r'%[0-5]{3}', ""), # %123 - foreground colour
(r'%(\[[0-5]{3})', parse_rgb), # %-123 - background colour (r'%\[[0-5]{3}', ""), # %[123 - background colour
(r'{([0-5]{3})', parse_rgb), # {123 - foreground colour (r'\{[0-5]{3}', ""), # {123 - foreground colour
(r'{(\[[0-5]{3})', parse_rgb) # {-123 - background colour (r'\{\[[0-5]{3}', "") # {[123 - background colour
] ]
# obs - order matters here, we want to do the xterms first since
# they collide with some of the other mappings otherwise.
ansi_map = xterm256_map + mux_ansi_map + ext_ansi_map
# prepare regex matching # prepare regex matching
ansi_sub = [(re.compile(sub[0], re.DOTALL), sub[1]) #ansi_sub = [(re.compile(sub[0], re.DOTALL), sub[1])
for sub in ansi_map] # for sub in ansi_map]
xterm256_sub = re.compile(r"|".join([tup[0] for tup in xterm256_map]), re.DOTALL)
ansi_sub = re.compile(r"|".join([re.escape(tup[0]) for tup in ansi_map]), re.DOTALL)
ansi_map = dict(ansi_map)
# prepare matching ansi codes overall # prepare matching ansi codes overall
ansi_regex = re.compile("\033\[[0-9;]+m") ansi_regex = re.compile("\033\[[0-9;]+m")
@ -477,7 +491,7 @@ class ANSIString(unicode):
""" """
return "ANSIString(%s, decoded=True)" % repr(self._raw_string) return "ANSIString(%s, decoded=True)" % repr(self._raw_string)
def __init__(self, *args, **kwargs): def __init__(self, text="", parser=ANSI_PARSER, **kwargs):
""" """
When the ANSIString is first initialized, a few internal variables When the ANSIString is first initialized, a few internal variables
have to be set. have to be set.
@ -503,8 +517,8 @@ class ANSIString(unicode):
tables for which characters in the raw string are related to ANSI tables for which characters in the raw string are related to ANSI
escapes, and which are for the readable text. escapes, and which are for the readable text.
""" """
self.parser = kwargs.pop('parser', ANSI_PARSER) self.parser = parser
super(ANSIString, self).__init__(*args, **kwargs) super(ANSIString, self).__init__(text)
self._code_indexes, self._char_indexes = self._get_indexes() self._code_indexes, self._char_indexes = self._get_indexes()
def __add__(self, other): def __add__(self, other):
@ -657,14 +671,10 @@ class ANSIString(unicode):
It's possible that only one of these tables is actually needed, the It's possible that only one of these tables is actually needed, the
other assumed to be what isn't in the first. other assumed to be what isn't in the first.
""" """
matches = [
(match.start(), match.end())
for match in self.parser.ansi_regex.finditer(self._raw_string)]
code_indexes = []
# These are all the indexes which hold code characters. # These are all the indexes which hold code characters.
for start, end in matches: code_indexes = []
code_indexes.extend(range(start, end)) for match in self.parser.ansi_regex.finditer(self._raw_string):
code_indexes.extend(range(match.start(), match.end()))
if not code_indexes: if not code_indexes:
# Plain string, no ANSI codes. # Plain string, no ANSI codes.
return code_indexes, range(0, len(self._raw_string)) return code_indexes, range(0, len(self._raw_string))

View file

@ -721,7 +721,7 @@ class EvTable(object):
hchar = kwargs.pop("header_line_char", "~") hchar = kwargs.pop("header_line_char", "~")
self.header_line_char = hchar[0] if hchar else "~" self.header_line_char = hchar[0] if hchar else "~"
border = kwargs.pop("border", "none") border = kwargs.pop("border", "tablecols")
if border is None: if border is None:
border = "none" border = "none"
if not border in ("none", "table", "tablecols", if not border in ("none", "table", "tablecols",