optimized ansi parsing a bit as well as did some general cleanup.

This commit is contained in:
Griatch 2014-03-01 17:06:04 +01:00
parent 2aa9ef9997
commit ef8755581c
3 changed files with 68 additions and 58 deletions

View file

@ -60,18 +60,9 @@ ANSI_SPACE = " "
# Escapes
ANSI_ESCAPES = ("{{", "%%", "\\\\")
def sub_meth(obj, function):
"""
RegexObject.sub() allows for the 'repl' argument to be a function.
However, it doesn't call bound methods correctly. This forces 'self'
to be passed.
"""
if isinstance(function, basestring):
return function
def wrapped(*args, **kwargs):
return function(obj, *args, **kwargs)
return wrapped
from collections import OrderedDict
_PARSE_CACHE = OrderedDict()
_PARSE_CACHE_SIZE = 10000
class ANSIParser(object):
@ -84,7 +75,14 @@ class ANSIParser(object):
an extra { for Merc-style codes
"""
def parse_rgb(self, rgbmatch):
def sub_ansi(self, ansimatch):
"""
Replacer used by re.sub to replace ansi
markers with correct ansi sequences
"""
return self.ansi_map.get(ansimatch.group(), "")
def sub_xterm256(self, rgbmatch):
"""
This is a replacer method called by re.sub with the matched
tag. It must return the correct ansi sequence.
@ -94,7 +92,9 @@ class ANSIParser(object):
"""
if not rgbmatch:
return ""
rgbtag = rgbmatch.groups()[0]
# get tag, stripping the initial marker
rgbtag = rgbmatch.group()[1:]
background = rgbtag[0] == '['
if background:
@ -179,23 +179,34 @@ class ANSIParser(object):
return string.clean()
else:
return string.raw()
if not string:
return ''
self.do_xterm256 = xterm256
string = utils.to_str(string)
# go through all available mappings and translate them
parts = self.ansi_escapes.split(string) + [" "]
string = ""
for part, sep in zip(parts[::2], parts[1::2]):
for sub in self.ansi_sub:
part = sub[0].sub(sub_meth(self, sub[1]), part)
string += "%s%s" % (part, sep[0].strip())
# check cached parsings
global _PARSE_CACHE
cachekey = "%s-%s-%s" % (string, strip_ansi, xterm256)
if cachekey in _PARSE_CACHE:
return _PARSE_CACHE[cachekey]
self.do_xterm256 = xterm256
in_string = utils.to_str(string)
# do string replacement
parsed_string = self.xterm256_sub.sub(self.sub_xterm256, in_string)
parsed_string = self.ansi_sub.sub(self.sub_ansi, parsed_string)
if strip_ansi:
# remove all ansi codes (including those manually
# inserted in string)
string = self.ansi_regex.sub("", string)
return string
parsed_string = self.ansi_regex.sub("", parsed_string)
# cache and crop old cache
_PARSE_CACHE[cachekey] = parsed_string
if len(_PARSE_CACHE) > _PARSE_CACHE_SIZE:
_PARSE_CACHE.popitem(last=False)
return parsed_string
# MUX-style mappings %cr %cn etc
@ -237,8 +248,8 @@ class ANSIParser(object):
(r'{/', ANSI_RETURN), # line break
(r'{-', ANSI_TAB), # tab
(r'{_', ANSI_SPACE), # space
(r'{\*', ANSI_INVERSE), # invert
(r'{\^', ANSI_BLINK), # blinking text (very annoying and not supported by all clients)
(r'{*', ANSI_INVERSE), # invert
(r'{^', ANSI_BLINK), # blinking text (very annoying and not supported by all clients)
(r'{r', hilite + ANSI_RED),
(r'{g', hilite + ANSI_GREEN),
@ -258,32 +269,35 @@ class ANSIParser(object):
(r'{W', normal + ANSI_WHITE), # light grey
(r'{X', normal + ANSI_BLACK), # pure black
(r'{\[r', ANSI_BACK_RED),
(r'{\[g', ANSI_BACK_GREEN),
(r'{\[y', ANSI_BACK_YELLOW),
(r'{\[b', ANSI_BACK_BLUE),
(r'{\[m', ANSI_BACK_MAGENTA),
(r'{\[c', ANSI_BACK_CYAN),
(r'{\[w', ANSI_BACK_WHITE), # light grey background
(r'{\[x', ANSI_BACK_BLACK) # pure black background
(r'{[r', ANSI_BACK_RED),
(r'{[g', ANSI_BACK_GREEN),
(r'{[y', ANSI_BACK_YELLOW),
(r'{[b', ANSI_BACK_BLUE),
(r'{[m', ANSI_BACK_MAGENTA),
(r'{[c', ANSI_BACK_CYAN),
(r'{[w', ANSI_BACK_WHITE), # light grey background
(r'{[x', ANSI_BACK_BLACK) # pure black background
]
# xterm256 {123, %c134,
ansi_map = mux_ansi_map + ext_ansi_map
# xterm256 {123, %c134. These are replaced directly by
# the sub_xterm256 method
xterm256_map = [
(r'%([0-5]{3})', parse_rgb), # %123 - foreground colour
(r'%(\[[0-5]{3})', parse_rgb), # %-123 - background colour
(r'{([0-5]{3})', parse_rgb), # {123 - foreground colour
(r'{(\[[0-5]{3})', parse_rgb) # {-123 - background colour
(r'%[0-5]{3}', ""), # %123 - foreground colour
(r'%\[[0-5]{3}', ""), # %[123 - background colour
(r'\{[0-5]{3}', ""), # {123 - foreground colour
(r'\{\[[0-5]{3}', "") # {[123 - background colour
]
# obs - order matters here, we want to do the xterms first since
# they collide with some of the other mappings otherwise.
ansi_map = xterm256_map + mux_ansi_map + ext_ansi_map
# prepare regex matching
ansi_sub = [(re.compile(sub[0], re.DOTALL), sub[1])
for sub in ansi_map]
#ansi_sub = [(re.compile(sub[0], re.DOTALL), sub[1])
# for sub in ansi_map]
xterm256_sub = re.compile(r"|".join([tup[0] for tup in xterm256_map]), re.DOTALL)
ansi_sub = re.compile(r"|".join([re.escape(tup[0]) for tup in ansi_map]), re.DOTALL)
ansi_map = dict(ansi_map)
# prepare matching ansi codes overall
ansi_regex = re.compile("\033\[[0-9;]+m")
@ -477,7 +491,7 @@ class ANSIString(unicode):
"""
return "ANSIString(%s, decoded=True)" % repr(self._raw_string)
def __init__(self, *args, **kwargs):
def __init__(self, text="", parser=ANSI_PARSER, **kwargs):
"""
When the ANSIString is first initialized, a few internal variables
have to be set.
@ -503,8 +517,8 @@ class ANSIString(unicode):
tables for which characters in the raw string are related to ANSI
escapes, and which are for the readable text.
"""
self.parser = kwargs.pop('parser', ANSI_PARSER)
super(ANSIString, self).__init__(*args, **kwargs)
self.parser = parser
super(ANSIString, self).__init__(text)
self._code_indexes, self._char_indexes = self._get_indexes()
def __add__(self, other):
@ -657,14 +671,10 @@ class ANSIString(unicode):
It's possible that only one of these tables is actually needed, the
other assumed to be what isn't in the first.
"""
matches = [
(match.start(), match.end())
for match in self.parser.ansi_regex.finditer(self._raw_string)]
code_indexes = []
# These are all the indexes which hold code characters.
for start, end in matches:
code_indexes.extend(range(start, end))
code_indexes = []
for match in self.parser.ansi_regex.finditer(self._raw_string):
code_indexes.extend(range(match.start(), match.end()))
if not code_indexes:
# Plain string, no ANSI codes.
return code_indexes, range(0, len(self._raw_string))

View file

@ -721,7 +721,7 @@ class EvTable(object):
hchar = kwargs.pop("header_line_char", "~")
self.header_line_char = hchar[0] if hchar else "~"
border = kwargs.pop("border", "none")
border = kwargs.pop("border", "tablecols")
if border is None:
border = "none"
if not border in ("none", "table", "tablecols",