ANSIString in progress. Checking in what I have so far.

This commit is contained in:
Kelketek 2013-12-31 08:37:42 -06:00
parent 81c57da56b
commit 63761f66ec

View file

@ -70,87 +70,6 @@ class ANSIParser(object):
an extra { for Merc-style codes
"""
def __init__(self):
"Sets the mappings"
# MUX-style mappings %cr %cn etc
self.mux_ansi_map = [
# commented out by default; they (especially blink) are
# potentially annoying
(r'%r', ANSI_RETURN),
(r'%t', ANSI_TAB),
(r'%b', ANSI_SPACE),
#(r'%cf', ANSI_BLINK),
#(r'%ci', ANSI_INVERSE),
(r'%cr', ANSI_RED),
(r'%cR', ANSI_BACK_RED),
(r'%cg', ANSI_GREEN),
(r'%cG', ANSI_BACK_GREEN),
(r'%cy', ANSI_YELLOW),
(r'%cY', ANSI_BACK_YELLOW),
(r'%cb', ANSI_BLUE),
(r'%cB', ANSI_BACK_BLUE),
(r'%cm', ANSI_MAGENTA),
(r'%cM', ANSI_BACK_MAGENTA),
(r'%cc', ANSI_CYAN),
(r'%cC', ANSI_BACK_CYAN),
(r'%cw', ANSI_WHITE),
(r'%cW', ANSI_BACK_WHITE),
(r'%cx', ANSI_BLACK),
(r'%cX', ANSI_BACK_BLACK),
(r'%ch', ANSI_HILITE),
(r'%cn', ANSI_NORMAL),
]
# Expanded mapping {r {n etc
hilite = ANSI_HILITE
normal = ANSI_NORMAL
self.ext_ansi_map = [
(r'{r', hilite + ANSI_RED),
(r'{R', normal + ANSI_RED),
(r'{g', hilite + ANSI_GREEN),
(r'{G', normal + ANSI_GREEN),
(r'{y', hilite + ANSI_YELLOW),
(r'{Y', normal + ANSI_YELLOW),
(r'{b', hilite + ANSI_BLUE),
(r'{B', normal + ANSI_BLUE),
(r'{m', hilite + ANSI_MAGENTA),
(r'{M', normal + ANSI_MAGENTA),
(r'{c', hilite + ANSI_CYAN),
(r'{C', normal + ANSI_CYAN),
(r'{w', hilite + ANSI_WHITE), # pure white
(r'{W', normal + ANSI_WHITE), # light grey
(r'{x', hilite + ANSI_BLACK), # dark grey
(r'{X', normal + ANSI_BLACK), # pure black
(r'{n', normal) # reset
]
# xterm256 {123, %c134,
self.xterm256_map = [
(r'%c([0-5]{3})', self.parse_rgb), # %c123 - foreground colour
(r'%c(b[0-5]{3})', self.parse_rgb), # %cb123 - background colour
(r'{([0-5]{3})', self.parse_rgb), # {123 - foreground colour
(r'{(b[0-5]{3})', self.parse_rgb) # {b123 - background colour
]
# obs - order matters here, we want to do the xterms first since
# they collide with some of the other mappings otherwise.
self.ansi_map = self.xterm256_map + self.mux_ansi_map + self.ext_ansi_map
# prepare regex matching
self.ansi_sub = [(re.compile(sub[0], re.DOTALL), sub[1])
for sub in self.ansi_map]
# prepare matching ansi codes overall
self.ansi_regex = re.compile("\033\[[0-9;]+m")
# escapes - these double-chars will be replaced with a single
# instance of each
self.ansi_escapes = re.compile(r"(%s)" % "|".join(ANSI_ESCAPES), re.DOTALL)
def parse_rgb(self, rgbmatch):
"""
This is a replacer method called by re.sub with the matched
@ -172,7 +91,7 @@ class ANSIParser(object):
if self.do_xterm256:
colval = 16 + (red * 36) + (green * 6) + blue
#print "RGB colours:", red, green, blue
return "\033[%s8;5;%s%s%sm" % (3 + int(background), colval/100, (colval%100)/10, colval%10)
return "\033[%s8;5;%s%s%sm" % (3 + int(background), colval/100, (colval % 100)/10, colval%10)
else:
#print "ANSI convert:", red, green, blue
# xterm256 not supported, convert the rgb value to ansi instead
@ -259,6 +178,84 @@ class ANSIParser(object):
string = self.ansi_regex.sub("", string)
return string
# MUX-style mappings %cr %cn etc
mux_ansi_map = [
# commented out by default; they (especially blink) are
# potentially annoying
(r'%r', ANSI_RETURN),
(r'%t', ANSI_TAB),
(r'%b', ANSI_SPACE),
#(r'%cf', ANSI_BLINK),
#(r'%ci', ANSI_INVERSE),
(r'%cr', ANSI_RED),
(r'%cR', ANSI_BACK_RED),
(r'%cg', ANSI_GREEN),
(r'%cG', ANSI_BACK_GREEN),
(r'%cy', ANSI_YELLOW),
(r'%cY', ANSI_BACK_YELLOW),
(r'%cb', ANSI_BLUE),
(r'%cB', ANSI_BACK_BLUE),
(r'%cm', ANSI_MAGENTA),
(r'%cM', ANSI_BACK_MAGENTA),
(r'%cc', ANSI_CYAN),
(r'%cC', ANSI_BACK_CYAN),
(r'%cw', ANSI_WHITE),
(r'%cW', ANSI_BACK_WHITE),
(r'%cx', ANSI_BLACK),
(r'%cX', ANSI_BACK_BLACK),
(r'%ch', ANSI_HILITE),
(r'%cn', ANSI_NORMAL),
]
# Expanded mapping {r {n etc
hilite = ANSI_HILITE
normal = ANSI_NORMAL
ext_ansi_map = [
(r'{r', hilite + ANSI_RED),
(r'{R', normal + ANSI_RED),
(r'{g', hilite + ANSI_GREEN),
(r'{G', normal + ANSI_GREEN),
(r'{y', hilite + ANSI_YELLOW),
(r'{Y', normal + ANSI_YELLOW),
(r'{b', hilite + ANSI_BLUE),
(r'{B', normal + ANSI_BLUE),
(r'{m', hilite + ANSI_MAGENTA),
(r'{M', normal + ANSI_MAGENTA),
(r'{c', hilite + ANSI_CYAN),
(r'{C', normal + ANSI_CYAN),
(r'{w', hilite + ANSI_WHITE), # pure white
(r'{W', normal + ANSI_WHITE), # light grey
(r'{x', hilite + ANSI_BLACK), # dark grey
(r'{X', normal + ANSI_BLACK), # pure black
(r'{n', normal) # reset
]
# xterm256 {123, %c134,
xterm256_map = [
(r'%([0-5]{3})', parse_rgb), # %123 - foreground colour
(r'%(-[0-5]{3})', parse_rgb), # %-123 - background colour
(r'{([0-5]{3})', parse_rgb), # {123 - foreground colour
(r'{(-[0-5]{3})', parse_rgb) # {-123 - background colour
]
# obs - order matters here, we want to do the xterms first since
# they collide with some of the other mappings otherwise.
ansi_map = xterm256_map + mux_ansi_map + ext_ansi_map
# prepare regex matching
ansi_sub = [(re.compile(sub[0], re.DOTALL), sub[1])
for sub in ansi_map]
# prepare matching ansi codes overall
ansi_regex = re.compile("\033\[[0-9;]+m")
# escapes - these double-chars will be replaced with a single
# instance of each
ansi_escapes = re.compile(r"(%s)" % "|".join(ANSI_ESCAPES), re.DOTALL)
ANSI_PARSER = ANSIParser()
@ -279,3 +276,119 @@ def raw(string):
Escapes a string into a form which won't be colorized by the ansi parser.
"""
return string.replace('{', '{{').replace('%', '%%')
def group(lst, n):
for i in range(0, len(lst), n):
val = lst[i:i+n]
if len(val) == n:
yield tuple(val)
class ANSIString(str):
"""
String-like object that is aware of ANSI codes.
This isn't especially efficient, as it doesn't really have an
understanding of what the codes mean in order to eliminate
redundant characters, but a proper parser would have to be written for
that.
"""
def __new__(cls, *args, **kwargs):
string = args[0]
args = args[1:]
parser = kwargs.get('parser', ANSI_PARSER)
string = parser.parse_ansi(string)
return super(ANSIString, cls).__new__(ANSIString, string, *args)
def __init__(self, *args, **kwargs):
self.parser = kwargs.pop('parser', ANSI_PARSER)
super(ANSIString, self).__init__(*args, **kwargs)
self.raw_string = super(ANSIString, self).__str__()
self.clean_string = self.parser.parse_ansi(
self.raw_string, strip_ansi=True)
for func_name in [
'count', 'startswith', 'endswith', 'find', 'index', 'isalnum',
'isalpha', 'isdigit', 'islower', 'isspace', 'istitle', 'isupper',
'rfind', 'rindex']:
setattr(self, func_name, _query_super(func_name))
self._code_indexes, self._char_indexes = self._get_indexes()
def __len__(self):
return len(self.clean_string)
def __getslice__(self, i, j):
return self.__getitem__(slice(i, j))
def _slice(self, item):
slice_indexes = self._char_indexes[item]
if not slice_indexes:
return ANSIString('')
try:
string = self[item.start].raw_string
except IndexError:
return ANSIString('')
last_mark = slice_indexes[0]
for i in slice_indexes[1:]:
for index in range(last_mark, i):
if index in self._code_indexes:
string += self.raw_string[index]
last_mark = i
try:
string += self.raw_string[i]
except IndexError:
pass
return ANSIString(string)
def __getitem__(self, item):
if isinstance(item, slice):
return self._slice(item)
item = self._char_indexes[item]
clean = self.raw_string[item]
result = ''
for index in range(0, item + 1):
if index in self._code_indexes:
result += self.raw_string[index]
return ANSIString(result + clean)
def _get_indexes(self):
matches = [
(match.start(), match.end())
for match in self.parser.ansi_regex.finditer(self.raw_string)]
code_indexes = []
# These are all the indexes which hold code characters.
for start, end in matches:
code_indexes.extend(range(start, end))
flat_ranges = []
# We need to get the ones between them, but the code might start at
# the beginning, and there might be codes at the end.
for tup in matches:
flat_ranges.extend(tup)
# Is the beginning of the string a code character?
if flat_ranges[0] == 0:
flat_ranges.pop(0)
else:
flat_ranges.insert(0, 0)
# How about the end?
end_index = (len(self.raw_string) - 1)
if flat_ranges[-1] == end_index:
flat_ranges.pop()
else:
flat_ranges.append(end_index)
char_indexes = []
for start, end in list(group(flat_ranges, 2)):
char_indexes.extend(range(start, end))
# The end character will be left off if it's a normal character. Fix
# that here.
if end_index in flat_ranges:
char_indexes.append(end_index)
return code_indexes, char_indexes
def _query_super(func_name):
def query_func(self, *args, **kwargs):
getattr(self.raw_string, func_name)(self, *args, **kwargs)
return query_func