Improve ANSIString performance.
This commit is contained in:
parent
ef6fffbca7
commit
07aba24f4d
1 changed files with 120 additions and 28 deletions
|
|
@ -317,11 +317,9 @@ class ANSIParser(object):
|
||||||
mxp_re = r'\{lc(.*?)\{lt(.*?)\{le'
|
mxp_re = r'\{lc(.*?)\{lt(.*?)\{le'
|
||||||
|
|
||||||
# prepare regex matching
|
# prepare regex matching
|
||||||
#ansi_sub = [(re.compile(sub[0], re.DOTALL), sub[1])
|
|
||||||
# for sub in ansi_map]
|
|
||||||
xterm256_sub = re.compile(r"|".join([tup[0] for tup in xterm256_map]), re.DOTALL)
|
xterm256_sub = re.compile(r"|".join([tup[0] for tup in xterm256_map]), re.DOTALL)
|
||||||
ansi_sub = re.compile(r"|".join([re.escape(tup[0]) for tup in mux_ansi_map + ext_ansi_map]), re.DOTALL)
|
ansi_sub = re.compile(r"|".join([re.escape(tup[0]) for tup in mux_ansi_map + ext_ansi_map]), re.DOTALL)
|
||||||
mxp_sub = re.compile(mxp_re, re.DOTALL)
|
mxp_sub = re.compile(mxp_re, re.DOTALL)
|
||||||
|
|
||||||
# used by regex replacer to correctly map ansi sequences
|
# used by regex replacer to correctly map ansi sequences
|
||||||
ansi_map = dict(mux_ansi_map + ext_ansi_map)
|
ansi_map = dict(mux_ansi_map + ext_ansi_map)
|
||||||
|
|
@ -436,7 +434,10 @@ def _transform(func_name):
|
||||||
elif index in self._char_indexes:
|
elif index in self._char_indexes:
|
||||||
to_string.append(replacement_string[char_counter])
|
to_string.append(replacement_string[char_counter])
|
||||||
char_counter += 1
|
char_counter += 1
|
||||||
return ANSIString(''.join(to_string), decoded=True)
|
return ANSIString(
|
||||||
|
''.join(to_string), decoded=True,
|
||||||
|
code_indexes=self._code_indexes, char_indexes=self._char_indexes,
|
||||||
|
clean_string=replacement_string)
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -452,8 +453,8 @@ class ANSIMeta(type):
|
||||||
'rfind', 'rindex', '__len__']:
|
'rfind', 'rindex', '__len__']:
|
||||||
setattr(cls, func_name, _query_super(func_name))
|
setattr(cls, func_name, _query_super(func_name))
|
||||||
for func_name in [
|
for func_name in [
|
||||||
'__mul__', '__mod__', 'expandtabs', '__rmul__',
|
'__mod__', 'expandtabs', 'decode', 'replace', 'format',
|
||||||
'decode', 'replace', 'format', 'encode']:
|
'encode']:
|
||||||
setattr(cls, func_name, _on_raw(func_name))
|
setattr(cls, func_name, _on_raw(func_name))
|
||||||
for func_name in [
|
for func_name in [
|
||||||
'capitalize', 'translate', 'lower', 'upper', 'swapcase']:
|
'capitalize', 'translate', 'lower', 'upper', 'swapcase']:
|
||||||
|
|
@ -485,19 +486,38 @@ class ANSIString(unicode):
|
||||||
the same attributes as the standard one, and you may declare the
|
the same attributes as the standard one, and you may declare the
|
||||||
string to be handled as already decoded. It is important not to double
|
string to be handled as already decoded. It is important not to double
|
||||||
decode strings, as escapes can only be respected once.
|
decode strings, as escapes can only be respected once.
|
||||||
|
|
||||||
|
Internally, ANSIString can also passes itself precached code/character
|
||||||
|
indexes and clean strings to avoid doing extra work when combining
|
||||||
|
ANSIStrings.
|
||||||
"""
|
"""
|
||||||
string = args[0]
|
string = args[0]
|
||||||
if not isinstance(string, basestring):
|
if not isinstance(string, basestring):
|
||||||
string = to_str(string, force_string=True)
|
string = to_str(string, force_string=True)
|
||||||
parser = kwargs.get('parser', ANSI_PARSER)
|
parser = kwargs.get('parser', ANSI_PARSER)
|
||||||
decoded = kwargs.get('decoded', False) or hasattr(string, '_raw_string')
|
decoded = kwargs.get('decoded', False) or hasattr(string, '_raw_string')
|
||||||
|
code_indexes = kwargs.pop('code_indexes', None)
|
||||||
|
char_indexes = kwargs.pop('char_indexes', None)
|
||||||
|
clean_string = kwargs.pop('clean_string', None)
|
||||||
|
# All True, or All False, not just one.
|
||||||
|
checks = map(lambda x: x is None, [code_indexes, char_indexes, clean_string])
|
||||||
|
if not len(set(checks)) == 1:
|
||||||
|
raise ValueError("You must specify code_indexes, char_indexes, "
|
||||||
|
"and clean_string together, or not at all.")
|
||||||
|
if not all(checks):
|
||||||
|
decoded = True
|
||||||
if not decoded:
|
if not decoded:
|
||||||
# Completely new ANSI String
|
# Completely new ANSI String
|
||||||
clean_string = to_unicode(parser.parse_ansi(string, strip_ansi=True))
|
clean_string = to_unicode(parser.parse_ansi(string, strip_ansi=True))
|
||||||
string = parser.parse_ansi(string)
|
string = parser.parse_ansi(string)
|
||||||
|
elif clean_string is not None:
|
||||||
|
# We have an explicit clean string.
|
||||||
|
pass
|
||||||
elif hasattr(string, '_clean_string'):
|
elif hasattr(string, '_clean_string'):
|
||||||
# It's already an ANSIString
|
# It's already an ANSIString
|
||||||
clean_string = string._clean_string
|
clean_string = string._clean_string
|
||||||
|
code_indexes = string._code_indexes
|
||||||
|
char_indexes = string._char_indexes
|
||||||
string = string._raw_string
|
string = string._raw_string
|
||||||
else:
|
else:
|
||||||
# It's a string that has been pre-ansi decoded.
|
# It's a string that has been pre-ansi decoded.
|
||||||
|
|
@ -505,12 +525,12 @@ class ANSIString(unicode):
|
||||||
|
|
||||||
if not isinstance(string, unicode):
|
if not isinstance(string, unicode):
|
||||||
string = string.decode('utf-8')
|
string = string.decode('utf-8')
|
||||||
else:
|
|
||||||
# Do this to prevent recursive ANSIStrings.
|
|
||||||
string = unicode(string)
|
|
||||||
ansi_string = super(ANSIString, cls).__new__(ANSIString, to_str(clean_string), "utf-8")
|
ansi_string = super(ANSIString, cls).__new__(ANSIString, to_str(clean_string), "utf-8")
|
||||||
ansi_string._raw_string = string
|
ansi_string._raw_string = string
|
||||||
ansi_string._clean_string = clean_string
|
ansi_string._clean_string = clean_string
|
||||||
|
ansi_string._code_indexes = code_indexes
|
||||||
|
ansi_string._char_indexes = char_indexes
|
||||||
return ansi_string
|
return ansi_string
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
|
@ -559,7 +579,34 @@ class ANSIString(unicode):
|
||||||
"""
|
"""
|
||||||
self.parser = kwargs.pop('parser', ANSI_PARSER)
|
self.parser = kwargs.pop('parser', ANSI_PARSER)
|
||||||
super(ANSIString, self).__init__()
|
super(ANSIString, self).__init__()
|
||||||
self._code_indexes, self._char_indexes = self._get_indexes()
|
if self._code_indexes is None:
|
||||||
|
self._code_indexes, self._char_indexes = self._get_indexes()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _shifter(iterable, offset):
|
||||||
|
"""
|
||||||
|
Takes a list of integers, and produces a new one incrementing all
|
||||||
|
by a number.
|
||||||
|
"""
|
||||||
|
return [i + offset for i in iterable]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _adder(cls, first, second):
|
||||||
|
"""
|
||||||
|
Joins two ANSIStrings, preserving calculated info.
|
||||||
|
"""
|
||||||
|
|
||||||
|
raw_string = first._raw_string + second._raw_string
|
||||||
|
clean_string = first._clean_string + second._clean_string
|
||||||
|
code_indexes = first._code_indexes[:]
|
||||||
|
char_indexes = first._char_indexes[:]
|
||||||
|
code_indexes.extend(
|
||||||
|
cls._shifter(second._code_indexes, len(first._raw_string)))
|
||||||
|
char_indexes.extend(
|
||||||
|
cls._shifter(second._code_indexes, len(first._raw_string)))
|
||||||
|
return ANSIString(raw_string, code_indexes=code_indexes,
|
||||||
|
char_indexes=char_indexes,
|
||||||
|
clean_string=clean_string)
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
"""
|
"""
|
||||||
|
|
@ -569,8 +616,9 @@ class ANSIString(unicode):
|
||||||
"""
|
"""
|
||||||
if not isinstance(other, basestring):
|
if not isinstance(other, basestring):
|
||||||
return NotImplemented
|
return NotImplemented
|
||||||
return ANSIString(self._raw_string + getattr(
|
if not isinstance(other, ANSIString):
|
||||||
other, '_raw_string', other), decoded=True)
|
other = ANSIString(other)
|
||||||
|
return self._adder(self, other)
|
||||||
|
|
||||||
def __radd__(self, other):
|
def __radd__(self, other):
|
||||||
"""
|
"""
|
||||||
|
|
@ -578,8 +626,9 @@ class ANSIString(unicode):
|
||||||
"""
|
"""
|
||||||
if not isinstance(other, basestring):
|
if not isinstance(other, basestring):
|
||||||
return NotImplemented
|
return NotImplemented
|
||||||
return ANSIString(getattr(
|
if not isinstance(other, ANSIString):
|
||||||
other, '_raw_string', other) + self._raw_string, decoded=True)
|
other = ANSIString(other)
|
||||||
|
return self._adder(other, self)
|
||||||
|
|
||||||
def __getslice__(self, i, j):
|
def __getslice__(self, i, j):
|
||||||
"""
|
"""
|
||||||
|
|
@ -615,7 +664,7 @@ class ANSIString(unicode):
|
||||||
# Check between the slice intervals for escape sequences.
|
# Check between the slice intervals for escape sequences.
|
||||||
i = None
|
i = None
|
||||||
for i in slice_indexes[1:]:
|
for i in slice_indexes[1:]:
|
||||||
for index in range(last_mark, i):
|
for index in xrange(last_mark, i):
|
||||||
if index in self._code_indexes:
|
if index in self._code_indexes:
|
||||||
string += self._raw_string[index]
|
string += self._raw_string[index]
|
||||||
last_mark = i
|
last_mark = i
|
||||||
|
|
@ -654,7 +703,7 @@ class ANSIString(unicode):
|
||||||
result = ''
|
result = ''
|
||||||
# Get the character they're after, and replay all escape sequences
|
# Get the character they're after, and replay all escape sequences
|
||||||
# previous to it.
|
# previous to it.
|
||||||
for index in range(0, item + 1):
|
for index in xrange(0, item + 1):
|
||||||
if index in self._code_indexes:
|
if index in self._code_indexes:
|
||||||
result += self._raw_string[index]
|
result += self._raw_string[index]
|
||||||
return ANSIString(result + clean + append_tail, decoded=True)
|
return ANSIString(result + clean + append_tail, decoded=True)
|
||||||
|
|
@ -711,13 +760,6 @@ class ANSIString(unicode):
|
||||||
It's possible that only one of these tables is actually needed, the
|
It's possible that only one of these tables is actually needed, the
|
||||||
other assumed to be what isn't in the first.
|
other assumed to be what isn't in the first.
|
||||||
"""
|
"""
|
||||||
# These are all the indexes which hold code characters.
|
|
||||||
#matches = [(match.start(), match.end())
|
|
||||||
# for match in self.parser.ansi_regex.finditer(self._raw_string)]
|
|
||||||
#code_indexes = []
|
|
||||||
# # These are all the indexes which hold code characters.
|
|
||||||
#for start, end in matches:
|
|
||||||
# code_indexes.extend(range(start, end))
|
|
||||||
|
|
||||||
code_indexes = []
|
code_indexes = []
|
||||||
for match in self.parser.ansi_regex.finditer(self._raw_string):
|
for match in self.parser.ansi_regex.finditer(self._raw_string):
|
||||||
|
|
@ -775,6 +817,28 @@ class ANSIString(unicode):
|
||||||
res.append(self[start:len(self)])
|
res.append(self[start:len(self)])
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def __mul__(self, other):
|
||||||
|
"""
|
||||||
|
Multiplication method. Implemented for performance reasons.
|
||||||
|
"""
|
||||||
|
if not isinstance(other, int):
|
||||||
|
return NotImplemented
|
||||||
|
raw_string = self._raw_string * other
|
||||||
|
clean_string = self._clean_string * other
|
||||||
|
code_indexes = self._code_indexes[:]
|
||||||
|
char_indexes = self._char_indexes[:]
|
||||||
|
for i in range(1, other + 1):
|
||||||
|
code_indexes.extend(
|
||||||
|
self._shifter(self._code_indexes, i * len(self._raw_string)))
|
||||||
|
char_indexes.extend(
|
||||||
|
self._shifter(self._char_indexes, i * len(self._raw_string)))
|
||||||
|
return ANSIString(
|
||||||
|
raw_string, code_indexes=code_indexes, char_indexes=char_indexes,
|
||||||
|
clean_string=clean_string)
|
||||||
|
|
||||||
|
def __rmul__(self, other):
|
||||||
|
return self.__mul__(other)
|
||||||
|
|
||||||
def rsplit(self, by, maxsplit=-1):
|
def rsplit(self, by, maxsplit=-1):
|
||||||
"""
|
"""
|
||||||
Stolen from PyPy's pure Python string implementation, tweaked for
|
Stolen from PyPy's pure Python string implementation, tweaked for
|
||||||
|
|
@ -810,11 +874,39 @@ class ANSIString(unicode):
|
||||||
last_item = None
|
last_item = None
|
||||||
for item in iterable:
|
for item in iterable:
|
||||||
if last_item is not None:
|
if last_item is not None:
|
||||||
result += self
|
result += self._raw_string
|
||||||
|
if not isinstance(item, ANSIString):
|
||||||
|
item = ANSIString(item)
|
||||||
result += item
|
result += item
|
||||||
last_item = item
|
last_item = item
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def _filler(self, char, amount):
|
||||||
|
"""
|
||||||
|
Generate a line of characters in a more efficient way than just adding
|
||||||
|
ANSIStrings.
|
||||||
|
"""
|
||||||
|
if not isinstance(char, ANSIString):
|
||||||
|
line = char * amount
|
||||||
|
return ANSIString(
|
||||||
|
char * amount, code_indexes=[], char_indexes=range(0, len(line)),
|
||||||
|
clean_string=char)
|
||||||
|
try:
|
||||||
|
start = char._code_indexes[0]
|
||||||
|
except IndexError:
|
||||||
|
start = None
|
||||||
|
end = char._char_indexes[0]
|
||||||
|
prefix = char._raw_string[start:end]
|
||||||
|
postfix = char._raw_string[end + 1:]
|
||||||
|
line = char._clean_string * amount
|
||||||
|
code_indexes = [i for i in range(0, len(prefix))]
|
||||||
|
length = len(prefix) + len(line)
|
||||||
|
code_indexes.extend([i for i in range(length, length + len(postfix))])
|
||||||
|
char_indexes = self._shifter(xrange(0, len(line)), len(prefix))
|
||||||
|
raw_string = prefix + line + postfix
|
||||||
|
return ANSIString(
|
||||||
|
raw_string, clean_string=line, char_indexes=char_indexes,
|
||||||
|
code_indexes=code_indexes)
|
||||||
|
|
||||||
@_spacing_preflight
|
@_spacing_preflight
|
||||||
def center(self, width, fillchar, difference):
|
def center(self, width, fillchar, difference):
|
||||||
|
|
@ -823,8 +915,8 @@ class ANSIString(unicode):
|
||||||
"""
|
"""
|
||||||
remainder = difference % 2
|
remainder = difference % 2
|
||||||
difference /= 2
|
difference /= 2
|
||||||
spacing = difference * fillchar
|
spacing = self._filler(fillchar, difference)
|
||||||
result = spacing + self + spacing + (remainder * fillchar)
|
result = spacing + self + spacing + self._filler(fillchar, remainder)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@_spacing_preflight
|
@_spacing_preflight
|
||||||
|
|
@ -832,11 +924,11 @@ class ANSIString(unicode):
|
||||||
"""
|
"""
|
||||||
Left justify some text.
|
Left justify some text.
|
||||||
"""
|
"""
|
||||||
return self + (difference * fillchar)
|
return self + self._filler(fillchar, difference)
|
||||||
|
|
||||||
@_spacing_preflight
|
@_spacing_preflight
|
||||||
def rjust(self, width, fillchar, difference):
|
def rjust(self, width, fillchar, difference):
|
||||||
"""
|
"""
|
||||||
Right justify some text.
|
Right justify some text.
|
||||||
"""
|
"""
|
||||||
return (difference * fillchar) + self
|
return self._filler(fillchar, difference) + self
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue