Made the in-memory representation of ANSIString the clean string, removing the need for the regexable flag.
This commit is contained in:
parent
259860ff25
commit
af0a149148
1 changed files with 20 additions and 67 deletions
|
|
@ -389,6 +389,7 @@ def _transform(func_name):
|
||||||
return ANSIString(''.join(to_string), decoded=True)
|
return ANSIString(''.join(to_string), decoded=True)
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
class ANSIMeta(type):
|
class ANSIMeta(type):
|
||||||
"""
|
"""
|
||||||
Many functions on ANSIString are just light wrappers around the unicode
|
Many functions on ANSIString are just light wrappers around the unicode
|
||||||
|
|
@ -402,7 +403,7 @@ class ANSIMeta(type):
|
||||||
setattr(cls, func_name, _query_super(func_name))
|
setattr(cls, func_name, _query_super(func_name))
|
||||||
for func_name in [
|
for func_name in [
|
||||||
'__mul__', '__mod__', 'expandtabs', '__rmul__', 'join',
|
'__mul__', '__mod__', 'expandtabs', '__rmul__', 'join',
|
||||||
'decode', 'replace', 'format']:
|
'decode', 'replace', 'format', 'encode']:
|
||||||
setattr(cls, func_name, _on_raw(func_name))
|
setattr(cls, func_name, _on_raw(func_name))
|
||||||
for func_name in [
|
for func_name in [
|
||||||
'capitalize', 'translate', 'lower', 'upper', 'swapcase']:
|
'capitalize', 'translate', 'lower', 'upper', 'swapcase']:
|
||||||
|
|
@ -434,62 +435,38 @@ class ANSIString(unicode):
|
||||||
the same attributes as the standard one, and you may declare the
|
the same attributes as the standard one, and you may declare the
|
||||||
string to be handled as already decoded. It is important not to double
|
string to be handled as already decoded. It is important not to double
|
||||||
decode strings, as escapes can only be respected once.
|
decode strings, as escapes can only be respected once.
|
||||||
|
|
||||||
If the regexable flag is set, using __getitem__, such as when getting
|
|
||||||
an index or slicing, will return the result from the raw string. If
|
|
||||||
this flag is set False, it will intelligently skip ANSI escapes.
|
|
||||||
|
|
||||||
ANSIString('{rHello{g, W{yorld', regexable=True)[0] will return the
|
|
||||||
first byte of the escape sequence before 'Hello', while
|
|
||||||
ANSIString('{rHello{g, W{yorld')[0] will return a red 'H'.
|
|
||||||
|
|
||||||
When a regexable ANSIString is sliced, the result is returned as a
|
|
||||||
non-regexable ANSI String. This ensures that usage of regexable
|
|
||||||
ANSIStrings is an explicit choice.
|
|
||||||
|
|
||||||
Why all this complication with the regexable flag?
|
|
||||||
|
|
||||||
The reason is that while we are able to subclass the unicode object in
|
|
||||||
Python, the byte representation of the string in memory cannot be
|
|
||||||
changed and still exists under the hood. This doesn't matter for things
|
|
||||||
coded in pure Python, but since Regexes need to be mindful of
|
|
||||||
performance, the module that handles them operates directly on the
|
|
||||||
memory representation of the string in order to do matching. It is thus
|
|
||||||
completely unaware of our customizations to the class. Interestingly,
|
|
||||||
however, while the re module does its matching on the raw string, it
|
|
||||||
slices the string using the object's methods. This means that running
|
|
||||||
a regex on an ANSIString would return matches at bogus indexes, since
|
|
||||||
the __getitem__ method of ANSIString skips ANSI escape sequences, which
|
|
||||||
were part of the raw data regex was matching against.
|
|
||||||
|
|
||||||
So, if you need to use regex on an ANSIString, make sure you get it in
|
|
||||||
regexable mode first, and be ready to deal with a few edge cases.
|
|
||||||
"""
|
"""
|
||||||
string = to_str(args[0], force_string=True)
|
string = to_str(args[0], force_string=True)
|
||||||
if not isinstance(string, basestring):
|
if not isinstance(string, basestring):
|
||||||
string = str(string)
|
string = str(string)
|
||||||
parser = kwargs.get('parser', ANSI_PARSER)
|
parser = kwargs.get('parser', ANSI_PARSER)
|
||||||
regexable = kwargs.get('regexable', False)
|
|
||||||
decoded = kwargs.get('decoded', False) or hasattr(string, '_raw_string')
|
decoded = kwargs.get('decoded', False) or hasattr(string, '_raw_string')
|
||||||
if not decoded:
|
if not decoded:
|
||||||
string = parser.parse_ansi(string)
|
string = parser.parse_ansi(string)
|
||||||
if isinstance(string, unicode):
|
clean_string = unicode(parser.parse_ansi(
|
||||||
string = super(ANSIString, cls).__new__(ANSIString, string)
|
string, strip_ansi=True), 'utf-8')
|
||||||
else:
|
ansi_string = super(ANSIString, cls).__new__(ANSIString, clean_string)
|
||||||
string = super(ANSIString, cls).__new__(ANSIString, string, 'utf-8')
|
ansi_string._raw_string = string
|
||||||
string._regexable = regexable
|
ansi_string._clean_string = clean_string
|
||||||
return string
|
return ansi_string
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self._raw_string.encode('utf-8')
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
"""
|
||||||
|
Unfortunately, this is not called during print() statements due to a
|
||||||
|
bug in the Python interpreter. You can always do unicode() or str()
|
||||||
|
around the resulting ANSIString and print that.
|
||||||
|
"""
|
||||||
|
return self._raw_string
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
"""
|
"""
|
||||||
Let's make the repr the command that would actually be used to
|
Let's make the repr the command that would actually be used to
|
||||||
construct this object, for convenience and reference.
|
construct this object, for convenience and reference.
|
||||||
"""
|
"""
|
||||||
if self._regexable:
|
return "ANSIString(%s, decoded=True)" % repr(self._raw_string)
|
||||||
reg = ', regexable=True'
|
|
||||||
else:
|
|
||||||
reg = ''
|
|
||||||
return "ANSIString(%s, decoded=True%s)" % (repr(self._raw_string), reg)
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
@ -519,9 +496,6 @@ class ANSIString(unicode):
|
||||||
"""
|
"""
|
||||||
self.parser = kwargs.pop('parser', ANSI_PARSER)
|
self.parser = kwargs.pop('parser', ANSI_PARSER)
|
||||||
super(ANSIString, self).__init__(*args, **kwargs)
|
super(ANSIString, self).__init__(*args, **kwargs)
|
||||||
self._raw_string = unicode(self)
|
|
||||||
self._clean_string = unicode(self.parser.parse_ansi(
|
|
||||||
self._raw_string, strip_ansi=True), 'utf-8')
|
|
||||||
self._code_indexes, self._char_indexes = self._get_indexes()
|
self._code_indexes, self._char_indexes = self._get_indexes()
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
|
|
@ -598,8 +572,6 @@ class ANSIString(unicode):
|
||||||
string instead, bypassing ANSIString's intelligent escape skipping,
|
string instead, bypassing ANSIString's intelligent escape skipping,
|
||||||
for reasons explained in the __new__ method's docstring.
|
for reasons explained in the __new__ method's docstring.
|
||||||
"""
|
"""
|
||||||
if self._regexable:
|
|
||||||
return ANSIString(self._raw_string[item], decoded=True)
|
|
||||||
if isinstance(item, slice):
|
if isinstance(item, slice):
|
||||||
# Slices must be handled specially.
|
# Slices must be handled specially.
|
||||||
return self._slice(item)
|
return self._slice(item)
|
||||||
|
|
@ -635,25 +607,6 @@ class ANSIString(unicode):
|
||||||
"""
|
"""
|
||||||
return self._raw_string
|
return self._raw_string
|
||||||
|
|
||||||
def is_regexable(self):
|
|
||||||
"""
|
|
||||||
State whether or not this ANSIString is a 'regexable' ANSIString.
|
|
||||||
Regexable ANSIStrings return indexes from _raw_string when sliced.
|
|
||||||
"""
|
|
||||||
return self._regexable
|
|
||||||
|
|
||||||
def regexable(self):
|
|
||||||
"""
|
|
||||||
Return the regexable version of this ANSIString.
|
|
||||||
"""
|
|
||||||
return ANSIString(self, decoded=True, regexable=True)
|
|
||||||
|
|
||||||
def non_regexable(self):
|
|
||||||
"""
|
|
||||||
Return the non-regexable version of this ANSIString.
|
|
||||||
"""
|
|
||||||
return ANSIString(self, decoded=True)
|
|
||||||
|
|
||||||
def partition(self, sep, reverse=False):
|
def partition(self, sep, reverse=False):
|
||||||
"""
|
"""
|
||||||
Similar to split, but always creates a tuple with three items:
|
Similar to split, but always creates a tuple with three items:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue