Added a 'regexable' mode for ANSIString, fixed a few bugs with it.
Refactored with metaclass and added comments. Resolves #481, Resolves #480
This commit is contained in:
parent
638483fa66
commit
a9ad82d005
1 changed files with 395 additions and 200 deletions
|
|
@ -174,11 +174,11 @@ class ANSIParser(object):
|
||||||
strip_ansi flag instead removes all ansi markup.
|
strip_ansi flag instead removes all ansi markup.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if hasattr(string, 'raw_string'):
|
if hasattr(string, '_raw_string'):
|
||||||
if strip_ansi:
|
if strip_ansi:
|
||||||
return string.clean_string
|
return string.clean()
|
||||||
else:
|
else:
|
||||||
return string.raw_string
|
return string.raw()
|
||||||
if not string:
|
if not string:
|
||||||
return ''
|
return ''
|
||||||
self.do_xterm256 = xterm256
|
self.do_xterm256 = xterm256
|
||||||
|
|
@ -322,6 +322,10 @@ def group(lst, n):
|
||||||
|
|
||||||
|
|
||||||
def _spacing_preflight(func):
|
def _spacing_preflight(func):
|
||||||
|
"""
|
||||||
|
This wrapper function is used to do some preflight checks on functions used
|
||||||
|
for padding ANSIStrings.
|
||||||
|
"""
|
||||||
def wrapped(self, width, fillchar=None):
|
def wrapped(self, width, fillchar=None):
|
||||||
if fillchar is None:
|
if fillchar is None:
|
||||||
fillchar = " "
|
fillchar = " "
|
||||||
|
|
@ -336,195 +340,14 @@ def _spacing_preflight(func):
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
class ANSIString(unicode):
|
|
||||||
"""
|
|
||||||
String-like object that is aware of ANSI codes.
|
|
||||||
|
|
||||||
This isn't especially efficient, as it doesn't really have an
|
|
||||||
understanding of what the codes mean in order to eliminate
|
|
||||||
redundant characters, but a proper parser would have to be written for
|
|
||||||
that.
|
|
||||||
|
|
||||||
Take note of the instructions at the bottom of the module, which modify
|
|
||||||
this class.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __new__(cls, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
When creating a new ANSIString, you may use a custom parser that has
|
|
||||||
the same attributes as the standard one, and you may declare the
|
|
||||||
string to be handled as already decoded. It is important not to double
|
|
||||||
decode strings, as escapes can only be respected once.
|
|
||||||
"""
|
|
||||||
string = to_str(args[0], force_string=True)
|
|
||||||
if not isinstance(string, basestring):
|
|
||||||
string = str(string)
|
|
||||||
parser = kwargs.get('parser', ANSI_PARSER)
|
|
||||||
decoded = kwargs.get('decoded', False) or hasattr(string, 'raw_string')
|
|
||||||
if not decoded:
|
|
||||||
string = parser.parse_ansi(string)
|
|
||||||
return super(ANSIString, cls).__new__(ANSIString, string, 'utf-8')
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "ANSIString(%s, decoded=True)" % repr(self.raw_string)
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
self.parser = kwargs.pop('parser', ANSI_PARSER)
|
|
||||||
super(ANSIString, self).__init__(*args, **kwargs)
|
|
||||||
self.raw_string = unicode(self)
|
|
||||||
self.clean_string = unicode(self.parser.parse_ansi(
|
|
||||||
self.raw_string, strip_ansi=True), 'utf-8')
|
|
||||||
self._code_indexes, self._char_indexes = self._get_indexes()
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.clean_string)
|
|
||||||
|
|
||||||
def __add__(self, other):
|
|
||||||
if not isinstance(other, basestring):
|
|
||||||
return NotImplemented
|
|
||||||
return ANSIString(self.raw_string + getattr(
|
|
||||||
other, 'raw_string', other), decoded=True)
|
|
||||||
|
|
||||||
def __radd__(self, other):
|
|
||||||
if not isinstance(other, basestring):
|
|
||||||
return NotImplemented
|
|
||||||
return ANSIString(getattr(
|
|
||||||
other, 'raw_string', other) + self.raw_string, decoded=True)
|
|
||||||
|
|
||||||
def __getslice__(self, i, j):
|
|
||||||
return self.__getitem__(slice(i, j))
|
|
||||||
|
|
||||||
def _slice(self, item):
|
|
||||||
slice_indexes = self._char_indexes[item]
|
|
||||||
if not slice_indexes:
|
|
||||||
return ANSIString('')
|
|
||||||
try:
|
|
||||||
string = self[item.start].raw_string
|
|
||||||
except IndexError:
|
|
||||||
return ANSIString('')
|
|
||||||
last_mark = slice_indexes[0]
|
|
||||||
for i in slice_indexes[1:]:
|
|
||||||
for index in range(last_mark, i):
|
|
||||||
if index in self._code_indexes:
|
|
||||||
string += self.raw_string[index]
|
|
||||||
last_mark = i
|
|
||||||
try:
|
|
||||||
string += self.raw_string[i]
|
|
||||||
except IndexError:
|
|
||||||
pass
|
|
||||||
return ANSIString(string, decoded=True)
|
|
||||||
|
|
||||||
def __getitem__(self, item):
|
|
||||||
if isinstance(item, slice):
|
|
||||||
return self._slice(item)
|
|
||||||
try:
|
|
||||||
item = self._char_indexes[item]
|
|
||||||
except IndexError:
|
|
||||||
raise IndexError("ANSIString index out of range.")
|
|
||||||
clean = self.raw_string[item]
|
|
||||||
|
|
||||||
result = ''
|
|
||||||
for index in range(0, item + 1):
|
|
||||||
if index in self._code_indexes:
|
|
||||||
result += self.raw_string[index]
|
|
||||||
return ANSIString(result + clean, decoded=True)
|
|
||||||
|
|
||||||
def rsplit(self, sep=None, maxsplit=None):
|
|
||||||
return self.split(sep, maxsplit, reverse=True)
|
|
||||||
|
|
||||||
def split(self, sep=None, maxsplit=None, reverse=False):
|
|
||||||
if hasattr(sep, 'clean_string'):
|
|
||||||
sep = sep.clean_string
|
|
||||||
args = [sep]
|
|
||||||
if maxsplit is not None:
|
|
||||||
args.append(maxsplit)
|
|
||||||
if reverse:
|
|
||||||
parent_result = self.clean_string.rsplit(*args)
|
|
||||||
else:
|
|
||||||
parent_result = self.clean_string.split(*args)
|
|
||||||
current_index = 0
|
|
||||||
result = []
|
|
||||||
for section in parent_result:
|
|
||||||
result.append(self[current_index:current_index + len(section)])
|
|
||||||
current_index += (len(section)) + len(sep)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def partition(self, sep, reverse=False):
|
|
||||||
if hasattr(sep, 'clean_string'):
|
|
||||||
sep = sep.clean_string
|
|
||||||
if reverse:
|
|
||||||
parent_result = self.clean_string.rpartition(sep)
|
|
||||||
else:
|
|
||||||
parent_result = self.clean_string.partition(sep)
|
|
||||||
current_index = 0
|
|
||||||
result = tuple()
|
|
||||||
for section in parent_result:
|
|
||||||
result += (self[current_index:current_index + len(section)],)
|
|
||||||
current_index += len(section)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _get_indexes(self):
|
|
||||||
matches = [
|
|
||||||
(match.start(), match.end())
|
|
||||||
for match in self.parser.ansi_regex.finditer(self.raw_string)]
|
|
||||||
code_indexes = []
|
|
||||||
# These are all the indexes which hold code characters.
|
|
||||||
for start, end in matches:
|
|
||||||
code_indexes.extend(range(start, end))
|
|
||||||
|
|
||||||
if not code_indexes:
|
|
||||||
# Plain string, no ANSI codes.
|
|
||||||
return code_indexes, range(0, len(self.raw_string))
|
|
||||||
flat_ranges = []
|
|
||||||
# We need to get the ones between them, but the code might start at
|
|
||||||
# the beginning, and there might be codes at the end.
|
|
||||||
for tup in matches:
|
|
||||||
flat_ranges.extend(tup)
|
|
||||||
# Is the beginning of the string a code character?
|
|
||||||
if flat_ranges[0] == 0:
|
|
||||||
flat_ranges.pop(0)
|
|
||||||
else:
|
|
||||||
flat_ranges.insert(0, 0)
|
|
||||||
# How about the end?
|
|
||||||
end_index = (len(self.raw_string) - 1)
|
|
||||||
if flat_ranges[-1] == end_index:
|
|
||||||
flat_ranges.pop()
|
|
||||||
else:
|
|
||||||
flat_ranges.append(end_index)
|
|
||||||
char_indexes = []
|
|
||||||
for start, end in list(group(flat_ranges, 2)):
|
|
||||||
char_indexes.extend(range(start, end))
|
|
||||||
# The end character will be left off if it's a normal character. Fix
|
|
||||||
# that here.
|
|
||||||
if end_index in flat_ranges:
|
|
||||||
char_indexes.append(end_index)
|
|
||||||
return code_indexes, char_indexes
|
|
||||||
|
|
||||||
@_spacing_preflight
|
|
||||||
def center(self, width, fillchar, difference):
|
|
||||||
remainder = difference % 2
|
|
||||||
difference /= 2
|
|
||||||
spacing = difference * fillchar
|
|
||||||
result = spacing + self + spacing + (remainder * fillchar)
|
|
||||||
return result
|
|
||||||
|
|
||||||
@_spacing_preflight
|
|
||||||
def ljust(self, width, fillchar, difference):
|
|
||||||
return self + (difference * fillchar)
|
|
||||||
|
|
||||||
@_spacing_preflight
|
|
||||||
def rjust(self, width, fillchar, difference):
|
|
||||||
return (difference * fillchar) + self
|
|
||||||
|
|
||||||
|
|
||||||
def _query_super(func_name):
|
def _query_super(func_name):
|
||||||
"""
|
"""
|
||||||
Have the string class handle this with the cleaned string instead of
|
Have the string class handle this with the cleaned string instead of
|
||||||
ANSIString.
|
ANSIString.
|
||||||
"""
|
"""
|
||||||
def query_func(self, *args, **kwargs):
|
def wrapped(self, *args, **kwargs):
|
||||||
return getattr(self.clean_string, func_name)(*args, **kwargs)
|
return getattr(self.clean(), func_name)(*args, **kwargs)
|
||||||
return query_func
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
def _on_raw(func_name):
|
def _on_raw(func_name):
|
||||||
|
|
@ -536,7 +359,7 @@ def _on_raw(func_name):
|
||||||
try:
|
try:
|
||||||
string = args.pop(0)
|
string = args.pop(0)
|
||||||
if hasattr(string, 'raw_string'):
|
if hasattr(string, 'raw_string'):
|
||||||
args.insert(0, string.raw_string)
|
args.insert(0, string.raw())
|
||||||
else:
|
else:
|
||||||
args.insert(0, string)
|
args.insert(0, string)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
|
@ -566,16 +389,388 @@ def _transform(func_name):
|
||||||
return ANSIString(''.join(to_string), decoded=True)
|
return ANSIString(''.join(to_string), decoded=True)
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
|
class ANSIMeta(type):
|
||||||
|
"""
|
||||||
|
Many functions on ANSIString are just light wrappers around the unicode
|
||||||
|
base class. We apply them here, as part of the classes construction.
|
||||||
|
"""
|
||||||
|
def __init__(cls, *args, **kwargs):
|
||||||
|
for func_name in [
|
||||||
|
'count', 'startswith', 'endswith', 'find', 'index', 'isalnum',
|
||||||
|
'isalpha', 'isdigit', 'islower', 'isspace', 'istitle', 'isupper',
|
||||||
|
'rfind', 'rindex', '__len__']:
|
||||||
|
setattr(cls, func_name, _query_super(func_name))
|
||||||
|
for func_name in [
|
||||||
|
'__mul__', '__mod__', 'expandtabs', '__rmul__', 'join',
|
||||||
|
'decode', 'replace', 'format']:
|
||||||
|
setattr(cls, func_name, _on_raw(func_name))
|
||||||
|
for func_name in [
|
||||||
|
'capitalize', 'translate', 'lower', 'upper', 'swapcase']:
|
||||||
|
setattr(cls, func_name, _transform(func_name))
|
||||||
|
super(ANSIMeta, cls).__init__(*args, **kwargs)
|
||||||
|
|
||||||
for func_name in [
|
|
||||||
'count', 'startswith', 'endswith', 'find', 'index', 'isalnum',
|
class ANSIString(unicode):
|
||||||
'isalpha', 'isdigit', 'islower', 'isspace', 'istitle', 'isupper',
|
"""
|
||||||
'rfind', 'rindex']:
|
String-like object that is aware of ANSI codes.
|
||||||
setattr(ANSIString, func_name, _query_super(func_name))
|
|
||||||
for func_name in [
|
This isn't especially efficient, as it doesn't really have an
|
||||||
'__mul__', '__mod__', 'expandtabs', '__rmul__', 'join',
|
understanding of what the codes mean in order to eliminate
|
||||||
'decode', 'replace', 'format']:
|
redundant characters. This could be made as an enhancement to ANSI_PARSER.
|
||||||
setattr(ANSIString, func_name, _on_raw(func_name))
|
|
||||||
for func_name in [
|
If one is going to use ANSIString, one should generally avoid converting
|
||||||
'capitalize', 'translate', 'lower', 'upper', 'swapcase']:
|
away from it until one is about to send information on the wire. This is
|
||||||
setattr(ANSIString, func_name, _transform(func_name))
|
because escape sequences in the string may otherwise already be decoded,
|
||||||
|
and taken literally the second time around.
|
||||||
|
|
||||||
|
Please refer to the Metaclass, ANSIMeta, which is used to apply wrappers
|
||||||
|
for several of the methods that need not be defined directly here.
|
||||||
|
"""
|
||||||
|
__metaclass__ = ANSIMeta
|
||||||
|
|
||||||
|
def __new__(cls, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
When creating a new ANSIString, you may use a custom parser that has
|
||||||
|
the same attributes as the standard one, and you may declare the
|
||||||
|
string to be handled as already decoded. It is important not to double
|
||||||
|
decode strings, as escapes can only be respected once.
|
||||||
|
|
||||||
|
If the regexable flag is set, using __getitem__, such as when getting
|
||||||
|
an index or slicing, will return the result from the raw string. If
|
||||||
|
this flag is set False, it will intelligently skip ANSI escapes.
|
||||||
|
|
||||||
|
ANSIString('{rHello{g, W{yorld', regexable=True)[0] will return the
|
||||||
|
first byte of the escape sequence before 'Hello', while
|
||||||
|
ANSIString('{rHello{g, W{yorld')[0] will return a red 'H'.
|
||||||
|
|
||||||
|
When a regexable ANSIString is sliced, the result is returned as a
|
||||||
|
non-regexable ANSI String. This ensures that usage of regexable
|
||||||
|
ANSIStrings is an explicit choice.
|
||||||
|
|
||||||
|
Why all this complication with the regexable flag?
|
||||||
|
|
||||||
|
The reason is that while we are able to subclass the unicode object in
|
||||||
|
Python, the byte representation of the string in memory cannot be
|
||||||
|
changed and still exists under the hood. This doesn't matter for things
|
||||||
|
coded in pure Python, but since Regexes need to be mindful of
|
||||||
|
performance, the module that handles them operates directly on the
|
||||||
|
memory representation of the string in order to do matching. It is thus
|
||||||
|
completely unaware of our customizations to the class. Interestingly,
|
||||||
|
however, while the re module does its matching on the raw string, it
|
||||||
|
slices the string using the object's methods. This means that running
|
||||||
|
a regex on an ANSIString would return matches at bogus indexes, since
|
||||||
|
the __getitem__ method of ANSIString skips ANSI escape sequences, which
|
||||||
|
were part of the raw data regex was matching against.
|
||||||
|
|
||||||
|
So, if you need to use regex on an ANSIString, make sure you get it in
|
||||||
|
regexable mode first, and be ready to deal with a few edge cases.
|
||||||
|
"""
|
||||||
|
string = to_str(args[0], force_string=True)
|
||||||
|
if not isinstance(string, basestring):
|
||||||
|
string = str(string)
|
||||||
|
parser = kwargs.get('parser', ANSI_PARSER)
|
||||||
|
regexable = kwargs.get('regexable', False)
|
||||||
|
decoded = kwargs.get('decoded', False) or hasattr(string, 'raw_string')
|
||||||
|
if not decoded:
|
||||||
|
string = parser.parse_ansi(string)
|
||||||
|
if isinstance(string, unicode):
|
||||||
|
string = super(ANSIString, cls).__new__(ANSIString, string)
|
||||||
|
else:
|
||||||
|
string = super(ANSIString, cls).__new__(ANSIString, string, 'utf-8')
|
||||||
|
string._regexable = regexable
|
||||||
|
return string
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
"""
|
||||||
|
Let's make the repr the command that would actually be used to
|
||||||
|
construct this object, for convenience and reference.
|
||||||
|
"""
|
||||||
|
if self._regexable:
|
||||||
|
reg = ', regexable=True'
|
||||||
|
else:
|
||||||
|
reg = ''
|
||||||
|
return "ANSIString(%s, decoded=True%s)" % (repr(self._raw_string), reg)
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
When the ANSIString is first initialized, a few internal variables
|
||||||
|
have to be set.
|
||||||
|
|
||||||
|
The first is the parser. It is possible to replace Evennia's standard
|
||||||
|
ANSI parser with one of your own syntax if you wish, so long as it
|
||||||
|
implements the same interface.
|
||||||
|
|
||||||
|
The second is the _raw_string. It should be noted that the ANSIStrings
|
||||||
|
are unicode based. This seemed more reasonable than basing it off of
|
||||||
|
the string class, because if someone were to use a unicode character,
|
||||||
|
the benefits of knowing the indexes of the ANSI characters would be
|
||||||
|
negated by the fact that a character within the string might require
|
||||||
|
more than one byte to be represented. The raw string is, then, a
|
||||||
|
unicode object rather than a true encoded string. If you need the
|
||||||
|
encoded string for sending over the wire, try using the .encode()
|
||||||
|
method.
|
||||||
|
|
||||||
|
The third thing to set is the _clean_string. This is a unicode object
|
||||||
|
that is devoid of all ANSI Escapes.
|
||||||
|
|
||||||
|
Finally, _code_indexes and _char_indexes are defined. These are lookup
|
||||||
|
tables for which characters in the raw string are related to ANSI
|
||||||
|
escapes, and which are for the readable text.
|
||||||
|
"""
|
||||||
|
self.parser = kwargs.pop('parser', ANSI_PARSER)
|
||||||
|
super(ANSIString, self).__init__(*args, **kwargs)
|
||||||
|
self._raw_string = unicode(self)
|
||||||
|
self._clean_string = unicode(self.parser.parse_ansi(
|
||||||
|
self._raw_string, strip_ansi=True), 'utf-8')
|
||||||
|
self._code_indexes, self._char_indexes = self._get_indexes()
|
||||||
|
|
||||||
|
def __add__(self, other):
|
||||||
|
"""
|
||||||
|
We have to be careful when adding two strings not to reprocess things
|
||||||
|
that don't need to be reprocessed, lest we end up with escapes being
|
||||||
|
interpreted literally.
|
||||||
|
"""
|
||||||
|
if not isinstance(other, basestring):
|
||||||
|
return NotImplemented
|
||||||
|
return ANSIString(self._raw_string + getattr(
|
||||||
|
other, 'raw_string', other), decoded=True)
|
||||||
|
|
||||||
|
def __radd__(self, other):
|
||||||
|
"""
|
||||||
|
Likewise, if we're on the other end.
|
||||||
|
"""
|
||||||
|
if not isinstance(other, basestring):
|
||||||
|
return NotImplemented
|
||||||
|
return ANSIString(getattr(
|
||||||
|
other, 'raw_string', other) + self._raw_string, decoded=True)
|
||||||
|
|
||||||
|
def __getslice__(self, i, j):
|
||||||
|
"""
|
||||||
|
This function is deprecated, so we just make it call the proper
|
||||||
|
function.
|
||||||
|
"""
|
||||||
|
return self.__getitem__(slice(i, j))
|
||||||
|
|
||||||
|
def _slice(self, slc):
|
||||||
|
"""
|
||||||
|
This function takes a slice() object.
|
||||||
|
|
||||||
|
Slices have to be handled specially. Not only are they able to specify
|
||||||
|
a start and end with [x:y], but many forget that they can also specify
|
||||||
|
an interval with [x:y:z]. As a result, not only do we have to track
|
||||||
|
the ANSI Escapes that have played before the start of the slice, we
|
||||||
|
must also replay any in these intervals, should the exist.
|
||||||
|
|
||||||
|
Thankfully, slicing the _char_indexes table gives us the actual
|
||||||
|
indexes that need slicing in the raw string. We can check between
|
||||||
|
those indexes to figure out what escape characters need to be
|
||||||
|
replayed.
|
||||||
|
"""
|
||||||
|
slice_indexes = self._char_indexes[slc]
|
||||||
|
if not slice_indexes:
|
||||||
|
return ANSIString('')
|
||||||
|
try:
|
||||||
|
string = self[slc.start]._raw_string
|
||||||
|
except IndexError:
|
||||||
|
return ANSIString('')
|
||||||
|
last_mark = slice_indexes[0]
|
||||||
|
# Check between the slice intervals for escape sequences.
|
||||||
|
for i in slice_indexes[1:]:
|
||||||
|
for index in range(last_mark, i):
|
||||||
|
if index in self._code_indexes:
|
||||||
|
string += self._raw_string[index]
|
||||||
|
last_mark = i
|
||||||
|
try:
|
||||||
|
string += self._raw_string[i]
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
return ANSIString(string, decoded=True)
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
"""
|
||||||
|
Gateway for slices and getting specific indexes in the ANSIString. If
|
||||||
|
this is a regexable ANSIString, it will get the data from the raw
|
||||||
|
string instead, bypassing ANSIString's intelligent escape skipping,
|
||||||
|
for reasons explained in the __new__ method's docstring.
|
||||||
|
"""
|
||||||
|
if self._regexable:
|
||||||
|
return ANSIString(self._raw_string[item], decoded=True)
|
||||||
|
if isinstance(item, slice):
|
||||||
|
# Slices must be handled specially.
|
||||||
|
return self._slice(item)
|
||||||
|
try:
|
||||||
|
item = self._char_indexes[item]
|
||||||
|
except IndexError:
|
||||||
|
raise IndexError("ANSIString index out of range.")
|
||||||
|
|
||||||
|
clean = self._raw_string[item]
|
||||||
|
result = ''
|
||||||
|
# Get the character they're after, and replay all escape sequences
|
||||||
|
# previous to it.
|
||||||
|
for index in range(0, item + 1):
|
||||||
|
if index in self._code_indexes:
|
||||||
|
result += self._raw_string[index]
|
||||||
|
return ANSIString(result + clean, decoded=True)
|
||||||
|
|
||||||
|
def rsplit(self, sep=None, maxsplit=None):
|
||||||
|
"""
|
||||||
|
Like split, but from the end of the string, rather than the beginning.
|
||||||
|
"""
|
||||||
|
return self.split(sep, maxsplit, reverse=True)
|
||||||
|
|
||||||
|
def split(self, sep=None, maxsplit=None, reverse=False):
|
||||||
|
"""
|
||||||
|
Splits in a manner similar to the standard string split method. First,
|
||||||
|
we split the clean string. Then we measure each section of the result
|
||||||
|
to figure out where they start and end, and replay any escapes that
|
||||||
|
would have occured before that.
|
||||||
|
"""
|
||||||
|
if hasattr(sep, 'clean_string'):
|
||||||
|
sep = sep.clean_string
|
||||||
|
args = [sep]
|
||||||
|
if maxsplit is not None:
|
||||||
|
args.append(maxsplit)
|
||||||
|
if reverse:
|
||||||
|
parent_result = self._clean_string.rsplit(*args)
|
||||||
|
else:
|
||||||
|
parent_result = self._clean_string.split(*args)
|
||||||
|
# Might be None.
|
||||||
|
sep = sep or ''
|
||||||
|
current_index = 0
|
||||||
|
result = []
|
||||||
|
for section in parent_result:
|
||||||
|
result.append(self[current_index:current_index + len(section)])
|
||||||
|
current_index += (len(section)) + len(sep)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def clean(self):
|
||||||
|
"""
|
||||||
|
Return a unicode object without the ANSI escapes.
|
||||||
|
"""
|
||||||
|
return self._clean_string
|
||||||
|
|
||||||
|
def raw(self):
|
||||||
|
"""
|
||||||
|
Return a unicode object with the ANSI escapes.
|
||||||
|
"""
|
||||||
|
return self._raw_string
|
||||||
|
|
||||||
|
def is_regexable(self):
|
||||||
|
"""
|
||||||
|
State whether or not this ANSIString is a 'regexable' ANSIString.
|
||||||
|
Regexable ANSIStrings return indexes from _raw_string when sliced.
|
||||||
|
"""
|
||||||
|
return self._regexable
|
||||||
|
|
||||||
|
def regexable(self):
|
||||||
|
"""
|
||||||
|
Return the regexable version of this ANSIString.
|
||||||
|
"""
|
||||||
|
return ANSIString(self, decoded=True, regexable=True)
|
||||||
|
|
||||||
|
def non_regexable(self):
|
||||||
|
"""
|
||||||
|
Return the non-regexable version of this ANSIString.
|
||||||
|
"""
|
||||||
|
return ANSIString(self, decoded=True)
|
||||||
|
|
||||||
|
def partition(self, sep, reverse=False):
|
||||||
|
"""
|
||||||
|
Similar to split, but always creates a tuple with three items:
|
||||||
|
1. The part before the separator
|
||||||
|
2. The separator itself.
|
||||||
|
3. The part after.
|
||||||
|
|
||||||
|
We use the same techniques we used in split() to make sure each are
|
||||||
|
colored.
|
||||||
|
"""
|
||||||
|
if hasattr(sep, '_clean_string'):
|
||||||
|
sep = sep.clean()
|
||||||
|
if reverse:
|
||||||
|
parent_result = self._clean_string.rpartition(sep)
|
||||||
|
else:
|
||||||
|
parent_result = self._clean_string.partition(sep)
|
||||||
|
current_index = 0
|
||||||
|
result = tuple()
|
||||||
|
for section in parent_result:
|
||||||
|
result += (self[current_index:current_index + len(section)],)
|
||||||
|
current_index += len(section)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _get_indexes(self):
|
||||||
|
"""
|
||||||
|
Two tables need to be made, one which contains the indexes of all
|
||||||
|
readable characters, and one which contains the indexes of all ANSI
|
||||||
|
escapes. It's important to remember that ANSI escapes require more
|
||||||
|
that one character at a time, though no readable character needs more
|
||||||
|
than one character, since the unicode base class abstracts that away
|
||||||
|
from us. However, several readable characters can be placed in a row.
|
||||||
|
|
||||||
|
We must use regexes here to figure out where all the escape sequences
|
||||||
|
are hiding in the string. Then we use the ranges of their starts and
|
||||||
|
ends to create a final, comprehensive list of all indexes which are
|
||||||
|
dedicated to code, and all dedicated to text.
|
||||||
|
|
||||||
|
It's possible that only one of these tables is actually needed, the
|
||||||
|
other assumed to be what isn't in the first.
|
||||||
|
"""
|
||||||
|
matches = [
|
||||||
|
(match.start(), match.end())
|
||||||
|
for match in self.parser.ansi_regex.finditer(self._raw_string)]
|
||||||
|
code_indexes = []
|
||||||
|
# These are all the indexes which hold code characters.
|
||||||
|
for start, end in matches:
|
||||||
|
code_indexes.extend(range(start, end))
|
||||||
|
|
||||||
|
if not code_indexes:
|
||||||
|
# Plain string, no ANSI codes.
|
||||||
|
return code_indexes, range(0, len(self._raw_string))
|
||||||
|
flat_ranges = []
|
||||||
|
# We need to get the ones between them, but the code might start at
|
||||||
|
# the beginning, and there might be codes at the end.
|
||||||
|
for tup in matches:
|
||||||
|
flat_ranges.extend(tup)
|
||||||
|
# Is the beginning of the string a code character?
|
||||||
|
if flat_ranges[0] == 0:
|
||||||
|
flat_ranges.pop(0)
|
||||||
|
else:
|
||||||
|
flat_ranges.insert(0, 0)
|
||||||
|
# How about the end?
|
||||||
|
end_index = (len(self._raw_string) - 1)
|
||||||
|
if flat_ranges[-1] == end_index:
|
||||||
|
flat_ranges.pop()
|
||||||
|
else:
|
||||||
|
flat_ranges.append(end_index)
|
||||||
|
char_indexes = []
|
||||||
|
for start, end in list(group(flat_ranges, 2)):
|
||||||
|
char_indexes.extend(range(start, end))
|
||||||
|
# The end character will be left off if it's a normal character. Fix
|
||||||
|
# that here.
|
||||||
|
if end_index in flat_ranges:
|
||||||
|
char_indexes.append(end_index)
|
||||||
|
return code_indexes, char_indexes
|
||||||
|
|
||||||
|
@_spacing_preflight
|
||||||
|
def center(self, width, fillchar, difference):
|
||||||
|
"""
|
||||||
|
Center some text with some spaces padding both sides.
|
||||||
|
"""
|
||||||
|
remainder = difference % 2
|
||||||
|
difference /= 2
|
||||||
|
spacing = difference * fillchar
|
||||||
|
result = spacing + self + spacing + (remainder * fillchar)
|
||||||
|
return result
|
||||||
|
|
||||||
|
@_spacing_preflight
|
||||||
|
def ljust(self, width, fillchar, difference):
|
||||||
|
"""
|
||||||
|
Left justify some text.
|
||||||
|
"""
|
||||||
|
return self + (difference * fillchar)
|
||||||
|
|
||||||
|
@_spacing_preflight
|
||||||
|
def rjust(self, width, fillchar, difference):
|
||||||
|
"""
|
||||||
|
Right justify some text.
|
||||||
|
"""
|
||||||
|
return (difference * fillchar) + self
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue