""" ANSI -> html converter Credit for original idea and implementation goes to Muhammad Alkarouri and his snippet #577349 on http://code.activestate.com. (extensively modified by Griatch 2010) """ from __future__ import absolute_import from builtins import object import re import cgi from .ansi import * # All xterm256 RGB equivalents XTERM256_FG = "\033[38;5;%sm" XTERM256_BG = "\033[48;5;%sm" class TextToHTMLparser(object): """ This class describes a parser for converting from ANSI to html. """ tabstop = 4 # mapping html color name <-> ansi code. hilite = ANSI_HILITE unhilite = ANSI_UNHILITE # this will be stripped - there is no css equivalent. normal = ANSI_NORMAL # " underline = ANSI_UNDERLINE blink = ANSI_BLINK inverse = ANSI_INVERSE # this will produce an outline; no obvious css equivalent? colorcodes = [ ('color-000', unhilite + ANSI_BLACK), # pure black ('color-001', unhilite + ANSI_RED), ('color-002', unhilite + ANSI_GREEN), ('color-003', unhilite + ANSI_YELLOW), ('color-004', unhilite + ANSI_BLUE), ('color-005', unhilite + ANSI_MAGENTA), ('color-006', unhilite + ANSI_CYAN), ('color-007', unhilite + ANSI_WHITE), # light grey ('color-008', hilite + ANSI_BLACK), # dark grey ('color-009', hilite + ANSI_RED), ('color-010', hilite + ANSI_GREEN), ('color-011', hilite + ANSI_YELLOW), ('color-012', hilite + ANSI_BLUE), ('color-013', hilite + ANSI_MAGENTA), ('color-014', hilite + ANSI_CYAN), ('color-015', hilite + ANSI_WHITE) # pure white ] + [("color-%03i" % (i+16), XTERM256_FG % ("%i" % (i+16))) for i in xrange(240)] colorback = [ ('bgcolor-000', ANSI_BACK_BLACK), # pure black ('bgcolor-001', ANSI_BACK_RED), ('bgcolor-002', ANSI_BACK_GREEN), ('bgcolor-003', ANSI_BACK_YELLOW), ('bgcolor-004', ANSI_BACK_BLUE), ('bgcolor-005', ANSI_BACK_MAGENTA), ('bgcolor-006', ANSI_BACK_CYAN), ('bgcolor-007', ANSI_BACK_WHITE), # light grey ('bgcolor-008', hilite + ANSI_BACK_BLACK), # dark grey ('bgcolor-009', hilite + ANSI_BACK_RED), ('bgcolor-010', hilite + ANSI_BACK_GREEN), ('bgcolor-011', hilite + ANSI_BACK_YELLOW), ('bgcolor-012', hilite + ANSI_BACK_BLUE), ('bgcolor-013', hilite + ANSI_BACK_MAGENTA), ('bgcolor-014', hilite + ANSI_BACK_CYAN), ('bgcolor-015', hilite + ANSI_BACK_WHITE), # pure white ] + [("bgcolor-%03i" % (i+16), XTERM256_BG % ("%i" % (i+16))) for i in range(240)] # make sure to escape [ #colorcodes = [(c, code.replace("[", r"\[")) for c, code in colorcodes] #colorback = [(c, code.replace("[", r"\[")) for c, code in colorback] fg_colormap = dict((code, clr) for clr, code in colorcodes) bg_colormap = dict((code, clr) for clr, code in colorback) # create stop markers fgstop = "(?:\033\[1m|\033\[22m)*\033\[3[0-8].*?m|\033\[0m|$" bgstop = "(?:\033\[1m|\033\[22m)*\033\[4[0-8].*?m|\033\[0m|$" # extract color markers, tagging the start marker and the text marked re_fgs = re.compile("((?:\033\[1m|\033\[22m)*\033\[3[0-8].*?m)(.*?)(?=" + fgstop + ")") re_bgs = re.compile("((?:\033\[1m|\033\[22m)*\033\[4[0-8].*?m)(.*?)(?=" + bgstop + ")") re_normal = re.compile(normal.replace("[", r"\[")) re_hilite = re.compile("(?:%s)(.*)(?=%s|%s)" % (hilite.replace("[", r"\["), fgstop, bgstop)) re_unhilite = re.compile("(?:%s)(.*)(?=%s|%s)" % (unhilite.replace("[", r"\["), fgstop, bgstop)) re_uline = re.compile("(?:%s)(.*?)(?=%s|%s)" % (underline.replace("[", r"\["), fgstop, bgstop)) re_blink = re.compile("(?:%s)(.*?)(?=%s|%s)" % (blink.replace("[", r"\["), fgstop, bgstop)) re_inverse = re.compile("(?:%s)(.*?)(?=%s|%s)" % (inverse.replace("[", r"\["), fgstop, bgstop)) re_string = re.compile(r'(?P[<&>])|(?P [ \t]+)|(?P\r\n|\r|\n)', re.S|re.M|re.I) re_url = re.compile(r'((?:ftp|www|https?)\W+(?:(?!\.(?:\s|$)|&\w+;)[^"\',;$*^\\(){}<>\[\]\s])+)(\.(?:\s|$)|&\w+;|)') re_mxplink = re.compile(r'\|lc(.*?)\|lt(.*?)\|le', re.DOTALL) def _sub_fg(self, colormatch): code, text = colormatch.groups() return r'''%s''' % (self.fg_colormap.get(code, "err"), text) def _sub_bg(self, colormatch): code, text = colormatch.groups() return r'''%s''' % (self.bg_colormap.get(code, "err"), text) def re_color(self, text): """ Replace ansi colors with html color class names. Let the client choose how it will display colors, if it wishes to. Args: text (str): the string with color to replace. Returns: text (str): Re-colored text. """ text = self.re_fgs.sub(self._sub_fg, text) text = self.re_bgs.sub(self._sub_bg, text) text = self.re_normal.sub("", text) return text def re_bold(self, text): """ Clean out superfluous hilights rather than set to make it match the look of telnet. Args: text (str): Text to process. Returns: text (str): Processed text. """ text = self.re_hilite.sub(r'\1', text) return self.re_unhilite.sub(r'\1', text) # strip unhilite - there is no equivalent in css. def re_underline(self, text): """ Replace ansi underline with html underline class name. Args: text (str): Text to process. Returns: text (str): Processed text. """ return self.re_uline.sub(r'\1', text) def re_blinking(self, text): """ Replace ansi blink with custom blink css class Args: text (str): Text to process. Returns: text (str): Processed text. """ return self.re_blink.sub(r'\1', text) def re_inversing(self, text): """ Replace ansi inverse with custom inverse css class Args: text (str): Text to process. Returns: text (str): Processed text. """ return self.re_inverse.sub(r'\1', text) def remove_bells(self, text): """ Remove ansi specials Args: text (str): Text to process. Returns: text (str): Processed text. """ return text.replace('\07', '') def remove_backspaces(self, text): """ Removes special escape sequences Args: text (str): Text to process. Returns: text (str): Processed text. """ backspace_or_eol = r'(.\010)|(\033\[K)' n = 1 while n > 0: text, n = re.subn(backspace_or_eol, '', text, 1) return text def convert_linebreaks(self, text): """ Extra method for cleaning linebreaks Args: text (str): Text to process. Returns: text (str): Processed text. """ return text.replace(r'\n', r'
') def convert_urls(self, text): """ Replace urls (http://...) by valid HTML. Args: text (str): Text to process. Returns: text (str): Processed text. """ # -> added target to output prevent the web browser from attempting to # change pages (and losing our webclient session). return self.re_url.sub(r'\1\2', text) def convert_links(self, text): """ Replaces links with HTML code. Args: text (str): Text to process. Returns: text (str): Processed text. """ return self.re_mxplink.sub(r"""\2""", text) def do_sub(self, match): """ Helper method to be passed to re.sub, for handling all substitutions. Args: match (re.Matchobject): Match for substitution. Returns: text (str): Processed text. """ cdict = match.groupdict() if cdict['htmlchars']: return cgi.escape(cdict['htmlchars']) if cdict['lineend']: return '
' elif cdict['space'] == '\t': return ' ' * self.tabstop elif cdict['space']: text = match.group().replace('\t', ' ' * self.tabstop) text = text.replace(' ', ' ') return text def parse(self, text, strip_ansi=False): """ Main access function, converts a text containing ANSI codes into html statements. Args: text (str): Text to process. strip_ansi (bool, optional): Returns: text (str): Parsed text. """ # parse everything to ansi first text = parse_ansi(text, strip_ansi=strip_ansi, xterm256=True, mxp=True) # convert all ansi to html result = re.sub(self.re_string, self.do_sub, text) result = self.re_color(result) result = self.re_bold(result) result = self.re_underline(result) result = self.re_blinking(result) result = self.re_inversing(result) result = self.remove_bells(result) result = self.convert_linebreaks(result) result = self.remove_backspaces(result) result = self.convert_urls(result) result = self.convert_links(result) # clean out eventual ansi that was missed #result = parse_ansi(result, strip_ansi=True) return result HTML_PARSER = TextToHTMLparser() # # Access function # def parse_html(string, strip_ansi=False, parser=HTML_PARSER): """ Parses a string, replace ANSI markup with html """ return parser.parse(string, strip_ansi=strip_ansi)