fix: improve convert_urls
This commit is contained in:
parent
f9ba117680
commit
80878fb124
2 changed files with 26 additions and 15 deletions
|
|
@ -250,13 +250,13 @@ class TestTextToHTMLparser(TestCase):
|
||||||
def test_url_scheme_ftp(self):
|
def test_url_scheme_ftp(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.parser.convert_urls("ftp.example.com"),
|
self.parser.convert_urls("ftp.example.com"),
|
||||||
'<a href="ftp.example.com" target="_blank">ftp.example.com</a>',
|
'<a href="//ftp.example.com" target="_blank">ftp.example.com</a>',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_url_scheme_www(self):
|
def test_url_scheme_www(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.parser.convert_urls("www.example.com"),
|
self.parser.convert_urls("www.example.com"),
|
||||||
'<a href="www.example.com" target="_blank">www.example.com</a>',
|
'<a href="//www.example.com" target="_blank">www.example.com</a>',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_url_scheme_ftpproto(self):
|
def test_url_scheme_ftpproto(self):
|
||||||
|
|
@ -280,7 +280,7 @@ class TestTextToHTMLparser(TestCase):
|
||||||
def test_url_chars_slash(self):
|
def test_url_chars_slash(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.parser.convert_urls("www.example.com/homedir"),
|
self.parser.convert_urls("www.example.com/homedir"),
|
||||||
'<a href="www.example.com/homedir" target="_blank">www.example.com/homedir</a>',
|
'<a href="//www.example.com/homedir" target="_blank">www.example.com/homedir</a>',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_url_chars_colon(self):
|
def test_url_chars_colon(self):
|
||||||
|
|
@ -313,22 +313,16 @@ class TestTextToHTMLparser(TestCase):
|
||||||
' target="_blank">https://groups.google.com/forum/?fromgroups#!categories/evennia/ainneve</a>',
|
' target="_blank">https://groups.google.com/forum/?fromgroups#!categories/evennia/ainneve</a>',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_url_edge_leadingw(self):
|
|
||||||
self.assertEqual(
|
|
||||||
self.parser.convert_urls("wwww.example.com"),
|
|
||||||
'w<a href="www.example.com" target="_blank">www.example.com</a>',
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_url_edge_following_period_eol(self):
|
def test_url_edge_following_period_eol(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.parser.convert_urls("www.example.com."),
|
self.parser.convert_urls("www.example.com."),
|
||||||
'<a href="www.example.com" target="_blank">www.example.com</a>.',
|
'<a href="//www.example.com" target="_blank">www.example.com</a>.',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_url_edge_following_period(self):
|
def test_url_edge_following_period(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.parser.convert_urls("see www.example.com. "),
|
self.parser.convert_urls("see www.example.com. "),
|
||||||
'see <a href="www.example.com" target="_blank">www.example.com</a>. ',
|
'see <a href="//www.example.com" target="_blank">www.example.com</a>. ',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_url_edge_brackets(self):
|
def test_url_edge_brackets(self):
|
||||||
|
|
@ -356,3 +350,9 @@ class TestTextToHTMLparser(TestCase):
|
||||||
'</span><a href="http://example.com/" target="_blank">'
|
'</span><a href="http://example.com/" target="_blank">'
|
||||||
'http://example.com/</a><span class="red">',
|
'http://example.com/</a><span class="red">',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_non_url_with_www(self):
|
||||||
|
self.assertEqual(
|
||||||
|
self.parser.convert_urls('Awwww.this should not be highlighted'),
|
||||||
|
'Awwww.this should not be highlighted'
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -88,8 +88,9 @@ class TextToHTMLparser(object):
|
||||||
re.S | re.M | re.I,
|
re.S | re.M | re.I,
|
||||||
)
|
)
|
||||||
re_url = re.compile(
|
re_url = re.compile(
|
||||||
r'(?<!=")((?:ftp|www|https?)\W+(?:(?!\.(?:\s|$)|&\w+;)[^"\',;$*^\\(){}<>\[\]\s])+)(\.(?:\s|$)|&\w+;|)'
|
r'(?<!=")(\b(?:ftp|www|https?)\W+(?:(?!\.(?:\s|$)|&\w+;)[^"\',;$*^\\(){}<>\[\]\s])+)(\.(?:\s|$)|&\w+;|)'
|
||||||
)
|
)
|
||||||
|
re_protocol = re.compile(r'^(?:ftp|https?)://')
|
||||||
re_mxplink = re.compile(r"\|lc(.*?)\|lt(.*?)\|le", re.DOTALL)
|
re_mxplink = re.compile(r"\|lc(.*?)\|lt(.*?)\|le", re.DOTALL)
|
||||||
re_mxpurl = re.compile(r"\|lu(.*?)\|lt(.*?)\|le", re.DOTALL)
|
re_mxpurl = re.compile(r"\|lu(.*?)\|lt(.*?)\|le", re.DOTALL)
|
||||||
|
|
||||||
|
|
@ -147,9 +148,19 @@ class TextToHTMLparser(object):
|
||||||
text (str): Processed text.
|
text (str): Processed text.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
m = self.re_url.search(text)
|
||||||
|
if m:
|
||||||
|
href = m.group(1)
|
||||||
|
label = href
|
||||||
|
# if there is no protocol (i.e. starts with www) prefix with // so the link isn't treated as relative
|
||||||
|
if not self.re_protocol.match(href):
|
||||||
|
href = "//" + href
|
||||||
|
rest = m.group(2)
|
||||||
# -> added target to output prevent the web browser from attempting to
|
# -> added target to output prevent the web browser from attempting to
|
||||||
# change pages (and losing our webclient session).
|
# change pages (and losing our webclient session).
|
||||||
return self.re_url.sub(r'<a href="\1" target="_blank">\1</a>\2', text)
|
return text[:m.start()] + f'<a href="{href}" target="_blank">{label}</a>{rest}' + text[m.end():]
|
||||||
|
else:
|
||||||
|
return text
|
||||||
|
|
||||||
def sub_mxp_links(self, match):
|
def sub_mxp_links(self, match):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue