Update utils.latinify to support Python 3
This commit is contained in:
parent
1115a941c1
commit
96d5734e51
2 changed files with 40 additions and 1 deletions
|
|
@ -223,3 +223,36 @@ class TestImportFunctions(TestCase):
|
||||||
test_path = self._t_dir_file("invalid_filename.py")
|
test_path = self._t_dir_file("invalid_filename.py")
|
||||||
loaded_mod = utils.mod_import_from_path(test_path)
|
loaded_mod = utils.mod_import_from_path(test_path)
|
||||||
self.assertIsNone(loaded_mod)
|
self.assertIsNone(loaded_mod)
|
||||||
|
|
||||||
|
|
||||||
|
class LatinifyTest(TestCase):
|
||||||
|
"""
|
||||||
|
utils._UNICODE_MAP may need some additional entries to resolve these tests--
|
||||||
|
|
||||||
|
LEFT DOUBLE QUOTATION MARK: "
|
||||||
|
RIGHT DOUBLE QUOTATION MARK: "
|
||||||
|
|
||||||
|
"""
|
||||||
|
def setUp(self):
|
||||||
|
super().setUp()
|
||||||
|
|
||||||
|
self.example_str = 'It says, “plugh.”'
|
||||||
|
self.example_ustr = u'It says, “plugh.”'
|
||||||
|
|
||||||
|
self.expected_output = 'It says, "plugh."'
|
||||||
|
|
||||||
|
def test_plain_string(self):
|
||||||
|
result = utils.latinify(self.example_str)
|
||||||
|
self.assertEqual(result, self.expected_output)
|
||||||
|
|
||||||
|
def test_unicode_string(self):
|
||||||
|
result = utils.latinify(self.example_ustr)
|
||||||
|
self.assertEqual(result, self.expected_output)
|
||||||
|
|
||||||
|
def test_encoded_string(self):
|
||||||
|
result = utils.latinify(self.example_str.encode('utf8'))
|
||||||
|
self.assertEqual(result, self.expected_output)
|
||||||
|
|
||||||
|
def test_byte_string(self):
|
||||||
|
result = utils.latinify(utils.to_bytes(self.example_str))
|
||||||
|
self.assertEqual(result, self.expected_output)
|
||||||
|
|
|
||||||
|
|
@ -761,7 +761,10 @@ _UNICODE_MAP = {
|
||||||
"EN DASH": "-",
|
"EN DASH": "-",
|
||||||
"HORIZONTAL BAR": "-",
|
"HORIZONTAL BAR": "-",
|
||||||
"HORIZONTAL ELLIPSIS": "...",
|
"HORIZONTAL ELLIPSIS": "...",
|
||||||
|
"LEFT SINGLE QUOTATION MARK": "'",
|
||||||
"RIGHT SINGLE QUOTATION MARK": "'",
|
"RIGHT SINGLE QUOTATION MARK": "'",
|
||||||
|
"LEFT DOUBLE QUOTATION MARK": '"',
|
||||||
|
"RIGHT DOUBLE QUOTATION MARK": '"',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -788,10 +791,13 @@ def latinify(string, default="?", pure_ascii=False):
|
||||||
|
|
||||||
from unicodedata import name
|
from unicodedata import name
|
||||||
|
|
||||||
|
if isinstance(string, bytes):
|
||||||
|
string = string.decode("utf8")
|
||||||
|
|
||||||
converted = []
|
converted = []
|
||||||
for unich in iter(string):
|
for unich in iter(string):
|
||||||
try:
|
try:
|
||||||
ch = unich.decode("ascii")
|
ch = unich.encode("utf8").decode("ascii")
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
# deduce a latin letter equivalent from the Unicode data
|
# deduce a latin letter equivalent from the Unicode data
|
||||||
# point name; e.g., since `name(u'á') == 'LATIN SMALL
|
# point name; e.g., since `name(u'á') == 'LATIN SMALL
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue