Merge pull request #1960 from dbarbuzzi/bugfix/1952-utils-latinify
Update utils.latinify to support Python 3
This commit is contained in:
commit
6d0e4aa9b1
2 changed files with 24 additions and 1 deletions
|
|
@ -223,3 +223,20 @@ class TestImportFunctions(TestCase):
|
||||||
test_path = self._t_dir_file("invalid_filename.py")
|
test_path = self._t_dir_file("invalid_filename.py")
|
||||||
loaded_mod = utils.mod_import_from_path(test_path)
|
loaded_mod = utils.mod_import_from_path(test_path)
|
||||||
self.assertIsNone(loaded_mod)
|
self.assertIsNone(loaded_mod)
|
||||||
|
|
||||||
|
|
||||||
|
class LatinifyTest(TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
super().setUp()
|
||||||
|
|
||||||
|
self.example_str = 'It naïvely says, “plugh.”'
|
||||||
|
self.expected_output = 'It naively says, "plugh."'
|
||||||
|
|
||||||
|
def test_plain_string(self):
|
||||||
|
result = utils.latinify(self.example_str)
|
||||||
|
self.assertEqual(result, self.expected_output)
|
||||||
|
|
||||||
|
def test_byte_string(self):
|
||||||
|
byte_str = utils.to_bytes(self.example_str)
|
||||||
|
result = utils.latinify(byte_str)
|
||||||
|
self.assertEqual(result, self.expected_output)
|
||||||
|
|
|
||||||
|
|
@ -761,7 +761,10 @@ _UNICODE_MAP = {
|
||||||
"EN DASH": "-",
|
"EN DASH": "-",
|
||||||
"HORIZONTAL BAR": "-",
|
"HORIZONTAL BAR": "-",
|
||||||
"HORIZONTAL ELLIPSIS": "...",
|
"HORIZONTAL ELLIPSIS": "...",
|
||||||
|
"LEFT SINGLE QUOTATION MARK": "'",
|
||||||
"RIGHT SINGLE QUOTATION MARK": "'",
|
"RIGHT SINGLE QUOTATION MARK": "'",
|
||||||
|
"LEFT DOUBLE QUOTATION MARK": '"',
|
||||||
|
"RIGHT DOUBLE QUOTATION MARK": '"',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -788,10 +791,13 @@ def latinify(string, default="?", pure_ascii=False):
|
||||||
|
|
||||||
from unicodedata import name
|
from unicodedata import name
|
||||||
|
|
||||||
|
if isinstance(string, bytes):
|
||||||
|
string = string.decode("utf8")
|
||||||
|
|
||||||
converted = []
|
converted = []
|
||||||
for unich in iter(string):
|
for unich in iter(string):
|
||||||
try:
|
try:
|
||||||
ch = unich.decode("ascii")
|
ch = unich.encode("utf8").decode("ascii")
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
# deduce a latin letter equivalent from the Unicode data
|
# deduce a latin letter equivalent from the Unicode data
|
||||||
# point name; e.g., since `name(u'á') == 'LATIN SMALL
|
# point name; e.g., since `name(u'á') == 'LATIN SMALL
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue