summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Abou Samra <jean@abou-samra.fr>2023-02-12 02:46:30 +0100
committerJean Abou Samra <jean@abou-samra.fr>2023-02-12 02:54:21 +0100
commit63bb71a5acd0d49a2ceee15098485bc34b0e8864 (patch)
tree4a4dbdc4f96d5b8a89a6cd0e45231e7f8f1473dc
parent08af5e2bab184c1b5d357ebde8c0efdbe6288e2c (diff)
downloadbabel-63bb71a5acd0d49a2ceee15098485bc34b0e8864.tar.gz
In fuzzy matching, also .lower().strip() fuzzy candidates
This seems intended at easing fuzzy matching with trivial edits in the msgstr (changing case and adding whitespace), but it was only done on the new msgstr, not on the old mgstr candidates, so it was possible for merging catalogs to miss messages.
-rw-r--r--babel/messages/catalog.py15
-rw-r--r--tests/messages/test_catalog.py10
2 files changed, 14 insertions, 11 deletions
diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py
index dead4aa..1902643 100644
--- a/babel/messages/catalog.py
+++ b/babel/messages/catalog.py
@@ -803,10 +803,13 @@ class Catalog:
# Prepare for fuzzy matching
fuzzy_candidates = []
if not no_fuzzy_matching:
- fuzzy_candidates = {
- self._key_for(msgid): messages[msgid].context
- for msgid in messages if msgid and messages[msgid].string
- }
+ fuzzy_candidates = {}
+ for msgid in messages:
+ if msgid and messages[msgid].string:
+ key = self._key_for(msgid)
+ ctxt = messages[msgid].context
+ modified_key = key.lower().strip()
+ fuzzy_candidates[modified_key] = (key, ctxt)
fuzzy_matches = set()
def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None:
@@ -861,8 +864,8 @@ class Catalog:
matches = get_close_matches(matchkey.lower().strip(),
fuzzy_candidates.keys(), 1)
if matches:
- newkey = matches[0]
- newctxt = fuzzy_candidates[newkey]
+ modified_key = matches[0]
+ newkey, newctxt = fuzzy_candidates[modified_key]
if newctxt is not None:
newkey = newkey, newctxt
_merge(message, newkey, key)
diff --git a/tests/messages/test_catalog.py b/tests/messages/test_catalog.py
index 273c83f..c2e7aed 100644
--- a/tests/messages/test_catalog.py
+++ b/tests/messages/test_catalog.py
@@ -121,16 +121,16 @@ class CatalogTestCase(unittest.TestCase):
def test_update_fuzzy_matching_with_case_change(self):
cat = catalog.Catalog()
- cat.add('foo', 'Voh')
+ cat.add('FOO', 'Voh')
cat.add('bar', 'Bahr')
tmpl = catalog.Catalog()
- tmpl.add('Foo')
+ tmpl.add('foo')
cat.update(tmpl)
assert len(cat.obsolete) == 1
- assert 'foo' not in cat
+ assert 'FOO' not in cat
- assert cat['Foo'].string == 'Voh'
- assert cat['Foo'].fuzzy is True
+ assert cat['foo'].string == 'Voh'
+ assert cat['foo'].fuzzy is True
def test_update_fuzzy_matching_with_char_change(self):
cat = catalog.Catalog()