summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomasz Miąsko <tomasz.miasko@gmail.com>2018-10-30 00:00:00 +0000
committerTomasz Miąsko <tomasz.miasko@gmail.com>2018-11-13 14:43:34 +0100
commitba18822f358c49f15435197dba7c11f6753396f1 (patch)
tree580136e8ec62f95fa526b1721a8087c6e7803fe4
parente0148985f30b7b37108259432b83e7dbbf7c0e6b (diff)
downloadglib-ba18822f358c49f15435197dba7c11f6753396f1.tar.gz
gdate: Use longest matching month name in g_date_set_parse
There are languages where a name of one month is a substring of another. Instead of stopping search on the first match use the month that constitutes the longest match. Fixes #1343.
-rw-r--r--glib/gdate.c75
-rw-r--r--glib/tests/date.c34
2 files changed, 65 insertions, 44 deletions
diff --git a/glib/gdate.c b/glib/gdate.c
index 4925818b3..5457a3b8c 100644
--- a/glib/gdate.c
+++ b/glib/gdate.c
@@ -931,6 +931,27 @@ struct _GDateParseTokens {
typedef struct _GDateParseTokens GDateParseTokens;
+static inline gboolean
+update_month_match (gsize *longest,
+ const gchar *haystack,
+ const gchar *needle)
+{
+ gsize length;
+
+ if (needle == NULL)
+ return FALSE;
+
+ length = strlen (needle);
+ if (*longest >= length)
+ return FALSE;
+
+ if (strstr (haystack, needle) == NULL)
+ return FALSE;
+
+ *longest = length;
+ return TRUE;
+}
+
#define NUM_LEN 10
/* HOLDS: g_date_global_lock */
@@ -978,6 +999,7 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
if (pt->num_ints < 3)
{
+ gsize longest = 0;
gchar *casefold;
gchar *normalized;
@@ -985,8 +1007,7 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
normalized = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
g_free (casefold);
- i = 1;
- while (i < 13)
+ for (i = 1; i < 13; ++i)
{
/* Here month names may be in a genitive case if the language
* grammatical rules require it.
@@ -997,60 +1018,26 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
* genitive case here so they use nominative everywhere.
* For example, English always uses "January".
*/
- if (long_month_names[i] != NULL)
- {
- const gchar *found = strstr (normalized, long_month_names[i]);
-
- if (found != NULL)
- {
- pt->month = i;
- break;
- }
- }
+ if (update_month_match (&longest, normalized, long_month_names[i]))
+ pt->month = i;
/* Here month names will be in a nominative case.
* Examples of how January may look in some languages:
* Catalan: "gener", Croatian: "Siječanj", Polish: "styczeń",
* Upper Sorbian: "Januar".
*/
- if (long_month_names_alternative[i] != NULL)
- {
- const gchar *found = strstr (normalized, long_month_names_alternative[i]);
-
- if (found != NULL)
- {
- pt->month = i;
- break;
- }
- }
+ if (update_month_match (&longest, normalized, long_month_names_alternative[i]))
+ pt->month = i;
/* Differences between abbreviated nominative and abbreviated
* genitive month names are visible in very few languages but
* let's handle them.
*/
- if (short_month_names[i] != NULL)
- {
- const gchar *found = strstr (normalized, short_month_names[i]);
-
- if (found != NULL)
- {
- pt->month = i;
- break;
- }
- }
+ if (update_month_match (&longest, normalized, short_month_names[i]))
+ pt->month = i;
- if (short_month_names_alternative[i] != NULL)
- {
- const gchar *found = strstr (normalized, short_month_names_alternative[i]);
-
- if (found != NULL)
- {
- pt->month = i;
- break;
- }
- }
-
- ++i;
+ if (update_month_match (&longest, normalized, short_month_names_alternative[i]))
+ pt->month = i;
}
g_free (normalized);
diff --git a/glib/tests/date.c b/glib/tests/date.c
index 6cd91ab6c..8eb28712b 100644
--- a/glib/tests/date.c
+++ b/glib/tests/date.c
@@ -209,6 +209,39 @@ test_parse_locale_change (void)
}
static void
+test_month_substring (void)
+{
+ GDate date;
+
+ g_test_bug ("793550");
+
+ if (setlocale (LC_ALL, "pl_PL") == NULL)
+ {
+ g_test_skip ("pl_PL locale not available");
+ return;
+ }
+
+ /* In Polish language September is "wrzesień" and August is "sierpień"
+ * abbreviated as "sie". The former used to be confused with the latter
+ * because "sie" is a substring of "wrzesień" and was matched first. */
+
+ g_date_set_parse (&date, "wrzesień 2018");
+ g_assert_true (g_date_valid (&date));
+ g_assert_cmpint (g_date_get_month (&date), ==, G_DATE_SEPTEMBER);
+
+ g_date_set_parse (&date, "sie 2018");
+ g_assert_true (g_date_valid (&date));
+ g_assert_cmpint (g_date_get_month (&date), ==, G_DATE_AUGUST);
+
+ g_date_set_parse (&date, "sierpień 2018");
+ g_assert_true (g_date_valid (&date));
+ g_assert_cmpint (g_date_get_month (&date), ==, G_DATE_AUGUST);
+
+ setlocale (LC_ALL, "");
+}
+
+
+static void
test_month_names (void)
{
#if defined(HAVE_LANGINFO_ABALTMON) || defined(G_OS_WIN32)
@@ -736,6 +769,7 @@ main (int argc, char** argv)
g_test_add_func ("/date/dates", test_dates);
g_test_add_func ("/date/parse", test_parse);
g_test_add_func ("/date/parse_locale_change", test_parse_locale_change);
+ g_test_add_func ("/date/month_substring", test_month_substring);
g_test_add_func ("/date/month_names", test_month_names);
g_test_add_func ("/date/clamp", test_clamp);
g_test_add_func ("/date/order", test_order);