From dcc7f767b3a705728c30c81cb38789262cf62b8c Mon Sep 17 00:00:00 2001 From: Carlos Garnacho Date: Thu, 2 Mar 2023 12:14:49 +0100 Subject: core: Move collation handling with other unicode code Move these implementations together with the libicu/libunistring implementations. --- src/libtracker-common/tracker-parser-libicu.c | 73 ++++++++++++++++++++++ .../tracker-parser-libunistring.c | 43 +++++++++++++ src/libtracker-common/tracker-parser.h | 12 ++++ 3 files changed, 128 insertions(+) (limited to 'src/libtracker-common') diff --git a/src/libtracker-common/tracker-parser-libicu.c b/src/libtracker-common/tracker-parser-libicu.c index a3271c003..06ba77da5 100644 --- a/src/libtracker-common/tracker-parser-libicu.c +++ b/src/libtracker-common/tracker-parser-libicu.c @@ -30,7 +30,10 @@ #include #include #include +#include +#include "tracker-locale.h" +#include "tracker-debug.h" #include "tracker-parser.h" #include "tracker-parser-utils.h" @@ -41,6 +44,8 @@ typedef enum { TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC, } TrackerParserWordType; +typedef UCollator TrackerCollator; + /* Max possible length of a UChar encoded string (just a safety limit) */ #define WORD_BUFFER_LENGTH 512 @@ -754,6 +759,74 @@ tracker_parser_next (TrackerParser *parser, return str; } +gpointer +tracker_collation_init (void) +{ + UCollator *collator = NULL; + UErrorCode status = U_ZERO_ERROR; + gchar *locale; + + /* Get locale! */ + locale = tracker_locale_get (TRACKER_LOCALE_COLLATE); + + TRACKER_NOTE (COLLATION, g_message ("[ICU collation] Initializing collator for locale '%s'", locale)); + collator = ucol_open (locale, &status); + if (!collator) { + g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s", + locale, u_errorName (status)); + /* Try to get UCA collator then... */ + status = U_ZERO_ERROR; + collator = ucol_open ("root", &status); + if (!collator) { + g_critical ("[ICU collation] UCA Collator cannot be created: %s", + u_errorName (status)); + } + } + g_free (locale); + + return collator; +} + +void +tracker_collation_shutdown (gpointer collator) +{ + if (collator) + ucol_close ((UCollator *)collator); +} + +gint +tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2) +{ + UErrorCode status = U_ZERO_ERROR; + UCharIterator iter1; + UCharIterator iter2; + UCollationResult result; + + /* Collator must be created before trying to collate */ + g_return_val_if_fail (collator, -1); + + /* Setup iterators */ + uiter_setUTF8 (&iter1, str1, len1); + uiter_setUTF8 (&iter2, str2, len2); + + result = ucol_strcollIter ((UCollator *)collator, + &iter1, + &iter2, + &status); + if (status != U_ZERO_ERROR) + g_critical ("Error collating: %s", u_errorName (status)); + + if (result == UCOL_GREATER) + return 1; + if (result == UCOL_LESS) + return -1; + return 0; +} + gunichar2 * tracker_parser_tolower (const gunichar2 *input, gsize len, diff --git a/src/libtracker-common/tracker-parser-libunistring.c b/src/libtracker-common/tracker-parser-libunistring.c index 04f08a2c6..1477c8526 100644 --- a/src/libtracker-common/tracker-parser-libunistring.c +++ b/src/libtracker-common/tracker-parser-libunistring.c @@ -40,6 +40,9 @@ typedef enum { TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC, } TrackerParserWordType; +/* If string lenth less than this value, allocating from the stack */ +#define MAX_STACK_STR_SIZE 8192 + /* Max possible length of a UTF-8 encoded string (just a safety limit) */ #define WORD_BUFFER_LENGTH 512 @@ -541,6 +544,46 @@ tracker_parser_next (TrackerParser *parser, return str; } +gpointer +tracker_collation_init (void) +{ + /* Nothing to do */ + return NULL; +} + +void +tracker_collation_shutdown (gpointer collator) +{ + /* Nothing to do */ +} + +gint +tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2) +{ + gint result; + guchar *aux1; + guchar *aux2; + + /* Note: str1 and str2 are NOT NUL-terminated */ + aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1); + aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1); + + memcpy (aux1, str1, len1); aux1[len1] = '\0'; + memcpy (aux2, str2, len2); aux2[len2] = '\0'; + + result = u8_strcoll (aux1, aux2); + + if (len1 >= MAX_STACK_STR_SIZE) + g_free (aux1); + if (len2 >= MAX_STACK_STR_SIZE) + g_free (aux2); + return result; +} + gunichar2 * tracker_parser_tolower (const gunichar2 *input, gsize len, diff --git a/src/libtracker-common/tracker-parser.h b/src/libtracker-common/tracker-parser.h index a6a51fd4e..cdf861989 100644 --- a/src/libtracker-common/tracker-parser.h +++ b/src/libtracker-common/tracker-parser.h @@ -34,6 +34,7 @@ G_BEGIN_DECLS +/* Parser */ typedef struct TrackerParser TrackerParser; TrackerParser *tracker_parser_new (TrackerLanguage *language); @@ -57,6 +58,17 @@ const gchar * tracker_parser_next (TrackerParser *parser, void tracker_parser_free (TrackerParser *parser); +/* Collation */ +gpointer tracker_collation_init (void); + +void tracker_collation_shutdown (gpointer collator); + +gint tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2); + /* Other helper methods */ gunichar2 * tracker_parser_tolower (const gunichar2 *input, -- cgit v1.2.1