diff options
author | Carlos Garnacho <carlosg@gnome.org> | 2023-03-02 12:14:49 +0100 |
---|---|---|
committer | Carlos Garnacho <carlosg@gnome.org> | 2023-03-02 15:44:59 +0100 |
commit | dcc7f767b3a705728c30c81cb38789262cf62b8c (patch) | |
tree | a9e04c01b6b290eeb1ea1bb1aa4e54d89a6a76f4 /src | |
parent | c8faf02daf0ba23d3ed6046c828e6632b08634c2 (diff) | |
download | tracker-dcc7f767b3a705728c30c81cb38789262cf62b8c.tar.gz |
core: Move collation handling with other unicode code
Move these implementations together with the libicu/libunistring
implementations.
Diffstat (limited to 'src')
-rw-r--r-- | src/libtracker-common/tracker-parser-libicu.c | 73 | ||||
-rw-r--r-- | src/libtracker-common/tracker-parser-libunistring.c | 43 | ||||
-rw-r--r-- | src/libtracker-common/tracker-parser.h | 12 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-collation.c | 167 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-collation.h | 13 |
5 files changed, 130 insertions, 178 deletions
diff --git a/src/libtracker-common/tracker-parser-libicu.c b/src/libtracker-common/tracker-parser-libicu.c index a3271c003..06ba77da5 100644 --- a/src/libtracker-common/tracker-parser-libicu.c +++ b/src/libtracker-common/tracker-parser-libicu.c @@ -30,7 +30,10 @@ #include <unicode/ustring.h> #include <unicode/uchar.h> #include <unicode/unorm.h> +#include <unicode/ucol.h> +#include "tracker-locale.h" +#include "tracker-debug.h" #include "tracker-parser.h" #include "tracker-parser-utils.h" @@ -41,6 +44,8 @@ typedef enum { TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC, } TrackerParserWordType; +typedef UCollator TrackerCollator; + /* Max possible length of a UChar encoded string (just a safety limit) */ #define WORD_BUFFER_LENGTH 512 @@ -754,6 +759,74 @@ tracker_parser_next (TrackerParser *parser, return str; } +gpointer +tracker_collation_init (void) +{ + UCollator *collator = NULL; + UErrorCode status = U_ZERO_ERROR; + gchar *locale; + + /* Get locale! */ + locale = tracker_locale_get (TRACKER_LOCALE_COLLATE); + + TRACKER_NOTE (COLLATION, g_message ("[ICU collation] Initializing collator for locale '%s'", locale)); + collator = ucol_open (locale, &status); + if (!collator) { + g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s", + locale, u_errorName (status)); + /* Try to get UCA collator then... */ + status = U_ZERO_ERROR; + collator = ucol_open ("root", &status); + if (!collator) { + g_critical ("[ICU collation] UCA Collator cannot be created: %s", + u_errorName (status)); + } + } + g_free (locale); + + return collator; +} + +void +tracker_collation_shutdown (gpointer collator) +{ + if (collator) + ucol_close ((UCollator *)collator); +} + +gint +tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2) +{ + UErrorCode status = U_ZERO_ERROR; + UCharIterator iter1; + UCharIterator iter2; + UCollationResult result; + + /* Collator must be created before trying to collate */ + g_return_val_if_fail (collator, -1); + + /* Setup iterators */ + uiter_setUTF8 (&iter1, str1, len1); + uiter_setUTF8 (&iter2, str2, len2); + + result = ucol_strcollIter ((UCollator *)collator, + &iter1, + &iter2, + &status); + if (status != U_ZERO_ERROR) + g_critical ("Error collating: %s", u_errorName (status)); + + if (result == UCOL_GREATER) + return 1; + if (result == UCOL_LESS) + return -1; + return 0; +} + gunichar2 * tracker_parser_tolower (const gunichar2 *input, gsize len, diff --git a/src/libtracker-common/tracker-parser-libunistring.c b/src/libtracker-common/tracker-parser-libunistring.c index 04f08a2c6..1477c8526 100644 --- a/src/libtracker-common/tracker-parser-libunistring.c +++ b/src/libtracker-common/tracker-parser-libunistring.c @@ -40,6 +40,9 @@ typedef enum { TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC, } TrackerParserWordType; +/* If string lenth less than this value, allocating from the stack */ +#define MAX_STACK_STR_SIZE 8192 + /* Max possible length of a UTF-8 encoded string (just a safety limit) */ #define WORD_BUFFER_LENGTH 512 @@ -541,6 +544,46 @@ tracker_parser_next (TrackerParser *parser, return str; } +gpointer +tracker_collation_init (void) +{ + /* Nothing to do */ + return NULL; +} + +void +tracker_collation_shutdown (gpointer collator) +{ + /* Nothing to do */ +} + +gint +tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2) +{ + gint result; + guchar *aux1; + guchar *aux2; + + /* Note: str1 and str2 are NOT NUL-terminated */ + aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1); + aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1); + + memcpy (aux1, str1, len1); aux1[len1] = '\0'; + memcpy (aux2, str2, len2); aux2[len2] = '\0'; + + result = u8_strcoll (aux1, aux2); + + if (len1 >= MAX_STACK_STR_SIZE) + g_free (aux1); + if (len2 >= MAX_STACK_STR_SIZE) + g_free (aux2); + return result; +} + gunichar2 * tracker_parser_tolower (const gunichar2 *input, gsize len, diff --git a/src/libtracker-common/tracker-parser.h b/src/libtracker-common/tracker-parser.h index a6a51fd4e..cdf861989 100644 --- a/src/libtracker-common/tracker-parser.h +++ b/src/libtracker-common/tracker-parser.h @@ -34,6 +34,7 @@ G_BEGIN_DECLS +/* Parser */ typedef struct TrackerParser TrackerParser; TrackerParser *tracker_parser_new (TrackerLanguage *language); @@ -57,6 +58,17 @@ const gchar * tracker_parser_next (TrackerParser *parser, void tracker_parser_free (TrackerParser *parser); +/* Collation */ +gpointer tracker_collation_init (void); + +void tracker_collation_shutdown (gpointer collator); + +gint tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2); + /* Other helper methods */ gunichar2 * tracker_parser_tolower (const gunichar2 *input, diff --git a/src/libtracker-sparql/core/tracker-collation.c b/src/libtracker-sparql/core/tracker-collation.c index 16ba56bcb..0e82d66dd 100644 --- a/src/libtracker-sparql/core/tracker-collation.c +++ b/src/libtracker-sparql/core/tracker-collation.c @@ -18,177 +18,12 @@ */ #include "config.h" + #include <glib.h> #include <glib/gi18n.h> -#include <string.h> -#include <locale.h> -#include <libtracker-common/tracker-debug.h> -#include <libtracker-common/tracker-locale.h> #include "tracker-collation.h" -/* If defined, will dump additional traces */ -#ifdef G_ENABLE_DEBUG -#define trace(message, ...) TRACKER_NOTE (COLLATION, g_message (message, ##__VA_ARGS__)) -#else -#define trace(...) -#endif - -#ifdef HAVE_LIBUNISTRING -/* libunistring versions prior to 9.1.2 need this hack */ -#define _UNUSED_PARAMETER_ -#include <unistr.h> -#elif defined(HAVE_LIBICU) -#include <unicode/ucol.h> -#include <unicode/utypes.h> -#endif - -/* If string lenth less than this value, allocating from the stack */ -#define MAX_STACK_STR_SIZE 8192 - -#ifdef HAVE_LIBUNISTRING /* ---- GNU libunistring based collation ---- */ - -gpointer -tracker_collation_init (void) -{ - gchar *locale; - - /* Get locale! */ - locale = tracker_locale_get (TRACKER_LOCALE_COLLATE); - TRACKER_NOTE (COLLATION, g_message ("[libunistring collation] Initializing collator for locale '%s'", locale)); - g_free (locale); - /* Nothing to do */ - return NULL; -} - -void -tracker_collation_shutdown (gpointer collator) -{ - /* Nothing to do */ -} - -gint -tracker_collation_utf8 (gpointer collator, - gint len1, - gconstpointer str1, - gint len2, - gconstpointer str2) -{ - gint result; - gchar *aux1; - gchar *aux2; - - /* Note: str1 and str2 are NOT NUL-terminated */ - aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1); - aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1); - - memcpy (aux1, str1, len1); aux1[len1] = '\0'; - memcpy (aux2, str2, len2); aux2[len2] = '\0'; - - result = u8_strcoll (aux1, aux2); - - trace ("(libunistring) Collating '%s' and '%s' (%d)", - aux1, aux2, result); - - if (len1 >= MAX_STACK_STR_SIZE) - g_free (aux1); - if (len2 >= MAX_STACK_STR_SIZE) - g_free (aux2); - return result; -} - -#elif defined(HAVE_LIBICU) /* ---- ICU based collation (UTF-16) ----*/ - -gpointer -tracker_collation_init (void) -{ - UCollator *collator = NULL; - UErrorCode status = U_ZERO_ERROR; - gchar *locale; - - /* Get locale! */ - locale = tracker_locale_get (TRACKER_LOCALE_COLLATE); - - TRACKER_NOTE (COLLATION, g_message ("[ICU collation] Initializing collator for locale '%s'", locale)); - collator = ucol_open (locale, &status); - if (!collator) { - g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s", - locale, u_errorName (status)); - /* Try to get UCA collator then... */ - status = U_ZERO_ERROR; - collator = ucol_open ("root", &status); - if (!collator) { - g_critical ("[ICU collation] UCA Collator cannot be created: %s", - u_errorName (status)); - } - } - g_free (locale); - return collator; -} - -void -tracker_collation_shutdown (gpointer collator) -{ - if (collator) - ucol_close ((UCollator *)collator); -} - -gint -tracker_collation_utf8 (gpointer collator, - gint len1, - gconstpointer str1, - gint len2, - gconstpointer str2) -{ - UErrorCode status = U_ZERO_ERROR; - UCharIterator iter1; - UCharIterator iter2; - UCollationResult result; - - /* Collator must be created before trying to collate */ - g_return_val_if_fail (collator, -1); - - /* Setup iterators */ - uiter_setUTF8 (&iter1, str1, len1); - uiter_setUTF8 (&iter2, str2, len2); - - result = ucol_strcollIter ((UCollator *)collator, - &iter1, - &iter2, - &status); - if (status != U_ZERO_ERROR) - g_critical ("Error collating: %s", u_errorName (status)); - -#ifdef ENABLE_TRACE - { - gchar *aux1; - gchar *aux2; - - /* Note: str1 and str2 are NOT NUL-terminated */ - aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1); - aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1); - - memcpy (aux1, str1, len1); aux1[len1] = '\0'; - memcpy (aux2, str2, len2); aux2[len2] = '\0'; - - trace ("(ICU) Collating '%s' and '%s' (%d)", - aux1, aux2, result); - - if (len1 >= MAX_STACK_STR_SIZE) - g_free (aux1); - if (len2 >= MAX_STACK_STR_SIZE) - g_free (aux2); - } -#endif /* ENABLE_TRACE */ - - if (result == UCOL_GREATER) - return 1; - if (result == UCOL_LESS) - return -1; - return 0; -} -#endif - static gboolean skip_non_alphanumeric (const gchar **str, gint *len) diff --git a/src/libtracker-sparql/core/tracker-collation.h b/src/libtracker-sparql/core/tracker-collation.h index 95551a9f0..6369aefba 100644 --- a/src/libtracker-sparql/core/tracker-collation.h +++ b/src/libtracker-sparql/core/tracker-collation.h @@ -22,13 +22,7 @@ G_BEGIN_DECLS -gpointer tracker_collation_init (void); -void tracker_collation_shutdown (gpointer collator); -gint tracker_collation_utf8 (gpointer collator, - gint len1, - gconstpointer str1, - gint len2, - gconstpointer str2); +#include <libtracker-common/tracker-parser.h> gint tracker_collation_utf8_title (gpointer collator, gint len1, @@ -36,12 +30,7 @@ gint tracker_collation_utf8_title (gpointer collator, gint len2, gconstpointer str2); -#ifdef HAVE_LIBICU #define TRACKER_COLLATION_LAST_CHAR ((gunichar) 0x10fffd) -#else -/* glibc-based collators do not properly sort private use characters */ -#define TRACKER_COLLATION_LAST_CHAR ((gunichar) 0x9fa5) -#endif G_END_DECLS |