summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCarlos Garnacho <carlosg@gnome.org>2023-03-02 12:14:49 +0100
committerCarlos Garnacho <carlosg@gnome.org>2023-03-02 15:44:59 +0100
commitdcc7f767b3a705728c30c81cb38789262cf62b8c (patch)
treea9e04c01b6b290eeb1ea1bb1aa4e54d89a6a76f4 /src
parentc8faf02daf0ba23d3ed6046c828e6632b08634c2 (diff)
downloadtracker-dcc7f767b3a705728c30c81cb38789262cf62b8c.tar.gz
core: Move collation handling with other unicode code
Move these implementations together with the libicu/libunistring implementations.
Diffstat (limited to 'src')
-rw-r--r--src/libtracker-common/tracker-parser-libicu.c73
-rw-r--r--src/libtracker-common/tracker-parser-libunistring.c43
-rw-r--r--src/libtracker-common/tracker-parser.h12
-rw-r--r--src/libtracker-sparql/core/tracker-collation.c167
-rw-r--r--src/libtracker-sparql/core/tracker-collation.h13
5 files changed, 130 insertions, 178 deletions
diff --git a/src/libtracker-common/tracker-parser-libicu.c b/src/libtracker-common/tracker-parser-libicu.c
index a3271c003..06ba77da5 100644
--- a/src/libtracker-common/tracker-parser-libicu.c
+++ b/src/libtracker-common/tracker-parser-libicu.c
@@ -30,7 +30,10 @@
#include <unicode/ustring.h>
#include <unicode/uchar.h>
#include <unicode/unorm.h>
+#include <unicode/ucol.h>
+#include "tracker-locale.h"
+#include "tracker-debug.h"
#include "tracker-parser.h"
#include "tracker-parser-utils.h"
@@ -41,6 +44,8 @@ typedef enum {
TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC,
} TrackerParserWordType;
+typedef UCollator TrackerCollator;
+
/* Max possible length of a UChar encoded string (just a safety limit) */
#define WORD_BUFFER_LENGTH 512
@@ -754,6 +759,74 @@ tracker_parser_next (TrackerParser *parser,
return str;
}
+gpointer
+tracker_collation_init (void)
+{
+ UCollator *collator = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+ gchar *locale;
+
+ /* Get locale! */
+ locale = tracker_locale_get (TRACKER_LOCALE_COLLATE);
+
+ TRACKER_NOTE (COLLATION, g_message ("[ICU collation] Initializing collator for locale '%s'", locale));
+ collator = ucol_open (locale, &status);
+ if (!collator) {
+ g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s",
+ locale, u_errorName (status));
+ /* Try to get UCA collator then... */
+ status = U_ZERO_ERROR;
+ collator = ucol_open ("root", &status);
+ if (!collator) {
+ g_critical ("[ICU collation] UCA Collator cannot be created: %s",
+ u_errorName (status));
+ }
+ }
+ g_free (locale);
+
+ return collator;
+}
+
+void
+tracker_collation_shutdown (gpointer collator)
+{
+ if (collator)
+ ucol_close ((UCollator *)collator);
+}
+
+gint
+tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UCharIterator iter1;
+ UCharIterator iter2;
+ UCollationResult result;
+
+ /* Collator must be created before trying to collate */
+ g_return_val_if_fail (collator, -1);
+
+ /* Setup iterators */
+ uiter_setUTF8 (&iter1, str1, len1);
+ uiter_setUTF8 (&iter2, str2, len2);
+
+ result = ucol_strcollIter ((UCollator *)collator,
+ &iter1,
+ &iter2,
+ &status);
+ if (status != U_ZERO_ERROR)
+ g_critical ("Error collating: %s", u_errorName (status));
+
+ if (result == UCOL_GREATER)
+ return 1;
+ if (result == UCOL_LESS)
+ return -1;
+ return 0;
+}
+
gunichar2 *
tracker_parser_tolower (const gunichar2 *input,
gsize len,
diff --git a/src/libtracker-common/tracker-parser-libunistring.c b/src/libtracker-common/tracker-parser-libunistring.c
index 04f08a2c6..1477c8526 100644
--- a/src/libtracker-common/tracker-parser-libunistring.c
+++ b/src/libtracker-common/tracker-parser-libunistring.c
@@ -40,6 +40,9 @@ typedef enum {
TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC,
} TrackerParserWordType;
+/* If string lenth less than this value, allocating from the stack */
+#define MAX_STACK_STR_SIZE 8192
+
/* Max possible length of a UTF-8 encoded string (just a safety limit) */
#define WORD_BUFFER_LENGTH 512
@@ -541,6 +544,46 @@ tracker_parser_next (TrackerParser *parser,
return str;
}
+gpointer
+tracker_collation_init (void)
+{
+ /* Nothing to do */
+ return NULL;
+}
+
+void
+tracker_collation_shutdown (gpointer collator)
+{
+ /* Nothing to do */
+}
+
+gint
+tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2)
+{
+ gint result;
+ guchar *aux1;
+ guchar *aux2;
+
+ /* Note: str1 and str2 are NOT NUL-terminated */
+ aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1);
+ aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1);
+
+ memcpy (aux1, str1, len1); aux1[len1] = '\0';
+ memcpy (aux2, str2, len2); aux2[len2] = '\0';
+
+ result = u8_strcoll (aux1, aux2);
+
+ if (len1 >= MAX_STACK_STR_SIZE)
+ g_free (aux1);
+ if (len2 >= MAX_STACK_STR_SIZE)
+ g_free (aux2);
+ return result;
+}
+
gunichar2 *
tracker_parser_tolower (const gunichar2 *input,
gsize len,
diff --git a/src/libtracker-common/tracker-parser.h b/src/libtracker-common/tracker-parser.h
index a6a51fd4e..cdf861989 100644
--- a/src/libtracker-common/tracker-parser.h
+++ b/src/libtracker-common/tracker-parser.h
@@ -34,6 +34,7 @@
G_BEGIN_DECLS
+/* Parser */
typedef struct TrackerParser TrackerParser;
TrackerParser *tracker_parser_new (TrackerLanguage *language);
@@ -57,6 +58,17 @@ const gchar * tracker_parser_next (TrackerParser *parser,
void tracker_parser_free (TrackerParser *parser);
+/* Collation */
+gpointer tracker_collation_init (void);
+
+void tracker_collation_shutdown (gpointer collator);
+
+gint tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2);
+
/* Other helper methods */
gunichar2 * tracker_parser_tolower (const gunichar2 *input,
diff --git a/src/libtracker-sparql/core/tracker-collation.c b/src/libtracker-sparql/core/tracker-collation.c
index 16ba56bcb..0e82d66dd 100644
--- a/src/libtracker-sparql/core/tracker-collation.c
+++ b/src/libtracker-sparql/core/tracker-collation.c
@@ -18,177 +18,12 @@
*/
#include "config.h"
+
#include <glib.h>
#include <glib/gi18n.h>
-#include <string.h>
-#include <locale.h>
-#include <libtracker-common/tracker-debug.h>
-#include <libtracker-common/tracker-locale.h>
#include "tracker-collation.h"
-/* If defined, will dump additional traces */
-#ifdef G_ENABLE_DEBUG
-#define trace(message, ...) TRACKER_NOTE (COLLATION, g_message (message, ##__VA_ARGS__))
-#else
-#define trace(...)
-#endif
-
-#ifdef HAVE_LIBUNISTRING
-/* libunistring versions prior to 9.1.2 need this hack */
-#define _UNUSED_PARAMETER_
-#include <unistr.h>
-#elif defined(HAVE_LIBICU)
-#include <unicode/ucol.h>
-#include <unicode/utypes.h>
-#endif
-
-/* If string lenth less than this value, allocating from the stack */
-#define MAX_STACK_STR_SIZE 8192
-
-#ifdef HAVE_LIBUNISTRING /* ---- GNU libunistring based collation ---- */
-
-gpointer
-tracker_collation_init (void)
-{
- gchar *locale;
-
- /* Get locale! */
- locale = tracker_locale_get (TRACKER_LOCALE_COLLATE);
- TRACKER_NOTE (COLLATION, g_message ("[libunistring collation] Initializing collator for locale '%s'", locale));
- g_free (locale);
- /* Nothing to do */
- return NULL;
-}
-
-void
-tracker_collation_shutdown (gpointer collator)
-{
- /* Nothing to do */
-}
-
-gint
-tracker_collation_utf8 (gpointer collator,
- gint len1,
- gconstpointer str1,
- gint len2,
- gconstpointer str2)
-{
- gint result;
- gchar *aux1;
- gchar *aux2;
-
- /* Note: str1 and str2 are NOT NUL-terminated */
- aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1);
- aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1);
-
- memcpy (aux1, str1, len1); aux1[len1] = '\0';
- memcpy (aux2, str2, len2); aux2[len2] = '\0';
-
- result = u8_strcoll (aux1, aux2);
-
- trace ("(libunistring) Collating '%s' and '%s' (%d)",
- aux1, aux2, result);
-
- if (len1 >= MAX_STACK_STR_SIZE)
- g_free (aux1);
- if (len2 >= MAX_STACK_STR_SIZE)
- g_free (aux2);
- return result;
-}
-
-#elif defined(HAVE_LIBICU) /* ---- ICU based collation (UTF-16) ----*/
-
-gpointer
-tracker_collation_init (void)
-{
- UCollator *collator = NULL;
- UErrorCode status = U_ZERO_ERROR;
- gchar *locale;
-
- /* Get locale! */
- locale = tracker_locale_get (TRACKER_LOCALE_COLLATE);
-
- TRACKER_NOTE (COLLATION, g_message ("[ICU collation] Initializing collator for locale '%s'", locale));
- collator = ucol_open (locale, &status);
- if (!collator) {
- g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s",
- locale, u_errorName (status));
- /* Try to get UCA collator then... */
- status = U_ZERO_ERROR;
- collator = ucol_open ("root", &status);
- if (!collator) {
- g_critical ("[ICU collation] UCA Collator cannot be created: %s",
- u_errorName (status));
- }
- }
- g_free (locale);
- return collator;
-}
-
-void
-tracker_collation_shutdown (gpointer collator)
-{
- if (collator)
- ucol_close ((UCollator *)collator);
-}
-
-gint
-tracker_collation_utf8 (gpointer collator,
- gint len1,
- gconstpointer str1,
- gint len2,
- gconstpointer str2)
-{
- UErrorCode status = U_ZERO_ERROR;
- UCharIterator iter1;
- UCharIterator iter2;
- UCollationResult result;
-
- /* Collator must be created before trying to collate */
- g_return_val_if_fail (collator, -1);
-
- /* Setup iterators */
- uiter_setUTF8 (&iter1, str1, len1);
- uiter_setUTF8 (&iter2, str2, len2);
-
- result = ucol_strcollIter ((UCollator *)collator,
- &iter1,
- &iter2,
- &status);
- if (status != U_ZERO_ERROR)
- g_critical ("Error collating: %s", u_errorName (status));
-
-#ifdef ENABLE_TRACE
- {
- gchar *aux1;
- gchar *aux2;
-
- /* Note: str1 and str2 are NOT NUL-terminated */
- aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1);
- aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1);
-
- memcpy (aux1, str1, len1); aux1[len1] = '\0';
- memcpy (aux2, str2, len2); aux2[len2] = '\0';
-
- trace ("(ICU) Collating '%s' and '%s' (%d)",
- aux1, aux2, result);
-
- if (len1 >= MAX_STACK_STR_SIZE)
- g_free (aux1);
- if (len2 >= MAX_STACK_STR_SIZE)
- g_free (aux2);
- }
-#endif /* ENABLE_TRACE */
-
- if (result == UCOL_GREATER)
- return 1;
- if (result == UCOL_LESS)
- return -1;
- return 0;
-}
-#endif
-
static gboolean
skip_non_alphanumeric (const gchar **str,
gint *len)
diff --git a/src/libtracker-sparql/core/tracker-collation.h b/src/libtracker-sparql/core/tracker-collation.h
index 95551a9f0..6369aefba 100644
--- a/src/libtracker-sparql/core/tracker-collation.h
+++ b/src/libtracker-sparql/core/tracker-collation.h
@@ -22,13 +22,7 @@
G_BEGIN_DECLS
-gpointer tracker_collation_init (void);
-void tracker_collation_shutdown (gpointer collator);
-gint tracker_collation_utf8 (gpointer collator,
- gint len1,
- gconstpointer str1,
- gint len2,
- gconstpointer str2);
+#include <libtracker-common/tracker-parser.h>
gint tracker_collation_utf8_title (gpointer collator,
gint len1,
@@ -36,12 +30,7 @@ gint tracker_collation_utf8_title (gpointer collator,
gint len2,
gconstpointer str2);
-#ifdef HAVE_LIBICU
#define TRACKER_COLLATION_LAST_CHAR ((gunichar) 0x10fffd)
-#else
-/* glibc-based collators do not properly sort private use characters */
-#define TRACKER_COLLATION_LAST_CHAR ((gunichar) 0x9fa5)
-#endif
G_END_DECLS