summaryrefslogtreecommitdiff
path: root/src/libtracker-common
diff options
context:
space:
mode:
authorCarlos Garnacho <carlosg@gnome.org>2023-03-02 12:14:49 +0100
committerCarlos Garnacho <carlosg@gnome.org>2023-03-02 15:44:59 +0100
commitdcc7f767b3a705728c30c81cb38789262cf62b8c (patch)
treea9e04c01b6b290eeb1ea1bb1aa4e54d89a6a76f4 /src/libtracker-common
parentc8faf02daf0ba23d3ed6046c828e6632b08634c2 (diff)
downloadtracker-dcc7f767b3a705728c30c81cb38789262cf62b8c.tar.gz
core: Move collation handling with other unicode code
Move these implementations together with the libicu/libunistring implementations.
Diffstat (limited to 'src/libtracker-common')
-rw-r--r--src/libtracker-common/tracker-parser-libicu.c73
-rw-r--r--src/libtracker-common/tracker-parser-libunistring.c43
-rw-r--r--src/libtracker-common/tracker-parser.h12
3 files changed, 128 insertions, 0 deletions
diff --git a/src/libtracker-common/tracker-parser-libicu.c b/src/libtracker-common/tracker-parser-libicu.c
index a3271c003..06ba77da5 100644
--- a/src/libtracker-common/tracker-parser-libicu.c
+++ b/src/libtracker-common/tracker-parser-libicu.c
@@ -30,7 +30,10 @@
#include <unicode/ustring.h>
#include <unicode/uchar.h>
#include <unicode/unorm.h>
+#include <unicode/ucol.h>
+#include "tracker-locale.h"
+#include "tracker-debug.h"
#include "tracker-parser.h"
#include "tracker-parser-utils.h"
@@ -41,6 +44,8 @@ typedef enum {
TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC,
} TrackerParserWordType;
+typedef UCollator TrackerCollator;
+
/* Max possible length of a UChar encoded string (just a safety limit) */
#define WORD_BUFFER_LENGTH 512
@@ -754,6 +759,74 @@ tracker_parser_next (TrackerParser *parser,
return str;
}
+gpointer
+tracker_collation_init (void)
+{
+ UCollator *collator = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+ gchar *locale;
+
+ /* Get locale! */
+ locale = tracker_locale_get (TRACKER_LOCALE_COLLATE);
+
+ TRACKER_NOTE (COLLATION, g_message ("[ICU collation] Initializing collator for locale '%s'", locale));
+ collator = ucol_open (locale, &status);
+ if (!collator) {
+ g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s",
+ locale, u_errorName (status));
+ /* Try to get UCA collator then... */
+ status = U_ZERO_ERROR;
+ collator = ucol_open ("root", &status);
+ if (!collator) {
+ g_critical ("[ICU collation] UCA Collator cannot be created: %s",
+ u_errorName (status));
+ }
+ }
+ g_free (locale);
+
+ return collator;
+}
+
+void
+tracker_collation_shutdown (gpointer collator)
+{
+ if (collator)
+ ucol_close ((UCollator *)collator);
+}
+
+gint
+tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UCharIterator iter1;
+ UCharIterator iter2;
+ UCollationResult result;
+
+ /* Collator must be created before trying to collate */
+ g_return_val_if_fail (collator, -1);
+
+ /* Setup iterators */
+ uiter_setUTF8 (&iter1, str1, len1);
+ uiter_setUTF8 (&iter2, str2, len2);
+
+ result = ucol_strcollIter ((UCollator *)collator,
+ &iter1,
+ &iter2,
+ &status);
+ if (status != U_ZERO_ERROR)
+ g_critical ("Error collating: %s", u_errorName (status));
+
+ if (result == UCOL_GREATER)
+ return 1;
+ if (result == UCOL_LESS)
+ return -1;
+ return 0;
+}
+
gunichar2 *
tracker_parser_tolower (const gunichar2 *input,
gsize len,
diff --git a/src/libtracker-common/tracker-parser-libunistring.c b/src/libtracker-common/tracker-parser-libunistring.c
index 04f08a2c6..1477c8526 100644
--- a/src/libtracker-common/tracker-parser-libunistring.c
+++ b/src/libtracker-common/tracker-parser-libunistring.c
@@ -40,6 +40,9 @@ typedef enum {
TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC,
} TrackerParserWordType;
+/* If string lenth less than this value, allocating from the stack */
+#define MAX_STACK_STR_SIZE 8192
+
/* Max possible length of a UTF-8 encoded string (just a safety limit) */
#define WORD_BUFFER_LENGTH 512
@@ -541,6 +544,46 @@ tracker_parser_next (TrackerParser *parser,
return str;
}
+gpointer
+tracker_collation_init (void)
+{
+ /* Nothing to do */
+ return NULL;
+}
+
+void
+tracker_collation_shutdown (gpointer collator)
+{
+ /* Nothing to do */
+}
+
+gint
+tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2)
+{
+ gint result;
+ guchar *aux1;
+ guchar *aux2;
+
+ /* Note: str1 and str2 are NOT NUL-terminated */
+ aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1);
+ aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1);
+
+ memcpy (aux1, str1, len1); aux1[len1] = '\0';
+ memcpy (aux2, str2, len2); aux2[len2] = '\0';
+
+ result = u8_strcoll (aux1, aux2);
+
+ if (len1 >= MAX_STACK_STR_SIZE)
+ g_free (aux1);
+ if (len2 >= MAX_STACK_STR_SIZE)
+ g_free (aux2);
+ return result;
+}
+
gunichar2 *
tracker_parser_tolower (const gunichar2 *input,
gsize len,
diff --git a/src/libtracker-common/tracker-parser.h b/src/libtracker-common/tracker-parser.h
index a6a51fd4e..cdf861989 100644
--- a/src/libtracker-common/tracker-parser.h
+++ b/src/libtracker-common/tracker-parser.h
@@ -34,6 +34,7 @@
G_BEGIN_DECLS
+/* Parser */
typedef struct TrackerParser TrackerParser;
TrackerParser *tracker_parser_new (TrackerLanguage *language);
@@ -57,6 +58,17 @@ const gchar * tracker_parser_next (TrackerParser *parser,
void tracker_parser_free (TrackerParser *parser);
+/* Collation */
+gpointer tracker_collation_init (void);
+
+void tracker_collation_shutdown (gpointer collator);
+
+gint tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2);
+
/* Other helper methods */
gunichar2 * tracker_parser_tolower (const gunichar2 *input,