summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSam Thursfield <sam@afuera.me.uk>2023-03-03 16:20:06 +0000
committerSam Thursfield <sam@afuera.me.uk>2023-03-03 16:20:06 +0000
commit5af526f8ba238075a7c6f8ca5628d27531b0537e (patch)
treed0b12977d5b878357309755a57b687a3224c435b /src
parenta9d83e783717e9b2d35b91a4f9955cce5762254c (diff)
parent65749008603efe9c7182f579d8e8d0e62c238670 (diff)
downloadtracker-5af526f8ba238075a7c6f8ca5628d27531b0537e.tar.gz
Merge branch 'wip/carlosg/modular-unicode-lib' into 'master'
Make unicode library a module Closes #396 See merge request https://gitlab.gnome.org/GNOME/tracker/-/merge_requests/581
Diffstat (limited to 'src')
-rw-r--r--src/libtracker-common/meson.build45
-rw-r--r--src/libtracker-common/tracker-common.h2
-rw-r--r--src/libtracker-common/tracker-locale.c105
-rw-r--r--src/libtracker-common/tracker-locale.h50
-rw-r--r--src/libtracker-common/tracker-parser-libicu.c264
-rw-r--r--src/libtracker-common/tracker-parser-libunistring.c138
-rw-r--r--src/libtracker-common/tracker-parser-utils.h4
-rw-r--r--src/libtracker-common/tracker-parser.c255
-rw-r--r--src/libtracker-common/tracker-parser.h36
-rw-r--r--src/libtracker-sparql/core/tracker-collation.c219
-rw-r--r--src/libtracker-sparql/core/tracker-collation.h13
-rw-r--r--src/libtracker-sparql/core/tracker-data-manager.c1
-rw-r--r--src/libtracker-sparql/core/tracker-db-interface-sqlite.c381
-rw-r--r--src/libtracker-sparql/core/tracker-db-manager.c11
-rw-r--r--src/libtracker-sparql/core/tracker-fts-tokenizer.c5
-rw-r--r--src/libtracker-sparql/direct/tracker-direct.c2
16 files changed, 763 insertions, 768 deletions
diff --git a/src/libtracker-common/meson.build b/src/libtracker-common/meson.build
index 17bdd533a..96124437b 100644
--- a/src/libtracker-common/meson.build
+++ b/src/libtracker-common/meson.build
@@ -6,18 +6,42 @@ tracker_common_sources = [
'tracker-file-utils.c',
'tracker-term-utils.c',
'tracker-utils.c',
- 'tracker-locale.c',
- 'tracker-parser-utils.c',
- 'tracker-language.c',
+ 'tracker-parser.c',
]
if unicode_library_name == 'icu'
- tracker_common_sources += 'tracker-parser-libicu.c'
+ libtracker_parser_libicu = shared_module('tracker-parser-libicu',
+ 'tracker-parser-libicu.c',
+ 'tracker-parser-utils.c',
+ 'tracker-language.c',
+ dependencies: [gobject,libstemmer, icu_uc, icu_i18n],
+ c_args: tracker_c_args + [
+ '-include', join_paths(build_root, 'config.h'),
+ '-DMODULE',
+ ],
+ include_directories: [configinc, srcinc],
+ install: true,
+ install_dir: tracker_internal_libs_dir,
+ name_suffix: 'so',
+ )
else
- tracker_common_sources += 'tracker-parser-libunistring.c'
+ libtracker_parser_libunistring = shared_module('tracker-parser-libunistring',
+ 'tracker-parser-libunistring.c',
+ 'tracker-parser-utils.c',
+ 'tracker-language.c',
+ dependencies: [gobject,libstemmer, libunistring],
+ c_args: tracker_c_args + [
+ '-include', join_paths(build_root, 'config.h'),
+ '-DMODULE',
+ ],
+ include_directories: [configinc, srcinc],
+ install: true,
+ install_dir: tracker_internal_libs_dir,
+ name_suffix: 'so',
+ )
endif
-tracker_common_dependencies = [glib, gio, gio_unix, libmath, libstemmer]
+tracker_common_dependencies = [glib, gio, gio_unix, libmath]
if build_machine.system() == 'openbsd'
libkvm = meson.get_compiler('c').find_library('kvm')
@@ -26,8 +50,11 @@ endif
libtracker_common = static_library('tracker-common',
tracker_common_sources,
- dependencies: tracker_common_dependencies + [unicode_library],
- c_args: tracker_c_args,
+ dependencies: [tracker_common_dependencies, gmodule],
+ c_args: [
+ '-DPRIVATE_LIBDIR="@0@"'.format(tracker_internal_libs_dir),
+ '-DBUILDROOT="@0@"'.format(meson.build_root()),
+ ] + tracker_c_args,
include_directories: [configinc, srcinc],
gnu_symbol_visibility: 'hidden',
)
@@ -36,6 +63,6 @@ commoninc = include_directories('.')
tracker_common_dep = declare_dependency(
link_with: libtracker_common,
- dependencies: tracker_common_dependencies + [unicode_library],
+ dependencies: [tracker_common_dependencies, gmodule],
include_directories: [configinc, srcinc, commoninc],
)
diff --git a/src/libtracker-common/tracker-common.h b/src/libtracker-common/tracker-common.h
index e572b6ebc..d04e2b083 100644
--- a/src/libtracker-common/tracker-common.h
+++ b/src/libtracker-common/tracker-common.h
@@ -31,11 +31,9 @@
#include "tracker-date-time.h"
#include "tracker-debug.h"
#include "tracker-file-utils.h"
-#include "tracker-language.h"
#include "tracker-parser.h"
#include "tracker-term-utils.h"
#include "tracker-utils.h"
-#include "tracker-locale.h"
#undef __LIBTRACKER_COMMON_INSIDE__
diff --git a/src/libtracker-common/tracker-locale.c b/src/libtracker-common/tracker-locale.c
deleted file mode 100644
index 816bb8d69..000000000
--- a/src/libtracker-common/tracker-locale.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (C) 2010 Nokia <ivan.frade@nokia.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "config.h"
-
-#include <locale.h>
-#include <string.h>
-
-#include <glib.h>
-
-#include "tracker-locale.h"
-
-static const gchar *locale_names[] = {
- [TRACKER_LOCALE_LANGUAGE] = "LANG",
- [TRACKER_LOCALE_TIME] = "LC_TIME",
- [TRACKER_LOCALE_COLLATE] = "LC_COLLATE",
- [TRACKER_LOCALE_NUMERIC] = "LC_NUMERIC",
- [TRACKER_LOCALE_MONETARY] = "LC_MONETARY"
-};
-
-static GRecMutex locales_mutex;
-
-static const gchar *
-tracker_locale_get_unlocked (TrackerLocaleID id)
-{
- const gchar *env_locale = NULL;
-
- switch (id) {
- case TRACKER_LOCALE_LANGUAGE:
- env_locale = g_getenv ("LANG");
- break;
- case TRACKER_LOCALE_TIME:
- env_locale = setlocale (LC_TIME, NULL);
- break;
- case TRACKER_LOCALE_COLLATE:
- env_locale = setlocale (LC_COLLATE, NULL);
- break;
- case TRACKER_LOCALE_NUMERIC:
- env_locale = setlocale (LC_NUMERIC, NULL);
- break;
- case TRACKER_LOCALE_MONETARY:
- env_locale = setlocale (LC_MONETARY, NULL);
- break;
- default:
- g_assert_not_reached ();
- break;
- }
-
- return env_locale;
-}
-
-void
-tracker_locale_sanity_check (void)
-{
- guint i;
-
- g_rec_mutex_lock (&locales_mutex);
-
- for (i = 0; i < TRACKER_LOCALE_LAST; i++) {
- const gchar *env_locale = NULL;
-
- env_locale = tracker_locale_get_unlocked (i);
-
- if (!env_locale) {
- g_warning ("Locale '%s' is not set, defaulting to C locale", locale_names[i]);
- }
- }
-
- g_rec_mutex_unlock (&locales_mutex);
-}
-
-gchar *
-tracker_locale_get (TrackerLocaleID id)
-{
- const gchar *env_locale = NULL;
- gchar *locale;
-
- g_rec_mutex_lock (&locales_mutex);
-
- env_locale = tracker_locale_get_unlocked (id);
-
- /* Always return a duplicated string, as the locale may change at any
- * moment */
- locale = g_strdup (env_locale);
-
- g_rec_mutex_unlock (&locales_mutex);
-
- return locale;
-}
diff --git a/src/libtracker-common/tracker-locale.h b/src/libtracker-common/tracker-locale.h
deleted file mode 100644
index 32547d13d..000000000
--- a/src/libtracker-common/tracker-locale.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2010 Nokia <ivan.frade@nokia.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef __LIBTRACKER_COMMON_LOCALE_H__
-#define __LIBTRACKER_COMMON_LOCALE_H__
-
-#include <glib.h>
-
-G_BEGIN_DECLS
-
-#if !defined (__LIBTRACKER_COMMON_INSIDE__) && !defined (TRACKER_COMPILATION)
-#error "only <libtracker-common/tracker-common.h> must be included directly."
-#endif
-
-/* Type of locales supported in tracker */
-typedef enum {
- TRACKER_LOCALE_LANGUAGE,
- TRACKER_LOCALE_TIME,
- TRACKER_LOCALE_COLLATE,
- TRACKER_LOCALE_NUMERIC,
- TRACKER_LOCALE_MONETARY,
- TRACKER_LOCALE_LAST
-} TrackerLocaleID;
-
-void tracker_locale_sanity_check (void);
-
-/* Get the current locale of the given type.
- * Note that it returns a newly-allocated string which should be g_free()-ed
- */
-gchar *tracker_locale_get (TrackerLocaleID id);
-
-G_END_DECLS
-
-#endif /* __LIBTRACKER_COMMON_LOCALE_H__ */
diff --git a/src/libtracker-common/tracker-parser-libicu.c b/src/libtracker-common/tracker-parser-libicu.c
index 8c4803206..8795af7cf 100644
--- a/src/libtracker-common/tracker-parser-libicu.c
+++ b/src/libtracker-common/tracker-parser-libicu.c
@@ -30,7 +30,10 @@
#include <unicode/ustring.h>
#include <unicode/uchar.h>
#include <unicode/unorm.h>
+#include <unicode/ucol.h>
+#include "tracker-language.h"
+#include "tracker-debug.h"
#include "tracker-parser.h"
#include "tracker-parser-utils.h"
@@ -41,6 +44,8 @@ typedef enum {
TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC,
} TrackerParserWordType;
+typedef UCollator TrackerCollator;
+
/* Max possible length of a UChar encoded string (just a safety limit) */
#define WORD_BUFFER_LENGTH 512
@@ -144,7 +149,7 @@ get_word_info (const UChar *word,
/* The input word in this method MUST be normalized in NFKD form,
* and given in UChars, where str_length is the number of UChars
* (not the number of bytes) */
-gboolean
+static gboolean
tracker_parser_unaccent_nfkd_string (gpointer str,
gsize *str_length)
{
@@ -571,15 +576,12 @@ parser_next (TrackerParser *parser,
}
TrackerParser *
-tracker_parser_new (TrackerLanguage *language)
+tracker_parser_new (void)
{
TrackerParser *parser;
- g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
-
parser = g_new0 (TrackerParser, 1);
-
- parser->language = g_object_ref (language);
+ parser->language = tracker_language_new (NULL);
return parser;
}
@@ -754,3 +756,253 @@ tracker_parser_next (TrackerParser *parser,
return str;
}
+gpointer
+tracker_collation_init (void)
+{
+ UCollator *collator = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+ const gchar *locale;
+
+ /* Get locale! */
+ locale = setlocale (LC_COLLATE, NULL);
+
+ collator = ucol_open (locale, &status);
+ if (!collator) {
+ g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s",
+ locale, u_errorName (status));
+ /* Try to get UCA collator then... */
+ status = U_ZERO_ERROR;
+ collator = ucol_open ("root", &status);
+ if (!collator) {
+ g_critical ("[ICU collation] UCA Collator cannot be created: %s",
+ u_errorName (status));
+ }
+ }
+
+ return collator;
+}
+
+void
+tracker_collation_shutdown (gpointer collator)
+{
+ if (collator)
+ ucol_close ((UCollator *)collator);
+}
+
+gint
+tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UCharIterator iter1;
+ UCharIterator iter2;
+ UCollationResult result;
+
+ /* Collator must be created before trying to collate */
+ g_return_val_if_fail (collator, -1);
+
+ /* Setup iterators */
+ uiter_setUTF8 (&iter1, str1, len1);
+ uiter_setUTF8 (&iter2, str2, len2);
+
+ result = ucol_strcollIter ((UCollator *)collator,
+ &iter1,
+ &iter2,
+ &status);
+ if (status != U_ZERO_ERROR)
+ g_critical ("Error collating: %s", u_errorName (status));
+
+ if (result == UCOL_GREATER)
+ return 1;
+ if (result == UCOL_LESS)
+ return -1;
+ return 0;
+}
+
+gunichar2 *
+tracker_parser_tolower (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ UChar *zOutput;
+ int nOutput;
+ UErrorCode status = U_ZERO_ERROR;
+
+ g_return_val_if_fail (input, NULL);
+
+ nOutput = len * 2 + 2;
+ zOutput = malloc (nOutput);
+
+ u_strToLower (zOutput, nOutput / 2,
+ input, len / 2,
+ NULL, &status);
+
+ if (!U_SUCCESS (status)) {
+ memcpy (zOutput, input, len);
+ zOutput[len] = '\0';
+ nOutput = len;
+ }
+
+ *len_out = nOutput;
+
+ return zOutput;
+}
+
+gunichar2 *
+tracker_parser_toupper (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ UChar *zOutput;
+ int nOutput;
+ UErrorCode status = U_ZERO_ERROR;
+
+ nOutput = len * 2 + 2;
+ zOutput = malloc (nOutput);
+
+ u_strToUpper (zOutput, nOutput / 2,
+ input, len / 2,
+ NULL, &status);
+
+ if (!U_SUCCESS (status)) {
+ memcpy (zOutput, input, len);
+ zOutput[len] = '\0';
+ nOutput = len;
+ }
+
+ *len_out = nOutput;
+
+ return zOutput;
+}
+
+gunichar2 *
+tracker_parser_casefold (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ UChar *zOutput;
+ int nOutput;
+ UErrorCode status = U_ZERO_ERROR;
+
+ nOutput = len * 2 + 2;
+ zOutput = malloc (nOutput);
+
+ u_strFoldCase (zOutput, nOutput / 2,
+ input, len / 2,
+ U_FOLD_CASE_DEFAULT, &status);
+
+ if (!U_SUCCESS (status)){
+ memcpy (zOutput, input, len);
+ zOutput[len] = '\0';
+ nOutput = len;
+ }
+
+ *len_out = nOutput;
+
+ return zOutput;
+}
+
+static gunichar2 *
+normalize_string (const gunichar2 *string,
+ gsize string_len, /* In gunichar2s */
+ const UNormalizer2 *normalizer,
+ gsize *len_out, /* In gunichar2s */
+ UErrorCode *status)
+{
+ int nOutput;
+ gunichar2 *zOutput;
+
+ nOutput = (string_len * 2) + 1;
+ zOutput = g_new0 (gunichar2, nOutput);
+
+ nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
+
+ if (*status == U_BUFFER_OVERFLOW_ERROR) {
+ /* Try again after allocating enough space for the normalization */
+ *status = U_ZERO_ERROR;
+ zOutput = g_renew (gunichar2, zOutput, nOutput);
+ memset (zOutput, 0, nOutput * sizeof (gunichar2));
+ nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
+ }
+
+ if (!U_SUCCESS (*status)) {
+ g_clear_pointer (&zOutput, g_free);
+ nOutput = 0;
+ }
+
+ if (len_out)
+ *len_out = nOutput;
+
+ return zOutput;
+}
+
+gunichar2 *
+tracker_parser_normalize (const gunichar2 *input,
+ GNormalizeMode mode,
+ gsize len,
+ gsize *len_out)
+{
+ uint16_t *zOutput = NULL;
+ gsize nOutput;
+ const UNormalizer2 *normalizer;
+ UErrorCode status = U_ZERO_ERROR;
+
+ if (mode == G_NORMALIZE_NFC)
+ normalizer = unorm2_getNFCInstance (&status);
+ else if (mode == G_NORMALIZE_NFD)
+ normalizer = unorm2_getNFDInstance (&status);
+ else if (mode == G_NORMALIZE_NFKC)
+ normalizer = unorm2_getNFKCInstance (&status);
+ else if (mode == G_NORMALIZE_NFKD)
+ normalizer = unorm2_getNFKDInstance (&status);
+ else
+ g_assert_not_reached ();
+
+ if (U_SUCCESS (status)) {
+ zOutput = normalize_string (input, len / 2,
+ normalizer,
+ &nOutput, &status);
+ }
+
+ if (!U_SUCCESS (status)) {
+ zOutput = g_memdup2 (input, len);
+ nOutput = len;
+ }
+
+ *len_out = nOutput;
+
+ return zOutput;
+}
+
+gunichar2 *
+tracker_parser_unaccent (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ uint16_t *zOutput = NULL;
+ gsize nOutput;
+ const UNormalizer2 *normalizer;
+ UErrorCode status = U_ZERO_ERROR;
+
+ normalizer = unorm2_getNFKDInstance (&status);
+
+ if (U_SUCCESS (status)) {
+ zOutput = normalize_string (input, len / 2,
+ normalizer,
+ &nOutput, &status);
+ }
+
+ if (!U_SUCCESS (status)) {
+ zOutput = g_memdup2 (input, len);
+ }
+
+ /* Unaccenting is done in place */
+ tracker_parser_unaccent_nfkd_string (zOutput, &nOutput);
+
+ *len_out = nOutput;
+
+ return zOutput;
+}
diff --git a/src/libtracker-common/tracker-parser-libunistring.c b/src/libtracker-common/tracker-parser-libunistring.c
index d24c5f1cb..b26b4bae5 100644
--- a/src/libtracker-common/tracker-parser-libunistring.c
+++ b/src/libtracker-common/tracker-parser-libunistring.c
@@ -30,6 +30,7 @@
#include <unictype.h>
#include <unicase.h>
+#include "tracker-language.h"
#include "tracker-parser.h"
#include "tracker-parser-utils.h"
@@ -40,6 +41,9 @@ typedef enum {
TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC,
} TrackerParserWordType;
+/* If string lenth less than this value, allocating from the stack */
+#define MAX_STACK_STR_SIZE 8192
+
/* Max possible length of a UTF-8 encoded string (just a safety limit) */
#define WORD_BUFFER_LENGTH 512
@@ -84,7 +88,7 @@ get_word_info (TrackerParser *parser,
/* Get first character of the word as UCS4 */
first_unichar_len = u8_strmbtouc (&first_unichar,
- &(parser->txt[parser->cursor]));
+ (const guchar *) &(parser->txt[parser->cursor]));
if (first_unichar_len <= 0) {
/* This should only happen if NIL was passed to u8_strmbtouc,
* so better just force stop here */
@@ -106,7 +110,7 @@ get_word_info (TrackerParser *parser,
i = parser->cursor + first_unichar_len;
while (1) {
/* Text bounds reached? */
- if (i >= parser->txt_size)
+ if (i >= (gsize) parser->txt_size)
break;
/* Proper unicode word break detected? */
if (parser->word_break_flags[i])
@@ -159,7 +163,7 @@ get_word_info (TrackerParser *parser,
/* The input word in this method MUST be normalized in NFKD form,
* and given in UTF-8, where str_length is the byte-length
* (note: there is no trailing NUL character!) */
-gboolean
+static gboolean
tracker_parser_unaccent_nfkd_string (gpointer str,
gsize *str_length)
{
@@ -181,7 +185,7 @@ tracker_parser_unaccent_nfkd_string (gpointer str,
gint utf8_len;
/* Get next character of the word as UCS4 */
- utf8_len = u8_strmbtouc (&unichar, &word[i]);
+ utf8_len = u8_strmbtouc (&unichar, (const guchar *) &word[i]);
/* Invalid UTF-8 character or end of original string. */
if (utf8_len <= 0) {
@@ -249,12 +253,12 @@ process_word_utf8 (TrackerParser *parser,
/* Casefold and NFKD normalization in output.
* NOTE: if the output buffer is not big enough, u8_casefold will
* return a newly-allocated buffer. */
- normalized = u8_casefold ((const uint8_t *)word,
- length,
- uc_locale_language (),
- UNINORM_NFKD,
- word_buffer,
- &new_word_length);
+ normalized = (gchar*) u8_casefold ((const uint8_t *)word,
+ length,
+ uc_locale_language (),
+ UNINORM_NFKD,
+ (guchar *) word_buffer,
+ &new_word_length);
/* Case folding + Normalization failed, ignore this word */
g_return_val_if_fail (normalized != NULL, NULL);
@@ -275,7 +279,7 @@ process_word_utf8 (TrackerParser *parser,
normalized = length > WORD_BUFFER_LENGTH ? g_malloc (length + 1) : word_buffer;
- for (i = 0; i < length; i++) {
+ for (i = 0; i < (gsize) length; i++) {
normalized[i] = g_ascii_tolower (word[i]);
}
@@ -345,7 +349,7 @@ parser_next (TrackerParser *parser,
/* Loop to look for next valid word */
while (!processed_word &&
- parser->cursor < parser->txt_size) {
+ parser->cursor < (gsize) parser->txt_size) {
TrackerParserWordType type;
gsize truncated_length;
gboolean is_allowed;
@@ -424,15 +428,12 @@ parser_next (TrackerParser *parser,
}
TrackerParser *
-tracker_parser_new (TrackerLanguage *language)
+tracker_parser_new (void)
{
TrackerParser *parser;
- g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
-
parser = g_new0 (TrackerParser, 1);
-
- parser->language = g_object_ref (language);
+ parser->language = tracker_language_new (NULL);
return parser;
}
@@ -541,3 +542,106 @@ tracker_parser_next (TrackerParser *parser,
return str;
}
+gpointer
+tracker_collation_init (void)
+{
+ /* Nothing to do */
+ return NULL;
+}
+
+void
+tracker_collation_shutdown (gpointer collator)
+{
+ /* Nothing to do */
+}
+
+gint
+tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2)
+{
+ gint result;
+ guchar *aux1;
+ guchar *aux2;
+
+ /* Note: str1 and str2 are NOT NUL-terminated */
+ aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1);
+ aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1);
+
+ memcpy (aux1, str1, len1); aux1[len1] = '\0';
+ memcpy (aux2, str2, len2); aux2[len2] = '\0';
+
+ result = u8_strcoll (aux1, aux2);
+
+ if (len1 >= MAX_STACK_STR_SIZE)
+ g_free (aux1);
+ if (len2 >= MAX_STACK_STR_SIZE)
+ g_free (aux2);
+ return result;
+}
+
+gunichar2 *
+tracker_parser_tolower (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ return u16_tolower (input, len / 2, NULL, NULL, NULL, len_out);
+}
+
+gunichar2 *
+tracker_parser_toupper (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ return u16_toupper (input, len / 2, NULL, NULL, NULL, len_out);
+}
+
+gunichar2 *
+tracker_parser_casefold (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ return u16_casefold (input, len / 2, NULL, NULL, NULL, len_out);
+}
+
+gunichar2 *
+tracker_parser_normalize (const gunichar2 *input,
+ GNormalizeMode mode,
+ gsize len,
+ gsize *len_out)
+{
+ uninorm_t nf;
+
+ if (mode == G_NORMALIZE_NFC)
+ nf = UNINORM_NFC;
+ else if (mode == G_NORMALIZE_NFD)
+ nf = UNINORM_NFD;
+ else if (mode == G_NORMALIZE_NFKC)
+ nf = UNINORM_NFKC;
+ else if (mode == G_NORMALIZE_NFKD)
+ nf = UNINORM_NFKD;
+ else
+ g_assert_not_reached ();
+
+ return u16_normalize (nf, input, len / 2, NULL, len_out);
+}
+
+gunichar2 *
+tracker_parser_unaccent (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ gunichar2 *zOutput;
+ gsize written = 0;
+
+ zOutput = u16_normalize (UNINORM_NFKD, input, len, NULL, &written);
+
+ /* Unaccenting is done in place */
+ tracker_parser_unaccent_nfkd_string (zOutput, &written);
+
+ *len_out = written;
+
+ return zOutput;
+}
diff --git a/src/libtracker-common/tracker-parser-utils.h b/src/libtracker-common/tracker-parser-utils.h
index b2440213f..84a48c58d 100644
--- a/src/libtracker-common/tracker-parser-utils.h
+++ b/src/libtracker-common/tracker-parser-utils.h
@@ -24,10 +24,6 @@
#include <glib.h>
-#ifdef HAVE_LIBICU
-#include <unicode/utypes.h>
-#endif
-
G_BEGIN_DECLS
/* ASCII-7 is in range [0x00,0x7F] */
diff --git a/src/libtracker-common/tracker-parser.c b/src/libtracker-common/tracker-parser.c
new file mode 100644
index 000000000..aaaed58de
--- /dev/null
+++ b/src/libtracker-common/tracker-parser.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2023, Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg@gnome.org>
+ */
+
+#include "config.h"
+
+#include <gio/gio.h>
+#include <gmodule.h>
+
+#include "tracker-parser.h"
+
+#include "tracker-debug.h"
+
+static TrackerParser * (*parser_new) (void);
+static void (*parser_free) (TrackerParser *parser);
+static void (*parser_reset) (TrackerParser *parser,
+ const gchar *txt,
+ gint txt_size,
+ guint max_word_length,
+ gboolean enable_stemmer,
+ gboolean enable_unaccent,
+ gboolean ignore_stop_words,
+ gboolean ignore_reserved_words,
+ gboolean ignore_numbers);
+static const gchar * (*parser_next) (TrackerParser *parser,
+ gint *position,
+ gint *byte_offset_start,
+ gint *byte_offset_end,
+ gboolean *stop_word,
+ gint *word_length);
+static gpointer (*collation_init) (void);
+static void (*collation_shutdown) (gpointer collator);
+static gint (*collation_utf8) (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2);
+static gunichar2 * (*util_tolower) (const gunichar2 *input,
+ gsize len,
+ gsize *len_out);
+static gunichar2 * (*util_toupper) (const gunichar2 *input,
+ gsize len,
+ gsize *len_out);
+static gunichar2 * (*util_casefold) (const gunichar2 *input,
+ gsize len,
+ gsize *len_out);
+static gunichar2 * (*util_normalize) (const gunichar2 *input,
+ GNormalizeMode mode,
+ gsize len,
+ gsize *len_out);
+static gunichar2 * (*util_unaccent) (const gunichar2 *input,
+ gsize len,
+ gsize *len_out);
+
+static void
+ensure_init_parser (void)
+{
+ static GModule *module = NULL;
+
+ if (module == NULL) {
+ const gchar *modules[] = {
+
+ "libtracker-parser-libicu.so",
+ "libtracker-parser-libunistring.so"
+ };
+ gchar *module_path;
+ guint i;
+
+ g_assert (g_module_supported ());
+
+ for (i = 0; i < G_N_ELEMENTS (modules); i++) {
+ if (g_strcmp0 (g_get_current_dir (), BUILDROOT) == 0) {
+ /* Detect in-build runtime of this code, this may happen
+ * building introspection information or running tests.
+ * We want the in-tree modules to be loaded then.
+ */
+ module_path = g_strdup_printf (BUILDROOT "/src/libtracker-common/%s", modules[i]);
+ } else {
+ module_path = g_strdup_printf (PRIVATE_LIBDIR "/%s", modules[i]);
+ }
+
+ module = g_module_open (module_path,
+ G_MODULE_BIND_LAZY |
+ G_MODULE_BIND_LOCAL);
+ g_free (module_path);
+
+ if (module)
+ break;
+ }
+
+ g_assert (module != NULL);
+
+ if (!g_module_symbol (module, "tracker_parser_new", (gpointer *) &parser_new) ||
+ !g_module_symbol (module, "tracker_parser_free", (gpointer *) &parser_free) ||
+ !g_module_symbol (module, "tracker_parser_reset", (gpointer *) &parser_reset) ||
+ !g_module_symbol (module, "tracker_parser_next", (gpointer *) &parser_next) ||
+ !g_module_symbol (module, "tracker_collation_init", (gpointer *) &collation_init) ||
+ !g_module_symbol (module, "tracker_collation_shutdown", (gpointer *) &collation_shutdown) ||
+ !g_module_symbol (module, "tracker_collation_utf8", (gpointer *) &collation_utf8) ||
+ !g_module_symbol (module, "tracker_parser_tolower", (gpointer *) &util_tolower) ||
+ !g_module_symbol (module, "tracker_parser_toupper", (gpointer *) &util_toupper) ||
+ !g_module_symbol (module, "tracker_parser_casefold", (gpointer *) &util_casefold) ||
+ !g_module_symbol (module, "tracker_parser_normalize", (gpointer *) &util_normalize) ||
+ !g_module_symbol (module, "tracker_parser_unaccent", (gpointer *) &util_unaccent)) {
+ g_printerr ("Could not initialize parser functions: %s\n",
+ g_module_error ());
+ }
+
+ TRACKER_NOTE (COLLATION, g_message ("Initialized collator %s", g_module_name (module)));
+
+ g_module_make_resident (module);
+ g_module_close (module);
+ }
+}
+
+TrackerParser *
+tracker_parser_new (void)
+{
+ ensure_init_parser ();
+
+ return parser_new ();
+}
+
+void
+tracker_parser_free (TrackerParser *parser)
+{
+ parser_free (parser);
+}
+
+void
+tracker_parser_reset (TrackerParser *parser,
+ const gchar *txt,
+ gint txt_size,
+ guint max_word_length,
+ gboolean enable_stemmer,
+ gboolean enable_unaccent,
+ gboolean ignore_stop_words,
+ gboolean ignore_reserved_words,
+ gboolean ignore_numbers)
+{
+ parser_reset (parser, txt, txt_size,
+ max_word_length,
+ enable_stemmer,
+ enable_unaccent,
+ ignore_stop_words,
+ ignore_reserved_words,
+ ignore_numbers);
+}
+
+const gchar *
+tracker_parser_next (TrackerParser *parser,
+ gint *position,
+ gint *byte_offset_start,
+ gint *byte_offset_end,
+ gboolean *stop_word,
+ gint *word_length)
+{
+ return parser_next (parser, position,
+ byte_offset_start,
+ byte_offset_end,
+ stop_word,
+ word_length);
+}
+
+gpointer
+tracker_collation_init (void)
+{
+ ensure_init_parser ();
+
+ return collation_init ();
+}
+
+void
+tracker_collation_shutdown (gpointer collator)
+{
+ collation_shutdown (collator);
+}
+
+gint
+tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2)
+{
+ return collation_utf8 (collator, len1, str1, len2, str2);
+}
+
+gunichar2 *
+tracker_parser_tolower (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ ensure_init_parser ();
+
+ return util_tolower (input, len, len_out);
+}
+
+gunichar2 *
+tracker_parser_toupper (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ ensure_init_parser ();
+
+ return util_toupper (input, len, len_out);
+}
+
+gunichar2 *
+tracker_parser_casefold (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ ensure_init_parser ();
+
+ return util_casefold (input, len, len_out);
+}
+
+gunichar2 *
+tracker_parser_normalize (const gunichar2 *input,
+ GNormalizeMode mode,
+ gsize len,
+ gsize *len_out)
+{
+ ensure_init_parser ();
+
+ return util_normalize (input, mode, len, len_out);
+}
+
+gunichar2 *
+tracker_parser_unaccent (const gunichar2 *input,
+ gsize len,
+ gsize *len_out)
+{
+ ensure_init_parser ();
+
+ return util_unaccent (input, len, len_out);
+}
diff --git a/src/libtracker-common/tracker-parser.h b/src/libtracker-common/tracker-parser.h
index 3c8271503..78d67e21f 100644
--- a/src/libtracker-common/tracker-parser.h
+++ b/src/libtracker-common/tracker-parser.h
@@ -34,9 +34,10 @@
G_BEGIN_DECLS
+/* Parser */
typedef struct TrackerParser TrackerParser;
-TrackerParser *tracker_parser_new (TrackerLanguage *language);
+TrackerParser *tracker_parser_new (void);
void tracker_parser_reset (TrackerParser *parser,
const gchar *txt,
@@ -57,10 +58,39 @@ const gchar * tracker_parser_next (TrackerParser *parser,
void tracker_parser_free (TrackerParser *parser);
+/* Collation */
+gpointer tracker_collation_init (void);
+
+void tracker_collation_shutdown (gpointer collator);
+
+gint tracker_collation_utf8 (gpointer collator,
+ gint len1,
+ gconstpointer str1,
+ gint len2,
+ gconstpointer str2);
+
/* Other helper methods */
-gboolean tracker_parser_unaccent_nfkd_string (gpointer str,
- gsize *str_length);
+gunichar2 * tracker_parser_tolower (const gunichar2 *input,
+ gsize len,
+ gsize *len_out);
+
+gunichar2 * tracker_parser_toupper (const gunichar2 *input,
+ gsize len,
+ gsize *len_out);
+
+gunichar2 * tracker_parser_casefold (const gunichar2 *input,
+ gsize len,
+ gsize *len_out);
+
+gunichar2 * tracker_parser_normalize (const gunichar2 *input,
+ GNormalizeMode mode,
+ gsize len,
+ gsize *len_out);
+
+gunichar2 * tracker_parser_unaccent (const gunichar2 *input,
+ gsize len,
+ gsize *len_out);
G_END_DECLS
diff --git a/src/libtracker-sparql/core/tracker-collation.c b/src/libtracker-sparql/core/tracker-collation.c
index beca29e3b..0e82d66dd 100644
--- a/src/libtracker-sparql/core/tracker-collation.c
+++ b/src/libtracker-sparql/core/tracker-collation.c
@@ -18,229 +18,12 @@
*/
#include "config.h"
+
#include <glib.h>
#include <glib/gi18n.h>
-#include <string.h>
-#include <locale.h>
-#include <libtracker-common/tracker-debug.h>
-#include <libtracker-common/tracker-locale.h>
#include "tracker-collation.h"
-/* If defined, will dump additional traces */
-#ifdef G_ENABLE_DEBUG
-#define trace(message, ...) TRACKER_NOTE (COLLATION, g_message (message, ##__VA_ARGS__))
-#else
-#define trace(...)
-#endif
-
-#ifdef HAVE_LIBUNISTRING
-/* libunistring versions prior to 9.1.2 need this hack */
-#define _UNUSED_PARAMETER_
-#include <unistr.h>
-#elif defined(HAVE_LIBICU)
-#include <unicode/ucol.h>
-#include <unicode/utypes.h>
-#endif
-
-/* If string lenth less than this value, allocating from the stack */
-#define MAX_STACK_STR_SIZE 8192
-
-#ifdef HAVE_LIBUNISTRING /* ---- GNU libunistring based collation ---- */
-
-gpointer
-tracker_collation_init (void)
-{
- gchar *locale;
-
- /* Get locale! */
- locale = tracker_locale_get (TRACKER_LOCALE_COLLATE);
- TRACKER_NOTE (COLLATION, g_message ("[libunistring collation] Initializing collator for locale '%s'", locale));
- g_free (locale);
- /* Nothing to do */
- return NULL;
-}
-
-void
-tracker_collation_shutdown (gpointer collator)
-{
- /* Nothing to do */
-}
-
-gint
-tracker_collation_utf8 (gpointer collator,
- gint len1,
- gconstpointer str1,
- gint len2,
- gconstpointer str2)
-{
- gint result;
- gchar *aux1;
- gchar *aux2;
-
- /* Note: str1 and str2 are NOT NUL-terminated */
- aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1);
- aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1);
-
- memcpy (aux1, str1, len1); aux1[len1] = '\0';
- memcpy (aux2, str2, len2); aux2[len2] = '\0';
-
- result = u8_strcoll (aux1, aux2);
-
- trace ("(libunistring) Collating '%s' and '%s' (%d)",
- aux1, aux2, result);
-
- if (len1 >= MAX_STACK_STR_SIZE)
- g_free (aux1);
- if (len2 >= MAX_STACK_STR_SIZE)
- g_free (aux2);
- return result;
-}
-
-#elif defined(HAVE_LIBICU) /* ---- ICU based collation (UTF-16) ----*/
-
-gpointer
-tracker_collation_init (void)
-{
- UCollator *collator = NULL;
- UErrorCode status = U_ZERO_ERROR;
- gchar *locale;
-
- /* Get locale! */
- locale = tracker_locale_get (TRACKER_LOCALE_COLLATE);
-
- TRACKER_NOTE (COLLATION, g_message ("[ICU collation] Initializing collator for locale '%s'", locale));
- collator = ucol_open (locale, &status);
- if (!collator) {
- g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s",
- locale, u_errorName (status));
- /* Try to get UCA collator then... */
- status = U_ZERO_ERROR;
- collator = ucol_open ("root", &status);
- if (!collator) {
- g_critical ("[ICU collation] UCA Collator cannot be created: %s",
- u_errorName (status));
- }
- }
- g_free (locale);
- return collator;
-}
-
-void
-tracker_collation_shutdown (gpointer collator)
-{
- if (collator)
- ucol_close ((UCollator *)collator);
-}
-
-gint
-tracker_collation_utf8 (gpointer collator,
- gint len1,
- gconstpointer str1,
- gint len2,
- gconstpointer str2)
-{
- UErrorCode status = U_ZERO_ERROR;
- UCharIterator iter1;
- UCharIterator iter2;
- UCollationResult result;
-
- /* Collator must be created before trying to collate */
- g_return_val_if_fail (collator, -1);
-
- /* Setup iterators */
- uiter_setUTF8 (&iter1, str1, len1);
- uiter_setUTF8 (&iter2, str2, len2);
-
- result = ucol_strcollIter ((UCollator *)collator,
- &iter1,
- &iter2,
- &status);
- if (status != U_ZERO_ERROR)
- g_critical ("Error collating: %s", u_errorName (status));
-
-#ifdef ENABLE_TRACE
- {
- gchar *aux1;
- gchar *aux2;
-
- /* Note: str1 and str2 are NOT NUL-terminated */
- aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1);
- aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1);
-
- memcpy (aux1, str1, len1); aux1[len1] = '\0';
- memcpy (aux2, str2, len2); aux2[len2] = '\0';
-
- trace ("(ICU) Collating '%s' and '%s' (%d)",
- aux1, aux2, result);
-
- if (len1 >= MAX_STACK_STR_SIZE)
- g_free (aux1);
- if (len2 >= MAX_STACK_STR_SIZE)
- g_free (aux2);
- }
-#endif /* ENABLE_TRACE */
-
- if (result == UCOL_GREATER)
- return 1;
- if (result == UCOL_LESS)
- return -1;
- return 0;
-}
-
-#else /* ---- GLib based collation ---- */
-
-gpointer
-tracker_collation_init (void)
-{
- gchar *locale;
-
- /* Get locale! */
- locale = tracker_locale_get (TRACKER_LOCALE_COLLATE);
- TRACKER_NOTE (COLLATION, g_message ("[GLib collation] Initializing collator for locale '%s'", locale));
- g_free (locale);
- /* Nothing to do */
- return NULL;
-}
-
-void
-tracker_collation_shutdown (gpointer collator)
-{
- /* Nothing to do */
-}
-
-gint
-tracker_collation_utf8 (gpointer collator,
- gint len1,
- gconstpointer str1,
- gint len2,
- gconstpointer str2)
-{
- gint result;
- gchar *aux1;
- gchar *aux2;
-
- /* Note: str1 and str2 are NOT NUL-terminated */
- aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1);
- aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1);
-
- memcpy (aux1, str1, len1); aux1[len1] = '\0';
- memcpy (aux2, str2, len2); aux2[len2] = '\0';
-
- result = g_utf8_collate (aux1, aux2);
-
- trace ("(GLib) Collating '%s' and '%s' (%d)",
- aux1, aux2, result);
-
- if (len1 >= MAX_STACK_STR_SIZE)
- g_free (aux1);
- if (len2 >= MAX_STACK_STR_SIZE)
- g_free (aux2);
- return result;
-}
-
-#endif
-
static gboolean
skip_non_alphanumeric (const gchar **str,
gint *len)
diff --git a/src/libtracker-sparql/core/tracker-collation.h b/src/libtracker-sparql/core/tracker-collation.h
index 95551a9f0..6369aefba 100644
--- a/src/libtracker-sparql/core/tracker-collation.h
+++ b/src/libtracker-sparql/core/tracker-collation.h
@@ -22,13 +22,7 @@
G_BEGIN_DECLS
-gpointer tracker_collation_init (void);
-void tracker_collation_shutdown (gpointer collator);
-gint tracker_collation_utf8 (gpointer collator,
- gint len1,
- gconstpointer str1,
- gint len2,
- gconstpointer str2);
+#include <libtracker-common/tracker-parser.h>
gint tracker_collation_utf8_title (gpointer collator,
gint len1,
@@ -36,12 +30,7 @@ gint tracker_collation_utf8_title (gpointer collator,
gint len2,
gconstpointer str2);
-#ifdef HAVE_LIBICU
#define TRACKER_COLLATION_LAST_CHAR ((gunichar) 0x10fffd)
-#else
-/* glibc-based collators do not properly sort private use characters */
-#define TRACKER_COLLATION_LAST_CHAR ((gunichar) 0x9fa5)
-#endif
G_END_DECLS
diff --git a/src/libtracker-sparql/core/tracker-data-manager.c b/src/libtracker-sparql/core/tracker-data-manager.c
index 1481a1a02..80e53623e 100644
--- a/src/libtracker-sparql/core/tracker-data-manager.c
+++ b/src/libtracker-sparql/core/tracker-data-manager.c
@@ -25,7 +25,6 @@
#include <glib/gstdio.h>
#include <libtracker-common/tracker-debug.h>
-#include <libtracker-common/tracker-locale.h>
#include <libtracker-sparql/tracker-deserializer-rdf.h>
diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
index 6ec1c1194..24c863616 100644
--- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
+++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
@@ -28,30 +28,13 @@
#include <libtracker-common/tracker-date-time.h>
#include <libtracker-common/tracker-debug.h>
-#include <libtracker-common/tracker-locale.h>
#include <libtracker-common/tracker-parser.h>
#include <libtracker-sparql/tracker-cursor.h>
#include <libtracker-sparql/tracker-private.h>
#include "tracker-fts.h"
-
-
-#ifdef HAVE_LIBUNISTRING
-/* libunistring versions prior to 9.1.2 need this hack */
-#define _UNUSED_PARAMETER_
-#include <unistr.h>
-#include <unicase.h>
-#elif defined(HAVE_LIBICU)
-#include <unicode/utypes.h>
-#include <unicode/uregex.h>
-#include <unicode/ustring.h>
-#include <unicode/ucol.h>
-#include <unicode/unorm2.h>
-#endif
-
#include "tracker-collation.h"
-
#include "tracker-db-interface-sqlite.h"
#include "tracker-db-manager.h"
#include "tracker-data-enum-types.h"
@@ -971,19 +954,21 @@ function_sparql_replace (sqlite3_context *context,
g_free (unescaped);
}
-#ifdef HAVE_LIBUNISTRING
-
static void
function_sparql_lower_case (sqlite3_context *context,
int argc,
sqlite3_value *argv[])
{
- const uint16_t *zInput;
- uint16_t *zOutput;
- size_t written = 0;
+ const gchar *fn = "fn:lower-case";
+ const gunichar2 *zInput;
+ gunichar2 *zOutput;
int nInput;
+ gsize nOutput;
- g_assert (argc == 1);
+ if (argc != 1) {
+ result_context_function_error (context, fn, "Invalid argument count");
+ return;
+ }
zInput = sqlite3_value_text16 (argv[0]);
@@ -993,9 +978,8 @@ function_sparql_lower_case (sqlite3_context *context,
nInput = sqlite3_value_bytes16 (argv[0]);
- zOutput = u16_tolower (zInput, nInput/2, NULL, NULL, NULL, &written);
-
- sqlite3_result_text16 (context, zOutput, written * 2, free);
+ zOutput = tracker_parser_tolower (zInput, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, -1, free);
}
static void
@@ -1003,12 +987,16 @@ function_sparql_upper_case (sqlite3_context *context,
int argc,
sqlite3_value *argv[])
{
- const uint16_t *zInput;
- uint16_t *zOutput;
- size_t written = 0;
+ const gchar *fn = "fn:upper-case";
+ const gunichar2 *zInput;
+ gunichar2 *zOutput;
int nInput;
+ gsize nOutput;
- g_assert (argc == 1);
+ if (argc != 1) {
+ result_context_function_error (context, fn, "Invalid argument count");
+ return;
+ }
zInput = sqlite3_value_text16 (argv[0]);
@@ -1018,9 +1006,8 @@ function_sparql_upper_case (sqlite3_context *context,
nInput = sqlite3_value_bytes16 (argv[0]);
- zOutput = u16_toupper (zInput, nInput / 2, NULL, NULL, NULL, &written);
-
- sqlite3_result_text16 (context, zOutput, written * 2, free);
+ zOutput = tracker_parser_toupper (zInput, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, -1, free);
}
static void
@@ -1028,12 +1015,16 @@ function_sparql_case_fold (sqlite3_context *context,
int argc,
sqlite3_value *argv[])
{
- const uint16_t *zInput;
- uint16_t *zOutput;
- size_t written = 0;
+ const gchar *fn = "tracker:case-fold";
+ const gunichar2 *zInput;
+ gunichar2 *zOutput;
int nInput;
+ gsize nOutput;
- g_assert (argc == 1);
+ if (argc != 1) {
+ result_context_function_error (context, fn, "Invalid argument count");
+ return;
+ }
zInput = sqlite3_value_text16 (argv[0]);
@@ -1043,9 +1034,8 @@ function_sparql_case_fold (sqlite3_context *context,
nInput = sqlite3_value_bytes16 (argv[0]);
- zOutput = u16_casefold (zInput, nInput/2, NULL, NULL, NULL, &written);
-
- sqlite3_result_text16 (context, zOutput, written * 2, free);
+ zOutput = tracker_parser_casefold (zInput, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, -1, free);
}
static void
@@ -1055,11 +1045,11 @@ function_sparql_normalize (sqlite3_context *context,
{
const gchar *fn = "tracker:normalize";
const gchar *nfstr;
- const uint16_t *zInput;
- uint16_t *zOutput;
- size_t written = 0;
+ const gunichar2 *zInput;
+ gunichar2 *zOutput = NULL;
+ GNormalizeMode mode;
int nInput;
- uninorm_t nf;
+ gsize nOutput;
if (argc != 2) {
result_context_function_error (context, fn, "Invalid argument count");
@@ -1072,25 +1062,24 @@ function_sparql_normalize (sqlite3_context *context,
return;
}
- nfstr = sqlite3_value_text (argv[1]);
+ nInput = sqlite3_value_bytes16 (argv[0]);
+
+ nfstr = (gchar *)sqlite3_value_text (argv[1]);
if (g_ascii_strcasecmp (nfstr, "nfc") == 0)
- nf = UNINORM_NFC;
+ mode = G_NORMALIZE_NFC;
else if (g_ascii_strcasecmp (nfstr, "nfd") == 0)
- nf = UNINORM_NFD;
+ mode = G_NORMALIZE_NFD;
else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0)
- nf = UNINORM_NFKC;
+ mode = G_NORMALIZE_NFKC;
else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0)
- nf = UNINORM_NFKD;
+ mode = G_NORMALIZE_NFKD;
else {
- result_context_function_error (context, fn, "Invalid normalization specified, options are 'nfc', 'nfd', 'nfkc' or 'nfkd'");
+ result_context_function_error (context, fn, "Invalid normalization specified");
return;
}
- nInput = sqlite3_value_bytes16 (argv[0]);
-
- zOutput = u16_normalize (nf, zInput, nInput/2, NULL, &written);
-
- sqlite3_result_text16 (context, zOutput, written * 2, free);
+ zOutput = tracker_parser_normalize (zInput, mode, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), free);
}
static void
@@ -1098,131 +1087,17 @@ function_sparql_unaccent (sqlite3_context *context,
int argc,
sqlite3_value *argv[])
{
- const gchar *zInput;
- gchar *zOutput;
- gsize written = 0;
- int nInput;
-
- g_assert (argc == 1);
-
- zInput = sqlite3_value_text (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- nInput = sqlite3_value_bytes (argv[0]);
-
- zOutput = u8_normalize (UNINORM_NFKD, zInput, nInput, NULL, &written);
-
- /* Unaccenting is done in place */
- tracker_parser_unaccent_nfkd_string (zOutput, &written);
-
- sqlite3_result_text (context, zOutput, written, free);
-}
-
-#elif defined(HAVE_LIBICU)
-
-static void
-function_sparql_lower_case (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const gchar *fn = "fn:lower-case";
- const UChar *zInput;
- UChar *zOutput;
- int nInput;
- int nOutput;
- UErrorCode status = U_ZERO_ERROR;
-
- g_assert (argc == 1);
-
- zInput = sqlite3_value_text16 (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- nInput = sqlite3_value_bytes16 (argv[0]);
-
- nOutput = nInput * 2 + 2;
- zOutput = sqlite3_malloc (nOutput);
-
- if (!zOutput) {
- return;
- }
-
- u_strToLower (zOutput, nOutput/2, zInput, nInput/2, NULL, &status);
-
- if (!U_SUCCESS (status)){
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: u_strToLower(): %s", u_errorName (status));
- zBuf[127] = '\0';
- sqlite3_free (zOutput);
- result_context_function_error (context, fn, zBuf);
- return;
- }
-
- sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
-}
-
-static void
-function_sparql_upper_case (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const gchar *fn = "fn:upper-case";
- const UChar *zInput;
- UChar *zOutput;
+ const gchar *fn = "tracker:unaccent";
+ const gunichar2 *zInput;
+ gunichar2 *zOutput = NULL;
int nInput;
- int nOutput;
- UErrorCode status = U_ZERO_ERROR;
-
- g_assert (argc == 1);
-
- zInput = sqlite3_value_text16 (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- nInput = sqlite3_value_bytes16 (argv[0]);
-
- nOutput = nInput * 2 + 2;
- zOutput = sqlite3_malloc (nOutput);
-
- if (!zOutput) {
- return;
- }
-
- u_strToUpper (zOutput, nOutput / 2, zInput, nInput / 2, NULL, &status);
+ gsize nOutput;
- if (!U_SUCCESS (status)){
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: u_strToUpper(): %s", u_errorName (status));
- zBuf[127] = '\0';
- sqlite3_free (zOutput);
- result_context_function_error (context, fn, zBuf);
+ if (argc != 1) {
+ result_context_function_error (context, fn, "Invalid argument count");
return;
}
- sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
-}
-
-static void
-function_sparql_case_fold (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const gchar *fn = "tracker:case-fold";
- const UChar *zInput;
- UChar *zOutput;
- int nInput;
- int nOutput;
- UErrorCode status = U_ZERO_ERROR;
-
- g_assert (argc == 1);
-
zInput = sqlite3_value_text16 (argv[0]);
if (!zInput) {
@@ -1231,25 +1106,8 @@ function_sparql_case_fold (sqlite3_context *context,
nInput = sqlite3_value_bytes16 (argv[0]);
- nOutput = nInput * 2 + 2;
- zOutput = sqlite3_malloc (nOutput);
-
- if (!zOutput) {
- return;
- }
-
- u_strFoldCase (zOutput, nOutput/2, zInput, nInput/2, U_FOLD_CASE_DEFAULT, &status);
-
- if (!U_SUCCESS (status)){
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: u_strFoldCase: %s", u_errorName (status));
- zBuf[127] = '\0';
- sqlite3_free (zOutput);
- result_context_function_error (context, fn, zBuf);
- return;
- }
-
- sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
+ zOutput = tracker_parser_unaccent (zInput, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), free);
}
static void
@@ -1277,141 +1135,6 @@ function_sparql_strip_punctuation (sqlite3_context *context,
sqlite3_result_text (context, output, -1, g_free);
}
-static gunichar2 *
-normalize_string (const gunichar2 *string,
- gsize string_len, /* In gunichar2s */
- const UNormalizer2 *normalizer,
- gsize *len_out, /* In gunichar2s */
- UErrorCode *status)
-{
- int nOutput;
- gunichar2 *zOutput;
-
- nOutput = (string_len * 2) + 1;
- zOutput = g_new0 (gunichar2, nOutput);
-
- nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
-
- if (*status == U_BUFFER_OVERFLOW_ERROR) {
- /* Try again after allocating enough space for the normalization */
- *status = U_ZERO_ERROR;
- zOutput = g_renew (gunichar2, zOutput, nOutput);
- memset (zOutput, 0, nOutput * sizeof (gunichar2));
- nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
- }
-
- if (!U_SUCCESS (*status)) {
- g_clear_pointer (&zOutput, g_free);
- nOutput = 0;
- }
-
- if (len_out)
- *len_out = nOutput;
-
- return zOutput;
-}
-
-static void
-function_sparql_normalize (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const gchar *fn = "tracker:normalize";
- const gchar *nfstr;
- const uint16_t *zInput;
- uint16_t *zOutput = NULL;
- int nInput;
- gsize nOutput;
- const UNormalizer2 *normalizer;
- UErrorCode status = U_ZERO_ERROR;
-
- if (argc != 2) {
- result_context_function_error (context, fn, "Invalid argument count");
- return;
- }
-
- zInput = sqlite3_value_text16 (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- nfstr = (gchar *)sqlite3_value_text (argv[1]);
- if (g_ascii_strcasecmp (nfstr, "nfc") == 0)
- normalizer = unorm2_getNFCInstance (&status);
- else if (g_ascii_strcasecmp (nfstr, "nfd") == 0)
- normalizer = unorm2_getNFDInstance (&status);
- else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0)
- normalizer = unorm2_getNFKCInstance (&status);
- else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0)
- normalizer = unorm2_getNFKDInstance (&status);
- else {
- result_context_function_error (context, fn, "Invalid normalization specified");
- return;
- }
-
- if (U_SUCCESS (status)) {
- nInput = sqlite3_value_bytes16 (argv[0]);
- zOutput = normalize_string (zInput, nInput / 2, normalizer, &nOutput, &status);
- }
-
- if (!U_SUCCESS (status)) {
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: unorm_normalize: %s", u_errorName (status));
- zBuf[127] = '\0';
- g_free (zOutput);
- result_context_function_error (context, fn, zBuf);
- return;
- }
-
- sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), g_free);
-}
-
-static void
-function_sparql_unaccent (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const gchar *fn = "tracker:unaccent";
- const uint16_t *zInput;
- uint16_t *zOutput = NULL;
- int nInput;
- gsize nOutput;
- const UNormalizer2 *normalizer;
- UErrorCode status = U_ZERO_ERROR;
-
- g_assert (argc == 1);
-
- zInput = sqlite3_value_text16 (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- normalizer = unorm2_getNFKDInstance (&status);
-
- if (U_SUCCESS (status)) {
- nInput = sqlite3_value_bytes16 (argv[0]);
- zOutput = normalize_string (zInput, nInput / 2, normalizer, &nOutput, &status);
- }
-
- if (!U_SUCCESS (status)) {
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: unorm_normalize: %s", u_errorName (status));
- zBuf[127] = '\0';
- g_free (zOutput);
- result_context_function_error (context, fn, zBuf);
- return;
- }
-
- /* Unaccenting is done in place */
- tracker_parser_unaccent_nfkd_string (zOutput, &nOutput);
-
- sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), g_free);
-}
-
-#endif
-
static void
function_sparql_encode_for_uri (sqlite3_context *context,
int argc,
diff --git a/src/libtracker-sparql/core/tracker-db-manager.c b/src/libtracker-sparql/core/tracker-db-manager.c
index cb4318727..cebf1fe5b 100644
--- a/src/libtracker-sparql/core/tracker-db-manager.c
+++ b/src/libtracker-sparql/core/tracker-db-manager.c
@@ -22,6 +22,7 @@
#include <fcntl.h>
#include <glib/gstdio.h>
+#include <locale.h>
#include <libtracker-common/tracker-common.h>
#include <libtracker-common/tracker-parser.h>
@@ -335,11 +336,11 @@ tracker_db_manager_locale_changed (TrackerDBManager *db_manager,
GError **error)
{
gchar *db_locale;
- gchar *current_locale;
+ const gchar *current_locale;
gboolean changed;
/* Get current collation locale */
- current_locale = tracker_locale_get (TRACKER_LOCALE_COLLATE);
+ current_locale = setlocale (LC_COLLATE, NULL);
/* Get db locale */
db_locale = db_get_locale (db_manager);
@@ -361,7 +362,6 @@ tracker_db_manager_locale_changed (TrackerDBManager *db_manager,
}
g_free (db_locale);
- g_free (current_locale);
return changed;
}
@@ -369,13 +369,12 @@ tracker_db_manager_locale_changed (TrackerDBManager *db_manager,
void
tracker_db_manager_set_current_locale (TrackerDBManager *db_manager)
{
- gchar *current_locale;
+ const gchar *current_locale;
/* Get current collation locale */
- current_locale = tracker_locale_get (TRACKER_LOCALE_COLLATE);
+ current_locale = setlocale (LC_COLLATE, NULL);
g_debug ("Saving DB locale as: '%s'", current_locale);
db_set_locale (db_manager, current_locale);
- g_free (current_locale);
}
static void
diff --git a/src/libtracker-sparql/core/tracker-fts-tokenizer.c b/src/libtracker-sparql/core/tracker-fts-tokenizer.c
index 66f68a069..e9dac1efa 100644
--- a/src/libtracker-sparql/core/tracker-fts-tokenizer.c
+++ b/src/libtracker-sparql/core/tracker-fts-tokenizer.c
@@ -38,7 +38,6 @@ typedef struct TrackerTokenizer TrackerTokenizer;
typedef struct TrackerTokenizerFunctionData TrackerTokenizerFunctionData;
struct TrackerTokenizerData {
- TrackerLanguage *language;
TrackerDBManagerFlags flags;
};
@@ -65,7 +64,7 @@ tracker_tokenizer_create (void *data,
tokenizer = g_new0 (TrackerTokenizer, 1);
tokenizer->data = data;
- tokenizer->parser = tracker_parser_new (tokenizer->data->language);
+ tokenizer->parser = tracker_parser_new ();
*tokenizer_out = (Fts5Tokenizer *) tokenizer;
@@ -159,7 +158,6 @@ tracker_tokenizer_data_new (TrackerDBManagerFlags flags)
TrackerTokenizerData *p;
p = g_new0 (TrackerTokenizerData, 1);
- p->language = tracker_language_new (NULL);
p->flags = flags;
return p;
@@ -170,7 +168,6 @@ tracker_tokenizer_data_free (gpointer user_data)
{
TrackerTokenizerData *data = user_data;
- g_object_unref (data->language);
g_free (data);
}
diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c
index 2e70a3513..c44254bcb 100644
--- a/src/libtracker-sparql/direct/tracker-direct.c
+++ b/src/libtracker-sparql/direct/tracker-direct.c
@@ -489,8 +489,6 @@ tracker_direct_connection_initable_init (GInitable *initable,
conn = TRACKER_DIRECT_CONNECTION (initable);
priv = tracker_direct_connection_get_instance_private (conn);
- tracker_locale_sanity_check ();
-
if (!set_up_thread_pools (conn, error))
return FALSE;