diff options
author | Sam Thursfield <sam@afuera.me.uk> | 2023-03-03 16:20:06 +0000 |
---|---|---|
committer | Sam Thursfield <sam@afuera.me.uk> | 2023-03-03 16:20:06 +0000 |
commit | 5af526f8ba238075a7c6f8ca5628d27531b0537e (patch) | |
tree | d0b12977d5b878357309755a57b687a3224c435b /src | |
parent | a9d83e783717e9b2d35b91a4f9955cce5762254c (diff) | |
parent | 65749008603efe9c7182f579d8e8d0e62c238670 (diff) | |
download | tracker-5af526f8ba238075a7c6f8ca5628d27531b0537e.tar.gz |
Merge branch 'wip/carlosg/modular-unicode-lib' into 'master'
Make unicode library a module
Closes #396
See merge request https://gitlab.gnome.org/GNOME/tracker/-/merge_requests/581
Diffstat (limited to 'src')
-rw-r--r-- | src/libtracker-common/meson.build | 45 | ||||
-rw-r--r-- | src/libtracker-common/tracker-common.h | 2 | ||||
-rw-r--r-- | src/libtracker-common/tracker-locale.c | 105 | ||||
-rw-r--r-- | src/libtracker-common/tracker-locale.h | 50 | ||||
-rw-r--r-- | src/libtracker-common/tracker-parser-libicu.c | 264 | ||||
-rw-r--r-- | src/libtracker-common/tracker-parser-libunistring.c | 138 | ||||
-rw-r--r-- | src/libtracker-common/tracker-parser-utils.h | 4 | ||||
-rw-r--r-- | src/libtracker-common/tracker-parser.c | 255 | ||||
-rw-r--r-- | src/libtracker-common/tracker-parser.h | 36 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-collation.c | 219 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-collation.h | 13 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-data-manager.c | 1 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-db-interface-sqlite.c | 381 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-db-manager.c | 11 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-fts-tokenizer.c | 5 | ||||
-rw-r--r-- | src/libtracker-sparql/direct/tracker-direct.c | 2 |
16 files changed, 763 insertions, 768 deletions
diff --git a/src/libtracker-common/meson.build b/src/libtracker-common/meson.build index 17bdd533a..96124437b 100644 --- a/src/libtracker-common/meson.build +++ b/src/libtracker-common/meson.build @@ -6,18 +6,42 @@ tracker_common_sources = [ 'tracker-file-utils.c', 'tracker-term-utils.c', 'tracker-utils.c', - 'tracker-locale.c', - 'tracker-parser-utils.c', - 'tracker-language.c', + 'tracker-parser.c', ] if unicode_library_name == 'icu' - tracker_common_sources += 'tracker-parser-libicu.c' + libtracker_parser_libicu = shared_module('tracker-parser-libicu', + 'tracker-parser-libicu.c', + 'tracker-parser-utils.c', + 'tracker-language.c', + dependencies: [gobject,libstemmer, icu_uc, icu_i18n], + c_args: tracker_c_args + [ + '-include', join_paths(build_root, 'config.h'), + '-DMODULE', + ], + include_directories: [configinc, srcinc], + install: true, + install_dir: tracker_internal_libs_dir, + name_suffix: 'so', + ) else - tracker_common_sources += 'tracker-parser-libunistring.c' + libtracker_parser_libunistring = shared_module('tracker-parser-libunistring', + 'tracker-parser-libunistring.c', + 'tracker-parser-utils.c', + 'tracker-language.c', + dependencies: [gobject,libstemmer, libunistring], + c_args: tracker_c_args + [ + '-include', join_paths(build_root, 'config.h'), + '-DMODULE', + ], + include_directories: [configinc, srcinc], + install: true, + install_dir: tracker_internal_libs_dir, + name_suffix: 'so', + ) endif -tracker_common_dependencies = [glib, gio, gio_unix, libmath, libstemmer] +tracker_common_dependencies = [glib, gio, gio_unix, libmath] if build_machine.system() == 'openbsd' libkvm = meson.get_compiler('c').find_library('kvm') @@ -26,8 +50,11 @@ endif libtracker_common = static_library('tracker-common', tracker_common_sources, - dependencies: tracker_common_dependencies + [unicode_library], - c_args: tracker_c_args, + dependencies: [tracker_common_dependencies, gmodule], + c_args: [ + '-DPRIVATE_LIBDIR="@0@"'.format(tracker_internal_libs_dir), + '-DBUILDROOT="@0@"'.format(meson.build_root()), + ] + tracker_c_args, include_directories: [configinc, srcinc], gnu_symbol_visibility: 'hidden', ) @@ -36,6 +63,6 @@ commoninc = include_directories('.') tracker_common_dep = declare_dependency( link_with: libtracker_common, - dependencies: tracker_common_dependencies + [unicode_library], + dependencies: [tracker_common_dependencies, gmodule], include_directories: [configinc, srcinc, commoninc], ) diff --git a/src/libtracker-common/tracker-common.h b/src/libtracker-common/tracker-common.h index e572b6ebc..d04e2b083 100644 --- a/src/libtracker-common/tracker-common.h +++ b/src/libtracker-common/tracker-common.h @@ -31,11 +31,9 @@ #include "tracker-date-time.h" #include "tracker-debug.h" #include "tracker-file-utils.h" -#include "tracker-language.h" #include "tracker-parser.h" #include "tracker-term-utils.h" #include "tracker-utils.h" -#include "tracker-locale.h" #undef __LIBTRACKER_COMMON_INSIDE__ diff --git a/src/libtracker-common/tracker-locale.c b/src/libtracker-common/tracker-locale.c deleted file mode 100644 index 816bb8d69..000000000 --- a/src/libtracker-common/tracker-locale.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (C) 2010 Nokia <ivan.frade@nokia.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" - -#include <locale.h> -#include <string.h> - -#include <glib.h> - -#include "tracker-locale.h" - -static const gchar *locale_names[] = { - [TRACKER_LOCALE_LANGUAGE] = "LANG", - [TRACKER_LOCALE_TIME] = "LC_TIME", - [TRACKER_LOCALE_COLLATE] = "LC_COLLATE", - [TRACKER_LOCALE_NUMERIC] = "LC_NUMERIC", - [TRACKER_LOCALE_MONETARY] = "LC_MONETARY" -}; - -static GRecMutex locales_mutex; - -static const gchar * -tracker_locale_get_unlocked (TrackerLocaleID id) -{ - const gchar *env_locale = NULL; - - switch (id) { - case TRACKER_LOCALE_LANGUAGE: - env_locale = g_getenv ("LANG"); - break; - case TRACKER_LOCALE_TIME: - env_locale = setlocale (LC_TIME, NULL); - break; - case TRACKER_LOCALE_COLLATE: - env_locale = setlocale (LC_COLLATE, NULL); - break; - case TRACKER_LOCALE_NUMERIC: - env_locale = setlocale (LC_NUMERIC, NULL); - break; - case TRACKER_LOCALE_MONETARY: - env_locale = setlocale (LC_MONETARY, NULL); - break; - default: - g_assert_not_reached (); - break; - } - - return env_locale; -} - -void -tracker_locale_sanity_check (void) -{ - guint i; - - g_rec_mutex_lock (&locales_mutex); - - for (i = 0; i < TRACKER_LOCALE_LAST; i++) { - const gchar *env_locale = NULL; - - env_locale = tracker_locale_get_unlocked (i); - - if (!env_locale) { - g_warning ("Locale '%s' is not set, defaulting to C locale", locale_names[i]); - } - } - - g_rec_mutex_unlock (&locales_mutex); -} - -gchar * -tracker_locale_get (TrackerLocaleID id) -{ - const gchar *env_locale = NULL; - gchar *locale; - - g_rec_mutex_lock (&locales_mutex); - - env_locale = tracker_locale_get_unlocked (id); - - /* Always return a duplicated string, as the locale may change at any - * moment */ - locale = g_strdup (env_locale); - - g_rec_mutex_unlock (&locales_mutex); - - return locale; -} diff --git a/src/libtracker-common/tracker-locale.h b/src/libtracker-common/tracker-locale.h deleted file mode 100644 index 32547d13d..000000000 --- a/src/libtracker-common/tracker-locale.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2010 Nokia <ivan.frade@nokia.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#ifndef __LIBTRACKER_COMMON_LOCALE_H__ -#define __LIBTRACKER_COMMON_LOCALE_H__ - -#include <glib.h> - -G_BEGIN_DECLS - -#if !defined (__LIBTRACKER_COMMON_INSIDE__) && !defined (TRACKER_COMPILATION) -#error "only <libtracker-common/tracker-common.h> must be included directly." -#endif - -/* Type of locales supported in tracker */ -typedef enum { - TRACKER_LOCALE_LANGUAGE, - TRACKER_LOCALE_TIME, - TRACKER_LOCALE_COLLATE, - TRACKER_LOCALE_NUMERIC, - TRACKER_LOCALE_MONETARY, - TRACKER_LOCALE_LAST -} TrackerLocaleID; - -void tracker_locale_sanity_check (void); - -/* Get the current locale of the given type. - * Note that it returns a newly-allocated string which should be g_free()-ed - */ -gchar *tracker_locale_get (TrackerLocaleID id); - -G_END_DECLS - -#endif /* __LIBTRACKER_COMMON_LOCALE_H__ */ diff --git a/src/libtracker-common/tracker-parser-libicu.c b/src/libtracker-common/tracker-parser-libicu.c index 8c4803206..8795af7cf 100644 --- a/src/libtracker-common/tracker-parser-libicu.c +++ b/src/libtracker-common/tracker-parser-libicu.c @@ -30,7 +30,10 @@ #include <unicode/ustring.h> #include <unicode/uchar.h> #include <unicode/unorm.h> +#include <unicode/ucol.h> +#include "tracker-language.h" +#include "tracker-debug.h" #include "tracker-parser.h" #include "tracker-parser-utils.h" @@ -41,6 +44,8 @@ typedef enum { TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC, } TrackerParserWordType; +typedef UCollator TrackerCollator; + /* Max possible length of a UChar encoded string (just a safety limit) */ #define WORD_BUFFER_LENGTH 512 @@ -144,7 +149,7 @@ get_word_info (const UChar *word, /* The input word in this method MUST be normalized in NFKD form, * and given in UChars, where str_length is the number of UChars * (not the number of bytes) */ -gboolean +static gboolean tracker_parser_unaccent_nfkd_string (gpointer str, gsize *str_length) { @@ -571,15 +576,12 @@ parser_next (TrackerParser *parser, } TrackerParser * -tracker_parser_new (TrackerLanguage *language) +tracker_parser_new (void) { TrackerParser *parser; - g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL); - parser = g_new0 (TrackerParser, 1); - - parser->language = g_object_ref (language); + parser->language = tracker_language_new (NULL); return parser; } @@ -754,3 +756,253 @@ tracker_parser_next (TrackerParser *parser, return str; } +gpointer +tracker_collation_init (void) +{ + UCollator *collator = NULL; + UErrorCode status = U_ZERO_ERROR; + const gchar *locale; + + /* Get locale! */ + locale = setlocale (LC_COLLATE, NULL); + + collator = ucol_open (locale, &status); + if (!collator) { + g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s", + locale, u_errorName (status)); + /* Try to get UCA collator then... */ + status = U_ZERO_ERROR; + collator = ucol_open ("root", &status); + if (!collator) { + g_critical ("[ICU collation] UCA Collator cannot be created: %s", + u_errorName (status)); + } + } + + return collator; +} + +void +tracker_collation_shutdown (gpointer collator) +{ + if (collator) + ucol_close ((UCollator *)collator); +} + +gint +tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2) +{ + UErrorCode status = U_ZERO_ERROR; + UCharIterator iter1; + UCharIterator iter2; + UCollationResult result; + + /* Collator must be created before trying to collate */ + g_return_val_if_fail (collator, -1); + + /* Setup iterators */ + uiter_setUTF8 (&iter1, str1, len1); + uiter_setUTF8 (&iter2, str2, len2); + + result = ucol_strcollIter ((UCollator *)collator, + &iter1, + &iter2, + &status); + if (status != U_ZERO_ERROR) + g_critical ("Error collating: %s", u_errorName (status)); + + if (result == UCOL_GREATER) + return 1; + if (result == UCOL_LESS) + return -1; + return 0; +} + +gunichar2 * +tracker_parser_tolower (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + UChar *zOutput; + int nOutput; + UErrorCode status = U_ZERO_ERROR; + + g_return_val_if_fail (input, NULL); + + nOutput = len * 2 + 2; + zOutput = malloc (nOutput); + + u_strToLower (zOutput, nOutput / 2, + input, len / 2, + NULL, &status); + + if (!U_SUCCESS (status)) { + memcpy (zOutput, input, len); + zOutput[len] = '\0'; + nOutput = len; + } + + *len_out = nOutput; + + return zOutput; +} + +gunichar2 * +tracker_parser_toupper (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + UChar *zOutput; + int nOutput; + UErrorCode status = U_ZERO_ERROR; + + nOutput = len * 2 + 2; + zOutput = malloc (nOutput); + + u_strToUpper (zOutput, nOutput / 2, + input, len / 2, + NULL, &status); + + if (!U_SUCCESS (status)) { + memcpy (zOutput, input, len); + zOutput[len] = '\0'; + nOutput = len; + } + + *len_out = nOutput; + + return zOutput; +} + +gunichar2 * +tracker_parser_casefold (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + UChar *zOutput; + int nOutput; + UErrorCode status = U_ZERO_ERROR; + + nOutput = len * 2 + 2; + zOutput = malloc (nOutput); + + u_strFoldCase (zOutput, nOutput / 2, + input, len / 2, + U_FOLD_CASE_DEFAULT, &status); + + if (!U_SUCCESS (status)){ + memcpy (zOutput, input, len); + zOutput[len] = '\0'; + nOutput = len; + } + + *len_out = nOutput; + + return zOutput; +} + +static gunichar2 * +normalize_string (const gunichar2 *string, + gsize string_len, /* In gunichar2s */ + const UNormalizer2 *normalizer, + gsize *len_out, /* In gunichar2s */ + UErrorCode *status) +{ + int nOutput; + gunichar2 *zOutput; + + nOutput = (string_len * 2) + 1; + zOutput = g_new0 (gunichar2, nOutput); + + nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status); + + if (*status == U_BUFFER_OVERFLOW_ERROR) { + /* Try again after allocating enough space for the normalization */ + *status = U_ZERO_ERROR; + zOutput = g_renew (gunichar2, zOutput, nOutput); + memset (zOutput, 0, nOutput * sizeof (gunichar2)); + nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status); + } + + if (!U_SUCCESS (*status)) { + g_clear_pointer (&zOutput, g_free); + nOutput = 0; + } + + if (len_out) + *len_out = nOutput; + + return zOutput; +} + +gunichar2 * +tracker_parser_normalize (const gunichar2 *input, + GNormalizeMode mode, + gsize len, + gsize *len_out) +{ + uint16_t *zOutput = NULL; + gsize nOutput; + const UNormalizer2 *normalizer; + UErrorCode status = U_ZERO_ERROR; + + if (mode == G_NORMALIZE_NFC) + normalizer = unorm2_getNFCInstance (&status); + else if (mode == G_NORMALIZE_NFD) + normalizer = unorm2_getNFDInstance (&status); + else if (mode == G_NORMALIZE_NFKC) + normalizer = unorm2_getNFKCInstance (&status); + else if (mode == G_NORMALIZE_NFKD) + normalizer = unorm2_getNFKDInstance (&status); + else + g_assert_not_reached (); + + if (U_SUCCESS (status)) { + zOutput = normalize_string (input, len / 2, + normalizer, + &nOutput, &status); + } + + if (!U_SUCCESS (status)) { + zOutput = g_memdup2 (input, len); + nOutput = len; + } + + *len_out = nOutput; + + return zOutput; +} + +gunichar2 * +tracker_parser_unaccent (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + uint16_t *zOutput = NULL; + gsize nOutput; + const UNormalizer2 *normalizer; + UErrorCode status = U_ZERO_ERROR; + + normalizer = unorm2_getNFKDInstance (&status); + + if (U_SUCCESS (status)) { + zOutput = normalize_string (input, len / 2, + normalizer, + &nOutput, &status); + } + + if (!U_SUCCESS (status)) { + zOutput = g_memdup2 (input, len); + } + + /* Unaccenting is done in place */ + tracker_parser_unaccent_nfkd_string (zOutput, &nOutput); + + *len_out = nOutput; + + return zOutput; +} diff --git a/src/libtracker-common/tracker-parser-libunistring.c b/src/libtracker-common/tracker-parser-libunistring.c index d24c5f1cb..b26b4bae5 100644 --- a/src/libtracker-common/tracker-parser-libunistring.c +++ b/src/libtracker-common/tracker-parser-libunistring.c @@ -30,6 +30,7 @@ #include <unictype.h> #include <unicase.h> +#include "tracker-language.h" #include "tracker-parser.h" #include "tracker-parser-utils.h" @@ -40,6 +41,9 @@ typedef enum { TRACKER_PARSER_WORD_TYPE_OTHER_NO_UNAC, } TrackerParserWordType; +/* If string lenth less than this value, allocating from the stack */ +#define MAX_STACK_STR_SIZE 8192 + /* Max possible length of a UTF-8 encoded string (just a safety limit) */ #define WORD_BUFFER_LENGTH 512 @@ -84,7 +88,7 @@ get_word_info (TrackerParser *parser, /* Get first character of the word as UCS4 */ first_unichar_len = u8_strmbtouc (&first_unichar, - &(parser->txt[parser->cursor])); + (const guchar *) &(parser->txt[parser->cursor])); if (first_unichar_len <= 0) { /* This should only happen if NIL was passed to u8_strmbtouc, * so better just force stop here */ @@ -106,7 +110,7 @@ get_word_info (TrackerParser *parser, i = parser->cursor + first_unichar_len; while (1) { /* Text bounds reached? */ - if (i >= parser->txt_size) + if (i >= (gsize) parser->txt_size) break; /* Proper unicode word break detected? */ if (parser->word_break_flags[i]) @@ -159,7 +163,7 @@ get_word_info (TrackerParser *parser, /* The input word in this method MUST be normalized in NFKD form, * and given in UTF-8, where str_length is the byte-length * (note: there is no trailing NUL character!) */ -gboolean +static gboolean tracker_parser_unaccent_nfkd_string (gpointer str, gsize *str_length) { @@ -181,7 +185,7 @@ tracker_parser_unaccent_nfkd_string (gpointer str, gint utf8_len; /* Get next character of the word as UCS4 */ - utf8_len = u8_strmbtouc (&unichar, &word[i]); + utf8_len = u8_strmbtouc (&unichar, (const guchar *) &word[i]); /* Invalid UTF-8 character or end of original string. */ if (utf8_len <= 0) { @@ -249,12 +253,12 @@ process_word_utf8 (TrackerParser *parser, /* Casefold and NFKD normalization in output. * NOTE: if the output buffer is not big enough, u8_casefold will * return a newly-allocated buffer. */ - normalized = u8_casefold ((const uint8_t *)word, - length, - uc_locale_language (), - UNINORM_NFKD, - word_buffer, - &new_word_length); + normalized = (gchar*) u8_casefold ((const uint8_t *)word, + length, + uc_locale_language (), + UNINORM_NFKD, + (guchar *) word_buffer, + &new_word_length); /* Case folding + Normalization failed, ignore this word */ g_return_val_if_fail (normalized != NULL, NULL); @@ -275,7 +279,7 @@ process_word_utf8 (TrackerParser *parser, normalized = length > WORD_BUFFER_LENGTH ? g_malloc (length + 1) : word_buffer; - for (i = 0; i < length; i++) { + for (i = 0; i < (gsize) length; i++) { normalized[i] = g_ascii_tolower (word[i]); } @@ -345,7 +349,7 @@ parser_next (TrackerParser *parser, /* Loop to look for next valid word */ while (!processed_word && - parser->cursor < parser->txt_size) { + parser->cursor < (gsize) parser->txt_size) { TrackerParserWordType type; gsize truncated_length; gboolean is_allowed; @@ -424,15 +428,12 @@ parser_next (TrackerParser *parser, } TrackerParser * -tracker_parser_new (TrackerLanguage *language) +tracker_parser_new (void) { TrackerParser *parser; - g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL); - parser = g_new0 (TrackerParser, 1); - - parser->language = g_object_ref (language); + parser->language = tracker_language_new (NULL); return parser; } @@ -541,3 +542,106 @@ tracker_parser_next (TrackerParser *parser, return str; } +gpointer +tracker_collation_init (void) +{ + /* Nothing to do */ + return NULL; +} + +void +tracker_collation_shutdown (gpointer collator) +{ + /* Nothing to do */ +} + +gint +tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2) +{ + gint result; + guchar *aux1; + guchar *aux2; + + /* Note: str1 and str2 are NOT NUL-terminated */ + aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1); + aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1); + + memcpy (aux1, str1, len1); aux1[len1] = '\0'; + memcpy (aux2, str2, len2); aux2[len2] = '\0'; + + result = u8_strcoll (aux1, aux2); + + if (len1 >= MAX_STACK_STR_SIZE) + g_free (aux1); + if (len2 >= MAX_STACK_STR_SIZE) + g_free (aux2); + return result; +} + +gunichar2 * +tracker_parser_tolower (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + return u16_tolower (input, len / 2, NULL, NULL, NULL, len_out); +} + +gunichar2 * +tracker_parser_toupper (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + return u16_toupper (input, len / 2, NULL, NULL, NULL, len_out); +} + +gunichar2 * +tracker_parser_casefold (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + return u16_casefold (input, len / 2, NULL, NULL, NULL, len_out); +} + +gunichar2 * +tracker_parser_normalize (const gunichar2 *input, + GNormalizeMode mode, + gsize len, + gsize *len_out) +{ + uninorm_t nf; + + if (mode == G_NORMALIZE_NFC) + nf = UNINORM_NFC; + else if (mode == G_NORMALIZE_NFD) + nf = UNINORM_NFD; + else if (mode == G_NORMALIZE_NFKC) + nf = UNINORM_NFKC; + else if (mode == G_NORMALIZE_NFKD) + nf = UNINORM_NFKD; + else + g_assert_not_reached (); + + return u16_normalize (nf, input, len / 2, NULL, len_out); +} + +gunichar2 * +tracker_parser_unaccent (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + gunichar2 *zOutput; + gsize written = 0; + + zOutput = u16_normalize (UNINORM_NFKD, input, len, NULL, &written); + + /* Unaccenting is done in place */ + tracker_parser_unaccent_nfkd_string (zOutput, &written); + + *len_out = written; + + return zOutput; +} diff --git a/src/libtracker-common/tracker-parser-utils.h b/src/libtracker-common/tracker-parser-utils.h index b2440213f..84a48c58d 100644 --- a/src/libtracker-common/tracker-parser-utils.h +++ b/src/libtracker-common/tracker-parser-utils.h @@ -24,10 +24,6 @@ #include <glib.h> -#ifdef HAVE_LIBICU -#include <unicode/utypes.h> -#endif - G_BEGIN_DECLS /* ASCII-7 is in range [0x00,0x7F] */ diff --git a/src/libtracker-common/tracker-parser.c b/src/libtracker-common/tracker-parser.c new file mode 100644 index 000000000..aaaed58de --- /dev/null +++ b/src/libtracker-common/tracker-parser.c @@ -0,0 +1,255 @@ +/* + * Copyright (C) 2023, Red Hat Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * Author: Carlos Garnacho <carlosg@gnome.org> + */ + +#include "config.h" + +#include <gio/gio.h> +#include <gmodule.h> + +#include "tracker-parser.h" + +#include "tracker-debug.h" + +static TrackerParser * (*parser_new) (void); +static void (*parser_free) (TrackerParser *parser); +static void (*parser_reset) (TrackerParser *parser, + const gchar *txt, + gint txt_size, + guint max_word_length, + gboolean enable_stemmer, + gboolean enable_unaccent, + gboolean ignore_stop_words, + gboolean ignore_reserved_words, + gboolean ignore_numbers); +static const gchar * (*parser_next) (TrackerParser *parser, + gint *position, + gint *byte_offset_start, + gint *byte_offset_end, + gboolean *stop_word, + gint *word_length); +static gpointer (*collation_init) (void); +static void (*collation_shutdown) (gpointer collator); +static gint (*collation_utf8) (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2); +static gunichar2 * (*util_tolower) (const gunichar2 *input, + gsize len, + gsize *len_out); +static gunichar2 * (*util_toupper) (const gunichar2 *input, + gsize len, + gsize *len_out); +static gunichar2 * (*util_casefold) (const gunichar2 *input, + gsize len, + gsize *len_out); +static gunichar2 * (*util_normalize) (const gunichar2 *input, + GNormalizeMode mode, + gsize len, + gsize *len_out); +static gunichar2 * (*util_unaccent) (const gunichar2 *input, + gsize len, + gsize *len_out); + +static void +ensure_init_parser (void) +{ + static GModule *module = NULL; + + if (module == NULL) { + const gchar *modules[] = { + + "libtracker-parser-libicu.so", + "libtracker-parser-libunistring.so" + }; + gchar *module_path; + guint i; + + g_assert (g_module_supported ()); + + for (i = 0; i < G_N_ELEMENTS (modules); i++) { + if (g_strcmp0 (g_get_current_dir (), BUILDROOT) == 0) { + /* Detect in-build runtime of this code, this may happen + * building introspection information or running tests. + * We want the in-tree modules to be loaded then. + */ + module_path = g_strdup_printf (BUILDROOT "/src/libtracker-common/%s", modules[i]); + } else { + module_path = g_strdup_printf (PRIVATE_LIBDIR "/%s", modules[i]); + } + + module = g_module_open (module_path, + G_MODULE_BIND_LAZY | + G_MODULE_BIND_LOCAL); + g_free (module_path); + + if (module) + break; + } + + g_assert (module != NULL); + + if (!g_module_symbol (module, "tracker_parser_new", (gpointer *) &parser_new) || + !g_module_symbol (module, "tracker_parser_free", (gpointer *) &parser_free) || + !g_module_symbol (module, "tracker_parser_reset", (gpointer *) &parser_reset) || + !g_module_symbol (module, "tracker_parser_next", (gpointer *) &parser_next) || + !g_module_symbol (module, "tracker_collation_init", (gpointer *) &collation_init) || + !g_module_symbol (module, "tracker_collation_shutdown", (gpointer *) &collation_shutdown) || + !g_module_symbol (module, "tracker_collation_utf8", (gpointer *) &collation_utf8) || + !g_module_symbol (module, "tracker_parser_tolower", (gpointer *) &util_tolower) || + !g_module_symbol (module, "tracker_parser_toupper", (gpointer *) &util_toupper) || + !g_module_symbol (module, "tracker_parser_casefold", (gpointer *) &util_casefold) || + !g_module_symbol (module, "tracker_parser_normalize", (gpointer *) &util_normalize) || + !g_module_symbol (module, "tracker_parser_unaccent", (gpointer *) &util_unaccent)) { + g_printerr ("Could not initialize parser functions: %s\n", + g_module_error ()); + } + + TRACKER_NOTE (COLLATION, g_message ("Initialized collator %s", g_module_name (module))); + + g_module_make_resident (module); + g_module_close (module); + } +} + +TrackerParser * +tracker_parser_new (void) +{ + ensure_init_parser (); + + return parser_new (); +} + +void +tracker_parser_free (TrackerParser *parser) +{ + parser_free (parser); +} + +void +tracker_parser_reset (TrackerParser *parser, + const gchar *txt, + gint txt_size, + guint max_word_length, + gboolean enable_stemmer, + gboolean enable_unaccent, + gboolean ignore_stop_words, + gboolean ignore_reserved_words, + gboolean ignore_numbers) +{ + parser_reset (parser, txt, txt_size, + max_word_length, + enable_stemmer, + enable_unaccent, + ignore_stop_words, + ignore_reserved_words, + ignore_numbers); +} + +const gchar * +tracker_parser_next (TrackerParser *parser, + gint *position, + gint *byte_offset_start, + gint *byte_offset_end, + gboolean *stop_word, + gint *word_length) +{ + return parser_next (parser, position, + byte_offset_start, + byte_offset_end, + stop_word, + word_length); +} + +gpointer +tracker_collation_init (void) +{ + ensure_init_parser (); + + return collation_init (); +} + +void +tracker_collation_shutdown (gpointer collator) +{ + collation_shutdown (collator); +} + +gint +tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2) +{ + return collation_utf8 (collator, len1, str1, len2, str2); +} + +gunichar2 * +tracker_parser_tolower (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + ensure_init_parser (); + + return util_tolower (input, len, len_out); +} + +gunichar2 * +tracker_parser_toupper (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + ensure_init_parser (); + + return util_toupper (input, len, len_out); +} + +gunichar2 * +tracker_parser_casefold (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + ensure_init_parser (); + + return util_casefold (input, len, len_out); +} + +gunichar2 * +tracker_parser_normalize (const gunichar2 *input, + GNormalizeMode mode, + gsize len, + gsize *len_out) +{ + ensure_init_parser (); + + return util_normalize (input, mode, len, len_out); +} + +gunichar2 * +tracker_parser_unaccent (const gunichar2 *input, + gsize len, + gsize *len_out) +{ + ensure_init_parser (); + + return util_unaccent (input, len, len_out); +} diff --git a/src/libtracker-common/tracker-parser.h b/src/libtracker-common/tracker-parser.h index 3c8271503..78d67e21f 100644 --- a/src/libtracker-common/tracker-parser.h +++ b/src/libtracker-common/tracker-parser.h @@ -34,9 +34,10 @@ G_BEGIN_DECLS +/* Parser */ typedef struct TrackerParser TrackerParser; -TrackerParser *tracker_parser_new (TrackerLanguage *language); +TrackerParser *tracker_parser_new (void); void tracker_parser_reset (TrackerParser *parser, const gchar *txt, @@ -57,10 +58,39 @@ const gchar * tracker_parser_next (TrackerParser *parser, void tracker_parser_free (TrackerParser *parser); +/* Collation */ +gpointer tracker_collation_init (void); + +void tracker_collation_shutdown (gpointer collator); + +gint tracker_collation_utf8 (gpointer collator, + gint len1, + gconstpointer str1, + gint len2, + gconstpointer str2); + /* Other helper methods */ -gboolean tracker_parser_unaccent_nfkd_string (gpointer str, - gsize *str_length); +gunichar2 * tracker_parser_tolower (const gunichar2 *input, + gsize len, + gsize *len_out); + +gunichar2 * tracker_parser_toupper (const gunichar2 *input, + gsize len, + gsize *len_out); + +gunichar2 * tracker_parser_casefold (const gunichar2 *input, + gsize len, + gsize *len_out); + +gunichar2 * tracker_parser_normalize (const gunichar2 *input, + GNormalizeMode mode, + gsize len, + gsize *len_out); + +gunichar2 * tracker_parser_unaccent (const gunichar2 *input, + gsize len, + gsize *len_out); G_END_DECLS diff --git a/src/libtracker-sparql/core/tracker-collation.c b/src/libtracker-sparql/core/tracker-collation.c index beca29e3b..0e82d66dd 100644 --- a/src/libtracker-sparql/core/tracker-collation.c +++ b/src/libtracker-sparql/core/tracker-collation.c @@ -18,229 +18,12 @@ */ #include "config.h" + #include <glib.h> #include <glib/gi18n.h> -#include <string.h> -#include <locale.h> -#include <libtracker-common/tracker-debug.h> -#include <libtracker-common/tracker-locale.h> #include "tracker-collation.h" -/* If defined, will dump additional traces */ -#ifdef G_ENABLE_DEBUG -#define trace(message, ...) TRACKER_NOTE (COLLATION, g_message (message, ##__VA_ARGS__)) -#else -#define trace(...) -#endif - -#ifdef HAVE_LIBUNISTRING -/* libunistring versions prior to 9.1.2 need this hack */ -#define _UNUSED_PARAMETER_ -#include <unistr.h> -#elif defined(HAVE_LIBICU) -#include <unicode/ucol.h> -#include <unicode/utypes.h> -#endif - -/* If string lenth less than this value, allocating from the stack */ -#define MAX_STACK_STR_SIZE 8192 - -#ifdef HAVE_LIBUNISTRING /* ---- GNU libunistring based collation ---- */ - -gpointer -tracker_collation_init (void) -{ - gchar *locale; - - /* Get locale! */ - locale = tracker_locale_get (TRACKER_LOCALE_COLLATE); - TRACKER_NOTE (COLLATION, g_message ("[libunistring collation] Initializing collator for locale '%s'", locale)); - g_free (locale); - /* Nothing to do */ - return NULL; -} - -void -tracker_collation_shutdown (gpointer collator) -{ - /* Nothing to do */ -} - -gint -tracker_collation_utf8 (gpointer collator, - gint len1, - gconstpointer str1, - gint len2, - gconstpointer str2) -{ - gint result; - gchar *aux1; - gchar *aux2; - - /* Note: str1 and str2 are NOT NUL-terminated */ - aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1); - aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1); - - memcpy (aux1, str1, len1); aux1[len1] = '\0'; - memcpy (aux2, str2, len2); aux2[len2] = '\0'; - - result = u8_strcoll (aux1, aux2); - - trace ("(libunistring) Collating '%s' and '%s' (%d)", - aux1, aux2, result); - - if (len1 >= MAX_STACK_STR_SIZE) - g_free (aux1); - if (len2 >= MAX_STACK_STR_SIZE) - g_free (aux2); - return result; -} - -#elif defined(HAVE_LIBICU) /* ---- ICU based collation (UTF-16) ----*/ - -gpointer -tracker_collation_init (void) -{ - UCollator *collator = NULL; - UErrorCode status = U_ZERO_ERROR; - gchar *locale; - - /* Get locale! */ - locale = tracker_locale_get (TRACKER_LOCALE_COLLATE); - - TRACKER_NOTE (COLLATION, g_message ("[ICU collation] Initializing collator for locale '%s'", locale)); - collator = ucol_open (locale, &status); - if (!collator) { - g_warning ("[ICU collation] Collator for locale '%s' cannot be created: %s", - locale, u_errorName (status)); - /* Try to get UCA collator then... */ - status = U_ZERO_ERROR; - collator = ucol_open ("root", &status); - if (!collator) { - g_critical ("[ICU collation] UCA Collator cannot be created: %s", - u_errorName (status)); - } - } - g_free (locale); - return collator; -} - -void -tracker_collation_shutdown (gpointer collator) -{ - if (collator) - ucol_close ((UCollator *)collator); -} - -gint -tracker_collation_utf8 (gpointer collator, - gint len1, - gconstpointer str1, - gint len2, - gconstpointer str2) -{ - UErrorCode status = U_ZERO_ERROR; - UCharIterator iter1; - UCharIterator iter2; - UCollationResult result; - - /* Collator must be created before trying to collate */ - g_return_val_if_fail (collator, -1); - - /* Setup iterators */ - uiter_setUTF8 (&iter1, str1, len1); - uiter_setUTF8 (&iter2, str2, len2); - - result = ucol_strcollIter ((UCollator *)collator, - &iter1, - &iter2, - &status); - if (status != U_ZERO_ERROR) - g_critical ("Error collating: %s", u_errorName (status)); - -#ifdef ENABLE_TRACE - { - gchar *aux1; - gchar *aux2; - - /* Note: str1 and str2 are NOT NUL-terminated */ - aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1); - aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1); - - memcpy (aux1, str1, len1); aux1[len1] = '\0'; - memcpy (aux2, str2, len2); aux2[len2] = '\0'; - - trace ("(ICU) Collating '%s' and '%s' (%d)", - aux1, aux2, result); - - if (len1 >= MAX_STACK_STR_SIZE) - g_free (aux1); - if (len2 >= MAX_STACK_STR_SIZE) - g_free (aux2); - } -#endif /* ENABLE_TRACE */ - - if (result == UCOL_GREATER) - return 1; - if (result == UCOL_LESS) - return -1; - return 0; -} - -#else /* ---- GLib based collation ---- */ - -gpointer -tracker_collation_init (void) -{ - gchar *locale; - - /* Get locale! */ - locale = tracker_locale_get (TRACKER_LOCALE_COLLATE); - TRACKER_NOTE (COLLATION, g_message ("[GLib collation] Initializing collator for locale '%s'", locale)); - g_free (locale); - /* Nothing to do */ - return NULL; -} - -void -tracker_collation_shutdown (gpointer collator) -{ - /* Nothing to do */ -} - -gint -tracker_collation_utf8 (gpointer collator, - gint len1, - gconstpointer str1, - gint len2, - gconstpointer str2) -{ - gint result; - gchar *aux1; - gchar *aux2; - - /* Note: str1 and str2 are NOT NUL-terminated */ - aux1 = (len1 < MAX_STACK_STR_SIZE) ? g_alloca (len1+1) : g_malloc (len1+1); - aux2 = (len2 < MAX_STACK_STR_SIZE) ? g_alloca (len2+1) : g_malloc (len2+1); - - memcpy (aux1, str1, len1); aux1[len1] = '\0'; - memcpy (aux2, str2, len2); aux2[len2] = '\0'; - - result = g_utf8_collate (aux1, aux2); - - trace ("(GLib) Collating '%s' and '%s' (%d)", - aux1, aux2, result); - - if (len1 >= MAX_STACK_STR_SIZE) - g_free (aux1); - if (len2 >= MAX_STACK_STR_SIZE) - g_free (aux2); - return result; -} - -#endif - static gboolean skip_non_alphanumeric (const gchar **str, gint *len) diff --git a/src/libtracker-sparql/core/tracker-collation.h b/src/libtracker-sparql/core/tracker-collation.h index 95551a9f0..6369aefba 100644 --- a/src/libtracker-sparql/core/tracker-collation.h +++ b/src/libtracker-sparql/core/tracker-collation.h @@ -22,13 +22,7 @@ G_BEGIN_DECLS -gpointer tracker_collation_init (void); -void tracker_collation_shutdown (gpointer collator); -gint tracker_collation_utf8 (gpointer collator, - gint len1, - gconstpointer str1, - gint len2, - gconstpointer str2); +#include <libtracker-common/tracker-parser.h> gint tracker_collation_utf8_title (gpointer collator, gint len1, @@ -36,12 +30,7 @@ gint tracker_collation_utf8_title (gpointer collator, gint len2, gconstpointer str2); -#ifdef HAVE_LIBICU #define TRACKER_COLLATION_LAST_CHAR ((gunichar) 0x10fffd) -#else -/* glibc-based collators do not properly sort private use characters */ -#define TRACKER_COLLATION_LAST_CHAR ((gunichar) 0x9fa5) -#endif G_END_DECLS diff --git a/src/libtracker-sparql/core/tracker-data-manager.c b/src/libtracker-sparql/core/tracker-data-manager.c index 1481a1a02..80e53623e 100644 --- a/src/libtracker-sparql/core/tracker-data-manager.c +++ b/src/libtracker-sparql/core/tracker-data-manager.c @@ -25,7 +25,6 @@ #include <glib/gstdio.h> #include <libtracker-common/tracker-debug.h> -#include <libtracker-common/tracker-locale.h> #include <libtracker-sparql/tracker-deserializer-rdf.h> diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c index 6ec1c1194..24c863616 100644 --- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c +++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c @@ -28,30 +28,13 @@ #include <libtracker-common/tracker-date-time.h> #include <libtracker-common/tracker-debug.h> -#include <libtracker-common/tracker-locale.h> #include <libtracker-common/tracker-parser.h> #include <libtracker-sparql/tracker-cursor.h> #include <libtracker-sparql/tracker-private.h> #include "tracker-fts.h" - - -#ifdef HAVE_LIBUNISTRING -/* libunistring versions prior to 9.1.2 need this hack */ -#define _UNUSED_PARAMETER_ -#include <unistr.h> -#include <unicase.h> -#elif defined(HAVE_LIBICU) -#include <unicode/utypes.h> -#include <unicode/uregex.h> -#include <unicode/ustring.h> -#include <unicode/ucol.h> -#include <unicode/unorm2.h> -#endif - #include "tracker-collation.h" - #include "tracker-db-interface-sqlite.h" #include "tracker-db-manager.h" #include "tracker-data-enum-types.h" @@ -971,19 +954,21 @@ function_sparql_replace (sqlite3_context *context, g_free (unescaped); } -#ifdef HAVE_LIBUNISTRING - static void function_sparql_lower_case (sqlite3_context *context, int argc, sqlite3_value *argv[]) { - const uint16_t *zInput; - uint16_t *zOutput; - size_t written = 0; + const gchar *fn = "fn:lower-case"; + const gunichar2 *zInput; + gunichar2 *zOutput; int nInput; + gsize nOutput; - g_assert (argc == 1); + if (argc != 1) { + result_context_function_error (context, fn, "Invalid argument count"); + return; + } zInput = sqlite3_value_text16 (argv[0]); @@ -993,9 +978,8 @@ function_sparql_lower_case (sqlite3_context *context, nInput = sqlite3_value_bytes16 (argv[0]); - zOutput = u16_tolower (zInput, nInput/2, NULL, NULL, NULL, &written); - - sqlite3_result_text16 (context, zOutput, written * 2, free); + zOutput = tracker_parser_tolower (zInput, nInput, &nOutput); + sqlite3_result_text16 (context, zOutput, -1, free); } static void @@ -1003,12 +987,16 @@ function_sparql_upper_case (sqlite3_context *context, int argc, sqlite3_value *argv[]) { - const uint16_t *zInput; - uint16_t *zOutput; - size_t written = 0; + const gchar *fn = "fn:upper-case"; + const gunichar2 *zInput; + gunichar2 *zOutput; int nInput; + gsize nOutput; - g_assert (argc == 1); + if (argc != 1) { + result_context_function_error (context, fn, "Invalid argument count"); + return; + } zInput = sqlite3_value_text16 (argv[0]); @@ -1018,9 +1006,8 @@ function_sparql_upper_case (sqlite3_context *context, nInput = sqlite3_value_bytes16 (argv[0]); - zOutput = u16_toupper (zInput, nInput / 2, NULL, NULL, NULL, &written); - - sqlite3_result_text16 (context, zOutput, written * 2, free); + zOutput = tracker_parser_toupper (zInput, nInput, &nOutput); + sqlite3_result_text16 (context, zOutput, -1, free); } static void @@ -1028,12 +1015,16 @@ function_sparql_case_fold (sqlite3_context *context, int argc, sqlite3_value *argv[]) { - const uint16_t *zInput; - uint16_t *zOutput; - size_t written = 0; + const gchar *fn = "tracker:case-fold"; + const gunichar2 *zInput; + gunichar2 *zOutput; int nInput; + gsize nOutput; - g_assert (argc == 1); + if (argc != 1) { + result_context_function_error (context, fn, "Invalid argument count"); + return; + } zInput = sqlite3_value_text16 (argv[0]); @@ -1043,9 +1034,8 @@ function_sparql_case_fold (sqlite3_context *context, nInput = sqlite3_value_bytes16 (argv[0]); - zOutput = u16_casefold (zInput, nInput/2, NULL, NULL, NULL, &written); - - sqlite3_result_text16 (context, zOutput, written * 2, free); + zOutput = tracker_parser_casefold (zInput, nInput, &nOutput); + sqlite3_result_text16 (context, zOutput, -1, free); } static void @@ -1055,11 +1045,11 @@ function_sparql_normalize (sqlite3_context *context, { const gchar *fn = "tracker:normalize"; const gchar *nfstr; - const uint16_t *zInput; - uint16_t *zOutput; - size_t written = 0; + const gunichar2 *zInput; + gunichar2 *zOutput = NULL; + GNormalizeMode mode; int nInput; - uninorm_t nf; + gsize nOutput; if (argc != 2) { result_context_function_error (context, fn, "Invalid argument count"); @@ -1072,25 +1062,24 @@ function_sparql_normalize (sqlite3_context *context, return; } - nfstr = sqlite3_value_text (argv[1]); + nInput = sqlite3_value_bytes16 (argv[0]); + + nfstr = (gchar *)sqlite3_value_text (argv[1]); if (g_ascii_strcasecmp (nfstr, "nfc") == 0) - nf = UNINORM_NFC; + mode = G_NORMALIZE_NFC; else if (g_ascii_strcasecmp (nfstr, "nfd") == 0) - nf = UNINORM_NFD; + mode = G_NORMALIZE_NFD; else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0) - nf = UNINORM_NFKC; + mode = G_NORMALIZE_NFKC; else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0) - nf = UNINORM_NFKD; + mode = G_NORMALIZE_NFKD; else { - result_context_function_error (context, fn, "Invalid normalization specified, options are 'nfc', 'nfd', 'nfkc' or 'nfkd'"); + result_context_function_error (context, fn, "Invalid normalization specified"); return; } - nInput = sqlite3_value_bytes16 (argv[0]); - - zOutput = u16_normalize (nf, zInput, nInput/2, NULL, &written); - - sqlite3_result_text16 (context, zOutput, written * 2, free); + zOutput = tracker_parser_normalize (zInput, mode, nInput, &nOutput); + sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), free); } static void @@ -1098,131 +1087,17 @@ function_sparql_unaccent (sqlite3_context *context, int argc, sqlite3_value *argv[]) { - const gchar *zInput; - gchar *zOutput; - gsize written = 0; - int nInput; - - g_assert (argc == 1); - - zInput = sqlite3_value_text (argv[0]); - - if (!zInput) { - return; - } - - nInput = sqlite3_value_bytes (argv[0]); - - zOutput = u8_normalize (UNINORM_NFKD, zInput, nInput, NULL, &written); - - /* Unaccenting is done in place */ - tracker_parser_unaccent_nfkd_string (zOutput, &written); - - sqlite3_result_text (context, zOutput, written, free); -} - -#elif defined(HAVE_LIBICU) - -static void -function_sparql_lower_case (sqlite3_context *context, - int argc, - sqlite3_value *argv[]) -{ - const gchar *fn = "fn:lower-case"; - const UChar *zInput; - UChar *zOutput; - int nInput; - int nOutput; - UErrorCode status = U_ZERO_ERROR; - - g_assert (argc == 1); - - zInput = sqlite3_value_text16 (argv[0]); - - if (!zInput) { - return; - } - - nInput = sqlite3_value_bytes16 (argv[0]); - - nOutput = nInput * 2 + 2; - zOutput = sqlite3_malloc (nOutput); - - if (!zOutput) { - return; - } - - u_strToLower (zOutput, nOutput/2, zInput, nInput/2, NULL, &status); - - if (!U_SUCCESS (status)){ - char zBuf[128]; - sqlite3_snprintf (128, zBuf, "ICU error: u_strToLower(): %s", u_errorName (status)); - zBuf[127] = '\0'; - sqlite3_free (zOutput); - result_context_function_error (context, fn, zBuf); - return; - } - - sqlite3_result_text16 (context, zOutput, -1, sqlite3_free); -} - -static void -function_sparql_upper_case (sqlite3_context *context, - int argc, - sqlite3_value *argv[]) -{ - const gchar *fn = "fn:upper-case"; - const UChar *zInput; - UChar *zOutput; + const gchar *fn = "tracker:unaccent"; + const gunichar2 *zInput; + gunichar2 *zOutput = NULL; int nInput; - int nOutput; - UErrorCode status = U_ZERO_ERROR; - - g_assert (argc == 1); - - zInput = sqlite3_value_text16 (argv[0]); - - if (!zInput) { - return; - } - - nInput = sqlite3_value_bytes16 (argv[0]); - - nOutput = nInput * 2 + 2; - zOutput = sqlite3_malloc (nOutput); - - if (!zOutput) { - return; - } - - u_strToUpper (zOutput, nOutput / 2, zInput, nInput / 2, NULL, &status); + gsize nOutput; - if (!U_SUCCESS (status)){ - char zBuf[128]; - sqlite3_snprintf (128, zBuf, "ICU error: u_strToUpper(): %s", u_errorName (status)); - zBuf[127] = '\0'; - sqlite3_free (zOutput); - result_context_function_error (context, fn, zBuf); + if (argc != 1) { + result_context_function_error (context, fn, "Invalid argument count"); return; } - sqlite3_result_text16 (context, zOutput, -1, sqlite3_free); -} - -static void -function_sparql_case_fold (sqlite3_context *context, - int argc, - sqlite3_value *argv[]) -{ - const gchar *fn = "tracker:case-fold"; - const UChar *zInput; - UChar *zOutput; - int nInput; - int nOutput; - UErrorCode status = U_ZERO_ERROR; - - g_assert (argc == 1); - zInput = sqlite3_value_text16 (argv[0]); if (!zInput) { @@ -1231,25 +1106,8 @@ function_sparql_case_fold (sqlite3_context *context, nInput = sqlite3_value_bytes16 (argv[0]); - nOutput = nInput * 2 + 2; - zOutput = sqlite3_malloc (nOutput); - - if (!zOutput) { - return; - } - - u_strFoldCase (zOutput, nOutput/2, zInput, nInput/2, U_FOLD_CASE_DEFAULT, &status); - - if (!U_SUCCESS (status)){ - char zBuf[128]; - sqlite3_snprintf (128, zBuf, "ICU error: u_strFoldCase: %s", u_errorName (status)); - zBuf[127] = '\0'; - sqlite3_free (zOutput); - result_context_function_error (context, fn, zBuf); - return; - } - - sqlite3_result_text16 (context, zOutput, -1, sqlite3_free); + zOutput = tracker_parser_unaccent (zInput, nInput, &nOutput); + sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), free); } static void @@ -1277,141 +1135,6 @@ function_sparql_strip_punctuation (sqlite3_context *context, sqlite3_result_text (context, output, -1, g_free); } -static gunichar2 * -normalize_string (const gunichar2 *string, - gsize string_len, /* In gunichar2s */ - const UNormalizer2 *normalizer, - gsize *len_out, /* In gunichar2s */ - UErrorCode *status) -{ - int nOutput; - gunichar2 *zOutput; - - nOutput = (string_len * 2) + 1; - zOutput = g_new0 (gunichar2, nOutput); - - nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status); - - if (*status == U_BUFFER_OVERFLOW_ERROR) { - /* Try again after allocating enough space for the normalization */ - *status = U_ZERO_ERROR; - zOutput = g_renew (gunichar2, zOutput, nOutput); - memset (zOutput, 0, nOutput * sizeof (gunichar2)); - nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status); - } - - if (!U_SUCCESS (*status)) { - g_clear_pointer (&zOutput, g_free); - nOutput = 0; - } - - if (len_out) - *len_out = nOutput; - - return zOutput; -} - -static void -function_sparql_normalize (sqlite3_context *context, - int argc, - sqlite3_value *argv[]) -{ - const gchar *fn = "tracker:normalize"; - const gchar *nfstr; - const uint16_t *zInput; - uint16_t *zOutput = NULL; - int nInput; - gsize nOutput; - const UNormalizer2 *normalizer; - UErrorCode status = U_ZERO_ERROR; - - if (argc != 2) { - result_context_function_error (context, fn, "Invalid argument count"); - return; - } - - zInput = sqlite3_value_text16 (argv[0]); - - if (!zInput) { - return; - } - - nfstr = (gchar *)sqlite3_value_text (argv[1]); - if (g_ascii_strcasecmp (nfstr, "nfc") == 0) - normalizer = unorm2_getNFCInstance (&status); - else if (g_ascii_strcasecmp (nfstr, "nfd") == 0) - normalizer = unorm2_getNFDInstance (&status); - else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0) - normalizer = unorm2_getNFKCInstance (&status); - else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0) - normalizer = unorm2_getNFKDInstance (&status); - else { - result_context_function_error (context, fn, "Invalid normalization specified"); - return; - } - - if (U_SUCCESS (status)) { - nInput = sqlite3_value_bytes16 (argv[0]); - zOutput = normalize_string (zInput, nInput / 2, normalizer, &nOutput, &status); - } - - if (!U_SUCCESS (status)) { - char zBuf[128]; - sqlite3_snprintf (128, zBuf, "ICU error: unorm_normalize: %s", u_errorName (status)); - zBuf[127] = '\0'; - g_free (zOutput); - result_context_function_error (context, fn, zBuf); - return; - } - - sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), g_free); -} - -static void -function_sparql_unaccent (sqlite3_context *context, - int argc, - sqlite3_value *argv[]) -{ - const gchar *fn = "tracker:unaccent"; - const uint16_t *zInput; - uint16_t *zOutput = NULL; - int nInput; - gsize nOutput; - const UNormalizer2 *normalizer; - UErrorCode status = U_ZERO_ERROR; - - g_assert (argc == 1); - - zInput = sqlite3_value_text16 (argv[0]); - - if (!zInput) { - return; - } - - normalizer = unorm2_getNFKDInstance (&status); - - if (U_SUCCESS (status)) { - nInput = sqlite3_value_bytes16 (argv[0]); - zOutput = normalize_string (zInput, nInput / 2, normalizer, &nOutput, &status); - } - - if (!U_SUCCESS (status)) { - char zBuf[128]; - sqlite3_snprintf (128, zBuf, "ICU error: unorm_normalize: %s", u_errorName (status)); - zBuf[127] = '\0'; - g_free (zOutput); - result_context_function_error (context, fn, zBuf); - return; - } - - /* Unaccenting is done in place */ - tracker_parser_unaccent_nfkd_string (zOutput, &nOutput); - - sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), g_free); -} - -#endif - static void function_sparql_encode_for_uri (sqlite3_context *context, int argc, diff --git a/src/libtracker-sparql/core/tracker-db-manager.c b/src/libtracker-sparql/core/tracker-db-manager.c index cb4318727..cebf1fe5b 100644 --- a/src/libtracker-sparql/core/tracker-db-manager.c +++ b/src/libtracker-sparql/core/tracker-db-manager.c @@ -22,6 +22,7 @@ #include <fcntl.h> #include <glib/gstdio.h> +#include <locale.h> #include <libtracker-common/tracker-common.h> #include <libtracker-common/tracker-parser.h> @@ -335,11 +336,11 @@ tracker_db_manager_locale_changed (TrackerDBManager *db_manager, GError **error) { gchar *db_locale; - gchar *current_locale; + const gchar *current_locale; gboolean changed; /* Get current collation locale */ - current_locale = tracker_locale_get (TRACKER_LOCALE_COLLATE); + current_locale = setlocale (LC_COLLATE, NULL); /* Get db locale */ db_locale = db_get_locale (db_manager); @@ -361,7 +362,6 @@ tracker_db_manager_locale_changed (TrackerDBManager *db_manager, } g_free (db_locale); - g_free (current_locale); return changed; } @@ -369,13 +369,12 @@ tracker_db_manager_locale_changed (TrackerDBManager *db_manager, void tracker_db_manager_set_current_locale (TrackerDBManager *db_manager) { - gchar *current_locale; + const gchar *current_locale; /* Get current collation locale */ - current_locale = tracker_locale_get (TRACKER_LOCALE_COLLATE); + current_locale = setlocale (LC_COLLATE, NULL); g_debug ("Saving DB locale as: '%s'", current_locale); db_set_locale (db_manager, current_locale); - g_free (current_locale); } static void diff --git a/src/libtracker-sparql/core/tracker-fts-tokenizer.c b/src/libtracker-sparql/core/tracker-fts-tokenizer.c index 66f68a069..e9dac1efa 100644 --- a/src/libtracker-sparql/core/tracker-fts-tokenizer.c +++ b/src/libtracker-sparql/core/tracker-fts-tokenizer.c @@ -38,7 +38,6 @@ typedef struct TrackerTokenizer TrackerTokenizer; typedef struct TrackerTokenizerFunctionData TrackerTokenizerFunctionData; struct TrackerTokenizerData { - TrackerLanguage *language; TrackerDBManagerFlags flags; }; @@ -65,7 +64,7 @@ tracker_tokenizer_create (void *data, tokenizer = g_new0 (TrackerTokenizer, 1); tokenizer->data = data; - tokenizer->parser = tracker_parser_new (tokenizer->data->language); + tokenizer->parser = tracker_parser_new (); *tokenizer_out = (Fts5Tokenizer *) tokenizer; @@ -159,7 +158,6 @@ tracker_tokenizer_data_new (TrackerDBManagerFlags flags) TrackerTokenizerData *p; p = g_new0 (TrackerTokenizerData, 1); - p->language = tracker_language_new (NULL); p->flags = flags; return p; @@ -170,7 +168,6 @@ tracker_tokenizer_data_free (gpointer user_data) { TrackerTokenizerData *data = user_data; - g_object_unref (data->language); g_free (data); } diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c index 2e70a3513..c44254bcb 100644 --- a/src/libtracker-sparql/direct/tracker-direct.c +++ b/src/libtracker-sparql/direct/tracker-direct.c @@ -489,8 +489,6 @@ tracker_direct_connection_initable_init (GInitable *initable, conn = TRACKER_DIRECT_CONNECTION (initable); priv = tracker_direct_connection_get_instance_private (conn); - tracker_locale_sanity_check (); - if (!set_up_thread_pools (conn, error)) return FALSE; |