summaryrefslogtreecommitdiff
path: root/src/libtracker-sparql/core
diff options
context:
space:
mode:
authorCarlos Garnacho <carlosg@gnome.org>2023-03-02 11:01:39 +0100
committerCarlos Garnacho <carlosg@gnome.org>2023-03-02 15:44:59 +0100
commit819e2510fbb1fd54dd41d02c99583be94a47e4c3 (patch)
tree0ee95a12b37dd3854a485db84c9298e619b02a69 /src/libtracker-sparql/core
parent6d250002586146de5ff1d34eaa29440bc18ff3ae (diff)
downloadtracker-819e2510fbb1fd54dd41d02c99583be94a47e4c3.tar.gz
core: Move unicode treatment SPARQL function features to helpers
Have the helpers defined together with the TrackerParser, so we don't drag details about the unicode library to other parts of the code.
Diffstat (limited to 'src/libtracker-sparql/core')
-rw-r--r--src/libtracker-sparql/core/tracker-db-interface-sqlite.c366
1 files changed, 45 insertions, 321 deletions
diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
index 36f4fdcc9..9e372e408 100644
--- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
+++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
@@ -35,23 +35,7 @@
#include <libtracker-sparql/tracker-private.h>
#include "tracker-fts.h"
-
-
-#ifdef HAVE_LIBUNISTRING
-/* libunistring versions prior to 9.1.2 need this hack */
-#define _UNUSED_PARAMETER_
-#include <unistr.h>
-#include <unicase.h>
-#elif defined(HAVE_LIBICU)
-#include <unicode/utypes.h>
-#include <unicode/uregex.h>
-#include <unicode/ustring.h>
-#include <unicode/ucol.h>
-#include <unicode/unorm2.h>
-#endif
-
#include "tracker-collation.h"
-
#include "tracker-db-interface-sqlite.h"
#include "tracker-db-manager.h"
#include "tracker-data-enum-types.h"
@@ -971,97 +955,18 @@ function_sparql_replace (sqlite3_context *context,
g_free (unescaped);
}
-#ifdef HAVE_LIBUNISTRING
-
static void
function_sparql_lower_case (sqlite3_context *context,
int argc,
sqlite3_value *argv[])
{
- const uint16_t *zInput;
- uint16_t *zOutput;
- size_t written = 0;
- int nInput;
-
- g_assert (argc == 1);
-
- zInput = sqlite3_value_text16 (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- nInput = sqlite3_value_bytes16 (argv[0]);
-
- zOutput = u16_tolower (zInput, nInput/2, NULL, NULL, NULL, &written);
-
- sqlite3_result_text16 (context, zOutput, written * 2, free);
-}
-
-static void
-function_sparql_upper_case (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const uint16_t *zInput;
- uint16_t *zOutput;
- size_t written = 0;
- int nInput;
-
- g_assert (argc == 1);
-
- zInput = sqlite3_value_text16 (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- nInput = sqlite3_value_bytes16 (argv[0]);
-
- zOutput = u16_toupper (zInput, nInput / 2, NULL, NULL, NULL, &written);
-
- sqlite3_result_text16 (context, zOutput, written * 2, free);
-}
-
-static void
-function_sparql_case_fold (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const uint16_t *zInput;
- uint16_t *zOutput;
- size_t written = 0;
- int nInput;
-
- g_assert (argc == 1);
-
- zInput = sqlite3_value_text16 (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- nInput = sqlite3_value_bytes16 (argv[0]);
-
- zOutput = u16_casefold (zInput, nInput/2, NULL, NULL, NULL, &written);
-
- sqlite3_result_text16 (context, zOutput, written * 2, free);
-}
-
-static void
-function_sparql_normalize (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const gchar *fn = "tracker:normalize";
- const gchar *nfstr;
- const uint16_t *zInput;
- uint16_t *zOutput;
- size_t written = 0;
+ const gchar *fn = "fn:lower-case";
+ const gunichar2 *zInput;
+ gunichar2 *zOutput;
int nInput;
- uninorm_t nf;
+ gsize nOutput;
- if (argc != 2) {
+ if (argc != 1) {
result_context_function_error (context, fn, "Invalid argument count");
return;
}
@@ -1072,98 +977,10 @@ function_sparql_normalize (sqlite3_context *context,
return;
}
- nfstr = sqlite3_value_text (argv[1]);
- if (g_ascii_strcasecmp (nfstr, "nfc") == 0)
- nf = UNINORM_NFC;
- else if (g_ascii_strcasecmp (nfstr, "nfd") == 0)
- nf = UNINORM_NFD;
- else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0)
- nf = UNINORM_NFKC;
- else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0)
- nf = UNINORM_NFKD;
- else {
- result_context_function_error (context, fn, "Invalid normalization specified, options are 'nfc', 'nfd', 'nfkc' or 'nfkd'");
- return;
- }
-
nInput = sqlite3_value_bytes16 (argv[0]);
- zOutput = u16_normalize (nf, zInput, nInput/2, NULL, &written);
-
- sqlite3_result_text16 (context, zOutput, written * 2, free);
-}
-
-static void
-function_sparql_unaccent (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const gchar *zInput;
- gchar *zOutput;
- gsize written = 0;
- int nInput;
-
- g_assert (argc == 1);
-
- zInput = sqlite3_value_text (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- nInput = sqlite3_value_bytes (argv[0]);
-
- zOutput = u8_normalize (UNINORM_NFKD, zInput, nInput, NULL, &written);
-
- /* Unaccenting is done in place */
- tracker_parser_unaccent_nfkd_string (zOutput, &written);
-
- sqlite3_result_text (context, zOutput, written, free);
-}
-
-#elif defined(HAVE_LIBICU)
-
-static void
-function_sparql_lower_case (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- const gchar *fn = "fn:lower-case";
- const UChar *zInput;
- UChar *zOutput;
- int nInput;
- int nOutput;
- UErrorCode status = U_ZERO_ERROR;
-
- g_assert (argc == 1);
-
- zInput = sqlite3_value_text16 (argv[0]);
-
- if (!zInput) {
- return;
- }
-
- nInput = sqlite3_value_bytes16 (argv[0]);
-
- nOutput = nInput * 2 + 2;
- zOutput = sqlite3_malloc (nOutput);
-
- if (!zOutput) {
- return;
- }
-
- u_strToLower (zOutput, nOutput/2, zInput, nInput/2, NULL, &status);
-
- if (!U_SUCCESS (status)){
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: u_strToLower(): %s", u_errorName (status));
- zBuf[127] = '\0';
- sqlite3_free (zOutput);
- result_context_function_error (context, fn, zBuf);
- return;
- }
-
- sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
+ zOutput = tracker_parser_tolower (zInput, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, -1, free);
}
static void
@@ -1172,13 +989,15 @@ function_sparql_upper_case (sqlite3_context *context,
sqlite3_value *argv[])
{
const gchar *fn = "fn:upper-case";
- const UChar *zInput;
- UChar *zOutput;
+ const gunichar2 *zInput;
+ gunichar2 *zOutput;
int nInput;
- int nOutput;
- UErrorCode status = U_ZERO_ERROR;
+ gsize nOutput;
- g_assert (argc == 1);
+ if (argc != 1) {
+ result_context_function_error (context, fn, "Invalid argument count");
+ return;
+ }
zInput = sqlite3_value_text16 (argv[0]);
@@ -1188,25 +1007,8 @@ function_sparql_upper_case (sqlite3_context *context,
nInput = sqlite3_value_bytes16 (argv[0]);
- nOutput = nInput * 2 + 2;
- zOutput = sqlite3_malloc (nOutput);
-
- if (!zOutput) {
- return;
- }
-
- u_strToUpper (zOutput, nOutput / 2, zInput, nInput / 2, NULL, &status);
-
- if (!U_SUCCESS (status)){
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: u_strToUpper(): %s", u_errorName (status));
- zBuf[127] = '\0';
- sqlite3_free (zOutput);
- result_context_function_error (context, fn, zBuf);
- return;
- }
-
- sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
+ zOutput = tracker_parser_toupper (zInput, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, -1, free);
}
static void
@@ -1215,13 +1017,15 @@ function_sparql_case_fold (sqlite3_context *context,
sqlite3_value *argv[])
{
const gchar *fn = "tracker:case-fold";
- const UChar *zInput;
- UChar *zOutput;
+ const gunichar2 *zInput;
+ gunichar2 *zOutput;
int nInput;
- int nOutput;
- UErrorCode status = U_ZERO_ERROR;
+ gsize nOutput;
- g_assert (argc == 1);
+ if (argc != 1) {
+ result_context_function_error (context, fn, "Invalid argument count");
+ return;
+ }
zInput = sqlite3_value_text16 (argv[0]);
@@ -1231,59 +1035,8 @@ function_sparql_case_fold (sqlite3_context *context,
nInput = sqlite3_value_bytes16 (argv[0]);
- nOutput = nInput * 2 + 2;
- zOutput = sqlite3_malloc (nOutput);
-
- if (!zOutput) {
- return;
- }
-
- u_strFoldCase (zOutput, nOutput/2, zInput, nInput/2, U_FOLD_CASE_DEFAULT, &status);
-
- if (!U_SUCCESS (status)){
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: u_strFoldCase: %s", u_errorName (status));
- zBuf[127] = '\0';
- sqlite3_free (zOutput);
- result_context_function_error (context, fn, zBuf);
- return;
- }
-
- sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
-}
-
-static gunichar2 *
-normalize_string (const gunichar2 *string,
- gsize string_len, /* In gunichar2s */
- const UNormalizer2 *normalizer,
- gsize *len_out, /* In gunichar2s */
- UErrorCode *status)
-{
- int nOutput;
- gunichar2 *zOutput;
-
- nOutput = (string_len * 2) + 1;
- zOutput = g_new0 (gunichar2, nOutput);
-
- nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
-
- if (*status == U_BUFFER_OVERFLOW_ERROR) {
- /* Try again after allocating enough space for the normalization */
- *status = U_ZERO_ERROR;
- zOutput = g_renew (gunichar2, zOutput, nOutput);
- memset (zOutput, 0, nOutput * sizeof (gunichar2));
- nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
- }
-
- if (!U_SUCCESS (*status)) {
- g_clear_pointer (&zOutput, g_free);
- nOutput = 0;
- }
-
- if (len_out)
- *len_out = nOutput;
-
- return zOutput;
+ zOutput = tracker_parser_casefold (zInput, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, -1, free);
}
static void
@@ -1293,12 +1046,11 @@ function_sparql_normalize (sqlite3_context *context,
{
const gchar *fn = "tracker:normalize";
const gchar *nfstr;
- const uint16_t *zInput;
- uint16_t *zOutput = NULL;
+ const gunichar2 *zInput;
+ gunichar2 *zOutput = NULL;
+ GNormalizeMode mode;
int nInput;
gsize nOutput;
- const UNormalizer2 *normalizer;
- UErrorCode status = U_ZERO_ERROR;
if (argc != 2) {
result_context_function_error (context, fn, "Invalid argument count");
@@ -1311,35 +1063,24 @@ function_sparql_normalize (sqlite3_context *context,
return;
}
+ nInput = sqlite3_value_bytes16 (argv[0]);
+
nfstr = (gchar *)sqlite3_value_text (argv[1]);
if (g_ascii_strcasecmp (nfstr, "nfc") == 0)
- normalizer = unorm2_getNFCInstance (&status);
+ mode = G_NORMALIZE_NFC;
else if (g_ascii_strcasecmp (nfstr, "nfd") == 0)
- normalizer = unorm2_getNFDInstance (&status);
+ mode = G_NORMALIZE_NFD;
else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0)
- normalizer = unorm2_getNFKCInstance (&status);
+ mode = G_NORMALIZE_NFKC;
else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0)
- normalizer = unorm2_getNFKDInstance (&status);
+ mode = G_NORMALIZE_NFKD;
else {
result_context_function_error (context, fn, "Invalid normalization specified");
return;
}
- if (U_SUCCESS (status)) {
- nInput = sqlite3_value_bytes16 (argv[0]);
- zOutput = normalize_string (zInput, nInput / 2, normalizer, &nOutput, &status);
- }
-
- if (!U_SUCCESS (status)) {
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: unorm_normalize: %s", u_errorName (status));
- zBuf[127] = '\0';
- g_free (zOutput);
- result_context_function_error (context, fn, zBuf);
- return;
- }
-
- sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), g_free);
+ zOutput = tracker_parser_normalize (zInput, mode, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), free);
}
static void
@@ -1348,45 +1089,28 @@ function_sparql_unaccent (sqlite3_context *context,
sqlite3_value *argv[])
{
const gchar *fn = "tracker:unaccent";
- const uint16_t *zInput;
- uint16_t *zOutput = NULL;
+ const gunichar2 *zInput;
+ gunichar2 *zOutput = NULL;
int nInput;
gsize nOutput;
- const UNormalizer2 *normalizer;
- UErrorCode status = U_ZERO_ERROR;
-
- g_assert (argc == 1);
- zInput = sqlite3_value_text16 (argv[0]);
-
- if (!zInput) {
+ if (argc != 1) {
+ result_context_function_error (context, fn, "Invalid argument count");
return;
}
- normalizer = unorm2_getNFKDInstance (&status);
-
- if (U_SUCCESS (status)) {
- nInput = sqlite3_value_bytes16 (argv[0]);
- zOutput = normalize_string (zInput, nInput / 2, normalizer, &nOutput, &status);
- }
+ zInput = sqlite3_value_text16 (argv[0]);
- if (!U_SUCCESS (status)) {
- char zBuf[128];
- sqlite3_snprintf (128, zBuf, "ICU error: unorm_normalize: %s", u_errorName (status));
- zBuf[127] = '\0';
- g_free (zOutput);
- result_context_function_error (context, fn, zBuf);
+ if (!zInput) {
return;
}
- /* Unaccenting is done in place */
- tracker_parser_unaccent_nfkd_string (zOutput, &nOutput);
+ nInput = sqlite3_value_bytes16 (argv[0]);
- sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), g_free);
+ zOutput = tracker_parser_unaccent (zInput, nInput, &nOutput);
+ sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), free);
}
-#endif
-
static void
function_sparql_strip_punctuation (sqlite3_context *context,
int argc,