core: Move unicode treatment SPARQL function features to helpers

Have the helpers defined together with the TrackerParser, so we don't drag details about the unicode library to other parts of the code.
author: Carlos Garnacho <carlosg@gnome.org> 2023-03-02 11:01:39 +0100
committer: Carlos Garnacho <carlosg@gnome.org> 2023-03-02 15:44:59 +0100
commit: 819e2510fbb1fd54dd41d02c99583be94a47e4c3 (patch)
tree: 0ee95a12b37dd3854a485db84c9298e619b02a69 /src
parent: 6d250002586146de5ff1d34eaa29440bc18ff3ae (diff)
download: tracker-819e2510fbb1fd54dd41d02c99583be94a47e4c3.tar.gz
4 files changed, 314 insertions, 325 deletions
diff --git a/src/libtracker-common/tracker-parser-libicu.c b/src/libtracker-common/tracker-parser-libicu.c
index 8c4803206..a3271c003 100644
--- a/src/libtracker-common/tracker-parser-libicu.c
+++ b/src/libtracker-common/tracker-parser-libicu.c
@@ -144,7 +144,7 @@ get_word_info (const UChar           *word,
 /* The input word in this method MUST be normalized in NFKD form,
  * and given in UChars, where str_length is the number of UChars
  * (not the number of bytes) */
-gboolean
+static gboolean
 tracker_parser_unaccent_nfkd_string (gpointer  str,
                                      gsize    *str_length)
 {
@@ -754,3 +754,187 @@ tracker_parser_next (TrackerParser *parser,
 	return str;
 }
 
+gunichar2 *
+tracker_parser_tolower (const gunichar2 *input,
+			gsize            len,
+			gsize           *len_out)
+{
+	UChar *zOutput;
+	int nOutput;
+	UErrorCode status = U_ZERO_ERROR;
+
+	g_return_val_if_fail (input, NULL);
+
+	nOutput = len * 2 + 2;
+	zOutput = malloc (nOutput);
+
+	u_strToLower (zOutput, nOutput / 2,
+		      input, len / 2,
+		      NULL, &status);
+
+	if (!U_SUCCESS (status)) {
+		memcpy (zOutput, input, len);
+		zOutput[len] = '\0';
+		nOutput = len;
+	}
+
+	*len_out = nOutput;
+
+	return zOutput;
+}
+
+gunichar2 *
+tracker_parser_toupper (const gunichar2 *input,
+			gsize            len,
+			gsize           *len_out)
+{
+	UChar *zOutput;
+	int nOutput;
+	UErrorCode status = U_ZERO_ERROR;
+
+	nOutput = len * 2 + 2;
+	zOutput = malloc (nOutput);
+
+	u_strToUpper (zOutput, nOutput / 2,
+		      input, len / 2,
+		      NULL, &status);
+
+	if (!U_SUCCESS (status)) {
+		memcpy (zOutput, input, len);
+		zOutput[len] = '\0';
+		nOutput = len;
+	}
+
+	*len_out = nOutput;
+
+	return zOutput;
+}
+
+gunichar2 *
+tracker_parser_casefold (const gunichar2 *input,
+			 gsize            len,
+			 gsize           *len_out)
+{
+	UChar *zOutput;
+	int nOutput;
+	UErrorCode status = U_ZERO_ERROR;
+
+	nOutput = len * 2 + 2;
+	zOutput = malloc (nOutput);
+
+	u_strFoldCase (zOutput, nOutput / 2,
+		       input, len / 2,
+		       U_FOLD_CASE_DEFAULT, &status);
+
+	if (!U_SUCCESS (status)){
+		memcpy (zOutput, input, len);
+		zOutput[len] = '\0';
+		nOutput = len;
+	}
+
+	*len_out = nOutput;
+
+	return zOutput;
+}
+
+static gunichar2 *
+normalize_string (const gunichar2    *string,
+                  gsize               string_len, /* In gunichar2s */
+                  const UNormalizer2 *normalizer,
+                  gsize              *len_out,    /* In gunichar2s */
+                  UErrorCode         *status)
+{
+	int nOutput;
+	gunichar2 *zOutput;
+
+	nOutput = (string_len * 2) + 1;
+	zOutput = g_new0 (gunichar2, nOutput);
+
+	nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
+
+	if (*status == U_BUFFER_OVERFLOW_ERROR) {
+		/* Try again after allocating enough space for the normalization */
+		*status = U_ZERO_ERROR;
+		zOutput = g_renew (gunichar2, zOutput, nOutput);
+		memset (zOutput, 0, nOutput * sizeof (gunichar2));
+		nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
+	}
+
+	if (!U_SUCCESS (*status)) {
+		g_clear_pointer (&zOutput, g_free);
+		nOutput = 0;
+	}
+
+	if (len_out)
+		*len_out = nOutput;
+
+	return zOutput;
+}
+
+gunichar2 *
+tracker_parser_normalize (const gunichar2 *input,
+                          GNormalizeMode   mode,
+			  gsize            len,
+			  gsize           *len_out)
+{
+	uint16_t *zOutput = NULL;
+	gsize nOutput;
+	const UNormalizer2 *normalizer;
+	UErrorCode status = U_ZERO_ERROR;
+
+	if (mode == G_NORMALIZE_NFC)
+		normalizer = unorm2_getNFCInstance (&status);
+	else if (mode == G_NORMALIZE_NFD)
+		normalizer = unorm2_getNFDInstance (&status);
+	else if (mode == G_NORMALIZE_NFKC)
+		normalizer = unorm2_getNFKCInstance (&status);
+	else if (mode == G_NORMALIZE_NFKD)
+		normalizer = unorm2_getNFKDInstance (&status);
+	else
+		g_assert_not_reached ();
+
+	if (U_SUCCESS (status)) {
+		zOutput = normalize_string (input, len / 2,
+					    normalizer,
+					    &nOutput, &status);
+	}
+
+	if (!U_SUCCESS (status)) {
+		zOutput = g_memdup2 (input, len);
+		nOutput = len;
+	}
+
+	*len_out = nOutput;
+
+	return zOutput;
+}
+
+gunichar2 *
+tracker_parser_unaccent (const gunichar2 *input,
+			 gsize            len,
+			 gsize           *len_out)
+{
+	uint16_t *zOutput = NULL;
+	gsize nOutput;
+	const UNormalizer2 *normalizer;
+	UErrorCode status = U_ZERO_ERROR;
+
+	normalizer = unorm2_getNFKDInstance (&status);
+
+	if (U_SUCCESS (status)) {
+		zOutput = normalize_string (input, len / 2,
+					    normalizer,
+					    &nOutput, &status);
+	}
+
+	if (!U_SUCCESS (status)) {
+		zOutput = g_memdup2 (input, len);
+	}
+
+	/* Unaccenting is done in place */
+	tracker_parser_unaccent_nfkd_string (zOutput, &nOutput);
+
+	*len_out = nOutput;
+
+	return zOutput;
+}
diff --git a/src/libtracker-common/tracker-parser-libunistring.c b/src/libtracker-common/tracker-parser-libunistring.c
index d24c5f1cb..04f08a2c6 100644
--- a/src/libtracker-common/tracker-parser-libunistring.c
+++ b/src/libtracker-common/tracker-parser-libunistring.c
@@ -159,7 +159,7 @@ get_word_info (TrackerParser         *parser,
 /* The input word in this method MUST be normalized in NFKD form,
  * and given in UTF-8, where str_length is the byte-length
  * (note: there is no trailing NUL character!) */
-gboolean
+static gboolean
 tracker_parser_unaccent_nfkd_string (gpointer  str,
                                      gsize    *str_length)
 {
@@ -541,3 +541,66 @@ tracker_parser_next (TrackerParser *parser,
 	return str;
 }
 
+gunichar2 *
+tracker_parser_tolower (const gunichar2 *input,
+			gsize            len,
+			gsize           *len_out)
+{
+	return u16_tolower (input, len / 2, NULL, NULL, NULL, len_out);
+}
+
+gunichar2 *
+tracker_parser_toupper (const gunichar2 *input,
+                        gsize            len,
+                        gsize           *len_out)
+{
+	return u16_toupper (input, len / 2, NULL, NULL, NULL, len_out);
+}
+
+gunichar2 *
+tracker_parser_casefold (const gunichar2 *input,
+			 gsize            len,
+			 gsize           *len_out)
+{
+	return u16_casefold (input, len / 2, NULL, NULL, NULL, len_out);
+}
+
+gunichar2 *
+tracker_parser_normalize (const gunichar2 *input,
+			  GNormalizeMode   mode,
+			  gsize            len,
+			  gsize           *len_out)
+{
+	uninorm_t nf;
+
+	if (mode == G_NORMALIZE_NFC)
+		nf = UNINORM_NFC;
+	else if (mode == G_NORMALIZE_NFD)
+		nf = UNINORM_NFD;
+	else if (mode == G_NORMALIZE_NFKC)
+		nf = UNINORM_NFKC;
+	else if (mode == G_NORMALIZE_NFKD)
+		nf = UNINORM_NFKD;
+	else
+		g_assert_not_reached ();
+
+	return u16_normalize (nf, input, len / 2, NULL, len_out);
+}
+
+gunichar2 *
+tracker_parser_unaccent (const gunichar2 *input,
+			 gsize            len,
+			 gsize           *len_out)
+{
+	gunichar2 *zOutput;
+	gsize written = 0;
+
+	zOutput = u16_normalize (UNINORM_NFKD, input, len, NULL, &written);
+
+	/* Unaccenting is done in place */
+	tracker_parser_unaccent_nfkd_string (zOutput, &written);
+
+	*len_out = written;
+
+	return zOutput;
+}
diff --git a/src/libtracker-common/tracker-parser.h b/src/libtracker-common/tracker-parser.h
index 3c8271503..a6a51fd4e 100644
--- a/src/libtracker-common/tracker-parser.h
+++ b/src/libtracker-common/tracker-parser.h
@@ -59,8 +59,26 @@ void           tracker_parser_free            (TrackerParser   *parser);
 
 /* Other helper methods */
 
-gboolean       tracker_parser_unaccent_nfkd_string (gpointer  str,
-                                                    gsize    *str_length);
+gunichar2 * tracker_parser_tolower (const gunichar2 *input,
+                                    gsize            len,
+                                    gsize           *len_out);
+
+gunichar2 * tracker_parser_toupper (const gunichar2 *input,
+                                    gsize            len,
+                                    gsize           *len_out);
+
+gunichar2 * tracker_parser_casefold (const gunichar2 *input,
+                                     gsize            len,
+                                     gsize           *len_out);
+
+gunichar2 * tracker_parser_normalize (const gunichar2 *input,
+                                      GNormalizeMode   mode,
+                                      gsize            len,
+                                      gsize           *len_out);
+
+gunichar2 * tracker_parser_unaccent (const gunichar2 *input,
+                                     gsize            len,
+                                     gsize           *len_out);
 
 G_END_DECLS
 
diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
index 36f4fdcc9..9e372e408 100644
--- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
+++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
@@ -35,23 +35,7 @@
 #include <libtracker-sparql/tracker-private.h>
 
 #include "tracker-fts.h"
-
-
-#ifdef HAVE_LIBUNISTRING
-/* libunistring versions prior to 9.1.2 need this hack */
-#define _UNUSED_PARAMETER_
-#include <unistr.h>
-#include <unicase.h>
-#elif defined(HAVE_LIBICU)
-#include <unicode/utypes.h>
-#include <unicode/uregex.h>
-#include <unicode/ustring.h>
-#include <unicode/ucol.h>
-#include <unicode/unorm2.h>
-#endif
-
 #include "tracker-collation.h"
-
 #include "tracker-db-interface-sqlite.h"
 #include "tracker-db-manager.h"
 #include "tracker-data-enum-types.h"
@@ -971,97 +955,18 @@ function_sparql_replace (sqlite3_context *context,
 	g_free (unescaped);
 }
 
-#ifdef HAVE_LIBUNISTRING
-
 static void
 function_sparql_lower_case (sqlite3_context *context,
                             int              argc,
                             sqlite3_value   *argv[])
 {
-	const uint16_t *zInput;
-	uint16_t *zOutput;
-	size_t written = 0;
-	int nInput;
-
-	g_assert (argc == 1);
-
-	zInput = sqlite3_value_text16 (argv[0]);
-
-	if (!zInput) {
-		return;
-	}
-
-	nInput = sqlite3_value_bytes16 (argv[0]);
-
-	zOutput = u16_tolower (zInput, nInput/2, NULL, NULL, NULL, &written);
-
-	sqlite3_result_text16 (context, zOutput, written * 2, free);
-}
-
-static void
-function_sparql_upper_case (sqlite3_context *context,
-                            int              argc,
-                            sqlite3_value   *argv[])
-{
-	const uint16_t *zInput;
-	uint16_t *zOutput;
-	size_t written = 0;
-	int nInput;
-
-	g_assert (argc == 1);
-
-	zInput = sqlite3_value_text16 (argv[0]);
-
-	if (!zInput) {
-		return;
-	}
-
-	nInput = sqlite3_value_bytes16 (argv[0]);
-
-	zOutput = u16_toupper (zInput, nInput / 2, NULL, NULL, NULL, &written);
-
-	sqlite3_result_text16 (context, zOutput, written * 2, free);
-}
-
-static void
-function_sparql_case_fold (sqlite3_context *context,
-                           int              argc,
-                           sqlite3_value   *argv[])
-{
-	const uint16_t *zInput;
-	uint16_t *zOutput;
-	size_t written = 0;
-	int nInput;
-
-	g_assert (argc == 1);
-
-	zInput = sqlite3_value_text16 (argv[0]);
-
-	if (!zInput) {
-		return;
-	}
-
-	nInput = sqlite3_value_bytes16 (argv[0]);
-
-	zOutput = u16_casefold (zInput, nInput/2, NULL, NULL, NULL, &written);
-
-	sqlite3_result_text16 (context, zOutput, written * 2, free);
-}
-
-static void
-function_sparql_normalize (sqlite3_context *context,
-                           int              argc,
-                           sqlite3_value   *argv[])
-{
-	const gchar *fn = "tracker:normalize";
-	const gchar *nfstr;
-	const uint16_t *zInput;
-	uint16_t *zOutput;
-	size_t written = 0;
+	const gchar *fn = "fn:lower-case";
+	const gunichar2 *zInput;
+	gunichar2 *zOutput;
 	int nInput;
-	uninorm_t nf;
+	gsize nOutput;
 
-	if (argc != 2) {
+	if (argc != 1) {
 		result_context_function_error (context, fn, "Invalid argument count");
 		return;
 	}
@@ -1072,98 +977,10 @@ function_sparql_normalize (sqlite3_context *context,
 		return;
 	}
 
-	nfstr = sqlite3_value_text (argv[1]);
-	if (g_ascii_strcasecmp (nfstr, "nfc") == 0)
-		nf = UNINORM_NFC;
-	else if (g_ascii_strcasecmp (nfstr, "nfd") == 0)
-		nf = UNINORM_NFD;
-	else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0)
-		nf = UNINORM_NFKC;
-	else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0)
-		nf = UNINORM_NFKD;
-	else {
-		result_context_function_error (context, fn, "Invalid normalization specified, options are 'nfc', 'nfd', 'nfkc' or 'nfkd'");
-		return;
-	}
-
 	nInput = sqlite3_value_bytes16 (argv[0]);
 
-	zOutput = u16_normalize (nf, zInput, nInput/2, NULL, &written);
-
-	sqlite3_result_text16 (context, zOutput, written * 2, free);
-}
-
-static void
-function_sparql_unaccent (sqlite3_context *context,
-                          int              argc,
-                          sqlite3_value   *argv[])
-{
-	const gchar *zInput;
-	gchar *zOutput;
-	gsize written = 0;
-	int nInput;
-
-	g_assert (argc == 1);
-
-	zInput = sqlite3_value_text (argv[0]);
-
-	if (!zInput) {
-		return;
-	}
-
-	nInput = sqlite3_value_bytes (argv[0]);
-
-	zOutput = u8_normalize (UNINORM_NFKD, zInput, nInput, NULL, &written);
-
-	/* Unaccenting is done in place */
-	tracker_parser_unaccent_nfkd_string (zOutput, &written);
-
-	sqlite3_result_text (context, zOutput, written, free);
-}
-
-#elif defined(HAVE_LIBICU)
-
-static void
-function_sparql_lower_case (sqlite3_context *context,
-                            int              argc,
-                            sqlite3_value   *argv[])
-{
-	const gchar *fn = "fn:lower-case";
-	const UChar *zInput;
-	UChar *zOutput;
-	int nInput;
-	int nOutput;
-	UErrorCode status = U_ZERO_ERROR;
-
-	g_assert (argc == 1);
-
-	zInput = sqlite3_value_text16 (argv[0]);
-
-	if (!zInput) {
-		return;
-	}
-
-	nInput = sqlite3_value_bytes16 (argv[0]);
-
-	nOutput = nInput * 2 + 2;
-	zOutput = sqlite3_malloc (nOutput);
-
-	if (!zOutput) {
-		return;
-	}
-
-	u_strToLower (zOutput, nOutput/2, zInput, nInput/2, NULL, &status);
-
-	if (!U_SUCCESS (status)){
-		char zBuf[128];
-		sqlite3_snprintf (128, zBuf, "ICU error: u_strToLower(): %s", u_errorName (status));
-		zBuf[127] = '\0';
-		sqlite3_free (zOutput);
-		result_context_function_error (context, fn, zBuf);
-		return;
-	}
-
-	sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
+	zOutput = tracker_parser_tolower (zInput, nInput, &nOutput);
+	sqlite3_result_text16 (context, zOutput, -1, free);
 }
 
 static void
@@ -1172,13 +989,15 @@ function_sparql_upper_case (sqlite3_context *context,
                             sqlite3_value   *argv[])
 {
 	const gchar *fn = "fn:upper-case";
-	const UChar *zInput;
-	UChar *zOutput;
+	const gunichar2 *zInput;
+	gunichar2 *zOutput;
 	int nInput;
-	int nOutput;
-	UErrorCode status = U_ZERO_ERROR;
+	gsize nOutput;
 
-	g_assert (argc == 1);
+	if (argc != 1) {
+		result_context_function_error (context, fn, "Invalid argument count");
+		return;
+	}
 
 	zInput = sqlite3_value_text16 (argv[0]);
 
@@ -1188,25 +1007,8 @@ function_sparql_upper_case (sqlite3_context *context,
 
 	nInput = sqlite3_value_bytes16 (argv[0]);
 
-	nOutput = nInput * 2 + 2;
-	zOutput = sqlite3_malloc (nOutput);
-
-	if (!zOutput) {
-		return;
-	}
-
-	u_strToUpper (zOutput, nOutput / 2, zInput, nInput / 2, NULL, &status);
-
-	if (!U_SUCCESS (status)){
-		char zBuf[128];
-		sqlite3_snprintf (128, zBuf, "ICU error: u_strToUpper(): %s", u_errorName (status));
-		zBuf[127] = '\0';
-		sqlite3_free (zOutput);
-		result_context_function_error (context, fn, zBuf);
-		return;
-	}
-
-	sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
+	zOutput = tracker_parser_toupper (zInput, nInput, &nOutput);
+	sqlite3_result_text16 (context, zOutput, -1, free);
 }
 
 static void
@@ -1215,13 +1017,15 @@ function_sparql_case_fold (sqlite3_context *context,
                            sqlite3_value   *argv[])
 {
 	const gchar *fn = "tracker:case-fold";
-	const UChar *zInput;
-	UChar *zOutput;
+	const gunichar2 *zInput;
+	gunichar2 *zOutput;
 	int nInput;
-	int nOutput;
-	UErrorCode status = U_ZERO_ERROR;
+	gsize nOutput;
 
-	g_assert (argc == 1);
+	if (argc != 1) {
+		result_context_function_error (context, fn, "Invalid argument count");
+		return;
+	}
 
 	zInput = sqlite3_value_text16 (argv[0]);
 
@@ -1231,59 +1035,8 @@ function_sparql_case_fold (sqlite3_context *context,
 
 	nInput = sqlite3_value_bytes16 (argv[0]);
 
-	nOutput = nInput * 2 + 2;
-	zOutput = sqlite3_malloc (nOutput);
-
-	if (!zOutput) {
-		return;
-	}
-
-	u_strFoldCase (zOutput, nOutput/2, zInput, nInput/2, U_FOLD_CASE_DEFAULT, &status);
-
-	if (!U_SUCCESS (status)){
-		char zBuf[128];
-		sqlite3_snprintf (128, zBuf, "ICU error: u_strFoldCase: %s", u_errorName (status));
-		zBuf[127] = '\0';
-		sqlite3_free (zOutput);
-		result_context_function_error (context, fn, zBuf);
-		return;
-	}
-
-	sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
-}
-
-static gunichar2 *
-normalize_string (const gunichar2    *string,
-                  gsize               string_len, /* In gunichar2s */
-                  const UNormalizer2 *normalizer,
-                  gsize              *len_out,    /* In gunichar2s */
-                  UErrorCode         *status)
-{
-	int nOutput;
-	gunichar2 *zOutput;
-
-	nOutput = (string_len * 2) + 1;
-	zOutput = g_new0 (gunichar2, nOutput);
-
-	nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
-
-	if (*status == U_BUFFER_OVERFLOW_ERROR) {
-		/* Try again after allocating enough space for the normalization */
-		*status = U_ZERO_ERROR;
-		zOutput = g_renew (gunichar2, zOutput, nOutput);
-		memset (zOutput, 0, nOutput * sizeof (gunichar2));
-		nOutput = unorm2_normalize (normalizer, string, string_len, zOutput, nOutput, status);
-	}
-
-	if (!U_SUCCESS (*status)) {
-		g_clear_pointer (&zOutput, g_free);
-		nOutput = 0;
-	}
-
-	if (len_out)
-		*len_out = nOutput;
-
-	return zOutput;
+	zOutput = tracker_parser_casefold (zInput, nInput, &nOutput);
+	sqlite3_result_text16 (context, zOutput, -1, free);
 }
 
 static void
@@ -1293,12 +1046,11 @@ function_sparql_normalize (sqlite3_context *context,
 {
 	const gchar *fn = "tracker:normalize";
 	const gchar *nfstr;
-	const uint16_t *zInput;
-	uint16_t *zOutput = NULL;
+	const gunichar2 *zInput;
+	gunichar2 *zOutput = NULL;
+	GNormalizeMode mode;
 	int nInput;
 	gsize nOutput;
-	const UNormalizer2 *normalizer;
-	UErrorCode status = U_ZERO_ERROR;
 
 	if (argc != 2) {
 		result_context_function_error (context, fn, "Invalid argument count");
@@ -1311,35 +1063,24 @@ function_sparql_normalize (sqlite3_context *context,
 		return;
 	}
 
+	nInput = sqlite3_value_bytes16 (argv[0]);
+
 	nfstr = (gchar *)sqlite3_value_text (argv[1]);
 	if (g_ascii_strcasecmp (nfstr, "nfc") == 0)
-		normalizer = unorm2_getNFCInstance (&status);
+		mode = G_NORMALIZE_NFC;
 	else if (g_ascii_strcasecmp (nfstr, "nfd") == 0)
-		normalizer = unorm2_getNFDInstance (&status);
+		mode = G_NORMALIZE_NFD;
 	else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0)
-		normalizer = unorm2_getNFKCInstance (&status);
+		mode = G_NORMALIZE_NFKC;
 	else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0)
-		normalizer = unorm2_getNFKDInstance (&status);
+		mode = G_NORMALIZE_NFKD;
 	else {
 		result_context_function_error (context, fn, "Invalid normalization specified");
 		return;
 	}
 
-	if (U_SUCCESS (status)) {
-		nInput = sqlite3_value_bytes16 (argv[0]);
-		zOutput = normalize_string (zInput, nInput / 2, normalizer, &nOutput, &status);
-	}
-
-	if (!U_SUCCESS (status)) {
-		char zBuf[128];
-		sqlite3_snprintf (128, zBuf, "ICU error: unorm_normalize: %s", u_errorName (status));
-		zBuf[127] = '\0';
-		g_free (zOutput);
-		result_context_function_error (context, fn, zBuf);
-		return;
-	}
-
-	sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), g_free);
+	zOutput = tracker_parser_normalize (zInput, mode, nInput, &nOutput);
+	sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), free);
 }
 
 static void
@@ -1348,45 +1089,28 @@ function_sparql_unaccent (sqlite3_context *context,
                           sqlite3_value   *argv[])
 {
 	const gchar *fn = "tracker:unaccent";
-	const uint16_t *zInput;
-	uint16_t *zOutput = NULL;
+	const gunichar2 *zInput;
+	gunichar2 *zOutput = NULL;
 	int nInput;
 	gsize nOutput;
-	const UNormalizer2 *normalizer;
-	UErrorCode status = U_ZERO_ERROR;
-
-	g_assert (argc == 1);
 
-	zInput = sqlite3_value_text16 (argv[0]);
-
-	if (!zInput) {
+	if (argc != 1) {
+		result_context_function_error (context, fn, "Invalid argument count");
 		return;
 	}
 
-	normalizer = unorm2_getNFKDInstance (&status);
-
-	if (U_SUCCESS (status)) {
-		nInput = sqlite3_value_bytes16 (argv[0]);
-		zOutput = normalize_string (zInput, nInput / 2, normalizer, &nOutput, &status);
-	}
+	zInput = sqlite3_value_text16 (argv[0]);
 
-	if (!U_SUCCESS (status)) {
-		char zBuf[128];
-		sqlite3_snprintf (128, zBuf, "ICU error: unorm_normalize: %s", u_errorName (status));
-		zBuf[127] = '\0';
-		g_free (zOutput);
-		result_context_function_error (context, fn, zBuf);
+	if (!zInput) {
 		return;
 	}
 
-	/* Unaccenting is done in place */
-	tracker_parser_unaccent_nfkd_string (zOutput, &nOutput);
+	nInput = sqlite3_value_bytes16 (argv[0]);
 
-	sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), g_free);
+	zOutput = tracker_parser_unaccent (zInput, nInput, &nOutput);
+	sqlite3_result_text16 (context, zOutput, nOutput * sizeof (gunichar2), free);
 }
 
-#endif
-
 static void
 function_sparql_strip_punctuation (sqlite3_context *context,
                                    int              argc,
author	Carlos Garnacho <carlosg@gnome.org>	2023-03-02 11:01:39 +0100
committer	Carlos Garnacho <carlosg@gnome.org>	2023-03-02 15:44:59 +0100
commit	819e2510fbb1fd54dd41d02c99583be94a47e4c3 (patch)
tree	0ee95a12b37dd3854a485db84c9298e619b02a69 /src
parent	6d250002586146de5ff1d34eaa29440bc18ff3ae (diff)
download	tracker-819e2510fbb1fd54dd41d02c99583be94a47e4c3.tar.gz