summaryrefslogtreecommitdiff
path: root/tests/libtracker-fts
diff options
context:
space:
mode:
authorMartyn Russell <martyn@lanedo.com>2014-12-03 10:31:18 +0000
committerMartyn Russell <martyn@lanedo.com>2014-12-03 10:31:18 +0000
commit8d14867631b59ecdfadcd77ac407f19fc15ba4d8 (patch)
tree7502236577c41d633c8cd13e27dd70fb6f9e9580 /tests/libtracker-fts
parent8833933e45e77a67d06f21f47e1c70a1525350eb (diff)
downloadtracker-8d14867631b59ecdfadcd77ac407f19fc15ba4d8.tar.gz
libtracker-common: Move and fix tracker-parser unit tests from libtracker-fts
Diffstat (limited to 'tests/libtracker-fts')
-rw-r--r--tests/libtracker-fts/Makefile.am10
-rw-r--r--tests/libtracker-fts/tracker-parser-test.c450
-rw-r--r--tests/libtracker-fts/tracker-parser.c260
3 files changed, 1 insertions, 719 deletions
diff --git a/tests/libtracker-fts/Makefile.am b/tests/libtracker-fts/Makefile.am
index aa2be7f2e..7e86b2de7 100644
--- a/tests/libtracker-fts/Makefile.am
+++ b/tests/libtracker-fts/Makefile.am
@@ -4,14 +4,10 @@ SUBDIRS = \
limits \
prefix
-check_PROGRAMS += \
- tracker-parser
-
noinst_PROGRAMS += $(test_programs)
test_programs = \
- tracker-fts-test \
- tracker-parser-test
+ tracker-fts-test
AM_CPPFLAGS = \
$(BUILD_CFLAGS) \
@@ -32,10 +28,6 @@ LDADD = \
tracker_fts_test_SOURCES = tracker-fts-test.c
-tracker_parser_test_SOURCES = tracker-parser-test.c
-
-tracker_parser_SOURCES = tracker-parser.c
-
EXTRA_DIST += \
data.ontology \
fts3aa-data.rq \
diff --git a/tests/libtracker-fts/tracker-parser-test.c b/tests/libtracker-fts/tracker-parser-test.c
deleted file mode 100644
index 954c212bd..000000000
--- a/tests/libtracker-fts/tracker-parser-test.c
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
- * Copyright (C) 2010, Nokia <ivan.frade@nokia.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "config.h"
-
-#include <string.h>
-
-#include <glib.h>
-#include <gio/gio.h>
-
-#include <libtracker-common/tracker-parser.h>
-
-/* -------------- COMMON FOR ALL TESTS ----------------- */
-
-/* Fixture object type */
-typedef struct {
- /* The parser object */
- TrackerParser *parser;
-
- /* Default parser configuration to use */
- gint max_word_length;
- gboolean enable_stemmer;
- gboolean enable_unaccent;
- gboolean ignore_stop_words;
- gboolean ignore_reserved_words;
- gboolean ignore_numbers;
-} TrackerParserTestFixture;
-
-/* Common setup for all tests */
-static void
-test_common_setup (TrackerParserTestFixture *fixture,
- gconstpointer data)
-{
- TrackerLanguage *language;
-
- /* Setup language for parser. We make sure that always English is used
- * in the unit tests, because we want the English stemming method to
- * be used. */
- language = tracker_language_new ("en");
- if (!language) {
- g_critical ("Language setup failed!");
- return;
- }
-
- /* Default conf parameters */
- fixture->max_word_length = 50;
- fixture->enable_stemmer = TRUE;
- fixture->enable_unaccent = TRUE;
- fixture->ignore_stop_words = TRUE;
- fixture->ignore_reserved_words = TRUE;
- fixture->ignore_numbers = TRUE;
-
- /* Create the parser */
- fixture->parser = tracker_parser_new (language);
- if (!fixture->parser) {
- g_critical ("Parser creation failed!");
- return;
- }
-
- g_object_unref (language);
-}
-
-/* Common teardown for all tests */
-static void
-test_common_teardown (TrackerParserTestFixture *fixture,
- gconstpointer data)
-{
- if (fixture->parser) {
- tracker_parser_free (fixture->parser);
- }
-}
-
-/* -------------- EXPECTED NUMBER OF WORDS TESTS ----------------- */
-
-/* Test struct for the expected-nwords tests */
-typedef struct TestDataExpectedNWords TestDataExpectedNWords;
-struct TestDataExpectedNWords {
- const gchar *str;
- gboolean ignore_numbers;
- guint expected_nwords;
- gint alternate_expected_nwords;
-};
-
-/* Common expected_word test method */
-static void
-expected_nwords_check (TrackerParserTestFixture *fixture,
- gconstpointer data)
-{
- const TestDataExpectedNWords *testdata = data;
- gint position;
- gint byte_offset_start;
- gint byte_offset_end;
- gboolean stop_word;
- gint word_length;
- guint nwords = 0;
-
- /* Reset the parser with the test string */
- tracker_parser_reset (fixture->parser,
- testdata->str,
- strlen (testdata->str),
- fixture->max_word_length,
- fixture->enable_stemmer,
- fixture->enable_unaccent,
- fixture->ignore_stop_words,
- fixture->ignore_reserved_words,
- testdata->ignore_numbers);
-
- /* Count number of output words */
- while (tracker_parser_next (fixture->parser,
- &position,
- &byte_offset_start,
- &byte_offset_end,
- &stop_word,
- &word_length)) {
- nwords++;
- }
-
- /* Some tests will yield different results when using different versions of
- * libicu (e.g. chinese ones). Handle this by allowing an alternate number
- * of words expected in the test. Note that our whole purpose is to test
- * that we can split different words, not much about the number of words
- * itself (althogh we should check that as well) */
-
- if (testdata->alternate_expected_nwords < 0)
- /* Check if input is same as expected */
- g_assert_cmpuint (nwords, == , testdata->expected_nwords);
- else
- /* We'll assert if both expected number of words fail */
- g_assert ((nwords == testdata->expected_nwords) ||
- (nwords == testdata->alternate_expected_nwords));
-}
-
-/* -------------- EXPECTED WORD TESTS ----------------- */
-
-/* Test struct for the expected-word tests */
-typedef struct TestDataExpectedWord TestDataExpectedWord;
-struct TestDataExpectedWord {
- const gchar *str;
- const gchar *expected;
- gboolean enable_stemmer;
- gboolean enable_unaccent;
-};
-
-/* Common expected_word test method */
-static void
-expected_word_check (TrackerParserTestFixture *fixture,
- gconstpointer data)
-{
- const TestDataExpectedWord *testdata = data;
- const gchar *word;
- gchar *expected_nfkd;
- gint position;
- gint byte_offset_start;
- gint byte_offset_end;
- gboolean stop_word;
- gint word_length;
-
- /* Reset the parser with our string */
- tracker_parser_reset (fixture->parser,
- testdata->str,
- strlen (testdata->str),
- fixture->max_word_length,
- testdata->enable_stemmer,
- testdata->enable_unaccent,
- fixture->ignore_stop_words,
- fixture->ignore_reserved_words,
- fixture->ignore_numbers);
-
- /* Process next word */
- word = tracker_parser_next (fixture->parser,
- &position,
- &byte_offset_start,
- &byte_offset_end,
- &stop_word,
- &word_length);
-
- /* Expected word MUST always be in NFKD normalization */
- expected_nfkd = g_utf8_normalize (testdata->expected,
- -1,
- G_NORMALIZE_NFKD);
-
- /* Check if input is same as expected */
- g_assert_cmpstr (word, == , expected_nfkd);
-
- g_free (expected_nfkd);
-}
-
-static void
-test_stemmer (TrackerParserTestFixture *fixture,
- gconstpointer data)
-{
-#ifdef HAVE_LIBSTEMMER
- expected_word_check (fixture, data);
-#else
- g_test_skip ("Built without libstemmer");
-#endif
-}
-
-static void
-test_unac (TrackerParserTestFixture *fixture,
- gconstpointer data)
-{
-#ifdef HAVE_UNAC
- expected_word_check (fixture, data);
-#else
- g_test_skip ("Built without UNAC");
-#endif
-}
-
-/* -------------- STOP WORD TESTS ----------------- */
-
-/* Test struct for the stop-word tests */
-typedef struct TestDataStopWord TestDataStopWord;
-struct TestDataStopWord {
- const gchar *str;
- gboolean ignore_stop_words;
- gboolean is_expected_stop_word;
-};
-
-/* Common stop__word test method */
-static void
-stop_word_check (TrackerParserTestFixture *fixture,
- gconstpointer data)
-{
- const TestDataStopWord *testdata = data;
- gint position;
- gint byte_offset_start;
- gint byte_offset_end;
- gboolean stop_word;
- gint word_length;
-
- /* Reset the parser with our string */
- tracker_parser_reset (fixture->parser,
- testdata->str,
- strlen (testdata->str),
- fixture->max_word_length,
- fixture->enable_stemmer,
- fixture->enable_unaccent,
- testdata->ignore_stop_words,
- fixture->ignore_reserved_words,
- fixture->ignore_numbers);
-
- /* Process next word */
- tracker_parser_next (fixture->parser,
- &position,
- &byte_offset_start,
- &byte_offset_end,
- &stop_word,
- &word_length);
-
- /* Check if input is same as stop_word */
- g_assert_cmpuint (stop_word, == , testdata->is_expected_stop_word);
-}
-
-/* -------------- LIST OF TESTS ----------------- */
-
-/* Normalization-related tests (unaccenting) */
-static const TestDataExpectedWord test_data_normalization[] = {
- { "école", "ecole", FALSE, TRUE },
- { "ÉCOLE", "ecole", FALSE, TRUE },
- { "École", "ecole", FALSE, TRUE },
- { "e" "\xCC\x81" "cole", "ecole", FALSE, TRUE },
- { "E" "\xCC\x81" "COLE", "ecole", FALSE, TRUE },
- { "E" "\xCC\x81" "cole", "ecole", FALSE, TRUE },
- { NULL, NULL, FALSE, FALSE }
-};
-
-/* Unaccenting-related tests */
-static const TestDataExpectedWord test_data_unaccent[] = {
- { "Murciélago", "murcielago", FALSE, TRUE },
- { "camión", "camion", FALSE, TRUE },
- { "desagüe", "desague", FALSE, TRUE },
- { "Ὰ", "α", FALSE, TRUE }, /* greek capital alpha with U+0300, composed */
- { "ὰ", "α", FALSE, TRUE }, /* greek small alpha with U+0300, composed */
- { "Ὶ", "ι", FALSE, TRUE }, /* greek capital iotta with U+0300, composed */
- { "ὶ", "ι", FALSE, TRUE }, /* greek small iotta with U+0300, composed */
- { "Ὼ", "ω", FALSE, TRUE }, /* greek capital omega with U+0300, composed */
- { "ὼ", "ω", FALSE, TRUE }, /* greek small omega with U+0300, composed */
- { "Ὰ", "α", FALSE, TRUE }, /* capital alpha with U+0300, decomposed */
- { "ὰ", "α", FALSE, TRUE }, /* small alpha with U+0300, decomposed */
- { "Ὶ", "ι", FALSE, TRUE }, /* capital iotta with U+0300, decomposed */
- { "ὶ", "ι", FALSE, TRUE }, /* small iotta with U+0300, decomposed */
- { "Ὼ", "ω", FALSE, TRUE }, /* capital omega with U+0300, decomposed */
- { "ὼ", "ω", FALSE, TRUE }, /* small omega with U+0300, decomposed */
- { "aN͡Ga", "anga", FALSE, TRUE }, /* 0x0361 affects to two characters */
- { "aNG͡a", "anga", FALSE, TRUE }, /* 0x0361 affects to two characters */
- { "Murciélago", "murciélago", FALSE, FALSE },
- { "camión", "camión", FALSE, FALSE },
- { "desagüe", "desagüe", FALSE, FALSE },
- { NULL, NULL, FALSE, FALSE }
-};
-
-/* Stemming-related tests */
-static const TestDataExpectedWord test_data_stemming[] = {
- { "ecole", "ecol", TRUE, TRUE },
- { "ecole", "ecole", FALSE, TRUE },
- { NULL, NULL, FALSE, FALSE }
-};
-
-/* Casefolding-related tests */
-static const TestDataExpectedWord test_data_casefolding[] = {
- { "gross", "gross", FALSE, TRUE },
- { "GROSS", "gross", FALSE, TRUE },
- { "GrOsS", "gross", FALSE, TRUE },
- { "groß", "gross", FALSE, TRUE },
- { NULL, NULL, FALSE, FALSE }
-};
-
-/* Number of expected words tests */
-static const TestDataExpectedNWords test_data_nwords[] = {
- { "The quick (\"brown\") fox can’t jump 32.3 feet, right?", TRUE, 8, -1 },
- { "The quick (\"brown\") fox can’t jump 32.3 feet, right?", FALSE, 10, -1 },
- /* Note: as of 0.9.15, the dot is always a word breaker, even between
- * numbers. */
- { "filename.txt", TRUE, 2, -1 },
- { ".hidden.txt", TRUE, 2, -1 },
- { "noextension.", TRUE, 1, -1 },
- { "ホモ・サピエンス", TRUE, 2, -1 }, /* katakana */
- { "喂人类", TRUE, 2, 3 }, /* chinese */
- { "Американские суда находятся в международных водах.", TRUE, 6, -1 }, /* russian */
- { "Bần chỉ là một anh nghèo xác", TRUE, 7, -1 }, /* vietnamese */
- { "ホモ・サピエンス 喂人类 katakana, chinese, english", TRUE, 7, 8 }, /* mixed */
- { NULL, FALSE, 0, 0 }
-};
-
-/* Stop-word tests (for english only) */
-static const TestDataStopWord test_data_stop_words[] = {
- { "hello", TRUE, TRUE }, /* hello is stop word */
- { "hello", FALSE, FALSE },
- { "world", TRUE, FALSE }, /* world is not stop word */
- { "world", FALSE, FALSE },
- { NULL, FALSE, FALSE }
-};
-
-int
-main (int argc, char **argv)
-{
- gint i;
-
- g_test_init (&argc, &argv, NULL);
-
- /* We want the tests to properly find the stopwords dictionaries, so we
- * need to set the following envvar with the path where the
- * dictionaries are. */
- g_setenv ("TRACKER_LANGUAGE_STOP_WORDS_DIR",
- TOP_SRCDIR "/src/libtracker-common/stop-words",
- TRUE);
-
- /* Add normalization checks */
- for (i = 0; test_data_normalization[i].str != NULL; i++) {
- gchar *testpath;
-
- testpath = g_strdup_printf ("/libtracker-fts/parser/normalization_%d", i);
- g_test_add (testpath,
- TrackerParserTestFixture,
- &test_data_normalization[i],
- test_common_setup,
- expected_word_check,
- test_common_teardown);
- g_free (testpath);
- }
-
- /* Add unaccent checks */
- for (i = 0; test_data_unaccent[i].str != NULL; i++) {
- gchar *testpath;
-
- testpath = g_strdup_printf ("/libtracker-fts/parser/unaccent_%d", i);
- g_test_add (testpath,
- TrackerParserTestFixture,
- &test_data_unaccent[i],
- test_common_setup,
- test_unac,
- test_common_teardown);
- g_free (testpath);
- }
-
- /* Add casefolding checks */
- for (i = 0; test_data_casefolding[i].str != NULL; i++) {
- gchar *testpath;
-
- testpath = g_strdup_printf ("/libtracker-fts/parser/casefolding_%d", i);
- g_test_add (testpath,
- TrackerParserTestFixture,
- &test_data_casefolding[i],
- test_common_setup,
- expected_word_check,
- test_common_teardown);
- g_free (testpath);
- }
-
- /* Add stemming checks */
- for (i = 0; test_data_stemming[i].str != NULL; i++) {
- gchar *testpath;
-
- testpath = g_strdup_printf ("/libtracker-fts/parser/stemming_%d", i);
- g_test_add (testpath,
- TrackerParserTestFixture,
- &test_data_stemming[i],
- test_common_setup,
- test_stemmer,
- test_common_teardown);
- g_free (testpath);
- }
-
- /* Add expected number of words checks */
- for (i = 0; test_data_nwords[i].str != NULL; i++) {
- gchar *testpath;
-
- testpath = g_strdup_printf ("/libtracker-fts/parser/nwords_%d", i);
- g_test_add (testpath,
- TrackerParserTestFixture,
- &test_data_nwords[i],
- test_common_setup,
- expected_nwords_check,
- test_common_teardown);
- g_free (testpath);
- }
-
- /* Add stop word checks */
- for (i = 0; test_data_stop_words[i].str != NULL; i++) {
- gchar *testpath;
-
- testpath = g_strdup_printf ("/libtracker-fts/parser/stop_words_%d", i);
- g_test_add (testpath,
- TrackerParserTestFixture,
- &test_data_stop_words[i],
- test_common_setup,
- stop_word_check,
- test_common_teardown);
- g_free (testpath);
- }
-
- return g_test_run ();
-}
diff --git a/tests/libtracker-fts/tracker-parser.c b/tests/libtracker-fts/tracker-parser.c
deleted file mode 100644
index 2c725d81a..000000000
--- a/tests/libtracker-fts/tracker-parser.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (C) 2010, Nokia <ivan.frade@nokia.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "config.h"
-
-#include <string.h>
-#include <locale.h>
-
-#include <glib.h>
-#include <gio/gio.h>
-
-#include <libtracker-fts/tracker-parser.h>
-#include <libtracker-fts/tracker-fts-config.h>
-#include <libtracker-common/tracker-common.h>
-
-static gchar *text;
-static gchar *filename;
-static gboolean verbose;
-
-/* Command Line options */
-static const GOptionEntry options [] = {
- {
- "verbose", 'v', G_OPTION_FLAG_NO_ARG,
- G_OPTION_ARG_NONE, &verbose,
- "Enable verbose output",
- NULL
- },
- {
- "text", 't', 0,
- G_OPTION_ARG_STRING, &text,
- "Specific text to parse",
- NULL
- },
- {
- "file", 'f', 0,
- G_OPTION_ARG_STRING, &filename,
- "Specific file to parse its contents",
- NULL
- },
- { NULL }
-};
-
-static gboolean
-setup_context (gint argc,
- gchar **argv)
-{
- GOptionContext *context = NULL;
- GError *error = NULL;
-
- /* Setup command line options */
- context = g_option_context_new ("- Test the Tracker FTS parser");
- g_option_context_add_main_entries (context,
- options,
- argv[0]);
-
- /* Parse input arguments */
- if (!g_option_context_parse (context,
- &argc,
- &argv,
- &error))
- {
- g_printerr ("%s\nRun '%s --help' to see a full list of available "
- "command line options.\n",
- error->message,
- argv[0]);
- g_error_free (error);
- return FALSE;
- }
-
- g_option_context_free (context);
- return TRUE;
-}
-
-static gboolean
-load_file_contents (void)
-{
- GError *error = NULL;
- GFile *file;
-
- file = g_file_new_for_commandline_arg (filename);
- if (!g_file_load_contents (file, NULL, &text, NULL, NULL, &error)) {
- g_printerr ("Error loading file '%s' contents: '%s'\n",
- filename,
- error->message);
- g_error_free (error);
- g_object_unref (file);
- return FALSE;
- }
- g_object_unref (file);
- return TRUE;
-}
-
-static gboolean
-run_parsing (void)
-{
- TrackerFTSConfig *config;
- TrackerLanguage *language;
- TrackerParser *parser;
- GTimer *timer;
-
- /* Initialize timing */
- timer = g_timer_new ();
-
- /* Read config file */
- config = tracker_fts_config_new ();
-
- /* Setup language for parser */
- language = tracker_language_new (NULL);
- if (!language) {
- g_printerr ("Language setup failed!\n");
- return FALSE;
- }
-
- /* Create the parser */
- parser = tracker_parser_new (language);
- if (!parser) {
- g_printerr ("Parser creation failed!\n");
- g_object_unref (language);
- return FALSE;
- }
-
- /* Reset the parser with our string, reading the current FTS config */
- tracker_parser_reset (parser,
- text,
- strlen (text),
- tracker_fts_config_get_max_word_length (config),
- tracker_fts_config_get_enable_stemmer (config),
- tracker_fts_config_get_enable_unaccent (config),
- tracker_fts_config_get_ignore_stop_words (config),
- TRUE,
- tracker_fts_config_get_ignore_numbers (config));
-
- /* Loop through all words! */
- while (1) {
- const gchar *word;
- gint position;
- gint byte_offset_start;
- gint byte_offset_end;
- gboolean stop_word;
- gint word_length;
-
-
- /* Process next word */
- word = tracker_parser_next (parser,
- &position,
- &byte_offset_start,
- &byte_offset_end,
- &stop_word,
- &word_length);
-
- /* Stop loop if no more words */
- if (!word) {
- break;
- }
-
- if (verbose) {
- gchar *word_hex;
- gchar *original_word;
- gchar *original_word_hex;
- gint original_word_length;
-
- /* Get original word */
- original_word_length = byte_offset_end - byte_offset_start;
- original_word = g_malloc (original_word_length + 1);
- memcpy (original_word,
- &text[byte_offset_start],
- original_word_length);
- original_word[original_word_length] = '\0';
-
- /* Get hex strings */
- word_hex = tracker_strhex (word, word_length, ':');
- original_word_hex = tracker_strhex (original_word,
- original_word_length,
- ':');
-
- g_print ("WORD at %d [%d,%d] Original: '%s' (%s), "
- "Processed: '%s' (%s) (stop? %s)\n",
- position,
- byte_offset_start,
- byte_offset_end,
- original_word,
- original_word_hex,
- word,
- word_hex,
- stop_word ? "yes" : "no");
-
- g_free (word_hex);
- g_free (original_word_hex);
- g_free (original_word);
- }
- }
-
- g_print ("\n----> Parsing finished after '%lf' seconds\n",
- g_timer_elapsed (timer, NULL));
-
- g_timer_destroy (timer);
-
- tracker_parser_free (parser);
- g_object_unref (language);
- return TRUE;
-}
-
-
-int
-main (int argc, char **argv)
-{
- /* Setup locale */
- setlocale (LC_ALL, "");
-
- /* Setup context */
- if (!setup_context (argc, argv)) {
- g_printerr ("Context setup failed... exiting\n");
- return -1;
- }
-
- /* Either text or file must be given */
- if (filename == NULL &&
- text == NULL) {
- g_printerr ("Either 'file' or 'text' options should be used\n"
- "Run '%s --help' to see a full list of available "
- "command line options.\n",
- argv[0]);
- return -2;
- }
-
- /* If required, load file contents */
- if (filename != NULL &&
- !load_file_contents ()) {
- g_printerr ("Loading file '%s' contents failed... exiting\n",
- filename);
- return -3;
- }
-
- /* Run the parsing! */
- if (!run_parsing ()) {
- g_printerr ("Parsing operation failed... exiting\n");
- return -4;
- }
-
- /* Clean exit */
- if (filename)
- g_free (text);
- return 0;
-}