libtracker-common: Move and fix tracker-parser unit tests from libtracker-fts

author: Martyn Russell <martyn@lanedo.com> 2014-12-03 10:31:18 +0000
committer: Martyn Russell <martyn@lanedo.com> 2014-12-03 10:31:18 +0000
commit: 8d14867631b59ecdfadcd77ac407f19fc15ba4d8 (patch)
tree: 7502236577c41d633c8cd13e27dd70fb6f9e9580 /tests/libtracker-common
parent: 8833933e45e77a67d06f21f47e1c70a1525350eb (diff)
download: tracker-8d14867631b59ecdfadcd77ac407f19fc15ba4d8.tar.gz
3 files changed, 720 insertions, 1 deletions
diff --git a/tests/libtracker-common/Makefile.am b/tests/libtracker-common/Makefile.am
index 68d6cbef2..d82ca6c99 100644
--- a/tests/libtracker-common/Makefile.am
+++ b/tests/libtracker-common/Makefile.am
@@ -2,13 +2,16 @@ include $(top_srcdir)/Makefile.decl
 
 noinst_PROGRAMS += $(test_programs)
 
+check_PROGRAMS += tracker-parser
+
 test_programs = \
 	tracker-type-utils                             \
 	tracker-dbus                                   \
 	tracker-file-utils                             \
 	tracker-utils				       \
 	tracker-sched-test			       \
-	tracker-date-time-test
+	tracker-date-time-test \
+        tracker-parser-test
 
 AM_CPPFLAGS =                                      \
 	-DTOP_SRCDIR=\"$(abs_top_srcdir)\"             \
@@ -37,4 +40,8 @@ tracker_sched_test_SOURCES = tracker-sched-test.c
 
 tracker_date_time_test_SOURCES = tracker-date-time-test.c
 
+tracker_parser_test_SOURCES = tracker-parser-test.c
+
+tracker_parser_SOURCES = tracker-parser.c
+
 EXTRA_DIST += non-utf8.txt
diff --git a/tests/libtracker-common/tracker-parser-test.c b/tests/libtracker-common/tracker-parser-test.c
new file mode 100644
index 000000000..954c212bd
--- /dev/null
+++ b/tests/libtracker-common/tracker-parser-test.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (C) 2010, Nokia <ivan.frade@nokia.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include <libtracker-common/tracker-parser.h>
+
+/* -------------- COMMON FOR ALL TESTS ----------------- */
+
+/* Fixture object type */
+typedef struct {
+	/* The parser object */
+	TrackerParser *parser;
+
+	/* Default parser configuration to use */
+	gint max_word_length;
+	gboolean enable_stemmer;
+	gboolean enable_unaccent;
+	gboolean ignore_stop_words;
+	gboolean ignore_reserved_words;
+	gboolean ignore_numbers;
+} TrackerParserTestFixture;
+
+/* Common setup for all tests */
+static void
+test_common_setup (TrackerParserTestFixture *fixture,
+                   gconstpointer             data)
+{
+	TrackerLanguage  *language;
+
+	/* Setup language for parser. We make sure that always English is used
+	 *  in the unit tests, because we want the English stemming method to
+	 *  be used. */
+	language = tracker_language_new ("en");
+	if (!language) {
+		g_critical ("Language setup failed!");
+		return;
+	}
+
+	/* Default conf parameters */
+	fixture->max_word_length = 50;
+	fixture->enable_stemmer = TRUE;
+	fixture->enable_unaccent = TRUE;
+	fixture->ignore_stop_words = TRUE;
+	fixture->ignore_reserved_words = TRUE;
+	fixture->ignore_numbers = TRUE;
+
+	/* Create the parser */
+	fixture->parser = tracker_parser_new (language);
+	if (!fixture->parser) {
+		g_critical ("Parser creation failed!");
+		return;
+	}
+
+	g_object_unref (language);
+}
+
+/* Common teardown for all tests */
+static void
+test_common_teardown (TrackerParserTestFixture *fixture,
+                      gconstpointer             data)
+{
+	if (fixture->parser) {
+		tracker_parser_free (fixture->parser);
+	}
+}
+
+/* -------------- EXPECTED NUMBER OF WORDS TESTS ----------------- */
+
+/* Test struct for the expected-nwords tests */
+typedef struct TestDataExpectedNWords TestDataExpectedNWords;
+struct TestDataExpectedNWords {
+	const gchar *str;
+	gboolean ignore_numbers;
+	guint expected_nwords;
+	gint alternate_expected_nwords;
+};
+
+/* Common expected_word test method */
+static void
+expected_nwords_check (TrackerParserTestFixture *fixture,
+                       gconstpointer             data)
+{
+	const TestDataExpectedNWords *testdata = data;
+	gint position;
+	gint byte_offset_start;
+	gint byte_offset_end;
+	gboolean stop_word;
+	gint word_length;
+	guint nwords = 0;
+
+	/* Reset the parser with the test string */
+	tracker_parser_reset (fixture->parser,
+	                      testdata->str,
+	                      strlen (testdata->str),
+	                      fixture->max_word_length,
+	                      fixture->enable_stemmer,
+	                      fixture->enable_unaccent,
+	                      fixture->ignore_stop_words,
+	                      fixture->ignore_reserved_words,
+	                      testdata->ignore_numbers);
+
+	/* Count number of output words */
+	while (tracker_parser_next (fixture->parser,
+				    &position,
+				    &byte_offset_start,
+				    &byte_offset_end,
+				    &stop_word,
+				    &word_length)) {
+		nwords++;
+	}
+
+	/* Some tests will yield different results when using different versions of
+	 * libicu (e.g. chinese ones). Handle this by allowing an alternate number
+	 * of words expected in the test. Note that our whole purpose is to test
+	 * that we can split different words, not much about the number of words
+	 * itself (althogh we should check that as well) */
+
+	if (testdata->alternate_expected_nwords < 0)
+		/* Check if input is same as expected */
+		g_assert_cmpuint (nwords, == , testdata->expected_nwords);
+	else
+		/* We'll assert if both expected number of words fail */
+		g_assert ((nwords == testdata->expected_nwords) ||
+		          (nwords == testdata->alternate_expected_nwords));
+}
+
+/* -------------- EXPECTED WORD TESTS ----------------- */
+
+/* Test struct for the expected-word tests */
+typedef struct TestDataExpectedWord TestDataExpectedWord;
+struct TestDataExpectedWord {
+	const gchar *str;
+	const gchar *expected;
+	gboolean enable_stemmer;
+	gboolean enable_unaccent;
+};
+
+/* Common expected_word test method */
+static void
+expected_word_check (TrackerParserTestFixture *fixture,
+                     gconstpointer             data)
+{
+	const TestDataExpectedWord *testdata = data;
+	const gchar *word;
+	gchar *expected_nfkd;
+	gint position;
+	gint byte_offset_start;
+	gint byte_offset_end;
+	gboolean stop_word;
+	gint word_length;
+
+	/* Reset the parser with our string */
+	tracker_parser_reset (fixture->parser,
+	                      testdata->str,
+	                      strlen (testdata->str),
+	                      fixture->max_word_length,
+	                      testdata->enable_stemmer,
+	                      testdata->enable_unaccent,
+	                      fixture->ignore_stop_words,
+	                      fixture->ignore_reserved_words,
+	                      fixture->ignore_numbers);
+
+	/* Process next word */
+	word = tracker_parser_next (fixture->parser,
+	                            &position,
+	                            &byte_offset_start,
+	                            &byte_offset_end,
+	                            &stop_word,
+	                            &word_length);
+
+	/* Expected word MUST always be in NFKD normalization */
+	expected_nfkd = g_utf8_normalize (testdata->expected,
+	                                  -1,
+	                                  G_NORMALIZE_NFKD);
+
+	/* Check if input is same as expected */
+	g_assert_cmpstr (word, == , expected_nfkd);
+
+	g_free (expected_nfkd);
+}
+
+static void
+test_stemmer (TrackerParserTestFixture *fixture,
+              gconstpointer             data)
+{
+#ifdef HAVE_LIBSTEMMER
+       expected_word_check (fixture, data);
+#else
+       g_test_skip ("Built without libstemmer");
+#endif
+}
+
+static void
+test_unac (TrackerParserTestFixture *fixture,
+           gconstpointer             data)
+{
+#ifdef HAVE_UNAC
+       expected_word_check (fixture, data);
+#else
+       g_test_skip ("Built without UNAC");
+#endif
+}
+
+/* -------------- STOP WORD TESTS ----------------- */
+
+/* Test struct for the stop-word tests */
+typedef struct TestDataStopWord TestDataStopWord;
+struct TestDataStopWord {
+	const gchar *str;
+	gboolean ignore_stop_words;
+	gboolean is_expected_stop_word;
+};
+
+/* Common stop__word test method */
+static void
+stop_word_check (TrackerParserTestFixture *fixture,
+                 gconstpointer             data)
+{
+	const TestDataStopWord *testdata = data;
+	gint position;
+	gint byte_offset_start;
+	gint byte_offset_end;
+	gboolean stop_word;
+	gint word_length;
+
+	/* Reset the parser with our string */
+	tracker_parser_reset (fixture->parser,
+	                      testdata->str,
+	                      strlen (testdata->str),
+	                      fixture->max_word_length,
+	                      fixture->enable_stemmer,
+	                      fixture->enable_unaccent,
+	                      testdata->ignore_stop_words,
+	                      fixture->ignore_reserved_words,
+	                      fixture->ignore_numbers);
+
+	/* Process next word */
+	tracker_parser_next (fixture->parser,
+			     &position,
+			     &byte_offset_start,
+			     &byte_offset_end,
+			     &stop_word,
+			     &word_length);
+
+	/* Check if input is same as stop_word */
+	g_assert_cmpuint (stop_word, == , testdata->is_expected_stop_word);
+}
+
+/* -------------- LIST OF TESTS ----------------- */
+
+/* Normalization-related tests (unaccenting) */
+static const TestDataExpectedWord test_data_normalization[] = {
+	{ "école",                "ecole", FALSE, TRUE  },
+	{ "ÉCOLE",                "ecole", FALSE, TRUE  },
+	{ "École",                "ecole", FALSE, TRUE  },
+	{ "e" "\xCC\x81" "cole",  "ecole", FALSE, TRUE  },
+	{ "E" "\xCC\x81" "COLE",  "ecole", FALSE, TRUE  },
+	{ "E" "\xCC\x81" "cole",  "ecole", FALSE, TRUE  },
+	{ NULL,                   NULL,    FALSE, FALSE }
+};
+
+/* Unaccenting-related tests */
+static const TestDataExpectedWord test_data_unaccent[] = {
+	{ "Murciélago",   "murcielago", FALSE, TRUE  },
+	{ "camión",       "camion",     FALSE, TRUE  },
+	{ "desagüe",      "desague",    FALSE, TRUE  },
+	{ "Ὰ",            "α",          FALSE, TRUE  }, /* greek capital alpha with U+0300, composed */
+	{ "ὰ",            "α",          FALSE, TRUE  }, /* greek small alpha with U+0300, composed */
+	{ "Ὶ",            "ι",          FALSE, TRUE  }, /* greek capital iotta with U+0300, composed */
+	{ "ὶ",            "ι",          FALSE, TRUE  }, /* greek small iotta with U+0300, composed */
+	{ "Ὼ",            "ω",          FALSE, TRUE  }, /* greek capital omega with U+0300, composed */
+	{ "ὼ",            "ω",          FALSE, TRUE  }, /* greek small omega with U+0300, composed */
+	{ "Ὰ",          "α",          FALSE, TRUE  }, /* capital alpha with U+0300, decomposed */
+	{ "ὰ",          "α",          FALSE, TRUE  }, /* small alpha with U+0300, decomposed */
+	{ "Ὶ",          "ι",          FALSE, TRUE  }, /* capital iotta with U+0300, decomposed */
+	{ "ὶ",          "ι",          FALSE, TRUE  }, /* small iotta with U+0300, decomposed */
+	{ "Ὼ",          "ω",          FALSE, TRUE  }, /* capital omega with U+0300, decomposed */
+	{ "ὼ",          "ω",          FALSE, TRUE  }, /* small omega with U+0300, decomposed */
+	{ "aN͡Ga",       "anga",       FALSE, TRUE  }, /* 0x0361 affects to two characters */
+	{ "aNG͡a",       "anga",       FALSE, TRUE  }, /* 0x0361 affects to two characters */
+	{ "Murciélago", "murciélago", FALSE, FALSE },
+	{ "camión",     "camión",     FALSE, FALSE },
+	{ "desagüe",    "desagüe",    FALSE, FALSE },
+	{ NULL,         NULL,         FALSE, FALSE }
+};
+
+/* Stemming-related tests */
+static const TestDataExpectedWord test_data_stemming[] = {
+	{ "ecole", "ecol",  TRUE,  TRUE  },
+	{ "ecole", "ecole", FALSE, TRUE  },
+	{ NULL,    NULL,    FALSE, FALSE }
+};
+
+/* Casefolding-related tests */
+static const TestDataExpectedWord test_data_casefolding[] = {
+	{ "gross", "gross", FALSE, TRUE  },
+	{ "GROSS", "gross", FALSE, TRUE  },
+	{ "GrOsS", "gross", FALSE, TRUE  },
+	{ "groß",  "gross", FALSE, TRUE  },
+	{ NULL,    NULL,    FALSE, FALSE }
+};
+
+/* Number of expected words tests */
+static const TestDataExpectedNWords test_data_nwords[] = {
+	{ "The quick (\"brown\") fox can’t jump 32.3 feet, right?", TRUE,   8, -1 },
+	{ "The quick (\"brown\") fox can’t jump 32.3 feet, right?", FALSE, 10, -1 },
+	/* Note: as of 0.9.15, the dot is always a word breaker, even between
+	 *  numbers. */
+	{ "filename.txt",                                           TRUE,   2, -1 },
+	{ ".hidden.txt",                                            TRUE,   2, -1 },
+	{ "noextension.",                                           TRUE,   1, -1 },
+	{ "ホモ・サピエンス",                                          TRUE,   2, -1 }, /* katakana */
+	{ "喂人类",                                                   TRUE,   2, 3 }, /* chinese */
+	{ "Американские суда находятся в международных водах.",     TRUE,   6, -1 }, /* russian */
+	{ "Bần chỉ là một anh nghèo xác",                            TRUE,   7, -1 }, /* vietnamese */
+	{ "ホモ・サピエンス 喂人类 katakana, chinese, english",          TRUE,   7, 8 }, /* mixed */
+	{ NULL,                                                     FALSE,  0, 0 }
+};
+
+/* Stop-word tests (for english only) */
+static const TestDataStopWord test_data_stop_words[] = {
+	{ "hello", TRUE,  TRUE  }, /* hello is stop word */
+	{ "hello", FALSE, FALSE },
+	{ "world", TRUE,  FALSE }, /* world is not stop word */
+	{ "world", FALSE, FALSE },
+	{ NULL,    FALSE, FALSE }
+};
+
+int
+main (int argc, char **argv)
+{
+	gint i;
+
+	g_test_init (&argc, &argv, NULL);
+
+	/* We want the tests to properly find the stopwords dictionaries, so we
+	 *  need to set the following envvar with the path where the
+	 *  dictionaries are. */
+	g_setenv ("TRACKER_LANGUAGE_STOP_WORDS_DIR",
+	          TOP_SRCDIR "/src/libtracker-common/stop-words",
+	          TRUE);
+
+	/* Add normalization checks */
+	for (i = 0; test_data_normalization[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/normalization_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_normalization[i],
+		            test_common_setup,
+		            expected_word_check,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	/* Add unaccent checks */
+	for (i = 0; test_data_unaccent[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/unaccent_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_unaccent[i],
+		            test_common_setup,
+		            test_unac,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	/* Add casefolding checks */
+	for (i = 0; test_data_casefolding[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/casefolding_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_casefolding[i],
+		            test_common_setup,
+		            expected_word_check,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	/* Add stemming checks */
+	for (i = 0; test_data_stemming[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/stemming_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_stemming[i],
+		            test_common_setup,
+		            test_stemmer,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	/* Add expected number of words checks */
+	for (i = 0; test_data_nwords[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/nwords_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_nwords[i],
+		            test_common_setup,
+		            expected_nwords_check,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	/* Add stop word checks */
+	for (i = 0; test_data_stop_words[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/stop_words_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_stop_words[i],
+		            test_common_setup,
+		            stop_word_check,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	return g_test_run ();
+}
diff --git a/tests/libtracker-common/tracker-parser.c b/tests/libtracker-common/tracker-parser.c
new file mode 100644
index 000000000..932cc2e69
--- /dev/null
+++ b/tests/libtracker-common/tracker-parser.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2010, Nokia <ivan.frade@nokia.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <locale.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include <libtracker-common/tracker-common.h>
+
+/* Normally this would be in the libtracker-fts config */
+#define DEFAULT_MAX_WORD_LENGTH   30
+#define DEFAULT_ENABLE_STEMMER    FALSE
+#define DEFAULT_ENABLE_UNACCENT   TRUE
+#define DEFAULT_IGNORE_STOP_WORDS TRUE
+#define DEFAULT_IGNORE_NUMBERS    TRUE
+
+static gchar    *text;
+static gchar    *filename;
+static gboolean  verbose;
+
+/* Command Line options */
+static const GOptionEntry options [] = {
+	{
+		"verbose", 'v', G_OPTION_FLAG_NO_ARG,
+		G_OPTION_ARG_NONE, &verbose,
+		"Enable verbose output",
+		NULL
+	},
+	{
+		"text", 't', 0,
+		G_OPTION_ARG_STRING, &text,
+		"Specific text to parse",
+		NULL
+	},
+	{
+		"file", 'f', 0,
+		G_OPTION_ARG_STRING, &filename,
+		"Specific file to parse its contents",
+		NULL
+	},
+	{ NULL }
+};
+
+static gboolean
+setup_context (gint argc,
+               gchar **argv)
+{
+	GOptionContext *context = NULL;
+	GError *error = NULL;
+
+	/* Setup command line options */
+	context = g_option_context_new ("- Test the Tracker FTS parser");
+	g_option_context_add_main_entries (context,
+	                                   options,
+	                                   argv[0]);
+
+	/* Parse input arguments */
+	if (!g_option_context_parse (context,
+	                             &argc,
+	                             &argv,
+	                             &error))
+	{
+		g_printerr ("%s\nRun '%s --help' to see a full list of available "
+		            "command line options.\n",
+		            error->message,
+		            argv[0]);
+		g_error_free (error);
+		return FALSE;
+	}
+
+	g_option_context_free (context);
+	return TRUE;
+}
+
+static gboolean
+load_file_contents (void)
+{
+	GError *error = NULL;
+	GFile *file;
+
+	file = g_file_new_for_commandline_arg (filename);
+	if (!g_file_load_contents (file, NULL, &text, NULL, NULL, &error)) {
+		g_printerr ("Error loading file '%s' contents: '%s'\n",
+		            filename,
+		            error->message);
+		g_error_free (error);
+		g_object_unref (file);
+		return FALSE;
+	}
+	g_object_unref (file);
+	return TRUE;
+}
+
+static gboolean
+run_parsing (void)
+{
+	TrackerLanguage *language;
+	TrackerParser *parser;
+	GTimer *timer;
+
+	/* Initialize timing */
+	timer = g_timer_new ();
+
+	/* Setup language for parser */
+	language = tracker_language_new (NULL);
+	if (!language) {
+		g_printerr ("Language setup failed!\n");
+		return FALSE;
+	}
+
+	/* Create the parser */
+	parser = tracker_parser_new (language);
+	if (!parser) {
+		g_printerr ("Parser creation failed!\n");
+		g_object_unref (language);
+		return FALSE;
+	}
+
+	/* Reset the parser with our string, reading the current FTS config */
+
+	tracker_parser_reset (parser,
+	                      text,
+	                      strlen (text),
+	                      DEFAULT_MAX_WORD_LENGTH,
+	                      DEFAULT_ENABLE_STEMMER,
+	                      DEFAULT_ENABLE_UNACCENT,
+	                      DEFAULT_IGNORE_STOP_WORDS,
+	                      TRUE,
+	                      DEFAULT_IGNORE_NUMBERS);
+
+	/* Loop through all words! */
+	while (1) {
+		const gchar *word;
+		gint position;
+		gint byte_offset_start;
+		gint byte_offset_end;
+		gboolean stop_word;
+		gint word_length;
+
+
+		/* Process next word */
+		word = tracker_parser_next (parser,
+		                            &position,
+		                            &byte_offset_start,
+		                            &byte_offset_end,
+		                            &stop_word,
+		                            &word_length);
+
+		/* Stop loop if no more words */
+		if (!word) {
+			break;
+		}
+
+		if (verbose) {
+			gchar *word_hex;
+			gchar *original_word;
+			gchar *original_word_hex;
+			gint original_word_length;
+
+			/* Get original word */
+			original_word_length = byte_offset_end - byte_offset_start;
+			original_word = g_malloc (original_word_length + 1);
+			memcpy (original_word,
+			        &text[byte_offset_start],
+			        original_word_length);
+			original_word[original_word_length] = '\0';
+
+			/* Get hex strings */
+			word_hex = tracker_strhex (word, word_length, ':');
+			original_word_hex = tracker_strhex (original_word,
+			                                    original_word_length,
+			                                    ':');
+
+			g_print ("WORD at %d [%d,%d] Original: '%s' (%s), "
+			         "Processed: '%s' (%s) (stop? %s)\n",
+			         position,
+			         byte_offset_start,
+			         byte_offset_end,
+			         original_word,
+			         original_word_hex,
+			         word,
+			         word_hex,
+			         stop_word ? "yes" : "no");
+
+			g_free (word_hex);
+			g_free (original_word_hex);
+			g_free (original_word);
+		}
+	}
+
+	g_print ("\n----> Parsing finished after '%lf' seconds\n",
+	         g_timer_elapsed (timer, NULL));
+
+	g_timer_destroy (timer);
+
+	tracker_parser_free (parser);
+	g_object_unref (language);
+	return TRUE;
+}
+
+
+int
+main (int argc, char **argv)
+{
+	/* Setup locale */
+	setlocale (LC_ALL, "");
+
+	/* Setup context */
+	if (!setup_context (argc, argv)) {
+		g_printerr ("Context setup failed... exiting\n");
+		return -1;
+	}
+
+	/* Either text or file must be given */
+	if (filename == NULL &&
+	    text == NULL) {
+		g_printerr ("Either 'file' or 'text' options should be used\n"
+		            "Run '%s --help' to see a full list of available "
+		            "command line options.\n",
+		            argv[0]);
+		return -2;
+	}
+
+	/* If required, load file contents */
+	if (filename != NULL &&
+	    !load_file_contents ()) {
+		g_printerr ("Loading file '%s' contents failed... exiting\n",
+		            filename);
+		return -3;
+	}
+
+	/* Run the parsing! */
+	if (!run_parsing ()) {
+		g_printerr ("Parsing operation failed... exiting\n");
+		return -4;
+	}
+
+	/* Clean exit */
+	if (filename)
+		g_free (text);
+	return 0;
+}
author	Martyn Russell <martyn@lanedo.com>	2014-12-03 10:31:18 +0000
committer	Martyn Russell <martyn@lanedo.com>	2014-12-03 10:31:18 +0000
commit	8d14867631b59ecdfadcd77ac407f19fc15ba4d8 (patch)
tree	7502236577c41d633c8cd13e27dd70fb6f9e9580 /tests/libtracker-common
parent	8833933e45e77a67d06f21f47e1c70a1525350eb (diff)
download	tracker-8d14867631b59ecdfadcd77ac407f19fc15ba4d8.tar.gz