summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/libtracker-extract/01-mockup.rule.in4
-rw-r--r--examples/libtracker-extract/Makefile.am11
-rw-r--r--examples/libtracker-extract/tracker-extract-mockup.c41
-rw-r--r--src/libtracker-extract/tracker-data.h73
-rw-r--r--src/tracker-extract/tracker-extract.c346
5 files changed, 95 insertions, 380 deletions
diff --git a/examples/libtracker-extract/01-mockup.rule.in b/examples/libtracker-extract/01-mockup.rule.in
new file mode 100644
index 000000000..31d56b561
--- /dev/null
+++ b/examples/libtracker-extract/01-mockup.rule.in
@@ -0,0 +1,4 @@
+[ExtractorRule]
+ModulePath=@modulesdir@/libextract-mp3.so
+MimeTypes=audio/mpeg;audio/x-mp3;
+
diff --git a/examples/libtracker-extract/Makefile.am b/examples/libtracker-extract/Makefile.am
index 798c73c2c..74cdc435f 100644
--- a/examples/libtracker-extract/Makefile.am
+++ b/examples/libtracker-extract/Makefile.am
@@ -19,3 +19,14 @@ libextract_mockup_la_LIBADD = \
$(BUILD_LIBS) \
$(LIBTRACKER_EXTRACT_LIBS)
+rules_in_files = 01-mockup.rule.in
+
+%.rule: %.rule.in
+ @sed -e "s|@modulesdir[@]|${modulesdir}|" $< > $@
+
+rulesdir = $(datadir)/tracker/extract-rules
+rules_DATA = 01-mockup.rule
+
+CLEANFILES = $(rules_DATA)
+
+EXTRA_DIST = $(rules_in_files) \ No newline at end of file
diff --git a/examples/libtracker-extract/tracker-extract-mockup.c b/examples/libtracker-extract/tracker-extract-mockup.c
index afb60df99..3ddb94b86 100644
--- a/examples/libtracker-extract/tracker-extract-mockup.c
+++ b/examples/libtracker-extract/tracker-extract-mockup.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010, Your name <Your email address>
+ * Copyright (C) 2011, Your name <Your email address>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -26,22 +26,17 @@
#include <libtracker-extract/tracker-extract.h>
#include <libtracker-sparql/tracker-sparql.h>
-static void extract_mockup (const gchar *uri,
- TrackerSparqlBuilder *preupdate,
- TrackerSparqlBuilder *metadata);
-
-static TrackerExtractData data[] = {
- /* TODO: Insert mime types and functions here. */
- { "audio/mpeg", extract_mockup },
- { "audio/x-mp3", extract_mockup },
- { NULL, NULL }
-};
-
-static void
-extract_mockup (const gchar *uri,
- TrackerSparqlBuilder *preupdate,
- TrackerSparqlBuilder *metadata)
+G_MODULE_EXPORT gboolean
+tracker_extract_get_metadata (const gchar *uri,
+ const gchar *mimetype,
+ TrackerSparqlBuilder *preupdate,
+ TrackerSparqlBuilder *metadata)
{
+ /* NOTE: This function has to exist, tracker-extract checks
+ * the symbole table for this function and if it doesn't
+ * exist, the module is not loaded to be used as an extractor.
+ */
+
/* File information */
FILE *f;
GFileInfo *info;
@@ -105,7 +100,7 @@ extract_mockup (const gchar *uri,
*/
if (size < 64) {
g_free (filename);
- return;
+ return FALSE;
}
/* TODO: Open file */
@@ -113,7 +108,7 @@ extract_mockup (const gchar *uri,
if (!f) {
g_free (filename);
- return;
+ return FALSE;
}
/* TODO: Get data from file.
@@ -335,14 +330,6 @@ extract_mockup (const gchar *uri,
g_free (composer);
g_free (performer);
g_free (filename);
-}
-TrackerExtractData *
-tracker_extract_get_data (void)
-{
- /* NOTE: This function has to exist, tracker-extract checks
- * the symbole table for this function and if it doesn't
- * exist, the module is not loaded to be used as an extractor.
- */
- return data;
+ return TRUE;
}
diff --git a/src/libtracker-extract/tracker-data.h b/src/libtracker-extract/tracker-data.h
index 7b0cabf14..5809b2e6b 100644
--- a/src/libtracker-extract/tracker-data.h
+++ b/src/libtracker-extract/tracker-data.h
@@ -95,16 +95,21 @@ G_BEGIN_DECLS
*
*/
-
/**
- * TrackerExtractMimeFunc:
+ * tracker_extract_get_metadata:
* @uri: a string representing a URI.
+ * @mimetype: mimetype for the element contained in URI
* @preupdate: used to populate with data updates that
* are a prerequisite for the actual file
* metadata insertion.
* @metadata: used to populate with file metadata predicate/object(s).
*
- * Extracts metadata from a file, and inserts it into @metadata.
+ * This function must be provided by ALL extractors. This is merely
+ * the declaration of the function which must be written by each
+ * extractor.
+ *
+ * This is checked by tracker-extract by looking up the symbols for
+ * each started plugin and making sure this function exists.
*
* The @metadata parameter is a #TrackerSparqlBuilder constructed
* through tracker_sparql_builder_new_embedded_insert(), the subject
@@ -119,65 +124,15 @@ G_BEGIN_DECLS
* added to @preupdate, which is a #TrackerSparqlBuilder constructed.
* through tracker_sparql_builder_new_update().
*
- * Since: 0.8
- **/
-typedef void (*TrackerExtractMimeFunc) (const gchar *uri,
- TrackerSparqlBuilder *preupdate,
- TrackerSparqlBuilder *metadata);
-
-/**
- * TrackerExtractData:
- * @mime: a string pointer representing a mime type.
- * @func: a function to extract extract the data in.
- *
- * The @mime is usually in the format of "image/png" for example.
-
- * The @func is called by tracker-extract if an extractor plugin
- * matches the @mime.
- *
- * Since: 0.8
- **/
-typedef struct {
- const gchar *mime;
- TrackerExtractMimeFunc func;
-} TrackerExtractData;
-
-/**
- * TrackerExtractDataFunc:
- *
- * This function is used by by tracker-extract to call into each
- * extractor to get a list of mime type and TrackerExtractMimeFunc
- * combinations.
- *
- * Returns: an array of #TrackerExtractData which must be NULL
- * terminated and must NOT be freed.
- *
- * Since: 0.6
- **/
-typedef TrackerExtractData * (*TrackerExtractDataFunc) (void);
-
-/**
- * tracker_extract_get_data:
- *
- *
- * This function must be provided by ALL extractors. This is merely
- * the declaration of the function which must be written by each
- * extractor.
- *
- * This is checked by tracker-extract by looking up the symbols for
- * each plugin and making sure this function exists. This is only
- * called by tracker-extract if a mime type in any of the
- * #TrackerExtractData structures returned matches the mime type of
- * the file being handled.
- *
- * Returns: a #TrackerExtractData pointer which should not be freed.
- * This pointer can be an array of #TrackerExtractData structures
- * where multiple mime types are supported.
+ * Returns: %TRUE if the extraction succeeded, %FALSE otherwise.
*
* Since: 0.8
*/
-TrackerExtractData *tracker_extract_get_data (void);
-
+gboolean tracker_extract_get_metadata (const gchar *uri,
+ const gchar *mimetype,
+ TrackerSparqlBuilder *preupdate,
+ TrackerSparqlBuilder *metadata);
+
G_END_DECLS
#endif /* __LIBTRACKER_EXTRACT_DATA_H__ */
diff --git a/src/tracker-extract/tracker-extract.c b/src/tracker-extract/tracker-extract.c
index dd0cee3d2..328d47132 100644
--- a/src/tracker-extract/tracker-extract.c
+++ b/src/tracker-extract/tracker-extract.c
@@ -32,6 +32,7 @@
#include <libtracker-common/tracker-common.h>
#include <libtracker-extract/tracker-extract.h>
+#include <libtracker-extract/tracker-extract-module-manager.h>
#include "tracker-extract.h"
#include "tracker-main.h"
@@ -41,8 +42,6 @@
#include "tracker-topanalyzer.h"
#endif /* HAVE_STREAMANALYZER */
-#define EXTRACT_FUNCTION "tracker_extract_get_data"
-
#define MAX_EXTRACT_TIME 10
#define UNKNOWN_METHOD_MESSAGE "Method \"%s\" with signature \"%s\" on " \
@@ -73,23 +72,22 @@ static const gchar introspection_xml[] =
extern gboolean debug;
typedef struct {
- GArray *specific_extractors;
- GArray *generic_extractors;
+ gint extracted_count;
+ gint failed_count;
+} StatisticsData;
+
+typedef struct {
+ GHashTable *statistics_data;
+
gboolean disable_shutdown;
gboolean force_internal_extractors;
gboolean disable_summary_on_finalize;
GDBusConnection *d_connection;
GDBusNodeInfo *introspection_data;
guint registration_id;
-} TrackerExtractPrivate;
-typedef struct {
- const GModule *module;
- const TrackerExtractData *edata;
- GPatternSpec *pattern; /* For a fast g_pattern_match() */
- gint extracted_count;
- gint failed_count;
-} ModuleData;
+ gint unhandled_count;
+} TrackerExtractPrivate;
static void tracker_extract_finalize (GObject *object);
static void report_statistics (GObject *object);
@@ -111,16 +109,20 @@ tracker_extract_class_init (TrackerExtractClass *klass)
static void
tracker_extract_init (TrackerExtract *object)
{
+ TrackerExtractPrivate *priv;
+
#ifdef HAVE_LIBSTREAMANALYZER
tracker_topanalyzer_init ();
#endif /* HAVE_STREAMANALYZER */
+
+ priv = TRACKER_EXTRACT_GET_PRIVATE (object);
+ priv->statistics_data = g_hash_table_new (NULL, NULL);
}
static void
tracker_extract_finalize (GObject *object)
{
TrackerExtractPrivate *priv;
- gint i;
priv = TRACKER_EXTRACT_GET_PRIVATE (object);
@@ -132,21 +134,7 @@ tracker_extract_finalize (GObject *object)
tracker_topanalyzer_shutdown ();
#endif /* HAVE_STREAMANALYZER */
- for (i = 0; i < priv->specific_extractors->len; i++) {
- ModuleData *mdata;
-
- mdata = &g_array_index (priv->specific_extractors, ModuleData, i);
- g_pattern_spec_free (mdata->pattern);
- }
- g_array_free (priv->specific_extractors, TRUE);
-
- for (i = 0; i < priv->generic_extractors->len; i++) {
- ModuleData *mdata;
-
- mdata = &g_array_index (priv->generic_extractors, ModuleData, i);
- g_pattern_spec_free (mdata->pattern);
- }
- g_array_free (priv->generic_extractors, TRUE);
+ g_hash_table_destroy (priv->statistics_data);
G_OBJECT_CLASS (tracker_extract_parent_class)->finalize (object);
}
@@ -155,205 +143,41 @@ static void
report_statistics (GObject *object)
{
TrackerExtractPrivate *priv;
- GHashTable *reported = NULL;
- gint i;
+ GHashTableIter iter;
+ gpointer key, value;
priv = TRACKER_EXTRACT_GET_PRIVATE (object);
g_message ("--------------------------------------------------");
g_message ("Statistics:");
- g_message (" Specific Extractors:");
-
- reported = g_hash_table_new (g_direct_hash, g_direct_equal);
- for (i = 0; i < priv->specific_extractors->len; i++) {
- ModuleData *mdata;
- const gchar *name;
+ g_hash_table_iter_init (&iter, priv->statistics_data);
- mdata = &g_array_index (priv->specific_extractors, ModuleData, i);
- name = g_module_name ((GModule*) mdata->module);
+ while (g_hash_table_iter_next (&iter, &key, &value)) {
+ GModule *module = key;
+ StatisticsData *data = value;
- if ((mdata->extracted_count > 0 || mdata->failed_count > 0) &&
- !g_hash_table_lookup (reported, name)) {
- const gchar *name_without_path;
+ if (data->extracted_count > 0 || data->failed_count > 0) {
+ const gchar *name, *name_without_path;
+ name = g_module_name (module);
name_without_path = strrchr (name, G_DIR_SEPARATOR) + 1;
g_message (" Module:'%s', extracted:%d, failures:%d",
name_without_path,
- mdata->extracted_count,
- mdata->failed_count);
- g_hash_table_insert (reported, (gpointer) name, GINT_TO_POINTER(1));
+ data->extracted_count,
+ data->failed_count);
}
}
- if (g_hash_table_size (reported) < 1) {
- g_message (" No files handled");
- }
-
- g_hash_table_remove_all (reported);
-
- g_message (" Generic Extractors:");
-
- for (i = 0; i < priv->generic_extractors->len; i++) {
- ModuleData *mdata;
- const gchar *name;
-
- mdata = &g_array_index (priv->generic_extractors, ModuleData, i);
- name = g_module_name ((GModule*) mdata->module);
-
- if ((mdata->extracted_count > 0 || mdata->failed_count > 0) &&
- !g_hash_table_lookup (reported, name)) {
- const gchar *name_without_path;
+ g_message ("Unhandled files: %d", priv->unhandled_count);
- name_without_path = strrchr (name, G_DIR_SEPARATOR) + 1;
-
- g_message (" Module:'%s', extracted:%d, failed:%d",
- name_without_path,
- mdata->extracted_count,
- mdata->failed_count);
- g_hash_table_insert (reported, (gpointer) name, GINT_TO_POINTER(1));
- }
- }
-
- if (g_hash_table_size (reported) < 1) {
+ if (priv->unhandled_count == 0 &&
+ g_hash_table_size (priv->statistics_data) < 1) {
g_message (" No files handled");
}
g_message ("--------------------------------------------------");
-
- g_hash_table_unref (reported);
-}
-
-static gboolean
-load_modules (const gchar *force_module,
- GArray **specific_extractors,
- GArray **generic_extractors)
-{
- GDir *dir;
- GError *error = NULL;
- const gchar *name;
- gchar *force_module_checked;
- gboolean success;
- const gchar *extractors_dir;
-
- extractors_dir = g_getenv ("TRACKER_EXTRACTORS_DIR");
- if (G_LIKELY (extractors_dir == NULL)) {
- extractors_dir = TRACKER_EXTRACTORS_DIR;
- } else {
- g_message ("Extractor modules directory is '%s' (set in env)", extractors_dir);
- }
-
- dir = g_dir_open (extractors_dir, 0, &error);
-
- if (!dir) {
- g_error ("Error opening modules directory: %s", error->message);
- g_error_free (error);
- return FALSE;
- }
-
- if (G_UNLIKELY (force_module)) {
- if (!g_str_has_suffix (force_module, "." G_MODULE_SUFFIX)) {
- force_module_checked = g_strdup_printf ("%s.%s",
- force_module,
- G_MODULE_SUFFIX);
- } else {
- force_module_checked = g_strdup (force_module);
- }
- } else {
- force_module_checked = NULL;
- }
-
- *specific_extractors = g_array_new (FALSE,
- TRUE,
- sizeof (ModuleData));
-
- *generic_extractors = g_array_new (FALSE,
- TRUE,
- sizeof (ModuleData));
-
-#ifdef HAVE_LIBSTREAMANALYZER
- if (!force_internal_extractors) {
- g_message ("Adding extractor for libstreamanalyzer");
- g_message (" Generic match for ALL (tried first before our module)");
- g_message (" Specific match for NONE (fallback to our modules)");
- } else {
- g_message ("Not using libstreamanalyzer");
- g_message (" It is available but disabled by command line");
- }
-#endif /* HAVE_STREAMANALYZER */
-
- while ((name = g_dir_read_name (dir)) != NULL) {
- TrackerExtractDataFunc func;
- GModule *module;
- gchar *module_path;
-
- if (!g_str_has_suffix (name, "." G_MODULE_SUFFIX)) {
- continue;
- }
-
- if (force_module_checked && strcmp (name, force_module_checked) != 0) {
- continue;
- }
-
- module_path = g_build_filename (extractors_dir, name, NULL);
-
- module = g_module_open (module_path, G_MODULE_BIND_LOCAL);
-
- if (!module) {
- g_warning ("Could not load module '%s': %s",
- name,
- g_module_error ());
- g_free (module_path);
- continue;
- }
-
- g_module_make_resident (module);
-
- if (g_module_symbol (module, EXTRACT_FUNCTION, (gpointer *) &func)) {
- ModuleData mdata = { 0 };
-
- mdata.module = module;
- mdata.edata = (func) ();
-
- g_message ("Adding extractor:'%s' with:",
- g_module_name ((GModule*) mdata.module));
-
- for (; mdata.edata->mime; mdata.edata++) {
- /* Compile pattern from mime */
- mdata.pattern = g_pattern_spec_new (mdata.edata->mime);
-
- if (G_UNLIKELY (strchr (mdata.edata->mime, '*') != NULL)) {
- g_message (" Generic match for mime:'%s'",
- mdata.edata->mime);
- g_array_append_val (*generic_extractors, mdata);
- } else {
- g_message (" Specific match for mime:'%s'",
- mdata.edata->mime);
- g_array_append_val (*specific_extractors, mdata);
- }
- }
- } else {
- g_warning ("Could not load module '%s': Function %s() was not found, is it exported?",
- name, EXTRACT_FUNCTION);
- }
-
- g_free (module_path);
- }
-
- if (G_UNLIKELY (force_module) &&
- (!*specific_extractors || (*specific_extractors)->len < 1) &&
- (!*generic_extractors || (*generic_extractors)->len < 1)) {
- g_warning ("Could not force module '%s', it was not found", force_module_checked);
- success = FALSE;
- } else {
- success = TRUE;
- }
-
- g_free (force_module_checked);
- g_dir_close (dir);
-
- return success;
}
TrackerExtract *
@@ -363,15 +187,8 @@ tracker_extract_new (gboolean disable_shutdown,
{
TrackerExtract *object;
TrackerExtractPrivate *priv;
- GArray *specific_extractors;
- GArray *generic_extractors;
- if (!g_module_supported ()) {
- g_error ("Modules are not supported for this platform");
- return NULL;
- }
-
- if (!load_modules (force_module, &specific_extractors, &generic_extractors)) {
+ if (!tracker_extract_module_manager_init ()) {
return NULL;
}
@@ -383,9 +200,6 @@ tracker_extract_new (gboolean disable_shutdown,
priv->disable_shutdown = disable_shutdown;
priv->force_internal_extractors = force_internal_extractors;
- priv->specific_extractors = specific_extractors;
- priv->generic_extractors = generic_extractors;
-
return object;
}
@@ -499,106 +313,50 @@ get_file_metadata (TrackerExtract *extract,
* data we need from the extractors.
*/
if (mime_used) {
- guint i;
- glong length;
- gchar *reversed;
-
- /* Using a reversed string while pattern matching is faster
- * if we have lots of patterns with wildcards.
- * We are assuming here that mime_used is ASCII always, so
- * we avoid g_utf8_strreverse() */
- reversed = g_strdup (mime_used);
- g_strreverse (reversed);
- length = strlen (mime_used);
-
- for (i = 0; i < priv->specific_extractors->len; i++) {
- const TrackerExtractData *edata;
- ModuleData *mdata;
-
- mdata = &g_array_index (priv->specific_extractors, ModuleData, i);
- edata = mdata->edata;
-
- if (g_pattern_match (mdata->pattern, length, mime_used, reversed)) {
- gint items;
-
- tracker_dbus_request_comment (request,
- " Extracting with module:'%s'",
- g_module_name ((GModule*) mdata->module));
-
- (*edata->func) (uri, preupdate, statements);
+ TrackerExtractMetadataFunc func;
+ GModule *module;
- items = tracker_sparql_builder_get_length (statements);
+ module = tracker_extract_module_manager_get_for_mimetype (mime_used, &func);
- tracker_dbus_request_comment (request,
- " Found %d metadata items",
- items);
+ if (module) {
+ StatisticsData *data;
+ gint items;
- mdata->extracted_count++;
+ (func) (uri, mime_used, preupdate, statements);
- if (items == 0) {
- mdata->failed_count++;
- continue;
- }
+ items = tracker_sparql_builder_get_length (statements);
- tracker_sparql_builder_insert_close (statements);
+ tracker_dbus_request_comment (request,
+ " Found %d metadata items",
+ items);
- g_free (mime_used);
- g_free (reversed);
+ data = g_hash_table_lookup (priv->statistics_data, module);
- *preupdate_out = preupdate;
- *statements_out = statements;
- return TRUE;
+ if (!data) {
+ data = g_slice_new0 (StatisticsData);
+ g_hash_table_insert (priv->statistics_data, module, data);
}
- }
-
- for (i = 0; i < priv->generic_extractors->len; i++) {
- const TrackerExtractData *edata;
- ModuleData *mdata;
-
- mdata = &g_array_index (priv->generic_extractors, ModuleData, i);
- edata = mdata->edata;
-
- if (g_pattern_match (mdata->pattern, length, mime_used, reversed)) {
- gint items;
-
- tracker_dbus_request_comment (request,
- " Extracting with module:'%s'",
- g_module_name ((GModule*) mdata->module));
- (*edata->func) (uri, preupdate, statements);
-
- items = tracker_sparql_builder_get_length (statements);
-
- tracker_dbus_request_comment (request,
- " Found %d metadata items",
- items);
-
- mdata->extracted_count++;
-
- if (items == 0) {
- mdata->failed_count++;
- continue;
- }
+ data->extracted_count++;
+ if (items > 0) {
tracker_sparql_builder_insert_close (statements);
- g_free (mime_used);
- g_free (reversed);
-
*preupdate_out = preupdate;
*statements_out = statements;
return TRUE;
+ } else {
+ data->failed_count++;
}
+ } else {
+ priv->unhandled_count++;
}
tracker_dbus_request_comment (request,
" Could not find any extractors to handle metadata type "
"(mime: %s)",
mime_used);
-
- g_free (mime_used);
- g_free (reversed);
} else {
tracker_dbus_request_comment (request,
" No mime available, not extracting data");