summaryrefslogtreecommitdiff
path: root/src/tracker-extract/tracker-extract-pdf.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/tracker-extract/tracker-extract-pdf.c')
-rw-r--r--src/tracker-extract/tracker-extract-pdf.c301
1 files changed, 74 insertions, 227 deletions
diff --git a/src/tracker-extract/tracker-extract-pdf.c b/src/tracker-extract/tracker-extract-pdf.c
index 6402ddd85..b6b4274f5 100644
--- a/src/tracker-extract/tracker-extract-pdf.c
+++ b/src/tracker-extract/tracker-extract-pdf.c
@@ -169,8 +169,8 @@ read_toc (PopplerIndexIter *index,
}
static void
-read_outline (PopplerDocument *document,
- TrackerSparqlBuilder *metadata)
+read_outline (PopplerDocument *document,
+ TrackerResource *metadata)
{
PopplerIndexIter *index;
GString *toc = NULL;
@@ -185,8 +185,7 @@ read_outline (PopplerDocument *document,
if (toc) {
if (toc->len > 0) {
- tracker_sparql_builder_predicate (metadata, "nfo:tableOfContents");
- tracker_sparql_builder_object_unvalidated (metadata, toc->str);
+ tracker_resource_set_string (metadata, "nfo:tableOfContents", toc->str);
}
g_string_free (toc, TRUE);
@@ -256,33 +255,26 @@ extract_content_text (PopplerDocument *document,
}
static void
-write_pdf_data (PDFData data,
- TrackerSparqlBuilder *metadata,
- GPtrArray *keywords)
+write_pdf_data (PDFData data,
+ TrackerResource *metadata,
+ GPtrArray *keywords)
{
if (!tracker_is_empty_string (data.title)) {
- tracker_sparql_builder_predicate (metadata, "nie:title");
- tracker_sparql_builder_object_unvalidated (metadata, data.title);
+ tracker_resource_set_string (metadata, "nie:title", data.title);
}
if (!tracker_is_empty_string (data.subject)) {
- tracker_sparql_builder_predicate (metadata, "nie:subject");
- tracker_sparql_builder_object_unvalidated (metadata, data.subject);
+ tracker_resource_set_string (metadata, "nie:subject", data.subject);
}
if (!tracker_is_empty_string (data.author)) {
- tracker_sparql_builder_predicate (metadata, "nco:creator");
- tracker_sparql_builder_object_blank_open (metadata);
- tracker_sparql_builder_predicate (metadata, "a");
- tracker_sparql_builder_object (metadata, "nco:Contact");
- tracker_sparql_builder_predicate (metadata, "nco:fullname");
- tracker_sparql_builder_object_unvalidated (metadata, data.author);
- tracker_sparql_builder_object_blank_close (metadata);
+ TrackerResource *author = tracker_extract_new_contact (data.author);
+ tracker_resource_set_relation (metadata, "nco:creator", author);
+ g_object_unref (author);
}
if (!tracker_is_empty_string (data.date)) {
- tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
- tracker_sparql_builder_object_unvalidated (metadata, data.date);
+ tracker_resource_set_string (metadata, "nie:contentCreated", data.date);
}
if (!tracker_is_empty_string (data.keywords)) {
@@ -296,9 +288,7 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
TrackerConfig *config;
GTime creation_date;
GError *error = NULL;
- TrackerSparqlBuilder *metadata, *preupdate;
- const gchar *graph;
- const gchar *urn;
+ TrackerResource *metadata;
TrackerXmpData *xd = NULL;
PDFData pd = { 0 }; /* actual data */
PDFData md = { 0 }; /* for merging */
@@ -315,11 +305,6 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
gsize len;
struct stat st;
- metadata = tracker_extract_info_get_metadata_builder (info);
- preupdate = tracker_extract_info_get_preupdate_builder (info);
- graph = tracker_extract_info_get_graph (info);
- urn = tracker_extract_info_get_urn (info);
-
file = tracker_extract_info_get_file (info);
filename = g_file_get_path (file);
@@ -362,14 +347,16 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
uri = g_file_get_uri (file);
document = poppler_document_new_from_data (contents, len, NULL, &error);
-
+
if (error) {
if (error->code == POPPLER_ERROR_ENCRYPTED) {
- tracker_sparql_builder_predicate (metadata, "a");
- tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument");
+ metadata = tracker_resource_new (NULL);
+
+ tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument");
+ tracker_resource_set_boolean (metadata, "nfo:isContentEncrypted", TRUE);
- tracker_sparql_builder_predicate (metadata, "nfo:isContentEncrypted");
- tracker_sparql_builder_object_boolean (metadata, TRUE);
+ tracker_extract_info_set_resource (info, metadata);
+ g_object_unref (metadata);
g_error_free (error);
g_free (uri);
@@ -398,8 +385,8 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
return FALSE;
}
- tracker_sparql_builder_predicate (metadata, "a");
- tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument");
+ metadata = tracker_resource_new (NULL);
+ tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument");
g_object_get (document,
"title", &pd.title,
@@ -435,243 +422,131 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
}
if (xd->publisher) {
- tracker_sparql_builder_predicate (metadata, "nco:publisher");
- tracker_sparql_builder_object_blank_open (metadata);
- tracker_sparql_builder_predicate (metadata, "a");
- tracker_sparql_builder_object (metadata, "nco:Contact");
- tracker_sparql_builder_predicate (metadata, "nco:fullname");
- tracker_sparql_builder_object_unvalidated (metadata, xd->publisher);
- tracker_sparql_builder_object_blank_close (metadata);
+ TrackerResource *publisher = tracker_extract_new_contact (xd->publisher);
+ tracker_resource_set_relation (metadata, "nco:publisher", publisher);
+ g_object_unref (publisher);
}
if (xd->type) {
- tracker_sparql_builder_predicate (metadata, "dc:type");
- tracker_sparql_builder_object_unvalidated (metadata, xd->type);
+ tracker_resource_set_string (metadata, "dc:type", xd->type);
}
if (xd->format) {
- tracker_sparql_builder_predicate (metadata, "dc:format");
- tracker_sparql_builder_object_unvalidated (metadata, xd->format);
+ tracker_resource_set_string (metadata, "dc:format", xd->format);
}
if (xd->identifier) {
- tracker_sparql_builder_predicate (metadata, "dc:identifier");
- tracker_sparql_builder_object_unvalidated (metadata, xd->identifier);
+ tracker_resource_set_string (metadata, "dc:identifier", xd->identifier);
}
if (xd->source) {
- tracker_sparql_builder_predicate (metadata, "dc:source");
- tracker_sparql_builder_object_unvalidated (metadata, xd->source);
+ tracker_resource_set_string (metadata, "dc:source", xd->source);
}
if (xd->language) {
- tracker_sparql_builder_predicate (metadata, "dc:language");
- tracker_sparql_builder_object_unvalidated (metadata, xd->language);
+ tracker_resource_set_string (metadata, "dc:language", xd->language);
}
if (xd->relation) {
- tracker_sparql_builder_predicate (metadata, "dc:relation");
- tracker_sparql_builder_object_unvalidated (metadata, xd->relation);
+ tracker_resource_set_string (metadata, "dc:relation", xd->relation);
}
if (xd->coverage) {
- tracker_sparql_builder_predicate (metadata, "dc:coverage");
- tracker_sparql_builder_object_unvalidated (metadata, xd->coverage);
+ tracker_resource_set_string (metadata, "dc:coverage", xd->coverage);
}
if (xd->license) {
- tracker_sparql_builder_predicate (metadata, "nie:license");
- tracker_sparql_builder_object_unvalidated (metadata, xd->license);
+ tracker_resource_set_string (metadata, "nie:license", xd->license);
}
if (xd->make || xd->model) {
- gchar *equip_uri;
-
- equip_uri = tracker_sparql_escape_uri_printf ("urn:equipment:%s:%s:",
- xd->make ? xd->make : "",
- xd->model ? xd->model : "");
-
- tracker_sparql_builder_insert_open (preupdate, NULL);
- if (graph) {
- tracker_sparql_builder_graph_open (preupdate, graph);
- }
-
- tracker_sparql_builder_subject_iri (preupdate, equip_uri);
- tracker_sparql_builder_predicate (preupdate, "a");
- tracker_sparql_builder_object (preupdate, "nfo:Equipment");
-
- if (xd->make) {
- tracker_sparql_builder_predicate (preupdate, "nfo:manufacturer");
- tracker_sparql_builder_object_unvalidated (preupdate, xd->make);
- }
-
- if (xd->model) {
- tracker_sparql_builder_predicate (preupdate, "nfo:model");
- tracker_sparql_builder_object_unvalidated (preupdate, xd->model);
- }
-
- if (graph) {
- tracker_sparql_builder_graph_close (preupdate);
- }
- tracker_sparql_builder_insert_close (preupdate);
-
- tracker_sparql_builder_predicate (metadata, "nfo:equipment");
- tracker_sparql_builder_object_iri (metadata, equip_uri);
- g_free (equip_uri);
+ TrackerResource *equipment = tracker_extract_new_equipment (xd->make, xd->model);
+ tracker_resource_set_relation (metadata, "nfo:equipment", equipment);
+ g_object_unref (equipment);
}
if (xd->orientation) {
- tracker_sparql_builder_predicate (metadata, "nfo:orientation");
- tracker_sparql_builder_object (metadata, xd->orientation);
+ tracker_resource_set_string (metadata, "nfo:orientation", xd->orientation);
}
if (xd->rights) {
- tracker_sparql_builder_predicate (metadata, "nie:copyright");
- tracker_sparql_builder_object_unvalidated (metadata, xd->rights);
+ tracker_resource_set_string (metadata, "nie:copyright", xd->rights);
}
if (xd->white_balance) {
- tracker_sparql_builder_predicate (metadata, "nmm:whiteBalance");
- tracker_sparql_builder_object (metadata, xd->white_balance);
+ tracker_resource_set_string (metadata, "nmm:whiteBalance", xd->white_balance);
}
if (xd->fnumber) {
gdouble value;
value = g_strtod (xd->fnumber, NULL);
- tracker_sparql_builder_predicate (metadata, "nmm:fnumber");
- tracker_sparql_builder_object_double (metadata, value);
+ tracker_resource_set_double (metadata, "nmm:fnumber", value);
}
if (xd->flash) {
- tracker_sparql_builder_predicate (metadata, "nmm:flash");
- tracker_sparql_builder_object (metadata, xd->flash);
+ tracker_resource_set_string (metadata, "nmm:flash", xd->flash);
}
if (xd->focal_length) {
gdouble value;
value = g_strtod (xd->focal_length, NULL);
- tracker_sparql_builder_predicate (metadata, "nmm:focalLength");
- tracker_sparql_builder_object_double (metadata, value);
+ tracker_resource_set_double (metadata, "nmm:focalLength", value);
}
/* Question: Shouldn't xd->Artist be merged with md.author instead? */
if (xd->artist || xd->contributor) {
- const gchar *artist;
+ TrackerResource *artist;
+ const gchar *artist_name;
+
+ artist_name = tracker_coalesce_strip (2, xd->artist, xd->contributor);
+
+ artist = tracker_extract_new_contact (artist_name);
+
+ tracker_resource_set_relation (metadata, "nco:contributor", artist);
- artist = tracker_coalesce_strip (2, xd->artist, xd->contributor);
- tracker_sparql_builder_predicate (metadata, "nco:contributor");
- tracker_sparql_builder_object_blank_open (metadata);
- tracker_sparql_builder_predicate (metadata, "a");
- tracker_sparql_builder_object (metadata, "nco:Contact");
- tracker_sparql_builder_predicate (metadata, "nco:fullname");
- tracker_sparql_builder_object_unvalidated (metadata, artist);
- tracker_sparql_builder_object_blank_close (metadata);
+ g_object_unref (artist);
}
if (xd->exposure_time) {
gdouble value;
value = g_strtod (xd->exposure_time, NULL);
- tracker_sparql_builder_predicate (metadata, "nmm:exposureTime");
- tracker_sparql_builder_object_double (metadata, value);
+ tracker_resource_set_double (metadata, "nmm:exposureTime", value);
}
if (xd->iso_speed_ratings) {
gdouble value;
value = g_strtod (xd->iso_speed_ratings, NULL);
- tracker_sparql_builder_predicate (metadata, "nmm:isoSpeed");
- tracker_sparql_builder_object_double (metadata, value);
+ tracker_resource_set_double (metadata, "nmm:isoSpeed", value);
}
if (xd->description) {
- tracker_sparql_builder_predicate (metadata, "nie:description");
- tracker_sparql_builder_object_unvalidated (metadata, xd->description);
+ tracker_resource_set_string (metadata, "nie:description", xd->description);
}
if (xd->metering_mode) {
- tracker_sparql_builder_predicate (metadata, "nmm:meteringMode");
- tracker_sparql_builder_object (metadata, xd->metering_mode);
+ tracker_resource_set_string (metadata, "nmm:meteringMode", xd->metering_mode);
}
if (xd->address || xd->state || xd->country || xd->city ||
xd->gps_altitude || xd->gps_latitude || xd-> gps_longitude) {
- tracker_sparql_builder_predicate (metadata, "slo:location");
-
- tracker_sparql_builder_object_blank_open (metadata); /* GeoLocation */
- tracker_sparql_builder_predicate (metadata, "a");
- tracker_sparql_builder_object (metadata, "slo:GeoLocation");
-
- if (xd->address || xd->state || xd->country || xd->city) {
- gchar *addruri;
- addruri = tracker_sparql_get_uuid_urn ();
-
- tracker_sparql_builder_predicate (metadata, "slo:postalAddress");
- tracker_sparql_builder_object_iri (metadata, addruri);
-
- tracker_sparql_builder_insert_open (preupdate, NULL);
- if (graph) {
- tracker_sparql_builder_graph_open (preupdate, graph);
- }
-
- tracker_sparql_builder_subject_iri (preupdate, addruri);
+ TrackerResource *location = tracker_extract_new_location (xd->address,
+ xd->state, xd->city, xd->country, xd->gps_altitude,
+ xd->gps_latitude, xd->gps_longitude);
- g_free (addruri);
+ tracker_resource_set_relation (metadata, "slo:location", location);
- tracker_sparql_builder_predicate (preupdate, "a");
- tracker_sparql_builder_object (preupdate, "nco:PostalAddress");
-
- if (xd->address) {
- tracker_sparql_builder_predicate (preupdate, "nco:streetAddress");
- tracker_sparql_builder_object_unvalidated (preupdate, xd->address);
- }
-
- if (xd->state) {
- tracker_sparql_builder_predicate (preupdate, "nco:region");
- tracker_sparql_builder_object_unvalidated (preupdate, xd->state);
- }
-
- if (xd->city) {
- tracker_sparql_builder_predicate (preupdate, "nco:locality");
- tracker_sparql_builder_object_unvalidated (preupdate, xd->city);
- }
-
- if (xd->country) {
- tracker_sparql_builder_predicate (preupdate, "nco:country");
- tracker_sparql_builder_object_unvalidated (preupdate, xd->country);
- }
-
- if (graph) {
- tracker_sparql_builder_graph_close (preupdate);
- }
- tracker_sparql_builder_insert_close (preupdate);
- }
-
- if (xd->gps_altitude) {
- tracker_sparql_builder_predicate (metadata, "slo:altitude");
- tracker_sparql_builder_object_unvalidated (metadata, xd->gps_altitude);
- }
-
- if (xd->gps_latitude) {
- tracker_sparql_builder_predicate (metadata, "slo:latitude");
- tracker_sparql_builder_object_unvalidated (metadata, xd->gps_latitude);
- }
-
- if (xd->gps_longitude) {
- tracker_sparql_builder_predicate (metadata, "slo:longitude");
- tracker_sparql_builder_object_unvalidated (metadata, xd->gps_longitude);
- }
-
- tracker_sparql_builder_object_blank_close (metadata); /* GeoLocation */
+ g_object_unref (location);
}
- if (xd->regions) {
- tracker_xmp_apply_regions (preupdate, metadata, graph, xd);
- }
+ if (xd->regions) {
+ tracker_xmp_apply_regions_to_resource (metadata, xd);
+ }
tracker_xmp_free (xd);
} else {
@@ -682,57 +557,26 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
}
for (i = 0; i < keywords->len; i++) {
- gchar *escaped, *subject;
+ TrackerResource *tag;
const gchar *p;
p = g_ptr_array_index (keywords, i);
- escaped = tracker_sparql_escape_string (p);
- subject = g_strdup_printf ("_:tag%d", i + 1);
-
- /* ensure tag with specified label exists */
- tracker_sparql_builder_insert_open (preupdate, graph);
- tracker_sparql_builder_subject (preupdate, subject);
- tracker_sparql_builder_predicate (preupdate, "a");
- tracker_sparql_builder_object (preupdate, "nao:Tag");
- tracker_sparql_builder_predicate (preupdate, "nao:prefLabel");
- tracker_sparql_builder_object_unvalidated (preupdate, escaped);
- tracker_sparql_builder_insert_close (preupdate);
- tracker_sparql_builder_append (preupdate,
- "WHERE { FILTER (NOT EXISTS { "
- "?tag a nao:Tag ; nao:prefLabel \"");
- tracker_sparql_builder_append (preupdate, escaped);
- tracker_sparql_builder_append (preupdate,
- "\" }) }\n");
-
- /* associate file with tag */
- tracker_sparql_builder_insert_open (preupdate, graph);
- tracker_sparql_builder_subject_iri (preupdate, urn);
- tracker_sparql_builder_predicate (preupdate, "nao:hasTag");
- tracker_sparql_builder_object (preupdate, "?tag");
- tracker_sparql_builder_insert_close (preupdate);
- tracker_sparql_builder_where_open (preupdate);
- tracker_sparql_builder_subject (preupdate, "?tag");
- tracker_sparql_builder_predicate (preupdate, "a");
- tracker_sparql_builder_object (preupdate, "nao:Tag");
- tracker_sparql_builder_predicate (preupdate, "nao:prefLabel");
- tracker_sparql_builder_object_unvalidated (preupdate, escaped);
- tracker_sparql_builder_where_close (preupdate);
-
- g_free (subject);
- g_free (escaped);
+ tag = tracker_extract_new_tag (p);
+
+ tracker_resource_add_relation (metadata, "nao:hasTag", tag);
+
+ g_object_unref (tag);
}
g_ptr_array_free (keywords, TRUE);
- tracker_sparql_builder_predicate (metadata, "nfo:pageCount");
- tracker_sparql_builder_object_int64 (metadata, poppler_document_get_n_pages (document));
+ tracker_resource_set_int64 (metadata, "nfo:pageCount", poppler_document_get_n_pages(document));
config = tracker_main_get_config ();
n_bytes = tracker_config_get_max_bytes (config);
content = extract_content_text (document, n_bytes);
if (content) {
- tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
- tracker_sparql_builder_object_unvalidated (metadata, content);
+ tracker_resource_set_string (metadata, "nie:plainTextContent", content);
g_free (content);
}
@@ -755,5 +599,8 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
close (fd);
+ tracker_extract_info_set_resource (info, metadata);
+ g_object_unref (metadata);
+
return TRUE;
}