summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Van Hoof <philip@codeminded.be>2011-09-22 11:05:22 +0200
committerPhilip Van Hoof <philip@codeminded.be>2011-09-23 12:48:44 +0200
commitd1d1f8a4e1d933bb0aa8d1017046a872761b0f55 (patch)
tree95332af4436ad3bb428325eb5d733e8e54a95f9c
parent43e643c8495fdcfca41434bcfc1486d8eb523d20 (diff)
downloadtracker-d1d1f8a4e1d933bb0aa8d1017046a872761b0f55.tar.gz
tracker-extract, pdf: Use mmap with an fd that has O_NOATIME for pdfs
Conflicts: src/tracker-extract/tracker-extract-pdf.c
-rw-r--r--src/tracker-extract/tracker-extract-pdf.c64
1 files changed, 63 insertions, 1 deletions
diff --git a/src/tracker-extract/tracker-extract-pdf.c b/src/tracker-extract/tracker-extract-pdf.c
index 787ba7218..8748c106a 100644
--- a/src/tracker-extract/tracker-extract-pdf.c
+++ b/src/tracker-extract/tracker-extract-pdf.c
@@ -21,13 +21,25 @@
#include "config.h"
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/mman.h>
#include <glib.h>
+#include <glib/gstdio.h>
#include <glib/poppler.h>
#include <libtracker-common/tracker-date-time.h>
#include <libtracker-common/tracker-utils.h>
+#include <libtracker-common/tracker-file-utils.h>
#include <libtracker-extract/tracker-extract.h>
@@ -291,11 +303,55 @@ extract_pdf (const gchar *uri,
GPtrArray *keywords;
guint i;
GString *where = NULL;
+ gchar *filename;
+ int fd;
+ gchar *contents = NULL;
+ gsize len;
+ struct stat st;
g_type_init ();
- document = poppler_document_new_from_file (uri, NULL, &error);
+ filename = g_filename_from_uri (uri, NULL, NULL);
+
+ fd = g_open (filename, O_RDONLY | O_NOATIME, 0);
+
+ if (fd == -1) {
+ g_warning ("Could not open pdf file '%s': %s\n",
+ filename,
+ g_strerror (errno));
+ g_free (filename);
+ return;
+ }
+ if (fstat (fd, &st) == -1) {
+ g_warning ("Could not fstat pdf file '%s': %s\n",
+ filename,
+ g_strerror (errno));
+ close (fd);
+ g_free (filename);
+ return;
+ }
+
+ if (st.st_size == 0) {
+ contents = NULL;
+ len = 0;
+ } else {
+ contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (contents == NULL) {
+ g_warning ("Could not mmap pdf file '%s': %s\n",
+ filename,
+ g_strerror (errno));
+ close (fd);
+ g_free (filename);
+ return;
+ }
+ len = st.st_size;
+ }
+
+ g_free (filename);
+
+ document = poppler_document_new_from_data (contents, len, NULL, &error);
+
if (error) {
if (error->code == POPPLER_ERROR_ENCRYPTED) {
tracker_sparql_builder_predicate (metadata, "a");
@@ -651,6 +707,12 @@ extract_pdf (const gchar *uri,
g_free (pd.date);
g_object_unref (document);
+
+ if (contents) {
+ munmap (contents, len);
+ }
+
+ close (fd);
}
TrackerExtractData *