diff options
author | Philip Van Hoof <philip@codeminded.be> | 2011-09-22 11:05:22 +0200 |
---|---|---|
committer | Philip Van Hoof <philip@codeminded.be> | 2011-09-23 12:48:44 +0200 |
commit | d1d1f8a4e1d933bb0aa8d1017046a872761b0f55 (patch) | |
tree | 95332af4436ad3bb428325eb5d733e8e54a95f9c | |
parent | 43e643c8495fdcfca41434bcfc1486d8eb523d20 (diff) | |
download | tracker-d1d1f8a4e1d933bb0aa8d1017046a872761b0f55.tar.gz |
tracker-extract, pdf: Use mmap with an fd that has O_NOATIME for pdfs
Conflicts:
src/tracker-extract/tracker-extract-pdf.c
-rw-r--r-- | src/tracker-extract/tracker-extract-pdf.c | 64 |
1 files changed, 63 insertions, 1 deletions
diff --git a/src/tracker-extract/tracker-extract-pdf.c b/src/tracker-extract/tracker-extract-pdf.c index 787ba7218..8748c106a 100644 --- a/src/tracker-extract/tracker-extract-pdf.c +++ b/src/tracker-extract/tracker-extract-pdf.c @@ -21,13 +21,25 @@ #include "config.h" +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <errno.h> +#include <fcntl.h> #include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <sys/mman.h> #include <glib.h> +#include <glib/gstdio.h> #include <glib/poppler.h> #include <libtracker-common/tracker-date-time.h> #include <libtracker-common/tracker-utils.h> +#include <libtracker-common/tracker-file-utils.h> #include <libtracker-extract/tracker-extract.h> @@ -291,11 +303,55 @@ extract_pdf (const gchar *uri, GPtrArray *keywords; guint i; GString *where = NULL; + gchar *filename; + int fd; + gchar *contents = NULL; + gsize len; + struct stat st; g_type_init (); - document = poppler_document_new_from_file (uri, NULL, &error); + filename = g_filename_from_uri (uri, NULL, NULL); + + fd = g_open (filename, O_RDONLY | O_NOATIME, 0); + + if (fd == -1) { + g_warning ("Could not open pdf file '%s': %s\n", + filename, + g_strerror (errno)); + g_free (filename); + return; + } + if (fstat (fd, &st) == -1) { + g_warning ("Could not fstat pdf file '%s': %s\n", + filename, + g_strerror (errno)); + close (fd); + g_free (filename); + return; + } + + if (st.st_size == 0) { + contents = NULL; + len = 0; + } else { + contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (contents == NULL) { + g_warning ("Could not mmap pdf file '%s': %s\n", + filename, + g_strerror (errno)); + close (fd); + g_free (filename); + return; + } + len = st.st_size; + } + + g_free (filename); + + document = poppler_document_new_from_data (contents, len, NULL, &error); + if (error) { if (error->code == POPPLER_ERROR_ENCRYPTED) { tracker_sparql_builder_predicate (metadata, "a"); @@ -651,6 +707,12 @@ extract_pdf (const gchar *uri, g_free (pd.date); g_object_unref (document); + + if (contents) { + munmap (contents, len); + } + + close (fd); } TrackerExtractData * |