summaryrefslogtreecommitdiff
path: root/pdf_ext.py
diff options
context:
space:
mode:
authorNicolas Chauvat <nicolas.chauvat@logilab.fr>2007-09-28 02:08:48 +0200
committerNicolas Chauvat <nicolas.chauvat@logilab.fr>2007-09-28 02:08:48 +0200
commita7bbed8e220645e7713f770c1fd9d3da51f14376 (patch)
tree9455c007bdb848d857edd9bd070af243e6471ec4 /pdf_ext.py
parentf3b59c81250f59adea07839aad09d3aa03d7e6ef (diff)
downloadlogilab-common-a7bbed8e220645e7713f770c1fd9d3da51f14376.tar.gz
pdf_ext manipulates pdf and fills pdf forms via fdf files. pdftk recommended.
Diffstat (limited to 'pdf_ext.py')
-rw-r--r--pdf_ext.py97
1 files changed, 97 insertions, 0 deletions
diff --git a/pdf_ext.py b/pdf_ext.py
new file mode 100644
index 0000000..c742262
--- /dev/null
+++ b/pdf_ext.py
@@ -0,0 +1,97 @@
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+""" Copyright (c) 2003-2007 LOGILAB S.A. (Paris, FRANCE).
+ http://www.logilab.fr/ -- mailto:contact@logilab.fr
+
+manipulate pdf and fdf files. pdftk recommended.
+
+Notes regarding pdftk, pdf forms and fdf files (form definition file)
+fields names can be extracted with:
+ pdftk orig.pdf generate_fdf output truc.fdf
+to merge fdf and pdf:
+ pdftk orig.pdf fill_form test.fdf output result.pdf [flatten]
+without flatten, one could further edit the resulting form.
+with flatten, everything is turned into text.
+"""
+
+import os
+
+HEAD="""%FDF-1.2
+%\xE2\xE3\xCF\xD3
+1 0 obj
+<<
+/FDF
+<<
+/Fields [
+"""
+
+TAIL="""]
+>>
+>>
+endobj
+trailer
+
+<<
+/Root 1 0 R
+>>
+%%EOF
+"""
+
+def output_field( f ):
+ return "\xfe\xff" + "".join( [ "\x00"+c for c in f ] )
+
+def extract_keys(lines):
+ keys = []
+ for line in lines:
+ if line.startswith('/V'):
+ pass #print 'value',line
+ elif line.startswith('/T'):
+ key = line[7:-2]
+ key = ''.join(key.split('\x00'))
+ keys.append( key )
+ return keys
+
+def write_field(out, key, value):
+ out.write("<<\n")
+ if value:
+ out.write("/V (%s)\n" %value)
+ else:
+ out.write("/V /\n")
+ out.write("/T (%s)\n" % output_field(key) )
+ out.write(">> \n")
+
+def write_fields(out, fields):
+ out.write(HEAD)
+ for (key,value,comment) in fields:
+ write_field(out, key, value)
+ write_field(out, key+"a", value) # pour copie-carbone sur autres pages
+ out.write(TAIL)
+
+def extract_keys_from_pdf(filename):
+ # what about using 'pdftk filename dump_data_fields' and parsing the output ?
+ os.system('pdftk %s generate_fdf output /tmp/toto.fdf' % filename)
+ lines = file('/tmp/toto.fdf').readlines()
+ return extract_keys(lines)
+
+
+def fill_pdf(infile, outfile, fields):
+ write_fields(file('/tmp/toto.fdf', 'w'), fields)
+ os.system('pdftk %s fill_form /tmp/toto.fdf output %s flatten' % (infile, outfile))
+
+def testfill_pdf(filename):
+ keys = extract_keys_from_pdf(filename)
+ fields = []
+ for key in keys:
+ fields.append( (key, key, '') )
+ fill_pdf(filename, '_'+filename, fields)
+