pdf_ext manipulates pdf and fills pdf forms via fdf files. pdftk recommended.

author: Nicolas Chauvat <nicolas.chauvat@logilab.fr> 2007-09-28 02:08:48 +0200
committer: Nicolas Chauvat <nicolas.chauvat@logilab.fr> 2007-09-28 02:08:48 +0200
commit: a7bbed8e220645e7713f770c1fd9d3da51f14376 (patch)
tree: 9455c007bdb848d857edd9bd070af243e6471ec4 /pdf_ext.py
parent: f3b59c81250f59adea07839aad09d3aa03d7e6ef (diff)
download: logilab-common-a7bbed8e220645e7713f770c1fd9d3da51f14376.tar.gz
1 files changed, 97 insertions, 0 deletions
diff --git a/pdf_ext.py b/pdf_ext.py
new file mode 100644
index 0000000..c742262
--- /dev/null
+++ b/pdf_ext.py
@@ -0,0 +1,97 @@
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+""" Copyright (c) 2003-2007 LOGILAB S.A. (Paris, FRANCE).
+ http://www.logilab.fr/ -- mailto:contact@logilab.fr
+
+manipulate pdf and fdf files. pdftk recommended.
+
+Notes regarding pdftk, pdf forms and fdf files (form definition file) 
+fields names can be extracted with:
+    pdftk orig.pdf generate_fdf output truc.fdf
+to merge fdf and pdf:      
+    pdftk orig.pdf fill_form test.fdf output result.pdf [flatten]
+without flatten, one could further edit the resulting form.
+with flatten, everything is turned into text.
+"""
+
+import os
+
+HEAD="""%FDF-1.2
+%\xE2\xE3\xCF\xD3
+1 0 obj 
+<<
+/FDF 
+<<
+/Fields [
+"""
+
+TAIL="""]
+>>
+>>
+endobj 
+trailer
+
+<<
+/Root 1 0 R
+>>
+%%EOF
+"""
+
+def output_field( f ):
+    return "\xfe\xff" + "".join( [ "\x00"+c for c in f ] )
+
+def extract_keys(lines):
+    keys = []
+    for line in lines:
+        if line.startswith('/V'):
+            pass #print 'value',line
+        elif line.startswith('/T'):
+            key = line[7:-2]
+            key = ''.join(key.split('\x00'))
+            keys.append( key )
+    return keys
+
+def write_field(out, key, value):
+    out.write("<<\n")
+    if value:
+        out.write("/V (%s)\n" %value)
+    else:
+        out.write("/V /\n")
+    out.write("/T (%s)\n" % output_field(key) )
+    out.write(">> \n")
+
+def write_fields(out, fields):
+    out.write(HEAD)
+    for (key,value,comment) in fields:
+        write_field(out, key, value)
+        write_field(out, key+"a", value) # pour copie-carbone sur autres pages
+    out.write(TAIL)
+
+def extract_keys_from_pdf(filename):
+    # what about using 'pdftk filename dump_data_fields' and parsing the output ?
+    os.system('pdftk %s generate_fdf output /tmp/toto.fdf' % filename)
+    lines = file('/tmp/toto.fdf').readlines()
+    return extract_keys(lines)
+
+
+def fill_pdf(infile, outfile, fields):
+    write_fields(file('/tmp/toto.fdf', 'w'), fields)
+    os.system('pdftk %s fill_form /tmp/toto.fdf output %s flatten' % (infile, outfile))
+
+def testfill_pdf(filename):
+    keys = extract_keys_from_pdf(filename)
+    fields = []
+    for key in keys:
+        fields.append( (key, key, '') )
+    fill_pdf(filename, '_'+filename, fields)
+
author	Nicolas Chauvat <nicolas.chauvat@logilab.fr>	2007-09-28 02:08:48 +0200
committer	Nicolas Chauvat <nicolas.chauvat@logilab.fr>	2007-09-28 02:08:48 +0200
commit	a7bbed8e220645e7713f770c1fd9d3da51f14376 (patch)
tree	9455c007bdb848d857edd9bd070af243e6471ec4 /pdf_ext.py
parent	f3b59c81250f59adea07839aad09d3aa03d7e6ef (diff)
download	logilab-common-a7bbed8e220645e7713f770c1fd9d3da51f14376.tar.gz