summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorRay Johnston <ray.johnston@artifex.com>2020-08-27 13:54:33 -0700
committerRay Johnston <ray.johnston@artifex.com>2020-08-27 13:54:33 -0700
commit94c344c43b274c86f529916085e02d7dc50d5edb (patch)
tree6522ebdf1975d2a4de86e161fd8e305a1c7791ad /lib
parentd7a70f69851a071eb9dfe7a968e2cb2f6302f1ba (diff)
downloadghostpdl-94c344c43b274c86f529916085e02d7dc50d5edb.tar.gz
Bug 702785: Add UTF-8 encoding for strings in Info dict to pdf_info.ps
Thanks to Peter Cherepanov for this change.
Diffstat (limited to 'lib')
-rw-r--r--lib/pdf_info.ps93
1 files changed, 84 insertions, 9 deletions
diff --git a/lib/pdf_info.ps b/lib/pdf_info.ps
index 0b4654ab0..9c940ed07 100644
--- a/lib/pdf_info.ps
+++ b/lib/pdf_info.ps
@@ -69,6 +69,81 @@ cleartomark % discard the dict from --where--
% ---- No more executable code on the top level after this line -----
% ---- except 2 lines at the very end -----
+% Write a character to the standard output.
+/putchar [ % int -> -
+ (%stdout) (w) file
+ /exch cvx /write cvx
+] cvx bind def
+
+% Write U+xxxx to the standard output as UTF-8.
+/put-ucode { % int -> -
+ dup 16#80 ge {
+ dup 16#800 ge {
+ dup 16#10000 ge {
+ dup -18 bitshift 16#f0 or putchar
+ dup -12 bitshift 16#3f and 16#80 or putchar
+ } {
+ dup -12 bitshift 16#e0 or putchar
+ } ifelse
+ dup -6 bitshift 16#3f and 16#80 or putchar
+ } {
+ dup -6 bitshift 16#C0 or putchar
+ } ifelse
+ 16#3f and 16#80 or
+ } if
+ putchar
+} bind def
+
+% PDFDocEncoding to U+xxxx decoding table.
+/doc-to-ucode [
+ 0 1 23 {} for
+ 16#2d8 16#2c7 16#2c6 16#2d9 16#2dd 16#2db 16#2da 16#2dc
+ 32 1 127 {} for
+ 16#2022 16#2020 16#2021 16#2026 16#2014 16#2013 16#192
+ 16#2044 16#2039 16#203a 16#2212 16#2030 16#201e 16#201c
+ 16#201d 16#2018 16#2019 16#201a 16#2122 16#fb01 16#fb02
+ 16#141 16#152 16#160 16#178 16#17d 16#131 16#142 16#153
+ 16#161 16#17e 0 16#20ac
+ 161 1 255 {} for
+] readonly def
+
+% Convert a doc string from PDFDocEncoding or UTF-16BE to UTF-8
+% and write it to standard output.
+/write-doc-string { % (string) -> -
+ <feff> anchorsearch {
+ pop
+ 0 exch % hi16 (str)
+ 0 2 2 index length 2 sub {
+ 2 copy 2 copy % hi16 (str) i (str) i (str) i
+ get 256 mul 3 1 roll % hi16 (str) i hi*256 (str) i
+ 1 add get add % hi16 (str) i 256*hi+lo
+ dup 16#fc00 and dup % hi16 (str) i 256*hi+lo tag tag
+ 16#d800 eq { % High surrogate
+ pop
+ 16#3ff and
+ 10 bitshift
+ 16#10000 add % hi16 (str) i hi16'
+ 4 1 roll % hi16' hi16 (str) i
+ pop exch pop % hi16' (str)
+ } {
+ 16#dc00 eq { % Low surrogate
+ 16#3ff and % hi16 (str) i (256*hi+lo)&0x3ff
+ 4 -1 roll add % (str) i (256*hi+lo)&0x3ff+hi16
+ put-ucode % (str) i
+ pop 0 exch % 0 (str)
+ } { % BMP plane
+ put-ucode % hi16 (str) i
+ pop % hi16 (str)
+ } ifelse
+ } ifelse
+ } for
+ pop pop % -
+ } {
+ { //doc-to-ucode exch get put-ucode
+ } forall
+ } ifelse
+} bind def
+
%% When checking Resources for Font objects, we must ensure that we don't end
%% up following a circular reference. Circular references in Resources should
%% not, of course, be present but we've seen a number of cases. If we do detect
@@ -129,15 +204,15 @@ cleartomark % discard the dict from --where--
% Print out the "Info" dictionary if present
Trailer /Info knownoget {
- dup /Title knownoget { (Title: ) print = flush } if
- dup /Author knownoget { (Author: ) print = flush } if
- dup /Subject knownoget { (Subject: ) print = flush } if
- dup /Keywords knownoget { (Keywords: ) print = flush } if
- dup /Creator knownoget { (Creator: ) print = flush } if
- dup /Producer knownoget { (Producer: ) print = flush } if
- dup /CreationDate knownoget { (CreationDate: ) print = flush } if
- dup /ModDate knownoget { (ModDate: ) print = flush } if
- dup /Trapped knownoget { (Trapped: ) print = flush } if
+ dup /Title knownoget { (Title: ) print write-doc-string () = flush } if
+ dup /Author knownoget { (Author: ) print write-doc-string () = flush } if
+ dup /Subject knownoget { (Subject: ) print write-doc-string () = flush } if
+ dup /Keywords knownoget { (Keywords: ) print write-doc-string () = flush } if
+ dup /Creator knownoget { (Creator: ) print write-doc-string () = flush } if
+ dup /Producer knownoget { (Producer: ) print write-doc-string () = flush } if
+ dup /CreationDate knownoget { (CreationDate: ) print write-doc-string () = flush } if
+ dup /ModDate knownoget { (ModDate: ) print write-doc-string () = flush } if
+ dup /Trapped knownoget { (Trapped: ) print write-doc-string () = flush } if
pop
} if