diff options
author | Ray Johnston <ray.johnston@artifex.com> | 2020-08-27 13:54:33 -0700 |
---|---|---|
committer | Ray Johnston <ray.johnston@artifex.com> | 2020-08-27 13:54:33 -0700 |
commit | 94c344c43b274c86f529916085e02d7dc50d5edb (patch) | |
tree | 6522ebdf1975d2a4de86e161fd8e305a1c7791ad /lib | |
parent | d7a70f69851a071eb9dfe7a968e2cb2f6302f1ba (diff) | |
download | ghostpdl-94c344c43b274c86f529916085e02d7dc50d5edb.tar.gz |
Bug 702785: Add UTF-8 encoding for strings in Info dict to pdf_info.ps
Thanks to Peter Cherepanov for this change.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/pdf_info.ps | 93 |
1 files changed, 84 insertions, 9 deletions
diff --git a/lib/pdf_info.ps b/lib/pdf_info.ps index 0b4654ab0..9c940ed07 100644 --- a/lib/pdf_info.ps +++ b/lib/pdf_info.ps @@ -69,6 +69,81 @@ cleartomark % discard the dict from --where-- % ---- No more executable code on the top level after this line ----- % ---- except 2 lines at the very end ----- +% Write a character to the standard output. +/putchar [ % int -> - + (%stdout) (w) file + /exch cvx /write cvx +] cvx bind def + +% Write U+xxxx to the standard output as UTF-8. +/put-ucode { % int -> - + dup 16#80 ge { + dup 16#800 ge { + dup 16#10000 ge { + dup -18 bitshift 16#f0 or putchar + dup -12 bitshift 16#3f and 16#80 or putchar + } { + dup -12 bitshift 16#e0 or putchar + } ifelse + dup -6 bitshift 16#3f and 16#80 or putchar + } { + dup -6 bitshift 16#C0 or putchar + } ifelse + 16#3f and 16#80 or + } if + putchar +} bind def + +% PDFDocEncoding to U+xxxx decoding table. +/doc-to-ucode [ + 0 1 23 {} for + 16#2d8 16#2c7 16#2c6 16#2d9 16#2dd 16#2db 16#2da 16#2dc + 32 1 127 {} for + 16#2022 16#2020 16#2021 16#2026 16#2014 16#2013 16#192 + 16#2044 16#2039 16#203a 16#2212 16#2030 16#201e 16#201c + 16#201d 16#2018 16#2019 16#201a 16#2122 16#fb01 16#fb02 + 16#141 16#152 16#160 16#178 16#17d 16#131 16#142 16#153 + 16#161 16#17e 0 16#20ac + 161 1 255 {} for +] readonly def + +% Convert a doc string from PDFDocEncoding or UTF-16BE to UTF-8 +% and write it to standard output. +/write-doc-string { % (string) -> - + <feff> anchorsearch { + pop + 0 exch % hi16 (str) + 0 2 2 index length 2 sub { + 2 copy 2 copy % hi16 (str) i (str) i (str) i + get 256 mul 3 1 roll % hi16 (str) i hi*256 (str) i + 1 add get add % hi16 (str) i 256*hi+lo + dup 16#fc00 and dup % hi16 (str) i 256*hi+lo tag tag + 16#d800 eq { % High surrogate + pop + 16#3ff and + 10 bitshift + 16#10000 add % hi16 (str) i hi16' + 4 1 roll % hi16' hi16 (str) i + pop exch pop % hi16' (str) + } { + 16#dc00 eq { % Low surrogate + 16#3ff and % hi16 (str) i (256*hi+lo)&0x3ff + 4 -1 roll add % (str) i (256*hi+lo)&0x3ff+hi16 + put-ucode % (str) i + pop 0 exch % 0 (str) + } { % BMP plane + put-ucode % hi16 (str) i + pop % hi16 (str) + } ifelse + } ifelse + } for + pop pop % - + } { + { //doc-to-ucode exch get put-ucode + } forall + } ifelse +} bind def + %% When checking Resources for Font objects, we must ensure that we don't end %% up following a circular reference. Circular references in Resources should %% not, of course, be present but we've seen a number of cases. If we do detect @@ -129,15 +204,15 @@ cleartomark % discard the dict from --where-- % Print out the "Info" dictionary if present Trailer /Info knownoget { - dup /Title knownoget { (Title: ) print = flush } if - dup /Author knownoget { (Author: ) print = flush } if - dup /Subject knownoget { (Subject: ) print = flush } if - dup /Keywords knownoget { (Keywords: ) print = flush } if - dup /Creator knownoget { (Creator: ) print = flush } if - dup /Producer knownoget { (Producer: ) print = flush } if - dup /CreationDate knownoget { (CreationDate: ) print = flush } if - dup /ModDate knownoget { (ModDate: ) print = flush } if - dup /Trapped knownoget { (Trapped: ) print = flush } if + dup /Title knownoget { (Title: ) print write-doc-string () = flush } if + dup /Author knownoget { (Author: ) print write-doc-string () = flush } if + dup /Subject knownoget { (Subject: ) print write-doc-string () = flush } if + dup /Keywords knownoget { (Keywords: ) print write-doc-string () = flush } if + dup /Creator knownoget { (Creator: ) print write-doc-string () = flush } if + dup /Producer knownoget { (Producer: ) print write-doc-string () = flush } if + dup /CreationDate knownoget { (CreationDate: ) print write-doc-string () = flush } if + dup /ModDate knownoget { (ModDate: ) print write-doc-string () = flush } if + dup /Trapped knownoget { (Trapped: ) print write-doc-string () = flush } if pop } if |