summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwlemb <wlemb>2003-12-27 05:46:57 +0000
committerwlemb <wlemb>2003-12-27 05:46:57 +0000
commit2529d328ba5a6b7d45644de8f3883b054e39ce76 (patch)
tree4ee7fb39e6c28dc5ce6d53ddb829ea37de0b6c53
parent01e707a2c6a63a77ae127275da490499444d77a7 (diff)
downloadgroff-2529d328ba5a6b7d45644de8f3883b054e39ce76.tar.gz
hpftodit has been extended to handle TrueType metric files and
more glyphs. See hpftodit.man for more details. * src/utils/hpftodit/hpftodit.cpp: Include stdio.h, string.h, ctype.h, and unicode.h. s/msl/charcode/ everywhere since we now handle Unicode values also. (equal, NO, YES, MSL, SYMSET, UNICODE, UNICODE): New macros. Use it where appropriate. (MULTIPLIER): Replaced with... (multiplier): New global static variable. (scale): Updated. (tag_type): Add more TFM tags. (tag_name): New array. (ENUM_TYPE, FLOAT_TYPE): Removed. (BYTE_TYPE): New value assigned. (ASCII_TYPE, RATIONAL_TYPE): New enumeration values. (text_symbol_sets, special_symbol_sets): Extended to cover more sets. (check_type): Add return value. (check_units): Add parameters to get ppi and upem values. Handle TrueType TFM data. (output_font_name): New function. (output_charset): Add parameter to handle TFM type. Handle TrueType TFMs also. (em_fract): New macro. (dump_tags): Be much more verbose and handle more tags. (dump_ascii, dump_symbol_sets, dump_symbols): New functions. (hp_msl_to_ucode_name, unicode_to_ucode_name, is_uname): New functions. (read_map): Add parameter to handle TFM type. Handle both MSL and Unicode mappings. (main): Add two new command line options `-a' and `-q'. Updated to make use of new functions. (usage): Updated. * src/utils/hpftodit/hpuni.cpp: New file. * src/utils/hpftodit/Makefile.sub, src/utils/hpftodit/hpftodit.man: Updated. * src/include/nonposix.h (read) [_MSC_VER]: Define.
-rw-r--r--ChangeLog46
-rw-r--r--src/include/nonposix.h1
-rw-r--r--src/utils/hpftodit/Makefile.sub6
-rw-r--r--src/utils/hpftodit/hpftodit.cpp837
-rw-r--r--src/utils/hpftodit/hpftodit.man202
-rw-r--r--src/utils/hpftodit/hpuni.cpp698
6 files changed, 1619 insertions, 171 deletions
diff --git a/ChangeLog b/ChangeLog
index 6da28ad2..77cd31e2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,49 @@
+2003-12-26 Jeff Conrad <jeff_conrad@msn.com>
+
+ hpftodit has been extended to handle TrueType metric files and
+ more glyphs. See hpftodit.man for more details.
+
+ * src/utils/hpftodit/hpftodit.cpp: Include stdio.h, string.h,
+ ctype.h, and unicode.h.
+ s/msl/charcode/ everywhere since we now handle Unicode values also.
+ (equal, NO, YES, MSL, SYMSET, UNICODE, UNICODE): New macros.
+ Use it where appropriate.
+ (MULTIPLIER): Replaced with...
+ (multiplier): New global static variable.
+ (scale): Updated.
+ (tag_type): Add more TFM tags.
+ (tag_name): New array.
+ (ENUM_TYPE, FLOAT_TYPE): Removed.
+ (BYTE_TYPE): New value assigned.
+ (ASCII_TYPE, RATIONAL_TYPE): New enumeration values.
+ (text_symbol_sets, special_symbol_sets): Extended to cover more
+ sets.
+ (check_type): Add return value.
+ (check_units): Add parameters to get ppi and upem values.
+ Handle TrueType TFM data.
+ (output_font_name): New function.
+ (output_charset): Add parameter to handle TFM type.
+ Handle TrueType TFMs also.
+ (em_fract): New macro.
+ (dump_tags): Be much more verbose and handle more tags.
+ (dump_ascii, dump_symbol_sets, dump_symbols): New functions.
+ (hp_msl_to_ucode_name, unicode_to_ucode_name, is_uname): New
+ functions.
+ (read_map): Add parameter to handle TFM type.
+ Handle both MSL and Unicode mappings.
+ (main): Add two new command line options `-a' and `-q'.
+ Updated to make use of new functions.
+ (usage): Updated.
+
+ * src/utils/hpftodit/hpuni.cpp: New file.
+
+ * src/utils/hpftodit/Makefile.sub, src/utils/hpftodit/hpftodit.man:
+ Updated.
+
+2003-12-25 Werner LEMBERG <wl@gnu.org>
+
+ * src/include/nonposix.h (read) [_MSC_VER]: Define.
+
2003-12-24 Werner LEMBERG <wl@gnu.org>
* src/utils/afmtodit/afmtodit.man: Some reformulations as suggested
diff --git a/src/include/nonposix.h b/src/include/nonposix.h
index 1c1e92c5..86167357 100644
--- a/src/include/nonposix.h
+++ b/src/include/nonposix.h
@@ -47,6 +47,7 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
# define setmode(f,m) _setmode(f,m)
# define WAIT(s,p,m) _cwait(s,p,m)
# define creat(p,m) _creat(p,m)
+# define read(f,b,s) _read(f,b,s)
# endif
# define SET_BINARY(f) do {if (!isatty(f)) setmode(f,O_BINARY);} while(0)
# define FOPEN_RB "rb"
diff --git a/src/utils/hpftodit/Makefile.sub b/src/utils/hpftodit/Makefile.sub
index d83188c5..6e80b474 100644
--- a/src/utils/hpftodit/Makefile.sub
+++ b/src/utils/hpftodit/Makefile.sub
@@ -2,5 +2,7 @@ PROG=hpftodit$(EXEEXT)
MAN1=hpftodit.n
XLIBS=$(LIBGROFF)
MLIB=$(LIBM)
-OBJS=hpftodit.$(OBJEXT)
-CCSRCS=$(srcdir)/hpftodit.cpp
+OBJS=hpftodit.$(OBJEXT) \
+ hpuni.$(OBJEXT)
+CCSRCS=$(srcdir)/hpftodit.cpp \
+ $(srcdir)/hpuni.cpp
diff --git a/src/utils/hpftodit/hpftodit.cpp b/src/utils/hpftodit/hpftodit.cpp
index fe512b61..f5ed8b36 100644
--- a/src/utils/hpftodit/hpftodit.cpp
+++ b/src/utils/hpftodit/hpftodit.cpp
@@ -20,17 +20,17 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/*
TODO
-put human readable font name in device file
devise new names for useful characters
-use --- for unnamed characters
option to specify symbol sets to look in
-make it work with TrueType fonts
put filename in error messages (or fix lib)
*/
#include "lib.h"
+#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
#include <math.h>
#include <errno.h>
#include "assert.h"
@@ -39,17 +39,29 @@ put filename in error messages (or fix lib)
#include "error.h"
#include "cset.h"
#include "nonposix.h"
+#include "unicode.h"
extern "C" const char *Version_string;
+extern const char *hp_msl_to_unicode_code(const char *);
#define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
+#define equal(a, b) (strcmp(a, b) == 0)
-const int MULTIPLIER = 3;
+#define NO 0
+#define YES 1
+
+#define MSL 0
+#define SYMSET 1
+#define UNICODE 2
+
+#define UNNAMED "---"
+
+static double multiplier = 3.0; // make Agfa-based unitwidth an integer
inline
int scale(int n)
{
- return n * MULTIPLIER;
+ return int(n * multiplier + 0.5);
}
// tags in TFM file
@@ -57,39 +69,67 @@ int scale(int n)
enum tag_type {
min_tag = 400,
type_tag = 400,
+ copyright_tag = 401,
+ comment_tag = 402,
+ charcode_tag = 403, // MSL for Intellifont, Unicode for TrueType
symbol_set_tag = 404,
- msl_tag = 403,
+ unique_identifier_tag = 405,
inches_per_point_tag = 406,
+ nominal_point_size_tag = 407,
design_units_per_em_tag = 408,
posture_tag = 409,
+ type_structure_tag = 410,
stroke_weight_tag = 411,
spacing_tag = 412,
slant_tag = 413,
appearance_width_tag = 414,
+ serif_style_tag = 415,
+ font_name_tag = 417,
+ typeface_source_tag = 418,
+ average_width_tag = 419,
+ max_width_tag = 420,
word_spacing_tag = 421,
+ recommended_line_spacing_tag = 422,
+ cap_height_tag = 423,
x_height_tag = 424,
+ max_ascent_tag = 425,
+ max_descent_tag = 426,
lower_ascent_tag = 427,
lower_descent_tag = 428,
+ underscore_depth_tag = 429,
+ underscore_thickness_tag = 430,
+ uppercase_accent_height_tag = 431,
+ lowercase_accent_height_tag = 432,
width_tag = 433,
+ vertical_escapement_tag = 434,
left_extent_tag = 435,
right_extent_tag = 436,
ascent_tag = 437,
descent_tag = 438,
pair_kern_tag = 439,
+ sector_kern_tag = 440,
+ track_kern_tag = 441,
typeface_tag = 442,
+ panose_tag = 443,
max_tag = 443
- };
+};
-// types in TFM file
+const char *tag_name[] = {
+ "Symbol Set",
+ "Font Type" // MSL for Intellifont, Unicode for TrueType
+};
+// types in TFM file
enum {
- ENUM_TYPE = 1,
- BYTE_TYPE = 2,
+ BYTE_TYPE = 1,
+ ASCII_TYPE = 2, // NUL-terminated string
USHORT_TYPE = 3,
- FLOAT_TYPE = 5,
- SIGNED_SHORT_TYPE = 17
- };
-
+ LONG_TYPE = 4, // unused
+ RATIONAL_TYPE = 5, // 8-byte numerator + 8-byte denominator
+ SIGNED_BYTE_TYPE = 16, // unused
+ SIGNED_SHORT_TYPE = 17,
+ SIGNED_LONG_TYPE = 18 // unused
+};
typedef unsigned char byte;
typedef unsigned short uint16;
@@ -119,7 +159,7 @@ struct entry {
};
struct char_info {
- uint16 msl;
+ uint16 charcode;
uint16 width;
int16 ascent;
int16 descent;
@@ -145,44 +185,102 @@ struct symbol_set {
#define SYMBOL_SET(n, c) ((n) * 32 + ((c) - 64))
+// change this to '1' to compare results with original version
+#if 0
uint16 text_symbol_sets[] = {
SYMBOL_SET(0, 'N'), // Latin 1
SYMBOL_SET(6, 'J'), // Microsoft Publishing
SYMBOL_SET(2, 'N'), // Latin 2
0
- };
+};
uint16 special_symbol_sets[] = {
SYMBOL_SET(8, 'M'),
SYMBOL_SET(5, 'M'),
SYMBOL_SET(15, 'U'),
0
- };
+};
+#else
+uint16 text_symbol_sets[] = {
+ SYMBOL_SET(19, 'U'), // Windows Latin 1 ("ANSI", code page 1252)
+ SYMBOL_SET(7, 'J'), // Desktop
+ SYMBOL_SET(6, 'J'), // Microsoft Publishing
+ SYMBOL_SET(9, 'E'), // Windows Latin 2, Code Page 1250
+ SYMBOL_SET(2, 'N'), // Latin 2 (subset of 9M,
+ // so we should never get here)
+ SYMBOL_SET(0, 'N'), // Latin 1 (subset of 19U,
+ // so we should never get here)
+ SYMBOL_SET(8, 'U'), // HP Roman 8
+ SYMBOL_SET(10, 'J'), // PS Standard
+ SYMBOL_SET(9, 'U'), // Windows 3.0 "ANSI"
+
+#if 0
+ SYMBOL_SET(13, 'J'), // Ventura International
+ SYMBOL_SET(6, 'M'), // Ventura Math
+ SYMBOL_SET(14, 'J'), // Ventura US
+ SYMBOL_SET(5, 'T'), // Code Page 1254
+ SYMBOL_SET(0, 'D'), // ISO 60, 7-bit Norwegian version 1
+ SYMBOL_SET(5, 'N'), // ISO 8859-9, Latin 5
+ SYMBOL_SET(1, 'F'), // ISO 69, 7-bit French
+ SYMBOL_SET(1, 'G'), // ISO 21, 7-bit German
+ SYMBOL_SET(0, 'I'), // ISO 15, 7-bit Italian
+ SYMBOL_SET(1, 'U'), // Legal
+ SYMBOL_SET(12, 'J'), // MC Text
+ SYMBOL_SET(10, 'U'), // PC Code Page 437
+ SYMBOL_SET(11, 'U'), // PC Code Page 437N
+ SYMBOL_SET(17, 'U'), // PC Code Page 852
+ SYMBOL_SET(12, 'U'), // PC Code Page 850
+ SYMBOL_SET(9, 'T'), // PC Code Page 437T
+ SYMBOL_SET(2, 'S'), // ISO 17, 7-bit Spanish
+ SYMBOL_SET(0, 'S'), // ISO 11, 7-bit Swedish
+ SYMBOL_SET(1, 'E'), // ISO 4, 7-bit UK English
+ SYMBOL_SET(0, 'U'), // ISO 6, 7-bit ASCII English
+#endif
+ 0
+};
+
+uint16 special_symbol_sets[] = {
+ SYMBOL_SET(8, 'M'), // Math 8
+ SYMBOL_SET(5, 'M'), // PS Math
+ SYMBOL_SET(15, 'U'), // Pi font
+ SYMBOL_SET(19, 'M'), // Symbol font
+ 0
+};
+#endif
entry tags[max_tag + 1 - min_tag];
char_info *char_table;
-uint32 nchars;
+uint32 nchars = 0;
-unsigned int msl_name_table_size = 0;
-name_list **msl_name_table = 0;
+unsigned int charcode_name_table_size = 0;
+name_list **charcode_name_table = NULL;
unsigned int n_symbol_sets;
symbol_set *symbol_set_table;
-static int special_flag = 0;
-static int italic_flag = 0;
+static int special_flag = NO; // not a special font
+static int italic_flag = NO; // don't add italic correction
static int italic_sep;
+static int all_flag = NO; // don't include glyphs not in mapfile
+static int quiet_flag = NO; // don't suppress warnings about symbols not found
-static void usage(FILE *stream);
+static char *hp_msl_to_ucode_name(int);
+static char *unicode_to_ucode_name(int);
+static int is_uname(char *);
+static void usage(FILE *);
static void usage();
static const char *xbasename(const char *);
static void read_tags(File &);
-static void check_type();
-static void check_units(File &);
-static int read_map(const char *);
+static int check_type();
+static void check_units(File &, const int, double *, double *);
+static int read_map(const char *, const int);
static void require_tag(tag_type);
-static void dump_tags(File &f);
+static void dump_ascii(File &, tag_type);
+static void dump_tags(File &);
+static void dump_symbol_sets(File &);
+static void dump_symbols(int);
+static void output_font_name(File &);
static void output_spacewidth();
static void output_pclweight();
static void output_pclproportional();
@@ -192,8 +290,8 @@ static void output_slant();
static void output_ligatures();
static void read_symbol_sets(File &);
static void read_and_output_kernpairs(File &);
-static void output_charset();
-static void read_char_table(File &f);
+static void output_charset(const int);
+static void read_char_table(File &);
inline
entry &tag_info(tag_type t)
@@ -206,25 +304,36 @@ int main(int argc, char **argv)
program_name = argv[0];
int opt;
- int debug_flag = 0;
+ int debug_flag = NO;
+ int res = 1200; // PCL unit of measure for cursor moves
+ int scalesize = 4; // LaserJet 4 only allows 1/4 point increments
+ int unitwidth = 6350;
+ double ppi; // points per inch
+ double upem; // design units per em
static const struct option long_options[] = {
{ "help", no_argument, 0, CHAR_MAX + 1 },
{ "version", no_argument, 0, 'v' },
{ NULL, 0, 0, 0 }
};
- while ((opt = getopt_long(argc, argv, "dsvi:", long_options, NULL)) != EOF) {
+ while ((opt = getopt_long(argc, argv, "adsqvi:", long_options, NULL)) != EOF) {
switch (opt) {
+ case 'a':
+ all_flag = YES;
+ break;
case 'd':
- debug_flag = 1;
+ debug_flag = YES;
break;
case 's':
- special_flag = 1;
+ special_flag = YES;
break;
case 'i':
- italic_flag = 1;
- italic_sep = atoi(optarg);
+ italic_flag = YES;
+ italic_sep = atoi(optarg); // design units
break;
+ case 'q':
+ quiet_flag = YES; // suppress warnings about symbols not found
+ break;
case 'v':
{
printf("GNU hpftodit (groff) version %s\n", Version_string);
@@ -242,44 +351,69 @@ int main(int argc, char **argv)
assert(0);
}
}
- if (argc - optind != 3)
+
+ if (debug_flag && argc - optind < 1)
+ usage();
+ else if (!debug_flag && argc - optind != 3)
usage();
File f(argv[optind]);
- if (!read_map(argv[optind + 1]))
- exit(1);
- current_filename = 0;
- current_lineno = -1; // no line numbers
- if (freopen(argv[optind + 2], "w", stdout) == 0)
- fatal("cannot open `%1': %2", argv[optind + 2], strerror(errno));
- current_filename = argv[optind];
- printf("name %s\n", xbasename(argv[optind + 2]));
- if (special_flag)
- printf("special\n");
read_tags(f);
- check_type();
- check_units(f);
+ int tfm_type = check_type();
if (debug_flag)
dump_tags(f);
+ else if (!read_map(argv[optind + 1], tfm_type))
+ exit(1);
+ current_filename = NULL;
+ current_lineno = -1; // no line numbers
+ if (!debug_flag && !equal(argv[optind + 2], "-"))
+ if (freopen(argv[optind + 2], "w", stdout) == NULL)
+ fatal("cannot open `%1': %2", argv[optind + 2], strerror(errno));
+ current_filename = argv[optind];
+
+ check_units(f, tfm_type, &ppi, &upem);
+ if (tfm_type == UNICODE) // don't calculate for Intellifont TFMs
+ multiplier = double(res) / upem / ppi * unitwidth / scalesize;
+ if (italic_flag)
+ // convert from thousandths of an em to design units
+ italic_sep = int(italic_sep * upem / 1000 + 0.5);
+
read_char_table(f);
- output_spacewidth();
- output_slant();
- read_and_output_pcltypeface(f);
- output_pclproportional();
- output_pclweight();
- output_pclstyle();
+ if (nchars == 0)
+ fatal("no characters");
+
+ if (!debug_flag) {
+ output_font_name(f);
+ printf("name %s\n", xbasename(argv[optind + 2]));
+ if (special_flag)
+ printf("special\n");
+ output_spacewidth();
+ output_slant();
+ read_and_output_pcltypeface(f);
+ output_pclproportional();
+ output_pclweight();
+ output_pclstyle();
+ }
read_symbol_sets(f);
- output_ligatures();
- read_and_output_kernpairs(f);
- output_charset();
+ if (debug_flag) {
+ printf("Symbols:\n");
+ dump_symbols(tfm_type);
+ }
+ else {
+ output_ligatures();
+ read_and_output_kernpairs(f);
+ output_charset(tfm_type);
+ }
return 0;
}
static
void usage(FILE *stream)
{
- fprintf(stream, "usage: %s [-s] [-i n] tfm_file map_file output_font\n",
+ fprintf(stream,
+ "usage: %s [-s] [-a] [-q] [-i n] tfm_file map_file output_font\n",
program_name);
}
+
static
void usage()
{
@@ -319,7 +453,7 @@ void File::skip(int n)
void File::seek(uint32 n)
{
- if ((uint32)(end_ - buf_) < n)
+ if (uint32(end_ - buf_) < n)
fatal("unexpected end of file");
ptr_ = buf_ + n;
}
@@ -372,31 +506,40 @@ void read_tags(File &f)
}
static
-void check_type()
+int check_type()
{
require_tag(type_tag);
- if (tag_info(type_tag).value != 0) {
- if (tag_info(type_tag).value == 2)
- fatal("cannot handle TrueType tfm files");
- fatal("unknown type tag %1", int(tag_info(type_tag).value));
+ int tfm_type = tag_info(type_tag).value;
+ switch (tfm_type) {
+ case MSL:
+ case UNICODE:
+ break;
+ case SYMSET:
+ fatal("cannot handle Symbol Set TFM files");
+ break;
+ default:
+ fatal("unknown type tag %1", tfm_type);
}
+ return tfm_type;
}
static
-void check_units(File &f)
+void check_units(File &f, const int tfm_type, double *ppi, double *upem)
{
require_tag(design_units_per_em_tag);
f.seek(tag_info(design_units_per_em_tag).value);
uint32 num = f.get_uint32();
uint32 den = f.get_uint32();
- if (num != 8782 || den != 1)
+ if (tfm_type == MSL && (num != 8782 || den != 1))
fatal("design units per em != 8782/1");
+ *upem = double(num) / den;
require_tag(inches_per_point_tag);
f.seek(tag_info(inches_per_point_tag).value);
num = f.get_uint32();
den = f.get_uint32();
- if (num != 100 || den != 7231)
+ if (tfm_type == MSL && (num != 100 || den != 7231))
fatal("inches per point not 100/7231");
+ *ppi = double(den) / num;
}
static
@@ -406,6 +549,36 @@ void require_tag(tag_type t)
fatal("tag %1 missing", int(t));
}
+// put a human-readable font name in the file
+static
+void output_font_name(File &f)
+{
+ char *p;
+
+ if (!tag_info(font_name_tag).present)
+ return;
+ int count = tag_info(font_name_tag).count;
+ char *font_name = new char[count];
+
+ if (count > 4) { // value is a file offset to the string
+ f.seek(tag_info(font_name_tag).value);
+ int n = count;
+ p = font_name;
+ while (--n)
+ *p++ = f.get_byte();
+ }
+ else // value contains the string
+ sprintf(font_name, "%.*s", count, (char*)(tag_info(font_name_tag).value));
+
+ // remove any trailing space
+ p = font_name + count - 1;
+ while (csspace(*--p))
+ ;
+ *(p + 1) = '\0';
+ printf("# %s\n", font_name);
+ delete font_name;
+}
+
static
void output_spacewidth()
{
@@ -422,10 +595,11 @@ void read_symbol_sets(File &f)
unsigned int i;
for (i = 0; i < n_symbol_sets; i++) {
f.seek(tag_info(symbol_set_tag).value + i*14);
- (void)f.get_uint32();
- uint32 off1 = f.get_uint32();
- uint32 off2 = f.get_uint32();
- (void)f.get_uint16(); // what's this for?
+ (void)f.get_uint32(); // offset to symbol set name
+ uint32 off1 = f.get_uint32(); // offset to selection string
+ uint32 off2 = f.get_uint32(); // offset to symbol set index array
+ (void)f.get_uint16(); // index array length
+ // (why is this needed?)
f.seek(off1);
unsigned int j;
uint16 kind = 0;
@@ -440,6 +614,7 @@ void read_symbol_sets(File &f)
for (j = 0; j < 256; j++)
symbol_set_table[i].index[j] = f.get_uint16();
}
+
for (i = 0; i < nchars; i++)
char_table[i].symbol_set = NO_SYMBOL_SET;
@@ -467,14 +642,14 @@ void read_symbol_sets(File &f)
static
void read_char_table(File &f)
{
- require_tag(msl_tag);
- nchars = tag_info(msl_tag).count;
+ require_tag(charcode_tag);
+ nchars = tag_info(charcode_tag).count;
char_table = new char_info[nchars];
- f.seek(tag_info(msl_tag).value);
+ f.seek(tag_info(charcode_tag).value);
uint32 i;
for (i = 0; i < nchars; i++)
- char_table[i].msl = f.get_uint16();
+ char_table[i].charcode = f.get_uint16();
require_tag(width_tag);
f.seek(tag_info(width_tag).value);
@@ -596,10 +771,10 @@ void output_ligatures()
unsigned ligature_mask = 0;
unsigned int i;
for (i = 0; i < nchars; i++) {
- uint16 msl = char_table[i].msl;
- if (msl < msl_name_table_size
+ uint16 charcode = char_table[i].charcode;
+ if (charcode < charcode_name_table_size
&& char_table[i].symbol_set != NO_SYMBOL_SET) {
- for (name_list *p = msl_name_table[msl]; p; p = p->next)
+ for (name_list *p = charcode_name_table[charcode]; p; p = p->next)
for (unsigned int j = 0; j < SIZEOF(ligature_chars); j++)
if (strcmp(p->name, ligature_chars[j]) == 0) {
ligature_mask |= 1 << j;
@@ -629,22 +804,23 @@ void read_and_output_kernpairs(File &f)
int16 val = int16(f.get_uint16());
if (char_table[i1].symbol_set != NO_SYMBOL_SET
&& char_table[i2].symbol_set != NO_SYMBOL_SET
- && char_table[i1].msl < msl_name_table_size
- && char_table[i2].msl < msl_name_table_size) {
- for (name_list *p = msl_name_table[char_table[i1].msl];
+ && char_table[i1].charcode < charcode_name_table_size
+ && char_table[i2].charcode < charcode_name_table_size) {
+ for (name_list *p = charcode_name_table[char_table[i1].charcode];
p;
p = p->next)
- for (name_list *q = msl_name_table[char_table[i2].msl];
+ for (name_list *q = charcode_name_table[char_table[i2].charcode];
q;
q = q->next)
- printf("%s %s %d\n", p->name, q->name, scale(val));
+ if (!equal(p->name, UNNAMED) && !equal(q->name, UNNAMED))
+ printf("%s %s %d\n", p->name, q->name, scale(val));
}
}
}
}
static
-void output_charset()
+void output_charset(const int tfm_type)
{
require_tag(slant_tag);
double slant_angle = int16(tag_info(slant_tag).value)*PI/18000.0;
@@ -657,20 +833,30 @@ void output_charset()
printf("charset\n");
unsigned int i;
for (i = 0; i < nchars; i++) {
- uint16 msl = char_table[i].msl;
- if (msl < msl_name_table_size
- && msl_name_table[msl]) {
- if (char_table[i].symbol_set != NO_SYMBOL_SET) {
- printf("%s\t%d,%d",
- msl_name_table[msl]->name,
- scale(char_table[i].width),
- scale(char_table[i].ascent));
- int depth = scale(- char_table[i].descent);
+ uint16 charcode = char_table[i].charcode;
+
+ // the TFM supports the character
+ if (char_table[i].symbol_set != NO_SYMBOL_SET) {
+ // the character was in the map file
+ if (charcode < charcode_name_table_size && charcode_name_table[charcode])
+ printf("%s", charcode_name_table[charcode]->name);
+ else if (!all_flag)
+ continue;
+ else if (tfm_type == MSL)
+ printf(hp_msl_to_ucode_name(charcode));
+ else
+ printf(unicode_to_ucode_name(charcode));
+
+ printf("\t%d,%d",
+ scale(char_table[i].width), scale(char_table[i].ascent));
+
+ int depth = scale(-char_table[i].descent);
if (depth < 0)
depth = 0;
int italic_correction = 0;
int left_italic_correction = 0;
int subscript_correction = 0;
+
if (italic_flag) {
italic_correction = scale(char_table[i].right_extent
- char_table[i].width
@@ -684,6 +870,7 @@ void output_charset()
left_italic_correction = scale(italic_sep
- char_table[i].left_extent);
}
+
if (subscript_correction != 0)
printf(",%d,%d,%d,%d",
depth, italic_correction, left_italic_correction,
@@ -696,45 +883,390 @@ void output_charset()
printf(",%d", depth);
// This is fairly arbitrary. Fortunately it doesn't much matter.
unsigned type = 0;
- if (char_table[i].ascent > (int16(tag_info(lower_ascent_tag).value)*9)/10)
+ if (char_table[i].ascent > int16(tag_info(lower_ascent_tag).value)*9/10)
type |= 2;
- if (char_table[i].descent < (int16(tag_info(lower_descent_tag).value)*9)/10)
+ if (char_table[i].descent < int16(tag_info(lower_descent_tag).value)*9/10)
type |= 1;
- printf("\t%d\t%d\n",
- type,
+ printf("\t%d\t%d", type,
char_table[i].symbol_set*256 + char_table[i].code);
- for (name_list *p = msl_name_table[msl]->next; p; p = p->next)
- printf("%s\t\"\n", p->name);
+
+ if (tfm_type == UNICODE) {
+ if (charcode >= 0xE000 && charcode <= 0xF8FF)
+ printf("\t\t-- HP PUA U+%04X\n", charcode);
+ else
+ printf("\t\t-- U+%04X\n", charcode);
+ }
+ else
+ printf("\t\t-- HP MSL %4d\n", charcode);
+
+ if (charcode < charcode_name_table_size
+ && charcode_name_table[charcode])
+ for (name_list *p = charcode_name_table[charcode]->next;
+ p; p = p->next)
+ printf("%s\t\"\n", p->name);
}
- else
- warning("MSL %1 not in any of the searched symbol sets", msl);
+ // warnings about characters in mapfile not found in TFM
+ else if (charcode < charcode_name_table_size
+ && charcode_name_table[charcode]) {
+ char *name = charcode_name_table[charcode]->name;
+ // don't warn about Unicode or unnamed glyphs
+ // that aren't in the the TFM file
+ if (tfm_type == UNICODE && !quiet_flag && !equal(name, UNNAMED)
+ && !is_uname(name))
+ fprintf(stderr,
+ "%s: warning: symbol U+%04X (%s) not in any of the searched symbol sets\n",
+ program_name, charcode, name);
+ else if (!quiet_flag && !equal(name, UNNAMED) && !is_uname(name))
+ warning("MSL %1 (%2) not in any of the searched symbol sets",
+ charcode, name);
}
}
}
+#define em_fract(a) (upem >= 0 ? double(a)/upem : 0)
+
static
void dump_tags(File &f)
{
- int i;
- for (i = min_tag; i <= max_tag; i++) {
+ double upem = -1.0;
+
+ printf("TFM tags\n"
+ "\n"
+ "tag# type count value\n"
+ "---------------------\n");
+
+ for (int i = min_tag; i <= max_tag; i++) {
enum tag_type t = tag_type(i);
if (tag_info(t).present) {
- fprintf(stderr,
- "%d %d %d %d\n", i, tag_info(t).type, tag_info(t).count,
- tag_info(t).value);
- if (tag_info(t).type == FLOAT_TYPE
- && tag_info(t).count == 1) {
- f.seek(tag_info(t).value);
- uint32 num = f.get_uint32();
- uint32 den = f.get_uint32();
- fprintf(stderr, "(%u/%u = %g)\n", num, den, (double)num/den);
+ printf("%4d %4d %5d", i, tag_info(t).type, tag_info(t).count);
+ switch (tag_info(t).type) {
+ case BYTE_TYPE:
+ case USHORT_TYPE:
+ printf(" %5u", tag_info(t).value);
+ switch (i) {
+ case type_tag:
+ printf(" Font Type ");
+ switch (tag_info(t).value) {
+ case MSL:
+ case SYMSET:
+ printf("(Intellifont)");
+ break;
+ case UNICODE:
+ printf("(TrueType)");
+ }
+ break;
+ case charcode_tag:
+ printf(" Number of Symbols (%u)", tag_info(t).count);
+ break;
+ case symbol_set_tag:
+ printf(" Symbol Sets (%u): ",
+ tag_info(symbol_set_tag).count / 14);
+ dump_symbol_sets(f);
+ break;
+ case type_structure_tag:
+ printf(" Type Structure (%u)", tag_info(t).value);
+ break;
+ case stroke_weight_tag:
+ printf(" Stroke Weight (%u)", tag_info(t).value);
+ break;
+ case spacing_tag:
+ printf(" Spacing ");
+ switch (tag_info(t).value) {
+ case 0:
+ printf("(Proportional)");
+ break;
+ case 1:
+ printf("(Fixed Pitch: %u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ }
+ break;
+ case appearance_width_tag:
+ printf(" Appearance Width (%u)", tag_info(t).value);
+ break;
+ case serif_style_tag:
+ printf(" Serif Style (%u)", tag_info(t).value);
+ break;
+ case max_width_tag:
+ printf(" Maximum Width (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case word_spacing_tag:
+ printf(" Interword Spacing (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case recommended_line_spacing_tag:
+ printf(" Recommended Line Spacing (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case x_height_tag:
+ printf(" x-Height (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case cap_height_tag:
+ printf(" Cap Height (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case max_ascent_tag:
+ printf(" Maximum Ascent (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case lower_ascent_tag:
+ printf(" Lowercase Ascent (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case underscore_thickness_tag:
+ printf(" Underscore Thickness (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case uppercase_accent_height_tag:
+ printf(" Uppercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case lowercase_accent_height_tag:
+ printf(" Lowercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
+ em_fract(tag_info(t).value));
+ break;
+ case width_tag:
+ printf(" Horizontal Escapement array");
+ break;
+ case vertical_escapement_tag:
+ printf(" Vertical Escapement array");
+ break;
+ case right_extent_tag:
+ printf(" Right Extent array");
+ break;
+ case ascent_tag:
+ printf(" Character Ascent array");
+ break;
+ case pair_kern_tag:
+ f.seek(tag_info(t).value);
+ printf(" Kern Pairs (%u)", f.get_uint16());
+ break;
+ case panose_tag:
+ printf(" PANOSE Classification array");
+ break;
+ }
+ break;
+ case SIGNED_SHORT_TYPE:
+ printf(" %5d", int16(tag_info(t).value));
+ switch (i) {
+ case slant_tag:
+ printf(" Slant (%.2f degrees)", double(tag_info(t).value) / 100);
+ break;
+ case max_descent_tag:
+ printf(" Maximum Descent (%d DU: %.2f em)", int16(tag_info(t).value),
+ em_fract(int16(tag_info(t).value)));
+ break;
+ case lower_descent_tag:
+ printf(" Lowercase Descent (%d DU: %.2f em)", int16(tag_info(t).value),
+ em_fract(int16(tag_info(t).value)));
+ break;
+ case underscore_depth_tag:
+ printf(" Underscore Depth (%d DU: %.2f em)", int16(tag_info(t).value),
+ em_fract(int16(tag_info(t).value)));
+ break;
+ case left_extent_tag:
+ printf(" Left Extent array");
+ break;
+ // both signed and unsigned do exist!
+ case ascent_tag:
+ printf(" Character Ascent array");
+ break;
+ case descent_tag:
+ printf(" Character Descent array");
+ break;
+ }
+ break;
+ case RATIONAL_TYPE:
+ printf(" %5u", tag_info(t).value);
+ switch (i) {
+ case inches_per_point_tag:
+ printf(" Inches per Point");
+ break;
+ case nominal_point_size_tag:
+ printf(" Nominal Point Size");
+ break;
+ case design_units_per_em_tag:
+ printf(" Design Units per Em");
+ break;
+ case average_width_tag:
+ printf(" Average Width");
+ break;
+ }
+ if (tag_info(t).count == 1) {
+ f.seek(tag_info(t).value);
+ uint32 num = f.get_uint32();
+ uint32 den = f.get_uint32();
+ if (i == design_units_per_em_tag)
+ upem = double(num) / den;
+ printf(" (%u/%u = %g)", num, den, double(num)/den);
+ }
+ break;
+ case ASCII_TYPE:
+ printf(" %5u ", tag_info(t).value);
+ switch (i) {
+ case comment_tag:
+ printf("Comment ");
+ break;
+ case copyright_tag:
+ printf("Copyright ");
+ break;
+ case unique_identifier_tag:
+ printf("Unique ID ");
+ break;
+ case font_name_tag:
+ printf("Typeface Name ");
+ break;
+ case typeface_source_tag:
+ printf("Typeface Source ");
+ break;
+ case typeface_tag:
+ printf("PCL Typeface ");
+ break;
+ }
+ dump_ascii(f, t);
}
+ putchar('\n');
+ }
+ }
+ putchar('\n');
+}
+#undef em_fract
+
+static
+void dump_ascii(File &f, tag_type t)
+{
+ putchar('"');
+ if (tag_info(t).count > 4) {
+ int count = tag_info(t).count;
+ f.seek(tag_info(t).value);
+ while (--count)
+ printf("%c", f.get_byte());
+ }
+ else
+ printf("%.4s", (char*)(tag_info(t).value));
+ putchar('"');
+}
+
+static
+void dump_symbol_sets(File &f)
+{
+ uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
+ uint32 n_symbol_sets = symbol_set_dir_length / 14;
+
+ for (uint32 i = 0; i < n_symbol_sets; i++) {
+ f.seek(tag_info(symbol_set_tag).value + i * 14);
+ (void)f.get_uint32(); // offset to symbol set name
+ uint32 off1 = f.get_uint32(); // offset to selection string
+ uint32 off2 = f.get_uint32(); // offset to symbol set index array
+ f.seek(off1);
+ for (uint32 j = 0; j < off2 - off1; j++) {
+ unsigned char c = f.get_byte();
+ if ('0' <= c && c <= '9')
+ putchar(c);
+ else if ('A' <= c && c <= 'Z')
+ printf(i < n_symbol_sets - 1 ? "%c," : "%c", c);
}
}
}
+static
+void dump_symbols(int tfm_type)
+{
+ for (uint32 i = 0; i < nchars; i++) {
+ uint16 charcode = char_table[i].charcode;
+ if (charcode < charcode_name_table_size
+ && charcode_name_table[charcode]) {
+ if (char_table[i].symbol_set != NO_SYMBOL_SET) {
+ printf(tfm_type == UNICODE ? "%4d (%04x) %d (%d%c)\t%s\n"
+ : "%4d (%4d) %d (%d%c)\t%s\n" ,
+ i, charcode, char_table[i].symbol_set,
+ char_table[i].symbol_set / 32,
+ (char_table[i].symbol_set % 32) + 64,
+ charcode_name_table[charcode]->name);
+ }
+ }
+ else
+ printf(tfm_type == UNICODE ? "%4d (U+%04X)\n"
+ : "%4d (HP MSL %4d)\n",
+ i, charcode);
+ }
+}
+
+static char *
+hp_msl_to_ucode_name(int msl)
+{
+ char codestr[8];
+
+ sprintf(codestr, "%d", msl);
+ const char *ustr = hp_msl_to_unicode_code(codestr);
+ if (ustr == NULL)
+ ustr = UNNAMED;
+ else {
+ char *nonum;
+ int ucode = int(strtol(ustr, &nonum, 16));
+ // don't allow PUA code points as Unicode names
+ if (ucode >= 0xE000 && ucode <= 0xF8FF)
+ ustr = UNNAMED;
+ }
+ if (!equal(ustr, UNNAMED)) {
+ const char *uname_decomposed = decompose_unicode(ustr);
+ if (uname_decomposed)
+ // 1st char is the number of components
+ ustr = uname_decomposed + 1;
+ }
+ char *value = new char[strlen(ustr) + 1];
+ sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
+ return value;
+}
+
+static char *
+unicode_to_ucode_name(int ucode)
+{
+ const char *ustr;
+ char codestr[8];
+
+ // don't allow PUA code points as Unicode names
+ if (ucode >= 0xE000 && ucode <= 0xF8FF)
+ ustr = UNNAMED;
+ else {
+ sprintf(codestr, "%04X", ucode);
+ ustr = codestr;
+ }
+ if (!equal(ustr, UNNAMED)) {
+ const char *uname_decomposed = decompose_unicode(ustr);
+ if (uname_decomposed)
+ // 1st char is the number of components
+ ustr = uname_decomposed + 1;
+ }
+ char *value = new char[strlen(ustr) + 1];
+ sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
+ return value;
+}
+
+static int
+is_uname(char *name)
+{
+ size_t i;
+ size_t len = strlen(name);
+ if (len % 5)
+ return 0;
+
+ if (name[0] != 'u')
+ return 0;
+ for (i = 1; i < 4; i++)
+ if (!csxdigit(name[i]))
+ return 0;
+ for (i = 5; i < len; i++)
+ if (i % 5 ? !csxdigit(name[i]) : name[i] != '_')
+ return 0;
+
+ return 1;
+}
+
static
-int read_map(const char *file)
+int read_map(const char *file, const int tfm_type)
{
errno = 0;
FILE *fp = fopen(file, "r");
@@ -745,6 +1277,7 @@ int read_map(const char *file)
current_filename = file;
char buf[512];
current_lineno = 0;
+ char *nonum;
while (fgets(buf, int(sizeof(buf)), fp)) {
current_lineno++;
char *ptr = buf;
@@ -755,37 +1288,73 @@ int read_map(const char *file)
ptr = strtok(ptr, " \n\t");
if (!ptr)
continue;
- int n;
- if (sscanf(ptr, "%d", &n) != 1) {
- error("bad map file");
+
+ int msl_code = int(strtol(ptr, &nonum, 10));
+ if (*nonum != '\0') {
+ if (csxdigit(*nonum))
+ error("bad MSL map: got hex code (%1)", ptr);
+ else if (ptr == nonum)
+ error("bad MSL map: bad MSL code (%1)", ptr);
+ else
+ error("bad MSL map");
fclose(fp);
return 0;
}
- if (n < 0) {
- error("negative code");
+
+ ptr = strtok(NULL, " \n\t");
+ if (!ptr)
+ continue;
+ int unicode = int(strtol(ptr, &nonum, 16));
+ if (*nonum != '\0') {
+ if (ptr == nonum)
+ error("bad Unicode value (%1)", ptr);
+ else
+ error("bad Unicode map");
fclose(fp);
return 0;
}
- if ((size_t)n >= msl_name_table_size) {
- size_t old_size = msl_name_table_size;
- name_list **old_table = msl_name_table;
- msl_name_table_size = n + 256;
- msl_name_table = new name_list *[msl_name_table_size];
- if (old_table) {
- memcpy(msl_name_table, old_table, old_size*sizeof(name_list *));
- a_delete old_table;
- }
- for (size_t i = old_size; i < msl_name_table_size; i++)
- msl_name_table[i] = 0;
+ if (strlen(ptr) != 4) {
+ error("bad Unicode value (%1)", ptr);
+ return 0;
+ }
+
+ int n = tfm_type == MSL ? msl_code : unicode;
+ if (tfm_type == UNICODE && n > 0xFFFF) {
+ // greatest value supported by TFM files
+ error("bad Unicode value (%1): greatest value is 0xFFFF", ptr);
+ fclose(fp);
+ return 0;
}
- ptr = strtok(0, " \n\t");
- if (!ptr) {
- error("missing names");
+ else if (n < 0) {
+ error("negative code value (%1)", ptr);
fclose(fp);
return 0;
}
- for (; ptr; ptr = strtok(0, " \n\t"))
- msl_name_table[n] = new name_list(ptr, msl_name_table[n]);
+
+ ptr = strtok(NULL, " \n\t");
+ if (!ptr) { // groff name
+ error("missing name(s)");
+ fclose(fp);
+ return 0;
+ }
+ else if (is_uname(ptr))
+ ptr = unicode_to_ucode_name(strtol(ptr + 1, &nonum, 16));
+
+ if (size_t(n) >= charcode_name_table_size) {
+ size_t old_size = charcode_name_table_size;
+ name_list **old_table = charcode_name_table;
+ charcode_name_table_size = n + 256;
+ charcode_name_table = new name_list *[charcode_name_table_size];
+ if (old_table) {
+ memcpy(charcode_name_table, old_table, old_size*sizeof(name_list *));
+ a_delete old_table;
+ }
+ for (size_t i = old_size; i < charcode_name_table_size; i++)
+ charcode_name_table[i] = NULL;
+ }
+
+ for (; ptr; ptr = strtok(NULL, " \n\t"))
+ charcode_name_table[n] = new name_list(ptr, charcode_name_table[n]);
}
fclose(fp);
return 1;
diff --git a/src/utils/hpftodit/hpftodit.man b/src/utils/hpftodit/hpftodit.man
index c069752b..b914db9e 100644
--- a/src/utils/hpftodit/hpftodit.man
+++ b/src/utils/hpftodit/hpftodit.man
@@ -1,3 +1,4 @@
+.tr ~
.ig
Copyright (C) 1994-2000, 2001, 2003 Free Software Foundation, Inc.
@@ -23,12 +24,16 @@ the original English.
.el .TP "\\$1"
..
.TH HPFTODIT @MAN1EXT@ "@MDATE@" "Groff Version @VERSION@"
+.\" --------------------------------------------------------------------------
.SH NAME
+.\" --------------------------------------------------------------------------
hpftodit \- create font description files for use with groff \-Tlj4
+.\" --------------------------------------------------------------------------
.SH SYNOPSIS
+.\" --------------------------------------------------------------------------
.B hpftodit
[
-.B \-sv
+.B \-adqsv
]
[
.BI \-i n
@@ -39,33 +44,53 @@ hpftodit \- create font description files for use with groff \-Tlj4
.PP
It is possible to have whitespace between the
.B \-i
-command line option and its parameter.
+option and its parameter.
+.\" --------------------------------------------------------------------------
.SH DESCRIPTION
+.\" --------------------------------------------------------------------------
.B hpftodit
-creates a font file for use with
-.B
-groff \-Tlj4\fR
-from an HP tagged font metric file.
+creates a font file for use with an HP LaserJet printer with
+.BR "groff \-Tlj4" ,
+using data from an HP tagged font metric (TFM) file.
.I tfm_file
-is the name of the tagged font metric file for the font.
+is the name of the tagged font metric file for the font; Intellifont and
+TrueType TFM files are supported, but symbol set TFM files are not.
.I map_file
-is a file giving the groff names for characters in the font;
-this file should consist of a sequence of lines of the form:
+is a file giving the groff names for characters in the font; this file
+should consist of a sequence of lines of the form:
.IP
.I
-n c1 c2 \fR.\|.\|.
+m u c1 c2 \fR.\|.\|.
.LP
where
-.I n
+.I m
is a decimal integer giving the MSL number of the character,
+.I u
+is a hexadecimal integer giving the Unicode value of the character,
and
.IR c1 ,
.IR c2 ,.\|.\|.
are the groff names of the character.
+The values can be separated by any whitespace; the Unicode value must
+use uppercase digits A\^\(en\^F, and must be without a leading `0x', `u', or
+\&`U+'.
+The name for a glyph without a groff name may be given as
+.RI u XXXX
+if the glyph corresponds to a Unicode value, or as an unnamed glyph
+`\-\^\-\^\-'.
+If the given Unicode value is in the Private Use Area
+(0xE000\^\(en\^0xF8FF), the glyph is included as an unnamed glyph.
+Refer to
+.BR groff_diff (@MAN1EXT@)
+for additional information about unnamed glyphs and how to access them.
+.LP
.I font
is the name of the groff font file.
The groff font file is written to
-.IR font .
+.IR font ;
+if
+.I font
+is specified as `\-', the output is written to the standard output.
.LP
The
.B \-s
@@ -75,7 +100,7 @@ option should be given if the font is special
if
.B troff
should search it whenever
-a character is not found in the current font.)
+a character is not found in the current font).
If the font is special,
it should be listed in the
.B fonts
@@ -93,28 +118,97 @@ a left italic correction and a subscript correction
for each character
(the significance of these parameters is explained in
.BR groff_font (@MAN5EXT@)).
+.LP
+The metrics generated by
+.B hpftodit
+assume that the DESC file contains values of 1200 for res and 6350 for
+unitwidth (or any combination (e.g., 2400 and 3175) for which
+res~\(mu~unitwidth~=~7\|620\|000).
+Although HP PCL 5 LaserJet printers support an internal resolution of
+7200 units per inch, they use a 16-bit signed integer for cursor
+positioning; if
+.B devlj4
+is to support U.S. ledger paper (11\(sd~\(mu~17\(sd), the maximum usable
+resolution is 32\|767~/~17, or 1927, units per inch, which rounds down to
+1200 units per inch.
+If the largest required paper size is less (e.g., 8.5\(sd~\(mu~11\(sd or
+A5), a greater resolution (and lesser unitwidth) can be specified.
+.\" --------------------------------------------------------------------------
.SH OPTIONS
+.\" --------------------------------------------------------------------------
+.TP
+.B \-a
+Include characters in the TFM file that do not have groff names and are
+not included in the map file.
+A glyph with corresponding Unicode value is given the name
+.RI u XXXX ;
+a glyph without a Unicode value is included as an unnamed glyph
+`\-\^\-\^\-'.
+A glyph with a Unicode value in the Private Use Area (0xE000\^\(en\^0xF8FF)
+also is included as an unnamed glyph.
+This option has no effect on Unicode-named or unnamed glyphs that are
+specified in the map file.
+.IP
+This option provides a simple means of adding Unicode-named and unnamed
+glyphs to a font without including them in the map file, but it affords
+little control over which glyphs are placed in a regular font and which
+are placed in a special font.
+Although the presence or absence of the
+.B \-s
+option has some effect on which glyphs are included, it isn't very
+selective\(emmany glyphs are placed in both regular and special fonts.
+.\" --------------------------------------------------------------------------
+.TP
+.B \-d
+Dump information about the TFM file to the standard output; this option
+can be useful for ensuring that a TFM file is a proper match for a font,
+and that the contents of the TFM file are suitable.
+The information includes the values of important TFM tags, and a listing
+(by MSL number for Intellifont TFM files or by Unicode value for
+TrueType TFM files) of the glyphs included in the TFM file.
+The unit of measure `DU' for some tags indicates design units; there are
+8782 design units per em for Intellifont fonts, and 2048 design units
+per em for TrueType fonts.
+Note that the accessibility of a glyph depends on its inclusion in a
+symbol set; some TFM files list many glyphs but only a few symbol sets.
+.IP
+With this option,
+.I map_file
+and
+.I font
+are not required, and are ignored if given.
+.\" --------------------------------------------------------------------------
+.TP
+.B \-q
+Suppress warnings about characters in the map file that were not found
+in the TFM file.
+Warnings never are given for unnamed glyphs or by glyphs named by their
+Unicode values.
+This option is useful when sending the output of
+.B hpftodit
+to the standard output.
+.\" --------------------------------------------------------------------------
.TP
.B \-v
Print the version number.
+.\" --------------------------------------------------------------------------
.TP
.B \-s
The font is special.
-The effect of this option is to add the
+This option adds the
.B special
-command to the font file.
+command to the font file, and affects the HP symbol sets that are
+searched for each glyph.
+.\" --------------------------------------------------------------------------
.TP
.BI \-i n
-Generate an italic correction for each character so that
-the character's width plus the character's italic correction
-is equal to
+Generate an italic correction for each character so that the character's
+width plus the character's italic correction is equal to
.I n
-design units
-plus the amount by which the right edge of the character's bounding
-is to the right of the character's origin.
-If this would result in a negative italic correction, use a zero
-italic correction instead.
-There are 8782 design units per em for Intellifont fonts.
+thousandths of an em plus the amount by which the right edge of the character's
+bounding is to the right of the character's origin.
+If this would result in a negative italic correction, use a zero italic
+correction instead.
.IP
Also generate a subscript correction equal to the
product of the tangent of the slant of the font and
@@ -126,13 +220,15 @@ instead.
Also generate a left italic correction for each character
equal to
.I n
-design units
-plus the amount by which the left edge of the character's bounding box
-is to the left of the character's origin.
+thousandths of an em plus the amount by which the left edge of the
+character's bounding box is to the left of the character's origin.
The left italic correction may be negative.
.IP
-This option is normally needed only with italic (or oblique) fonts.
+This option normally is needed only with italic or oblique fonts;
+a value of 50 (0.05 em) usually is a reasonable choice.
+.\" --------------------------------------------------------------------------
.SH FILES
+.\" --------------------------------------------------------------------------
.Tp \w'\fB@FONTDIR@/devlj4/DESC'u+2n
.B @FONTDIR@/devlj4/DESC
Device description file.
@@ -140,17 +236,53 @@ Device description file.
.BI @FONTDIR@/devlj4/ F
Font description file for font
.IR F .
-.SH BUGS
+.\" --------------------------------------------------------------------------
+.SH LIMITATIONS
+.\" --------------------------------------------------------------------------
+Although
+.B hpftodit
+can read TrueType TFM files, most of the HP-supplied TFM files for
+TrueType fonts support relatively few glyphs and include little, if any,
+kerning information.
+When the HP LaserJet 4 printer was first introduced, most of the
+internal scalable fonts were Agfa Intellifont.
+The TFM files provided for these fonts supported 600+ glyphs and
+contained extensive lists of kern pairs.
+.LP
+All scalable internal fonts on LaserJet printers introduced since
+approximately 1996 have been TrueType.
+With most developers using other means of obtaining font metrics by that
+time, support for new TFM files was very limited.
+The TFM files provided for the TrueType fonts support only the Latin 2
+(ISO 8859-2) symbol set, and include no kerning information;
+consequently, they are of little value for any but the most rudimentary
+documents.
.LP
-This program was written without the benefit of complete, official
-documentation on the tagged font metric format.
-It is therefore likely that it will fail to work on tfm files that are
-dissimilar to those for the internal fonts on the Laserjet 4,
-with which it was tested.
+Because the Intellifont TFM files contain considerably more information,
+they generally are preferable to the TrueType TFM files even for the
+TrueType fonts in the newer printers.
+The metrics for the TrueType fonts are very close, though not identical,
+to those for the earlier Intellifont fonts of the same names.
+Although most output using the Intellifont metrics with the newer
+printers is quite acceptable, a few glyphs may fail to print as expected.
.LP
-TrueType tfm files are not supported.
+At the time HP last supported TFM files, only Version 1 of the Unicode
+standard was available.
+Consequently, many glyphs lacking assigned code points were assigned by
+HP to the Private Use Area (PUA).
+Later versions of the Unicode standard included code points outside the
+PUA for many of these glyphs.
+The HP-supplied TrueType TFM files use the PUA assignments;
+TFM files generated from more recent TrueType font files require the
+later Unicode values to access the same glyphs.
+Consequently, two different mapping files may be required: one for the
+HP-supplied TFM files, and one for more recent TFM files.
+.\" --------------------------------------------------------------------------
.SH "SEE ALSO"
+.\" --------------------------------------------------------------------------
+.ad 0
.BR groff (@MAN1EXT@),
+.BR groff_diff (@MAN1EXT@),
.BR grolj4 (@MAN1EXT@),
.BR groff_font (@MAN5EXT@)
.
diff --git a/src/utils/hpftodit/hpuni.cpp b/src/utils/hpftodit/hpuni.cpp
new file mode 100644
index 00000000..892843ac
--- /dev/null
+++ b/src/utils/hpftodit/hpuni.cpp
@@ -0,0 +1,698 @@
+// -*- C++ -*-
+/* Copyright (C) 2003 Free Software Foundation, Inc.
+ Written by Jeff Conrad (jeff_conrad@msn.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING. If not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "lib.h"
+#include "stringclass.h"
+#include "ptable.h"
+
+#include "unicode.h"
+
+struct hp_msl_to_unicode {
+ char *value;
+};
+
+declare_ptable(hp_msl_to_unicode)
+implement_ptable(hp_msl_to_unicode)
+
+PTABLE(hp_msl_to_unicode) hp_msl_to_unicode_table;
+
+struct {
+ const char *key;
+ const char *value;
+} hp_msl_to_unicode_list[] = {
+ { "1", "0021", }, // Exclamation Mark
+ { "2", "0022", }, // Neutral Double Quote
+ { "3", "0023", }, // Number Sign
+ { "4", "0024", }, // Dollar Sign
+ { "5", "0025", }, // Per Cent Sign
+ { "6", "0026", }, // Ampersand
+ { "8", "2019", }, // Single Close Quote (9)
+ { "9", "0028", }, // Left Parenthesis
+ { "10", "0029", }, // Right Parenthesis
+ { "11", "002A", }, // Asterisk
+ { "12", "002B", }, // Plus Sign
+ { "13", "002C", }, // Comma, or Decimal Separator
+ { "14", "002D", }, // Hyphen
+ { "15", "002E", }, // Period, or Full Stop
+ { "16", "002F", }, // Solidus, or Slash
+ { "17", "0030", }, // Numeral Zero
+ { "18", "0031", }, // Numeral One
+ { "19", "0032", }, // Numeral Two
+ { "20", "0033", }, // Numeral Three
+ { "21", "0034", }, // Numeral Four
+ { "22", "0035", }, // Numeral Five
+ { "23", "0036", }, // Numeral Six
+ { "24", "0037", }, // Numeral Seven
+ { "25", "0038", }, // Numeral Eight
+ { "26", "0039", }, // Numeral Nine
+ { "27", "003A", }, // Colon
+ { "28", "003B", }, // Semicolon
+ { "29", "003C", }, // Less Than Sign
+ { "30", "003D", }, // Equals Sign
+ { "31", "003E", }, // Greater Than Sign
+ { "32", "003F", }, // Question Mark
+ { "33", "0040", }, // Commercial At
+ { "34", "0041", }, // Uppercase A
+ { "35", "0042", }, // Uppercase B
+ { "36", "0043", }, // Uppercase C
+ { "37", "0044", }, // Uppercase D
+ { "38", "0045", }, // Uppercase E
+ { "39", "0046", }, // Uppercase F
+ { "40", "0047", }, // Uppercase G
+ { "41", "0048", }, // Uppercase H
+ { "42", "0049", }, // Uppercase I
+ { "43", "004A", }, // Uppercase J
+ { "44", "004B", }, // Uppercase K
+ { "45", "004C", }, // Uppercase L
+ { "46", "004D", }, // Uppercase M
+ { "47", "004E", }, // Uppercase N
+ { "48", "004F", }, // Uppercase O
+ { "49", "0050", }, // Uppercase P
+ { "50", "0051", }, // Uppercase Q
+ { "51", "0052", }, // Uppercase R
+ { "52", "0053", }, // Uppercase S
+ { "53", "0054", }, // Uppercase T
+ { "54", "0055", }, // Uppercase U
+ { "55", "0056", }, // Uppercase V
+ { "56", "0057", }, // Uppercase W
+ { "57", "0058", }, // Uppercase X
+ { "58", "0059", }, // Uppercase Y
+ { "59", "005A", }, // Uppercase Z
+ { "60", "005B", }, // Left Bracket
+ { "61", "005C", }, // Reverse Solidus, or Backslash
+ { "62", "005D", }, // Right Bracket
+ { "63", "005E", }, // Circumflex, Exponent, or Pointer
+ { "64", "005F", }, // Underline or Underscore Character
+ { "66", "2018", }, // Single Open Quote (6)
+ { "67", "0061", }, // Lowercase A
+ { "68", "0062", }, // Lowercase B
+ { "69", "0063", }, // Lowercase C
+ { "70", "0064", }, // Lowercase D
+ { "71", "0065", }, // Lowercase E
+ { "72", "0066", }, // Lowercase F
+ { "73", "0067", }, // Lowercase G
+ { "74", "0068", }, // Lowercase H
+ { "75", "0069", }, // Lowercase I
+ { "76", "006A", }, // Lowercase J
+ { "77", "006B", }, // Lowercase K
+ { "78", "006C", }, // Lowercase L
+ { "79", "006D", }, // Lowercase M
+ { "80", "006E", }, // Lowercase N
+ { "81", "006F", }, // Lowercase O
+ { "82", "0070", }, // Lowercase P
+ { "83", "0071", }, // Lowercase Q
+ { "84", "0072", }, // Lowercase R
+ { "85", "0073", }, // Lowercase S
+ { "86", "0074", }, // Lowercase T
+ { "87", "0075", }, // Lowercase U
+ { "88", "0076", }, // Lowercase V
+ { "89", "0077", }, // Lowercase W
+ { "90", "0078", }, // Lowercase X
+ { "91", "0079", }, // Lowercase Y
+ { "92", "007A", }, // Lowercase Z
+ { "93", "007B", }, // Left Brace
+ { "94", "007C", }, // Long Vertical Mark
+ { "95", "007D", }, // Right Brace
+ { "96", "007E", }, // One Wavy Line Approximate
+ { "97", "2592", }, // Medium Shading Character
+ { "99", "00C0", }, // Uppercase A Grave
+ { "100", "00C2", }, // Uppercase A Circumflex
+ { "101", "00C8", }, // Uppercase E Grave
+ { "102", "00CA", }, // Uppercase E Circumflex
+ { "103", "00CB", }, // Uppercase E Dieresis
+ { "104", "00CE", }, // Uppercase I Circumflex
+ { "105", "00CF", }, // Uppercase I Dieresis
+ { "106", "00B4", }, // Lowercase Acute Accent (Spacing)
+ { "107", "0060", }, // Lowercase Grave Accent (Spacing)
+ { "108", "02C6", }, // Lowercase Circumflex Accent (Spacing)
+ { "109", "00A8", }, // Lowercase Dieresis Accent (Spacing)
+ { "110", "02DC", }, // Lowercase Tilde Accent (Spacing)
+ { "111", "00D9", }, // Uppercase U Grave
+ { "112", "00DB", }, // Uppercase U Circumflex
+ { "113", "00AF", }, // Overline, or Overscore Character
+ { "114", "00DD", }, // Uppercase Y Acute
+ { "115", "00FD", }, // Lowercase Y Acute
+ { "116", "00B0", }, // Degree Sign
+ { "117", "00C7", }, // Uppercase C Cedilla
+ { "118", "00E7", }, // Lowercase C Cedilla
+ { "119", "00D1", }, // Uppercase N Tilde
+ { "120", "00F1", }, // Lowercase N Tilde
+ { "121", "00A1", }, // Inverted Exclamation
+ { "122", "00BF", }, // Inverted Question Mark
+ { "123", "00A4", }, // Currency Symbol
+ { "124", "00A3", }, // Pound Sterling Sign
+ { "125", "00A5", }, // Yen Sign
+ { "126", "00A7", }, // Section Mark
+ { "127", "0192", }, // Florin Sign
+ { "128", "00A2", }, // Cent Sign
+ { "129", "00E2", }, // Lowercase A Circumflex
+ { "130", "00EA", }, // Lowercase E Circumflex
+ { "131", "00F4", }, // Lowercase O Circumflex
+ { "132", "00FB", }, // Lowercase U Circumflex
+ { "133", "00E1", }, // Lowercase A Acute
+ { "134", "00E9", }, // Lowercase E Acute
+ { "135", "00F3", }, // Lowercase O Acute
+ { "136", "00FA", }, // Lowercase U Acute
+ { "137", "00E0", }, // Lowercase A Grave
+ { "138", "00E8", }, // Lowercase E Grave
+ { "139", "00F2", }, // Lowercase O Grave
+ { "140", "00F9", }, // Lowercase U Grave
+ { "141", "00E4", }, // Lowercase A Dieresis
+ { "142", "00EB", }, // Lowercase E Dieresis
+ { "143", "00F6", }, // Lowercase O Dieresis
+ { "144", "00FC", }, // Lowercase U Dieresis
+ { "145", "00C5", }, // Uppercase A Ring
+ { "146", "00EE", }, // Lowercase I Circumflex
+ { "147", "00D8", }, // Uppercase O Oblique
+ { "148", "00C6", }, // Uppercase AE Diphthong
+ { "149", "00E5", }, // Lowercase A Ring
+ { "150", "00ED", }, // Lowercase I Acute
+ { "151", "00F8", }, // Lowercase O Oblique
+ { "152", "00E6", }, // Lowercase AE Diphthong
+ { "153", "00C4", }, // Uppercase A Dieresis
+ { "154", "00EC", }, // Lowercase I Grave
+ { "155", "00D6", }, // Uppercase O Dieresis
+ { "156", "00DC", }, // Uppercase U Dieresis
+ { "157", "00C9", }, // Uppercase E Acute
+ { "158", "00EF", }, // Lowercase I Dieresis
+ { "159", "00DF", }, // Lowercase Es-zet Ligature
+ { "160", "00D4", }, // Uppercase O Circumflex
+ { "161", "00C1", }, // Uppercase A Acute
+ { "162", "00C3", }, // Uppercase A Tilde
+ { "163", "00E3", }, // Lowercase A Tilde
+ { "164", "00D0", }, // Uppercase Eth
+//{ "164", "0110", }, // Uppercase D-Stroke
+ { "165", "00F0", }, // Lowercase Eth
+ { "166", "00CD", }, // Uppercase I Acute
+ { "167", "00CC", }, // Uppercase I Grave
+ { "168", "00D3", }, // Uppercase O Acute
+ { "169", "00D2", }, // Uppercase O Grave
+ { "170", "00D5", }, // Uppercase O Tilde
+ { "171", "00F5", }, // Lowercase O Tilde
+ { "172", "0160", }, // Uppercase S Hacek
+ { "173", "0161", }, // Lowercase S Hacek
+ { "174", "00DA", }, // Uppercase U Acute
+ { "175", "0178", }, // Uppercase Y Dieresis
+ { "176", "00FF", }, // Lowercase Y Dieresis
+ { "177", "00DE", }, // Uppercase Thorn
+ { "178", "00FE", }, // Lowercase Thorn
+ { "180", "00B5", }, // Lowercase Greek Mu, or Micro
+ { "181", "00B6", }, // Pilcrow, or Paragraph Sign
+ { "182", "00BE", }, // Vulgar Fraction 3/4
+ { "183", "2212", }, // Minus Sign
+ { "184", "00BC", }, // Vulgar Fraction 1/4
+ { "185", "00BD", }, // Vulgar Fraction 1/2
+ { "186", "00AA", }, // Female Ordinal
+ { "187", "00BA", }, // Male Ordinal
+ { "188", "00AB", }, // Left Pointing Double Angle Quote
+ { "189", "25A0", }, // Medium Solid Square Box
+ { "190", "00BB", }, // Right Pointing Double Angle Quote
+ { "191", "00B1", }, // Plus Over Minus Sign
+ { "192", "00A6", }, // Broken Vertical Mark
+ { "193", "00A9", }, // Copyright Sign
+ { "194", "00AC", }, // Not Sign
+ { "195", "00AD", }, // Soft Hyphen
+ { "196", "00AE", }, // Registered Sign
+ { "197", "00B2", }, // Superior Numeral 2
+ { "198", "00B3", }, // Superior Numeral 3
+ { "199", "00B8", }, // Lowercase Cedilla (Spacing)
+ { "200", "00B9", }, // Superior Numeral 1
+ { "201", "00D7", }, // Multiply Sign
+ { "202", "00F7", }, // Divide Sign
+ { "203", "263A", }, // Open Smiling Face
+ { "204", "263B", }, // Solid Smiling Face
+ { "205", "2665", }, // Solid Heart, Card Suit
+ { "206", "2666", }, // Solid Diamond, Card Suit
+ { "207", "2663", }, // Solid Club, Card Suit
+ { "208", "2660", }, // Solid Spade, Card Suit
+ { "209", "25CF", }, // Medium Solid Round Bullet
+ { "210", "25D8", }, // Large Solid square with White Dot
+ { "211", "EFFD", }, // Large Open Round Bullet
+ { "212", "25D9", }, // Large Solid square with White Circle
+ { "213", "2642", }, // Male Symbol
+ { "214", "2640", }, // Female Symbol
+ { "215", "266A", }, // Musical Note
+ { "216", "266B", }, // Pair Of Musical Notes
+ { "217", "263C", }, // Compass, or Eight Pointed Sun
+ { "218", "25BA", }, // Right Solid Arrowhead
+ { "219", "25C4", }, // Left Solid Arrowhead
+ { "220", "2195", }, // Up/Down Arrow
+ { "221", "203C", }, // Double Exclamation Mark
+ { "222", "25AC", }, // Thick Horizontal Mark
+ { "223", "21A8", }, // Up/Down Arrow Baseline
+ { "224", "2191", }, // Up Arrow
+ { "225", "2193", }, // Down Arrow
+ { "226", "2192", }, // Right Arrow
+ { "227", "2190", }, // Left Arrow
+ { "229", "2194", }, // Left/Right Arrow
+ { "230", "25B2", }, // Up Solid Arrowhead
+ { "231", "25BC", }, // Down Solid Arrowhead
+ { "232", "20A7", }, // Pesetas Sign
+ { "233", "2310", }, // Reversed Not Sign
+ { "234", "2591", }, // Light Shading Character
+ { "235", "2593", }, // Dark Shading Character
+ { "236", "2502", }, // Box Draw Line, Vert. 1
+ { "237", "2524", }, // Box Draw Right Tee, Vert. 1 Horiz. 1
+ { "238", "2561", }, // Box Draw Right Tee, Vert. 1 Horiz. 2
+ { "239", "2562", }, // Box Draw Right Tee, Vert. 2 Horiz. 1
+ { "240", "2556", }, // Box Draw Upper Right Corner, Vert. 2 Horiz. 1
+ { "241", "2555", }, // Box Draw Upper Right Corner, Vert. 1 Horiz. 2
+ { "242", "2563", }, // Box Draw Right Tee, Vert. 2 Horiz. 2
+ { "243", "2551", }, // Box Draw Lines, Vert. 2
+ { "244", "2557", }, // Box Draw Upper Right Corner, Vert. 2 Horiz. 2
+ { "245", "255D", }, // Box Draw Lower Right Corner, Vert. 2 Horiz. 2
+ { "246", "255C", }, // Box Draw Lower Right Corner, Vert. 2 Horiz. 1
+ { "247", "255B", }, // Box Draw Lower Right Corner, Vert. 1 Horiz. 2
+ { "248", "2510", }, // Box Draw Upper Right Corner, Vert. 1, Horiz. 1
+ { "249", "2514", }, // Box Draw Lower Left Corner, Vert. 1, Horiz. 1
+ { "250", "2534", }, // Box Draw Bottom Tee, Vert. 1 Horiz. 1
+ { "251", "252C", }, // Box Draw Top Tee, Vert. 1 Horiz. 1
+ { "252", "251C", }, // Box Draw Left Tee, Vert. 1 Horiz. 1
+ { "253", "2500", }, // Box Draw Line, Horiz. 1
+ { "254", "253C", }, // Box Draw Cross, Vert. 1 Horiz. 1
+ { "255", "255E", }, // Box Draw Left Tee, Vert. 1 Horiz. 2
+ { "256", "255F", }, // Box Draw Left Tee, Vert. 2 Horz. 1
+ { "257", "255A", }, // Box Draw Lower Left Corner, Vert. 2 Horiz. 2
+ { "258", "2554", }, // Box Draw Upper Left Corner, Vert. 2 Horiz. 2
+ { "259", "2569", }, // Box Draw Bottom Tee, Vert. 2 Horiz. 2
+ { "260", "2566", }, // Box Draw Top Tee, Vert. 2 Horiz. 2
+ { "261", "2560", }, // Box Draw Left Tee, Vert. 2 Horiz. 2
+ { "262", "2550", }, // Box Draw Lines, Horiz. 2
+ { "263", "256C", }, // Box Draw Cross Open Center, Vert. 2 Horiz. 2
+ { "264", "2567", }, // Box Draw Bottom Tee, Vert. 1 Horiz. 2
+ { "265", "2568", }, // Box Draw Bottom Tee, Vert. 2 Horiz. 1
+ { "266", "2564", }, // Box Draw Top Tee, Vert. 1 Horiz. 2
+ { "267", "2565", }, // Box Draw Top Tee, Vert. 2 Horiz. 1
+ { "268", "2559", }, // Box Draw Lower Left Corner, Vert. 2 Horiz. 1
+ { "269", "2558", }, // Box Draw Lower Left Corner, Vert. 1 Horiz. 2
+ { "270", "2552", }, // Box Draw Upper Left Corner, Vert. 1 Horiz. 2
+ { "271", "2553", }, // Box Draw Upper Left Corner, Vert. 2 Horiz. 1
+ { "272", "256B", }, // Box Draw Cross, Vert. 2 Horiz. 1
+ { "273", "256A", }, // Box Draw Cross, Vert. 1 Horiz. 2
+ { "274", "2518", }, // Box Draw Lower Right Corner, Vert. 1 Horiz. 1
+ { "275", "250C", }, // Box Draw Upper Left Corner, Vert. 1, Horiz. 1
+ { "276", "2588", }, // Solid Full High/Wide
+ { "277", "2584", }, // Bottom Half Solid Rectangle
+ { "278", "258C", }, // Left Half Solid Rectangle
+ { "279", "2590", }, // Right Half Solid Rectangle
+ { "280", "2580", }, // Top Half Solid Rectangle
+ { "290", "2126", }, // Uppercase Greek Omega, or Ohms
+ { "292", "221E", }, // Infinity Symbol
+ { "295", "2229", }, // Set Intersection Symbol
+ { "296", "2261", }, // Exactly Equals Sign
+ { "297", "2265", }, // Greater Than or Equal Sign
+ { "298", "2264", }, // Less Than or Equal Sign
+ { "299", "2320", }, // Top Integral
+ { "300", "2321", }, // Bottom Integral
+ { "301", "2248", }, // Two Wavy Line Approximate Sign
+//{ "302", "00B7", }, // Middle Dot, or Centered Period (see 2219)
+//{ "302", "2219", }, // Centered Period, Middle Dot
+ { "302", "2219", }, // Math Dot, Centered Period
+ { "303", "221A", }, // Radical Symbol, Standalone Diagonal
+ { "305", "25AA", }, // Small Solid Square Box
+ { "306", "013F", }, // Uppercase L-Dot
+ { "307", "0140", }, // Lowercase L-Dot
+ { "308", "2113", }, // Litre Symbol
+ { "309", "0149", }, // Lowercase Apostrophe-N
+ { "310", "2032", }, // Prime, Minutes, or Feet Symbol
+ { "311", "2033", }, // Double Prime, Seconds, or Inches Symbol
+ { "312", "2020", }, // Dagger Symbol
+ { "313", "2122", }, // Trademark Sign
+ { "314", "2017", }, // Double Underline Character
+ { "315", "02C7", }, // Lowercase Hacek Accent (Spacing)
+ { "316", "02DA", }, // Lowercase Ring Accent (Spacing)
+ { "317", "EFF9", }, // Uppercase Acute Accent (Spacing)
+ { "318", "EFF8", }, // Uppercase Grave Accent (Spacing)
+ { "319", "EFF7", }, // Uppercase Circumflex Accent (Spacing)
+ { "320", "EFF6", }, // Uppercase Dieresis Accent (Spacing)
+ { "321", "EFF5", }, // Uppercase Tilde Accent (Spacing)
+ { "322", "EFF4", }, // Uppercase Hacek Accent (Spacing)
+ { "323", "EFF3", }, // Uppercase Ring Accent (Spacing)
+ { "324", "2215", }, // Vulgar Fraction Bar
+ { "325", "2014", }, // Em Dash
+ { "326", "2013", }, // En Dash
+ { "327", "2021", }, // Double Dagger Symbol
+ { "328", "0131", }, // Lowercase Undotted I
+ { "329", "0027", }, // Neutral Single Quote
+ { "330", "EFF2", }, // Uppercase Cedilla (Spacing)
+ { "331", "2022", }, // Small Solid Round Bullet
+ { "332", "207F", }, // Superior Lowercase N
+ { "333", "2302", }, // Home Plate
+ { "335", "0138", }, // Lowercase Kra
+ { "338", "0166", }, // Uppercase T-Stroke
+ { "339", "0167", }, // Lowercase T-Stroke
+ { "340", "014A", }, // Uppercase Eng
+ { "341", "014B", }, // Lowercase Eng
+ { "342", "0111", }, // Lowercase D-Stroke
+ { "400", "0102", }, // Uppercase A Breve
+ { "401", "0103", }, // Lowercase A Breve
+ { "402", "0100", }, // Uppercase A Macron
+ { "403", "0101", }, // Lowercase A Macron
+ { "404", "0104", }, // Uppercase A Ogonek
+ { "405", "0105", }, // Lowercase A Ogonek
+ { "406", "0106", }, // Uppercase C Acute
+ { "407", "0107", }, // Lowercase C Acute
+ { "410", "010C", }, // Uppercase C Hacek
+ { "411", "010D", }, // Lowercase C Hacek
+ { "414", "010E", }, // Uppercase D Hacek
+ { "415", "010F", }, // Lowercase D Hacek
+ { "416", "011A", }, // Uppercase E Hacek
+ { "417", "011B", }, // Lowercase E Hacek
+ { "418", "0116", }, // Uppercase E Overdot
+ { "419", "0117", }, // Lowercase E Overdot
+ { "420", "0112", }, // Uppercase E Macron
+ { "421", "0113", }, // Lowercase E Macron
+ { "422", "0118", }, // Uppercase E Ogonek
+ { "423", "0119", }, // Lowercase E Ogonek
+ { "428", "0122", }, // Uppercase G Cedilla
+ { "429", "0123", }, // Lowercase G Cedilla
+ { "432", "012E", }, // Uppercase I Ogonek
+ { "433", "012F", }, // Lowercase I Ogonek
+ { "434", "012A", }, // Uppercase I Macron
+ { "435", "012B", }, // Lowercase I Macron
+ { "438", "0136", }, // Uppercase K Cedilla
+ { "439", "0137", }, // Lowercase K Cedilla
+ { "440", "0139", }, // Uppercase L Acute
+ { "441", "013A", }, // Lowercase L Acute
+ { "442", "013D", }, // Uppercase L Hacek
+ { "443", "013E", }, // Lowercase L Hacek
+ { "444", "013B", }, // Uppercase L Cedilla
+ { "445", "013C", }, // Lowercase L Cedilla
+ { "446", "0143", }, // Uppercase N Acute
+ { "447", "0144", }, // Lowercase N Acute
+ { "448", "0147", }, // Uppercase N Hacek
+ { "449", "0148", }, // Lowercase N Hacek
+ { "450", "0145", }, // Uppercase N Cedilla
+ { "451", "0146", }, // Lowercase N Cedilla
+ { "452", "0150", }, // Uppercase O Double Acute
+ { "453", "0151", }, // Lowercase O Double Acute
+ { "454", "014C", }, // Uppercase O Macron
+ { "455", "014D", }, // Lowercase O Macron
+ { "456", "0154", }, // Uppercase R Acute
+ { "457", "0155", }, // Lowercase R Acute
+ { "458", "0158", }, // Uppercase R Hacek
+ { "459", "0159", }, // Lowercase R Hacek
+ { "460", "0156", }, // Uppercase R Cedilla
+ { "461", "0157", }, // Lowercase R Cedilla
+ { "462", "015A", }, // Uppercase S Acute
+ { "463", "015B", }, // Lowercase S Acute
+ { "466", "0164", }, // Uppercase T Hacek
+ { "467", "0165", }, // Lowercase T Hacek
+ { "468", "0162", }, // Uppercase T Cedilla
+ { "469", "0163", }, // Lowercase T Cedilla
+ { "470", "0168", }, // Uppercase U Tilde
+ { "471", "0169", }, // Lowercase U Tilde
+ { "474", "0170", }, // Uppercase U Double Acute
+ { "475", "0171", }, // Lowercase U Double Acute
+ { "476", "016E", }, // Uppercase U Ring
+ { "477", "016F", }, // Lowercase U Ring
+ { "478", "016A", }, // Uppercase U Macron
+ { "479", "016B", }, // Lowercase U Macron
+ { "480", "0172", }, // Uppercase U Ogonek
+ { "481", "0173", }, // Lowercase U Ogonek
+ { "482", "0179", }, // Uppercase Z Acute
+ { "483", "017A", }, // Lowercase Z Acute
+ { "484", "017B", }, // Uppercase Z Overdot
+ { "485", "017C", }, // Lowercase Z Overdot
+ { "486", "0128", }, // Uppercase I Tilde
+ { "487", "0129", }, // Lowercase I Tilde
+ { "500", "EFBF", }, // Radical, Diagonal, Composite
+ { "501", "221D", }, // Proportional To Symbol
+ { "502", "212F", }, // Napierian (italic e)
+ { "503", "03F5", }, // Alternate Lowercase Greek Epsilon
+//{ "503", "EFEC", }, // Alternate Lowercase Greek Epsilon
+ { "504", "2234", }, // Therefore Symbol
+ { "505", "0393", }, // Uppercase Greek Gamma
+ { "506", "2206", }, // Increment Symbol (Delta)
+ { "507", "0398", }, // Uppercase Greek Theta
+ { "508", "039B", }, // Uppercase Greek Lambda
+ { "509", "039E", }, // Uppercase Greek Xi
+ { "510", "03A0", }, // Uppercase Greek Pi
+ { "511", "03A3", }, // Uppercase Greek Sigma
+ { "512", "03A5", }, // Uppercase Greek Upsilon
+ { "513", "03A6", }, // Uppercase Greek Phi
+ { "514", "03A8", }, // Uppercase Greek Psi
+ { "515", "03A9", }, // Uppercase Greek Omega
+ { "516", "2207", }, // Nabla Symbol (inverted Delta)
+ { "517", "2202", }, // Partial Differential Delta Symbol
+ { "518", "03C2", }, // Lowercase Sigma, Terminal
+ { "519", "2260", }, // Not Equal To Symbol
+ { "520", "EFEB", }, // Underline, Composite
+ { "521", "2235", }, // Because Symbol
+ { "522", "03B1", }, // Lowercase Greek Alpha
+ { "523", "03B2", }, // Lowercase Greek Beta
+ { "524", "03B3", }, // Lowercase Greek Gamma
+ { "525", "03B4", }, // Lowercase Greek Delta
+ { "526", "03B5", }, // Lowercase Greek Epsilon
+ { "527", "03B6", }, // Lowercase Greek Zeta
+ { "528", "03B7", }, // Lowercase Greek Eta
+ { "529", "03B8", }, // Lowercase Greek Theta
+ { "530", "03B9", }, // Lowercase Greek Iota
+ { "531", "03BA", }, // Lowercase Greek Kappa
+ { "532", "03BB", }, // Lowercase Greek Lambda
+ { "533", "03BC", }, // Lowercase Greek Mu
+ { "534", "03BD", }, // Lowercase Greek Nu
+ { "535", "03BE", }, // Lowercase Greek Xi
+ { "536", "03BF", }, // Lowercase Greek Omicron
+ { "537", "03C0", }, // Lowercase Greek Pi
+ { "538", "03C1", }, // Lowercase Greek Rho
+ { "539", "03C3", }, // Lowercase Greek Sigma
+ { "540", "03C4", }, // Lowercase Greek Tau
+ { "541", "03C5", }, // Lowercase Greek Upsilon
+ { "542", "03C6", }, // Lowercase Greek Phi
+ { "543", "03C7", }, // Lowercase Greek Chi
+ { "544", "03C8", }, // Lowercase Greek Psi
+ { "545", "03C9", }, // Lowercase Greek Omega
+ { "546", "03D1", }, // Lowercase Greek Theta, Open
+ { "547", "03D5", }, // Lowercase Greek Phi, Open
+ { "548", "03D6", }, // Lowercase Pi, Alternate
+ { "549", "2243", }, // Wavy Over Straight Approximate Symbol
+ { "550", "2262", }, // Not Exactly Equal To Symbol
+ { "551", "21D1", }, // Up Arrow Double Stroke
+ { "552", "21D2", }, // Right Arrow Double Stroke
+ { "553", "21D3", }, // Down Arrow Double Stroke
+ { "554", "21D0", }, // Left Arrow Double Stroke
+ { "555", "21D5", }, // Up/Down Arrow Double Stroke
+ { "556", "21D4", }, // Left/Right Arrow Double Stroke
+ { "557", "21C4", }, // Right Over Left Arrow
+ { "558", "21C6", }, // Left Over Right Arrow
+ { "559", "EFE9", }, // Vector Symbol
+ { "560", "0305", }, // Overline, Composite
+ { "561", "2200", }, // For All Symbol, or Universal (inverted A)
+ { "562", "2203", }, // There Exists Symbol, or Existential (inverted E)
+ { "563", "22A4", }, // Top Symbol
+ { "564", "22A5", }, // Bottom Symbol
+ { "565", "222A", }, // Set Union Symbol
+ { "566", "2208", }, // Element-Of Symbol
+ { "567", "220B", }, // Contains Symbol
+ { "568", "2209", }, // Not-Element-Of Symbol
+ { "569", "2282", }, // Proper Subset Symbol
+ { "570", "2283", }, // Proper Superset Symbol
+ { "571", "2284", }, // Not Proper Subset Symbol
+ { "572", "2285", }, // Not Proper Superset Symbol
+ { "573", "2286", }, // Subset Symbol
+ { "574", "2287", }, // Superset Symbol
+ { "575", "2295", }, // Plus In Circle Symbol
+ { "576", "2299", }, // Dot In Circle Symbol
+ { "577", "2297", }, // Times In Circle Symbol
+ { "578", "2296", }, // Minus In Circle Symbol
+ { "579", "2298", }, // Slash In Circle Symbol
+ { "580", "2227", }, // Logical And Symbol
+ { "581", "2228", }, // Logical Or Symbol
+ { "582", "22BB", }, // Exclusive Or Symbol
+ { "583", "2218", }, // Functional Composition Symbol
+ { "584", "20DD", }, // Large Open Circle
+ { "585", "22A3", }, // Assertion Symbol
+ { "586", "22A2", }, // Backwards Assertion Symbol
+ { "587", "222B", }, // Integral Symbol
+ { "588", "222E", }, // Curvilinear Integral Symbol
+ { "589", "2220", }, // Angle Symbol
+ { "590", "2205", }, // Empty Set Symbol
+ { "591", "2135", }, // Hebrew Aleph
+ { "592", "2136", }, // Hebrew Beth
+ { "593", "2137", }, // Hebrew Gimmel
+ { "594", "212D", }, // Fraktur Uppercase C
+ { "595", "2111", }, // Fraktur Uppercase I
+ { "596", "211C", }, // Fraktur Uppercase R
+ { "597", "2128", }, // Fraktur Uppercase Z
+ { "598", "23A1", }, // Top Segment Left Bracket (Left Square Bracket Upper Corner)
+ { "599", "23A3", }, // Bottom Segment Left Bracket (Left Square Bracket Lower Corner)
+ { "600", "239B", }, // Top Segment Left Brace (Left Parenthesis Upper Hook)
+//{ "600", "23A7", }, // Top Segment Left Brace (Right Curly Bracket Upper Hook)
+ { "601", "23A8", }, // Middle Segment Left Brace (Right Curly Bracket Middle Piece)
+ { "602", "239D", }, // Bottom Segment LeftBrace (Left Parenthesis Lower Hook)
+//{ "602", "23A9", }, // Bottom Segment Left Brace (Right Curly Bracket Lower Hook)
+ { "603", "EFD4", }, // Middle Segment Curvilinear Integral
+ { "604", "EFD3", }, // Top Left Segment Summation
+ { "605", "2225", }, // Double Vertical Line, Composite
+ { "606", "EFD2", }, // Bottom Left Segment Summation
+ { "607", "EFD1", }, // Bottom Diagonal Summation
+ { "608", "23A4", }, // Top Segment Right Bracket (Right Square Bracket Upper Corner)
+ { "609", "23A6", }, // Bottom Segment Right Bracket (Right Square Bracket Lower Corner)
+ { "610", "239E", }, // Top Segment Right Brace (Right Parenthesis Upper Hook)
+//{ "610", "23AB", }, // Top Segment Right Brace (Right Curly Bracket Upper Hook)
+ { "611", "23AC", }, // Middle Segment Right Brace (Right Curly Bracket Middle Piece)
+ { "612", "23A0", }, // Bottom Segment Right ( Right Parenthesis Lower Hook)
+//{ "612", "23AD", }, // Bottom Segment Right Brace (Right Curly Bracket Lower Hook)
+ { "613", "239C", }, // Thick Vertical Line, Composite (Left Parenthesis Extension)
+//{ "613", "239F", }, // Thick Vertical Line, Composite (Right Parenthesis Extension)
+//{ "613", "23AA", }, // Thick Vertical Line, Composite (Curly Bracket Extension)
+//{ "613", "23AE", }, // Thick Vertical Line, Composite (Integral Extension)
+ { "614", "2223", }, // Thin Vertical Line, Composite
+ { "615", "EFDC", }, // Bottom Segment of Vertical Radical
+ { "616", "EFD0", }, // Top Right Segment Summation
+ { "617", "EFCF", }, // Middle Segment Summation
+ { "618", "EFCE", }, // Bottom Right Segment Summation
+ { "619", "EFCD", }, // Top Diagonal Summation
+ { "620", "2213", }, // Minus Over Plus Sign
+ { "621", "2329", }, // Left Angle Bracket
+ { "622", "232A", }, // Right Angle Bracket
+ { "623", "EFFF", }, // Mask Symbol
+ { "624", "2245", }, // Wavy Over Two Straight Approximate Symbol
+ { "625", "2197", }, // 45 Degree Arrow
+ { "626", "2198", }, // -45 Degree Arrow
+ { "627", "2199", }, // -135 Degree Arrow
+ { "628", "2196", }, // 135 Degree Arrow
+ { "629", "25B5", }, // Up Open Triangle
+ { "630", "25B9", }, // Right Open Triangle
+ { "631", "25BF", }, // Down Open Triangle
+ { "632", "25C3", }, // Left Open Triangle
+ { "633", "226A", }, // Much Less Than Sign
+ { "634", "226B", }, // Much Greater Than Sign
+ { "635", "2237", }, // Proportional To Symbol (4 dots)
+ { "636", "225C", }, // Defined As Symbol
+ { "637", "03DD", }, // Lowercase Greek Digamma
+ { "638", "210F", }, // Planck's Constant divided by 2 pi
+ { "639", "2112", }, // Laplace Transform Symbol
+ { "640", "EFFE", }, // Power Set
+ { "641", "2118", }, // Weierstrassian Symbol
+ { "642", "2211", }, // Summation Symbol (large Sigma)
+ { "643", "301A", }, // Left Double Bracket
+ { "644", "EFC9", }, // Middle Segment Double Bracket
+ { "645", "301B", }, // Right Double Bracket
+ { "646", "256D", }, // Box Draw Left Top Round Corner
+ { "647", "2570", }, // Box Draw Left Bottom Round Corner
+ { "648", "EFC8", }, // Extender Large Union/Product
+ { "649", "EFC7", }, // Bottom Segment Large Union
+ { "650", "EFC6", }, // Top Segment Large Intersection
+ { "651", "EFC5", }, // Top Segment Left Double Bracket
+ { "652", "EFC4", }, // Bottom Segment Left Double Bracket
+ { "653", "EFFC", }, // Large Open Square Box
+ { "654", "25C7", }, // Open Diamond
+ { "655", "256E", }, // Box Draw Right Top Round Corner
+ { "656", "256F", }, // Box Draw Right Bottom Round Corner
+ { "657", "EFC3", }, // Bottom Segment Large Bottom Product
+ { "658", "EFC2", }, // Top Segment Large Top Product
+ { "659", "EFC1", }, // Top Segment Right Double Bracket
+ { "660", "EFC0", }, // Bottom Segment Right Double Bracket
+ { "661", "EFFB", }, // Large Solid Square Box
+ { "662", "25C6", }, // Solid Diamond
+ { "663", "220D", }, // Such That Symbol (rotated lc epsilon)
+ { "664", "2217", }, // Math Asterisk
+ { "665", "23AF", }, // Horizontal Arrow Extender (Horizontal Line Extension)
+ { "666", "EFCB", }, // Double Horizontal Arrow Extender
+ { "667", "EFCC", }, // Inverted Complement of 0xEFCF or MSL 617
+ { "668", "221F", }, // Right Angle Symbol
+ { "669", "220F", }, // Product Symbol (large Pi)
+ { "684", "25CA", }, // Lozenge, Diamond
+ { "1000", "2070", }, // Superior Numeral 0
+ { "1001", "2074", }, // Superior Numeral 4
+ { "1002", "2075", }, // Superior Numeral 5
+ { "1003", "2076", }, // Superior Numeral 6
+ { "1004", "2077", }, // Superior Numeral 7
+ { "1005", "2078", }, // Superior Numeral 8
+ { "1006", "2079", }, // Superior Numeral 9
+ { "1017", "201C", }, // Double Open Quote (6)
+ { "1018", "201D", }, // Double Close Quote (9)
+ { "1019", "201E", }, // Double Baseline Quote (9)
+ { "1020", "2003", }, // Em Space
+ { "1021", "2002", }, // En Space
+ { "1023", "2009", }, // Thin Space
+ { "1028", "2026", }, // Ellipsis
+ { "1030", "EFF1", }, // Uppercase Ogonek (Spacing)
+ { "1031", "017E", }, // Lowercase Z Hacek
+ { "1034", "2120", }, // Service Mark
+ { "1036", "211E", }, // Prescription Sign
+//{ "1040", "F001", }, // Lowercase FI Ligature
+ { "1040", "FB01", }, // Lowercase FI Ligature
+//{ "1041", "F002", }, // Lowercase FL Ligature
+ { "1041", "FB02", }, // Lowercase FL Ligature
+ { "1042", "FB00", }, // Lowercase FF Ligature
+ { "1043", "FB03", }, // Lowercase FFI Ligature
+ { "1044", "FB04", }, // Lowercase FFL Ligature
+ { "1045", "EFF0", }, // Uppercase Double Acute Accent (Spacing)
+ { "1047", "0133", }, // Lowercase IJ Ligature
+ { "1060", "2105", }, // Care Of Symbol
+ { "1061", "011E", }, // Uppercase G Breve
+ { "1062", "011F", }, // Lowercase G Breve
+ { "1063", "015E", }, // Uppercase S Cedilla
+ { "1064", "015F", }, // Lowercase S Cedilla
+ { "1065", "0130", }, // Uppercase I Overdot
+ { "1067", "201A", }, // Single Baseline Quote (9)
+ { "1068", "2030", }, // Per Mill Sign
+ { "1069", "20AC", }, // Euro
+ { "1084", "02C9", }, // Lowercase Macron Accent (Spacing)
+ { "1086", "02D8", }, // Lowercase Breve Accent (Spacing)
+ { "1088", "02D9", }, // Lowercase Overdot Accent (Spacing)
+ { "1090", "0153", }, // Lowercase OE Ligature
+ { "1091", "0152", }, // Uppercase OE Ligature
+ { "1092", "2039", }, // Left Pointing Single Angle Quote
+ { "1093", "203A", }, // Right Pointing Single Angle Quote
+ { "1094", "25A1", }, // Medium Open Square Box
+ { "1095", "0141", }, // Uppercase L-Stroke
+ { "1096", "0142", }, // Lowercase L-Stroke
+ { "1097", "02DD", }, // Lowercase Double Acute Accent (Spacing)
+ { "1098", "02DB", }, // Lowercase Ogonek (Spacing)
+ { "1099", "21B5", }, // Carriage Return Symbol
+ { "1100", "EFDB", }, // Full Size Serif Registered
+ { "1101", "EFDA", }, // Full Size Serif Copyright
+ { "1102", "EFD9", }, // Full Size Serif Trademark
+ { "1103", "EFD8", }, // Full Size Sans Registered
+ { "1104", "EFD7", }, // Full Size Sans Copyright
+ { "1105", "EFD6", }, // Full Size Sans Trademark
+ { "1106", "017D", }, // Uppercase Z Hacek
+ { "1107", "0132", }, // Uppercase IJ Ligature
+ { "1108", "25AB", }, // Small Open Square Box
+ { "1109", "25E6", }, // Small Open Round Bullet
+ { "1110", "25CB", }, // Medium Open Round Bullet
+ { "1111", "EFFA", }, // Large Solid Round Bullet
+ { "3812", "F000", }, // Ornament, Apple
+};
+
+// global constructor
+static struct hp_msl_to_unicode_init {
+ hp_msl_to_unicode_init();
+} _hp_msl_to_unicode_init;
+
+hp_msl_to_unicode_init::hp_msl_to_unicode_init() {
+ for (unsigned int i = 0;
+ i < sizeof(hp_msl_to_unicode_list)/sizeof(hp_msl_to_unicode_list[0]);
+ i++) {
+ hp_msl_to_unicode *ptu = new hp_msl_to_unicode[1];
+ ptu->value = (char *)hp_msl_to_unicode_list[i].value;
+ hp_msl_to_unicode_table.define(hp_msl_to_unicode_list[i].key, ptu);
+ }
+}
+
+const char *hp_msl_to_unicode_code(const char *s)
+{
+ hp_msl_to_unicode *result = hp_msl_to_unicode_table.lookup(s);
+ return result ? result->value : 0;
+}