summaryrefslogtreecommitdiff
path: root/src/utils/afmtodit/make-afmtodit-tables
blob: ff7a71d5f7f117940ab3060efa14a0443fd57a1f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#! /bin/sh
#
# make-afmtodit-tables -- script for creating the `unicode_decomposed'
#                         and `AGL_to_unicode' tables
#
# Copyright (C) 2005
# Free Software Foundation, Inc.
#      Written by Werner Lemberg <wl@gnu.org>
#
# This file is part of groff.
#
# groff is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2, or (at your option) any later
# version.
#
# groff is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with groff; see the file COPYING.  If not, write to the Free Software
# Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.

#
# usage:
#
#   make-afmtodit-tables \
#     UnicodeData.txt version-string glyphlist.txt > afmtodit.in
#
# `UnicodeData.txt' is the central database file from the Unicode standard.
# Unfortunately, it doesn't contain a version number which must be thus
# provided manually as an additional parameter.
#
# `glyphlist.txt' holds the Adobe Glyph List (AGL).
#
# This program needs a C preprocessor.
#

CPP=cpp

prog="$0"

if test $# -ne 3; then
  echo "usage: $0 UnicodeData.txt <version-string> glyphlist.txt > afmtodit.in"
  exit 1
fi

unicode_data="$1"
version_string="$2"
glyph_list="$3"

if test ! -f "$1"; then
  echo "File \`$1' doesn't exist" >&2
  exit 2
fi
if test ! -f "$3"; then
  echo "File \`$3' doesn't exist" >&2
  exit 2
fi

# Handle UnicodeData.txt.
#
# Remove ranges and control characters,
# then extract the decomposition field,
# then remove lines without decomposition,
# then remove all compatibility decompositions.
cat "$1" \
| sed -e '/^[^;]*;</d' \
| sed -e 's/;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);.*$/;\1/' \
| sed -e '/^[^;]*;$/d' \
| sed -e '/^[^;]*;</d' > $$1

# Prepare input for running cpp.
cat $$1 \
| sed -e 's/^\([^;]*\);/#define \1 /' \
      -e 's/ / u/g' > $$2
cat $$1 \
| sed -e 's/^\([^;]*\);.*$/\1 u\1/' >> $$2

# Run C preprocessor to recursively decompose.
$CPP $$2 $$3

# Convert it back to original format.
cat $$3 \
| sed -e '/#/d' \
      -e '/^$/d' \
      -e 's/ \+/ /g' \
      -e 's/ *$//' \
      -e 's/u//g' \
      -e 's/^\([^ ]*\) /\1;/' > $$4

# Write comment.
cat <<END
# This table has been algorithmically derived from the file
# UnicodeData.txt, version $version_string, available from unicode.org,
# on `date '+%Y-%m-%d'`.
END

# Emit first table.
echo 'my %unicode_decomposed = ('
cat $$4 \
| sed -e 's/ /_/g' \
      -e 's/\(.*\);\(.*\)/  "\1", "\2",/'
echo ');'
echo ''

# Write comment.
cat <<END
# This table has been algorithmically derived from the file
# glyphlist.txt, version 2.0, available from partners.adobe.com,
# on `date '+%Y-%m-%d'`.
END

# Emit second table.
echo 'my %AGL_to_unicode = ('
cat "$3" \
| sed -e '/#/d' \
      -e 's/ /_/g' \
      -e '/;\(E\|F[0-8]\)/d' \
      -e 's/\(.*\);\(.*\)/  "\1", "\2",/'
echo ');'

# Remove temporary files.
rm $$1 $$2 $$3 $$4

# EOF