1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
BEGIN {
tohex["A"] = 10;
tohex["B"] = 11;
tohex["C"] = 12;
tohex["D"] = 13;
tohex["E"] = 14;
tohex["F"] = 15;
tohex["a"] = 10;
tohex["b"] = 11;
tohex["c"] = 12;
tohex["d"] = 13;
tohex["e"] = 14;
tohex["f"] = 15;
}
function decode_hex(str) {
n = 0;
len = length(str);
for (i = 1; i <= len; i++)
{
c = substr (str, i, 1);
if (c >= "0" && c <= "9")
n = n * 16 + (c - "0");
else
n = n * 16 + tohex[c];
}
return n;
}
function gb_to_index(gb) {
b0 = int(gb / 256);
b1 = gb % 256;
idx = (((b0 - 129)) * 190 + b1 - 64);
if (b1 >= 128)
idx--;
return idx
}
function index_to_gb(idx) {
b0 = int(idx / 190) + 129;
b1 = (idx % 190) + 64;
if (b1 >= 127)
b1++;
return (b0 * 256 + b1);
}
function decode_gb(str) {
b0 = decode_hex(substr(str, 3, 2));
b1 = decode_hex(substr(str, 7, 2));
return (b0 * 256 + b1)
}
/^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ {
if ($2 ~ /^\\x[0-9A-F][0-9A-F]\\x[0-9A-F][0-9A-F]$/)
{
unicode = decode_hex(substr($1, 3, 4));
gb = decode_gb($2);
idx = gb_to_index(gb);
gb_table[idx] = unicode;
}
}
END {
last_idx = gb_to_index(decode_hex("FEFE"));
from_idx = 0;
from_unicode = gb_table[0];
for (i = 1; i <= last_idx; i++)
{
gb = index_to_gb(i);
unicode = gb_table[i];
if (i - from_idx != unicode - from_unicode)
{
if (i - 1 == from_idx)
printf ("0x%04X 0x%04X\n",
index_to_gb(from_idx), from_unicode);
else
printf ("0x%04X-0x%04X 0x%04X\n",
index_to_gb(from_idx), index_to_gb(i - 1), from_unicode);
from_idx = i;
from_unicode=unicode;
}
}
if (i - from_idx != unicode - from_unicode)
printf ("0x%04X-0x%04X 0x%04X\n",
index_to_gb(from_idx), index_to_gb(i - 1), from_unicode);
}
|