summaryrefslogtreecommitdiff
path: root/strings/utr11-dump.c
blob: 54c1de7b7b81a7ee3419eee2a54babd878330c67 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/* Copyright (c) 2004, 2006 MySQL AB
   Use is subject to license terms.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>


/*
  Dump an EastAsianWidth.txt file.
  See http://www.unicode.org/reports/tr11/ for details.
  Character types:
  F  - Full width  = 1
  H  - Half width  = 0
  W  - Wide        = 1
  Na - Narrow      = 0
  A  - Ambiguous   = 0
  N  - Neutral     = 0
*/


int main(int ac, char **av)
{
  char str[128];
  int errors= 0;
  int plane[0x10000];
  int page[256];
  int i; 
  
  memset(plane, 0, sizeof(plane));
  memset(page, 0, sizeof(page));
  
  while (fgets(str, sizeof(str), stdin))
  {
    int code1, code2, width;
    char *end;
    
    if (str[0] == '#')
      continue;
    code1= strtol(str, &end, 16);
    if (code1 < 0 || code1 > 0xFFFF)
      continue;
    if (end[0] == ';') /* One character */
    {
      code2= code1;
    }
    else if (end[0] == '.' && end[1] == '.') /* Range */
    {
      end+= 2;
      code2= strtol(end, &end, 16);
      if (code2 < 0 || code2 > 0xFFFF)
        continue;
      if (end[0] != ';')
      {
        errors++;
        fprintf(stderr, "error: %s", str);
        continue;
      }
    }
    else
    { 
      errors++;
      fprintf(stderr, "error: %s", str);
      continue;
    }
    
    end++;
    width= (end[0] == 'F' || end[0] == 'W') ? 1 : 0;
    
    for ( ; code1 <= code2; code1++)
    {
      plane[code1]= width;
    }
  }
  
  if (errors)
    return 1;
  
  for (i=0; i < 256; i++)
  {
    int j;
    int *p= plane + 256 * i;
    page[i]= 0;
    for (j=0; j < 256; j++)
    {
      page[i]+= p[j];
    }
    if (page[i] != 0 && page[i] != 256)
    {
      printf("static char pg%02X[256]=\n{\n", i);
      for (j=0; j < 256; j++)
      {
        printf("%d%s%s", p[j], j < 255 ? "," : "", (j + 1) % 32 ? "" : "\n");
      }
      printf("};\n\n");
    }
  }
  
  printf("static struct {int page; char *p;} utr11_data[256]=\n{\n");
  for (i=0; i < 256; i++)
  {
    if (page[i] == 0 || page[i] == 256)
    {
      int width= (page[i] == 256) ? 1 : 0;
      printf("{%d,NULL}", width);
    }
    else
    {
      printf("{0,pg%02X}", i);
    }
    printf("%s%s", i < 255 ? "," : "", (i+1) % 8 ? "" : "\n");
  }
  printf("};\n");
  return 0;
}