summaryrefslogtreecommitdiff
path: root/src/raptor_nfc.h
blob: 295544a8ec6cc5bf153be43200dbe4ed0f4394b3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/* -*- Mode: c; c-basic-offset: 2 -*-
 *
 * raptor_nfc.h - Raptor Unicode NFC headers
 *
 * $Id$
 *
 * Copyright (C) 2004-2004, David Beckett http://purl.org/net/dajobe/
 * Institute for Learning and Research Technology http://www.ilrt.bristol.ac.uk/
 * University of Bristol, UK http://www.bristol.ac.uk/
 * 
 * This package is Free Software and part of Redland http://librdf.org/
 * 
 * It is licensed under the following three licenses as alternatives:
 *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
 *   2. GNU General Public License (GPL) V2 or any newer version
 *   3. Apache License, V2.0 or any newer version
 * 
 * You may not use this file except in compliance with at least one of
 * the above three licenses.
 * 
 * See LICENSE.html or LICENSE.txt at the top of this package for the
 * complete terms and further detail along with the license texts for
 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
 * 
 * 
 */


#include <sys/types.h>

#if u32 == MISSING
  #undef u32
  #if SIZEOF_UNSIGNED_INT == 4
    typedef unsigned int u32;
  #elif SIZEOF_UNSIGNED_LONG == 4
    typedef unsigned long u32;
  #else
    #error u32 type not defined
  #endif
#endif


#if u16 == MISSING
  #undef u16
  #if SIZEOF_UNSIGNED_SHORT == 2
    typedef unsigned int u16;
  #elif SIZEOF_UNSIGNED_INT == 2
    typedef unsigned long u16;
  #else
    #error u16 type not defined
  #endif
#endif


#if u8 == MISSING
  #undef u8
  #if SIZEOF_UNSIGNED_CHAR == 1
    typedef unsigned char u8;
  #else
    #error u8 type not defined
  #endif
#endif

/*
 * Definitions for Unicode NFC data tables
 *
 * See Unicode Normalization http://unicode.org/unicode/reports/tr15/
 * for the definition of Unicode Normal Form C (NFC)
 */


/* Unicode combining classes
 *
 * The combining class is taken from the 4th field of UnicodeData.txt
 * and are mostly class 0 - nothing special.  This structure
 * is used to make a sparse sequence of (key, class) pairs
 * ordered by key, of the non-0 class entries.
 *
 */
typedef struct 
{
  /* the code (0.. 0x10FFD inclusive - 24 bits) */
  unsigned int key:24;
 /* the combining class (0.. 255 - 8 bits is enough, there are ~50-60 used) */
  unsigned int combining_class:8;
} raptor_nfc_key_class;


/* Unicode combining characters
 *
 * Pairs of characters (base, follow) that must be in that order
 * They are all 0..0xFFFF inclusive
 *
 * This structure is used to make a sparse sequence of (base, follow)
 * pairs of valid combinations. 'base' may have several valid 'follow's in
 * the sequence.
 */
typedef struct
{
  u16 base;
  u16 follow;
}  raptor_nfc_base_follow;


/*
 * Flags for codes U+0 to U+108FF, U+1D000 to U+1D7FF
 */

typedef enum {
  HIGH,  /* U+D800 to U+DBFF High Surrogates */
  loww,  /* U+DC00 to U+DFFF Low Surrogates */
  NoNo,  /* code that does not exist */
  NOFC,  /* forbidden or excluded in NFC */
  ReCo,  /* class > 0 recombining */
  NoRe,  /* class > 0 not recombining */
  COM0,  /* class 0 and composing */
  Hang,  /* U+1100 to U+1112 - Hangul Jamo (Korean) initial consonants */
  hAng,  /* U+1161 to U+1175 - Hangul Jamo (Korean) medial vowels */
  haNG,  /* U+11A8 to U+11C2 - Hangul Jamo (Korean) trailing consonants */
  HAng,  /* U+AC00 to U+D7A3 (except for every 28) - Hangul syllables */
  Base,  /* base that combines */
  simp   /* class 0 nothing special */
} raptor_nfc_code_flag;


#define RAPTOR_NFC_CLASSES_COUNT 352
extern raptor_nfc_key_class raptor_nfc_classes[RAPTOR_NFC_CLASSES_COUNT];

#define RAPTOR_NFC_RECOMBINERS_COUNT 2177
extern raptor_nfc_base_follow raptor_nfc_recombiners[RAPTOR_NFC_RECOMBINERS_COUNT];

#define RAPTOR_NFC_CODE_FLAGS_COUNT 34944
extern u8 raptor_nfc_flags[RAPTOR_NFC_CODE_FLAGS_COUNT];