blob: 32810dfa80b5e7ae4fd368019720237ed29ea67f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
// -*- C++ -*-
/* Copyright (C) 2002
Free Software Foundation, Inc.
Written by Werner Lemberg <wl@gnu.org>
This file is part of groff.
groff is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
groff is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License along
with groff; see the file COPYING. If not, write to the Free Software
Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
#include "lib.h"
#include "cset.h"
#include "stringclass.h"
#include "unicode.h"
const char *check_unicode_name(const char *u)
{
if (*u != 'u')
return 0;
const char *p = ++u;
for (;;) {
int val = 0;
const char *start = p;
for (;;) {
// only uppercase hex digits allowed
if (!csxdigit(*p))
return 0;
if (csdigit(*p))
val = val*0x10 + (*p-'0');
else if (csupper(*p))
val = val*0x10 + (*p-'A'+10);
else
return 0;
// biggest Unicode value is U+10FFFF
if (val > 0x10FFFF)
return 0;
p++;
if (*p == '\0' || *p == '_')
break;
}
// surrogates not allowed
if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF))
return 0;
if (val > 0xFFFF) {
if (*start == '0') // no leading zeros allowed if > 0xFFFF
return 0;
}
else if (p - start != 4) // otherwise, check for exactly 4 hex digits
return 0;
if (*p == '\0')
break;
p++;
}
return u;
}
|