blob: d9c269195e9a8ec53349334cc4adbfbb7be862a1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
%%{
machine emoji_presentation;
alphtype unsigned char;
write data noerror nofinal noentry;
}%%
%%{
EMOJI = 0;
EMOJI_TEXT_PRESENTATION = 1;
EMOJI_EMOJI_PRESENTATION = 2;
EMOJI_MODIFIER_BASE = 3;
EMOJI_MODIFIER = 4;
EMOJI_VS_BASE = 5;
REGIONAL_INDICATOR = 6;
KEYCAP_BASE = 7;
COMBINING_ENCLOSING_KEYCAP = 8;
COMBINING_ENCLOSING_CIRCLE_BACKSLASH = 9;
ZWJ = 10;
VS15 = 11;
VS16 = 12;
TAG_BASE = 13;
TAG_SEQUENCE = 14;
TAG_TERM = 15;
any_emoji = EMOJI_TEXT_PRESENTATION | EMOJI_EMOJI_PRESENTATION | KEYCAP_BASE |
EMOJI_MODIFIER_BASE | TAG_BASE | EMOJI;
emoji_combining_enclosing_circle_backslash_sequence = any_emoji
COMBINING_ENCLOSING_CIRCLE_BACKSLASH;
# This could be sharper than any_emoji by restricting this only to valid
# variation sequences:
# https://www.unicode.org/Public/emoji/11.0/emoji-variation-sequences.txt
# However, implementing
# https://www.unicode.org/reports/tr51/#def_emoji_presentation_sequence is
# sufficient for our purposes here.
emoji_presentation_sequence = any_emoji VS16;
emoji_modifier_sequence = EMOJI_MODIFIER_BASE EMOJI_MODIFIER;
emoji_flag_sequence = REGIONAL_INDICATOR REGIONAL_INDICATOR;
# Here we only allow the valid tag sequences
# https://www.unicode.org/reports/tr51/#valid-emoji-tag-sequences, instead of
# all well-formed ones defined in
# https://www.unicode.org/reports/tr51/#def_emoji_tag_sequence
emoji_tag_sequence = TAG_BASE TAG_SEQUENCE+ TAG_TERM;
emoji_keycap_sequence = KEYCAP_BASE VS16 COMBINING_ENCLOSING_KEYCAP;
emoji_zwj_element = emoji_presentation_sequence | emoji_modifier_sequence | any_emoji;
emoji_zwj_sequence = emoji_zwj_element ( ZWJ emoji_zwj_element )+;
emoji_presentation = EMOJI_EMOJI_PRESENTATION | TAG_BASE | EMOJI_MODIFIER_BASE |
emoji_presentation_sequence | emoji_modifier_sequence | emoji_flag_sequence |
emoji_tag_sequence | emoji_keycap_sequence | emoji_zwj_sequence |
emoji_combining_enclosing_circle_backslash_sequence;
emoji_run = emoji_presentation;
text_presentation_emoji = any_emoji VS15;
text_run = any;
text_and_emoji_run := |*
# In order to give the the VS15 sequences higher priority than detecting
# emoji sequences they are listed first as scanner token here.
text_presentation_emoji => { *is_emoji = false; return te; };
emoji_run => { *is_emoji = true; return te; };
text_run => { *is_emoji = false; return te; };
*|;
}%%
static emoji_text_iter_t
scan_emoji_presentation (emoji_text_iter_t p,
const emoji_text_iter_t pe,
bool* is_emoji)
{
emoji_text_iter_t ts, te;
const emoji_text_iter_t eof = pe;
unsigned act;
int cs;
%%{
write init;
write exec;
}%%
/* Should not be reached. */
*is_emoji = false;
return pe;
}
|