diff options
Diffstat (limited to 'pango2/pango-emoji.c')
-rw-r--r-- | pango2/pango-emoji.c | 290 |
1 files changed, 290 insertions, 0 deletions
diff --git a/pango2/pango-emoji.c b/pango2/pango-emoji.c new file mode 100644 index 00000000..aa82e9ec --- /dev/null +++ b/pango2/pango-emoji.c @@ -0,0 +1,290 @@ +/* Pango2 + * pango-emoji.c: Emoji handling + * + * Copyright (C) 2017 Google, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Implementation of pango2_emoji_iter is based on Chromium's Ragel-based + * parser: + * + * https://chromium-review.googlesource.com/c/chromium/src/+/1264577 + * + * The grammar file emoji_presentation_scanner.rl was just modified to + * adapt the function signature and variables to our usecase. The + * grammar itself was NOT modified: + * + * https://chromium-review.googlesource.com/c/chromium/src/+/1264577/3/third_party/blink/renderer/platform/fonts/emoji_presentation_scanner.rl + * + * The emoji_presentation_scanner.c is generated from .rl file by + * running ragel on it. + * + * The categorization is also based on: + * + * https://chromium-review.googlesource.com/c/chromium/src/+/1264577/3/third_party/blink/renderer/platform/fonts/utf16_ragel_iterator.h + * + * The iterator next() is based on: + * + * https://chromium-review.googlesource.com/c/chromium/src/+/1264577/3/third_party/blink/renderer/platform/fonts/symbols_iterator.cc + * + * // Copyright 2015 The Chromium Authors. All rights reserved. + * // Use of this source code is governed by a BSD-style license that can be + * // found in the LICENSE file. + */ + +#include "config.h" +#include <stdlib.h> +#include <string.h> + +#include "pango-emoji-private.h" +#include "pango-emoji-table.h" + +static inline gboolean +bsearch_interval (gunichar c, + const struct Interval table[], + guint n) +{ + guint lower = 0; + guint upper = n - 1; + + while (lower <= upper) + { + int mid = (lower + upper) / 2; + + if (c < table[mid].start) + upper = mid - 1; + else if (c > table[mid].end) + lower = mid + 1; + else + return TRUE; + } + + return FALSE; +} + +#define DEFINE_pango2_Is_(name) \ +static inline gboolean \ +_pango2_Is_##name (gunichar ch) \ +{ \ + return ch >= _pango2_##name##_table[0].start && \ + bsearch_interval (ch, \ + _pango2_##name##_table, \ + G_N_ELEMENTS (_pango2_##name##_table)); \ +} + +DEFINE_pango2_Is_(Emoji) +DEFINE_pango2_Is_(Emoji_Presentation) +DEFINE_pango2_Is_(Emoji_Modifier) +DEFINE_pango2_Is_(Emoji_Modifier_Base) +DEFINE_pango2_Is_(Extended_Pictographic) + +gboolean +_pango2_Is_Emoji_Base_Character (gunichar ch) +{ + return _pango2_Is_Emoji (ch); +} + +gboolean +_pango2_Is_Emoji_Extended_Pictographic (gunichar ch) +{ + return _pango2_Is_Extended_Pictographic (ch); +} + +static inline gboolean +_pango2_Is_Emoji_Emoji_Default (gunichar ch) +{ + return _pango2_Is_Emoji_Presentation (ch); +} + +static inline gboolean +_pango2_Is_Emoji_Keycap_Base (gunichar ch) +{ + return (ch >= '0' && ch <= '9') || ch == '#' || ch == '*'; +} + +static inline gboolean +_pango2_Is_Regional_Indicator (gunichar ch) +{ + return (ch >= 0x1F1E6 && ch <= 0x1F1FF); +} + + +#define kCombiningEnclosingCircleBackslashCharacter 0x20E0 +#define kCombiningEnclosingKeycapCharacter 0x20E3 +#define kVariationSelector15Character 0xFE0E +#define kVariationSelector16Character 0xFE0F +#define kZeroWidthJoinerCharacter 0x200D + +enum Pango2EmojiScannerCategory { + EMOJI = 0, + EMOJI_TEXT_PRESENTATION = 1, + EMOJI_EMOJI_PRESENTATION = 2, + EMOJI_MODIFIER_BASE = 3, + EMOJI_MODIFIER = 4, + EMOJI_VS_BASE = 5, + REGIONAL_INDICATOR = 6, + KEYCAP_BASE = 7, + COMBINING_ENCLOSING_KEYCAP = 8, + COMBINING_ENCLOSING_CIRCLE_BACKSLASH = 9, + ZWJ = 10, + VS15 = 11, + VS16 = 12, + TAG_BASE = 13, + TAG_SEQUENCE = 14, + TAG_TERM = 15, + kMaxEmojiScannerCategory = 16 +}; + +static inline unsigned char +_pango2_EmojiSegmentationCategory (gunichar codepoint) +{ + /* Specific ones first. */ + if (('a' <= codepoint && codepoint <= 'z') || + ('A' <= codepoint && codepoint <= 'Z') || + codepoint == ' ') + return kMaxEmojiScannerCategory; + + if ('0' <= codepoint && codepoint <= '9') + return KEYCAP_BASE; + + switch (codepoint) + { + case kCombiningEnclosingKeycapCharacter: + return COMBINING_ENCLOSING_KEYCAP; + case kCombiningEnclosingCircleBackslashCharacter: + return COMBINING_ENCLOSING_CIRCLE_BACKSLASH; + case kZeroWidthJoinerCharacter: + return ZWJ; + case kVariationSelector15Character: + return VS15; + case kVariationSelector16Character: + return VS16; + case 0x1F3F4: + return TAG_BASE; + case 0xE007F: + return TAG_TERM; + default: ; + } + + if ((0xE0030 <= codepoint && codepoint <= 0xE0039) || + (0xE0061 <= codepoint && codepoint <= 0xE007A)) + return TAG_SEQUENCE; + + if (_pango2_Is_Emoji_Modifier_Base (codepoint)) + return EMOJI_MODIFIER_BASE; + if (_pango2_Is_Emoji_Modifier (codepoint)) + return EMOJI_MODIFIER; + if (_pango2_Is_Regional_Indicator (codepoint)) + return REGIONAL_INDICATOR; + if (_pango2_Is_Emoji_Keycap_Base (codepoint)) + return KEYCAP_BASE; + if (_pango2_Is_Emoji_Emoji_Default (codepoint)) + return EMOJI_EMOJI_PRESENTATION; + if (_pango2_Is_Emoji (codepoint)) + return EMOJI_TEXT_PRESENTATION; + + /* Ragel state machine will interpret unknown category as "any". */ + return kMaxEmojiScannerCategory; +} + + +typedef gboolean bool; +enum { false = FALSE, true = TRUE }; +typedef unsigned char *emoji_text_iter_t; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch-default" +#include "emoji_presentation_scanner.c" +#pragma GCC diagnostic pop + + +Pango2EmojiIter * +_pango2_emoji_iter_init (Pango2EmojiIter *iter, + const char *text, + int length) +{ + unsigned int n_chars = g_utf8_strlen (text, length); + unsigned char *types = g_malloc (n_chars); + unsigned int i; + const char *p; + + p = text; + for (i = 0; i < n_chars; i++) + { + types[i] = _pango2_EmojiSegmentationCategory (g_utf8_get_char (p)); + p = g_utf8_next_char (p); + } + + iter->text_start = iter->start = iter->end = text; + if (length >= 0) + iter->text_end = text + length; + else + iter->text_end = text + strlen (text); + iter->is_emoji = FALSE; + + iter->types = types; + iter->n_chars = n_chars; + iter->cursor = 0; + + _pango2_emoji_iter_next (iter); + + return iter; +} + +void +_pango2_emoji_iter_fini (Pango2EmojiIter *iter) +{ + g_free (iter->types); +} + +gboolean +_pango2_emoji_iter_next (Pango2EmojiIter *iter) +{ + unsigned int old_cursor, cursor; + gboolean is_emoji; + + if (iter->end >= iter->text_end) + return FALSE; + + iter->start = iter->end; + + old_cursor = cursor = iter->cursor; + cursor = scan_emoji_presentation (iter->types + cursor, + iter->types + iter->n_chars, + &is_emoji) - iter->types; + do + { + iter->cursor = cursor; + iter->is_emoji = is_emoji; + + if (cursor == iter->n_chars) + break; + + cursor = scan_emoji_presentation (iter->types + cursor, + iter->types + iter->n_chars, + &is_emoji) - iter->types; + } + while (iter->is_emoji == is_emoji); + + iter->end = g_utf8_offset_to_pointer (iter->start, iter->cursor - old_cursor); + + return TRUE; +} + + +/********************************************************** + * End of code from Chromium + **********************************************************/ |