summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKristian Rietveld <kris@loopnest.org>2015-09-05 22:52:48 +0200
committerKristian Rietveld <kris@loopnest.org>2016-04-24 11:47:17 +0200
commit783544ddfc23225f894a26eb31fcc4bc3990ce22 (patch)
tree392b2116ae80d3836ecbe89b86b36329961e3202
parent626e3c4a4a5dffecb5b6cb9227843db50701f594 (diff)
downloadpango-783544ddfc23225f894a26eb31fcc4bc3990ce22.tar.gz
coretext: properly handle UTF32 characters in CFStrings
-rw-r--r--pango/pangocoretext-shape.c72
1 files changed, 63 insertions, 9 deletions
diff --git a/pango/pangocoretext-shape.c b/pango/pangocoretext-shape.c
index 60f6b166..1f46d8a0 100644
--- a/pango/pangocoretext-shape.c
+++ b/pango/pangocoretext-shape.c
@@ -67,6 +67,8 @@ struct RunIterator
CFIndex total_ct_i;
CFIndex ct_i;
+ CFIndex *chr_idx_lut;
+
int current_run_number;
CTRunRef current_run;
CFIndex *current_indices;
@@ -130,6 +132,35 @@ run_iterator_get_glyph_count (struct RunIterator *iter)
return accumulator;
}
+/* This function generates a lookup table to match string indices of glyphs to
+ * actual unicode character indices. This also takes unicode characters into
+ * account that are encoded using 2 UTF16 code points in CFStrings. We use the
+ * unicode character index to match up with the unicode characters in the UTF8
+ * string provided by Pango.
+ */
+static CFIndex *
+run_iterator_get_chr_idx_lut (CFStringRef cstr)
+{
+ CFIndex cstr_length = CFStringGetLength (cstr);
+ CFIndex *chr_idx_lut = malloc (sizeof (CFIndex) * cstr_length);
+ CFIndex i;
+ CFIndex current_value = 0;
+
+ for (i = 0; i < cstr_length; i++)
+ {
+ chr_idx_lut[i] = current_value;
+
+ if (CFStringIsSurrogateHighCharacter (CFStringGetCharacterAtIndex (cstr, i)) &&
+ i + 1 < cstr_length &&
+ CFStringIsSurrogateLowCharacter (CFStringGetCharacterAtIndex (cstr, i + 1)))
+ continue;
+
+ current_value++;
+ }
+
+ return chr_idx_lut;
+}
+
/* These functions are commented out to silence the compiler, but
* kept around because they might be of use when fixing the more
* intricate issues noted in the comment in the function
@@ -157,7 +188,18 @@ run_iterator_run_is_non_monotonic (struct RunIterator *iter)
static gunichar
run_iterator_get_character (struct RunIterator *iter)
{
- return CFStringGetCharacterAtIndex (iter->cstr, iter->current_indices[iter->ct_i]);
+ UniChar ch = CFStringGetCharacterAtIndex (iter->cstr, iter->current_indices[iter->ct_i]);
+
+ if (CFStringIsSurrogateHighCharacter (ch) &&
+ iter->current_indices[iter->ct_i] + 1 < CFStringGetLength (iter->cstr))
+ {
+ UniChar ch2 = CFStringGetCharacterAtIndex (iter->cstr, iter->current_indices[iter->ct_i]+1);
+
+ if (CFStringIsSurrogateLowCharacter (ch2))
+ return CFStringGetLongCharacterForSurrogatePair (ch, ch2);
+ }
+
+ return ch;
}
static CGGlyph
@@ -169,7 +211,7 @@ run_iterator_get_cgglyph (struct RunIterator *iter)
static CFIndex
run_iterator_get_index (struct RunIterator *iter)
{
- return iter->current_indices[iter->ct_i];
+ return iter->chr_idx_lut[iter->current_indices[iter->ct_i]];
}
static gboolean
@@ -194,6 +236,7 @@ run_iterator_create (struct RunIterator *iter,
iter->current_run_number = -1;
iter->current_run = NULL;
iter->current_indices = NULL;
+ iter->chr_idx_lut = NULL;
iter->current_cgglyphs = NULL;
iter->current_cgglyphs_buffer = NULL;
@@ -228,6 +271,8 @@ run_iterator_create (struct RunIterator *iter,
CFRelease (attstr);
CFRelease (attributes);
+ iter->chr_idx_lut = run_iterator_get_chr_idx_lut (iter->cstr);
+
iter->total_ct_i = 0;
iter->glyph_count = run_iterator_get_glyph_count (iter);
@@ -248,6 +293,8 @@ run_iterator_free (struct RunIterator *iter)
{
run_iterator_free_current_run (iter);
+ free (iter->chr_idx_lut);
+
CFRelease (iter->line);
CFRelease (iter->cstr);
}
@@ -383,11 +430,6 @@ _pango_core_text_shape (PangoFont *font,
* increasing/decreasing.
*
* FIXME items for future fixing:
- * # CoreText strings are UTF16, and the indices *often* refer to characters,
- * but not *always*. Notable exception is when a character is encoded using
- * two UTF16 code points. This are two characters in a CFString. At this point
- * advancing a single character in the CFString and advancing a single character
- * using g_utf8_next_char in the const char string goes out of sync.
* # We currently don't bother about LTR, Pango core appears to fix this up for us.
* (Even when we cared warnings were generated that strings were in the wrong
* order, this should be investigated).
@@ -400,7 +442,7 @@ _pango_core_text_shape (PangoFont *font,
if (!glyph_list)
return;
- /* Translate the glyph list to a PangoGlyphString */
+ /* Set up for translation of the glyph list to a PangoGlyphString. */
n_chars = pango_utf8_strlen (text, length);
pango_glyph_string_set_size (glyphs, n_chars);
@@ -409,6 +451,10 @@ _pango_core_text_shape (PangoFont *font,
coverage = pango_font_get_coverage (PANGO_FONT (cfont),
analysis->language);
+ /* gs_i is the index into the Pango glyph string. gi is the iterator into
+ * the (CoreText) glyph list, gi->index is the index into the CFString.
+ * In matching, we want gs_i and gi->index to match up.
+ */
for (gs_prev_i = -1, gs_i = 0, p = text; gs_i < n_chars;
gs_prev_i = gs_i, gs_i++, p = g_utf8_next_char (p))
{
@@ -416,12 +462,20 @@ _pango_core_text_shape (PangoFont *font,
if (gi == NULL || gi->index > gs_i)
{
- /* gs_i is behind, insert empty glyph */
+ /* The glyph string is behind, insert an empty glyph to catch
+ * up with the CoreText glyph list. This occurs for instance when
+ * CoreText inserts a ligature that covers two characters.
+ */
set_glyph (font, glyphs, gs_i, p - text, PANGO_GLYPH_EMPTY);
continue;
}
else if (gi->index < gs_i)
{
+ /* The CoreText glyph list is behind, fast forward the iterator
+ * to catch up. This can happen when CoreText emits two glyphs
+ * for once character, which is (as noted in the FIXME) above
+ * not handled by us yet.
+ */
while (gi && gi->index < gs_i)
{
glyph_iter = g_slist_next (glyph_iter);