summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-11-21 16:17:37 -0500
committerMatthias Clasen <mclasen@redhat.com>2021-11-22 23:32:35 -0500
commit6655ceabff26f7830335833ace8e59e78251c296 (patch)
tree0355dcc3c767e4ad1de1a34de19c266cf844f7d8
parent9d114095bd4c4fb5ab64fff0ed5c1f6680ed2609 (diff)
downloadpango-6655ceabff26f7830335833ace8e59e78251c296.tar.gz
break: Fix hyphen condition
When looking at scripts, we want to look at the script of the *previous* character. And then we need to exclude SHY from the common script.
-rw-r--r--pango/break.c9
-rw-r--r--tests/breaks/eight.expected2
-rw-r--r--tests/breaks/eleven.expected14
-rw-r--r--tests/breaks/fifteen.expected4
-rw-r--r--tests/breaks/one.expected4
-rw-r--r--tests/breaks/seventeen.expected2
-rw-r--r--tests/breaks/sixteen.expected4
-rw-r--r--tests/breaks/thirteen.expected4
-rw-r--r--tests/breaks/two.break2
-rw-r--r--tests/breaks/two.expected14
10 files changed, 32 insertions, 27 deletions
diff --git a/pango/break.c b/pango/break.c
index 3af083ce..5622ca21 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -170,6 +170,8 @@ default_break (const char *text,
GUnicodeBreakType prev_break_type;
GUnicodeBreakType prev_prev_break_type;
+ PangoScript prev_script;
+
/* See Grapheme_Cluster_Break Property Values table of UAX#29 */
typedef enum
{
@@ -262,6 +264,7 @@ default_break (const char *text,
prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_wc = 0;
+ prev_script = PANGO_SCRIPT_COMMON;
prev_jamo = NO_JAMO;
prev_space_or_hyphen = FALSE;
@@ -539,7 +542,6 @@ default_break (const char *text,
}
script = (PangoScript)g_unichar_get_script (wc);
-
/* ---- UAX#29 Word Boundaries ---- */
{
is_word_boundary = FALSE;
@@ -1571,9 +1573,11 @@ default_break (const char *text,
attrs[i].break_inserts_hyphen = FALSE;
attrs[i].break_removes_preceding = FALSE;
- switch ((int)script)
+ switch ((int)prev_script)
{
case PANGO_SCRIPT_COMMON:
+ insert_hyphens = prev_wc == 0x00ad;
+ break;
case PANGO_SCRIPT_HAN:
case PANGO_SCRIPT_HANGUL:
case PANGO_SCRIPT_HIRAGANA:
@@ -1634,6 +1638,7 @@ default_break (const char *text,
}
prev_wc = wc;
+ prev_script = script;
/* wc might not be a valid Unicode base character, but really all we
* need to know is the last non-combining character */
diff --git a/tests/breaks/eight.expected b/tests/breaks/eight.expected
index 39794d22..d71bb02f 100644
--- a/tests/breaks/eight.expected
+++ b/tests/breaks/eight.expected
@@ -4,4 +4,4 @@ Whitespace: x x x x
Sentences: bs e b
Words: bs be bs e s be bs be b bs be bs be bs be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i i i i i i i i
+Hyphens: i i i i i i i i i i i i i i i i
diff --git a/tests/breaks/eleven.expected b/tests/breaks/eleven.expected
index 8df89869..b4a33897 100644
--- a/tests/breaks/eleven.expected
+++ b/tests/breaks/eleven.expected
@@ -1,7 +1,7 @@
-Text: ⁦❤⁩ ⁦️⁩ ⁦︎⁩ ⁦︎⁩ ⁦👨⁩ [0x200d]⁦🦰⁩ ⁦👨⁩⁦🏿⁩ [0x200d]⁦🦱⁩ ⁦0⁩ ⁦️⁩ ⁦⃣⁩ ⁦🏴⁩[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] ⁦🇩⁩⁦🇪⁩ ⁦️⁩ [0x0a]
-Breaks: c lc lc lc lc lc c lc
-Whitespace: w w
-Sentences: bs e b
-Words: b b b bs be b b b
-Graphemes: b b b b b b b b
-Hyphens: i i i i i i i i
+Text: ⁦❤⁩⁦️⁩ ⁦︎⁩ ⁦︎⁩ ⁦👨⁩[0x200d] ⁦🦰⁩ ⁦👨⁩⁦🏿⁩[0x200d] ⁦🦱⁩ ⁦0⁩⁦️⁩ ⁦⃣⁩ ⁦🏴⁩[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] ⁦🇩⁩⁦🇪⁩⁦️⁩ [0x0a]
+Breaks: c lc lc lc lc lc c Lc
+Whitespace: w w
+Sentences: bs e b
+Words: b b b bs be b b b
+Graphemes: b b b b b b b b
+Hyphens: i i i i i
diff --git a/tests/breaks/fifteen.expected b/tests/breaks/fifteen.expected
index 93b37c39..3521a70b 100644
--- a/tests/breaks/fifteen.expected
+++ b/tests/breaks/fifteen.expected
@@ -1,7 +1,7 @@
Text: ⁦o⁩ ⁦n⁩ ⁦e⁩ [ ] ⁦t⁩ ⁦w⁩ ⁦o⁩ ⁦-⁩ ⁦t⁩ ⁦h⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩ [ ] ⁦f⁩ ⁦o⁩ [0xad] ⁦u⁩ ⁦r⁩ [0x0a]
-Breaks: c c c c lc c c c lc c c c c c lc c c lc c c lc
+Breaks: c c c c lc c c c lc c c c c c lc c c lc c c Lc
Whitespace: x x w w
Sentences: bs e b
Words: bs be bs be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i
+Hyphens: i i i i i i i i i i
diff --git a/tests/breaks/one.expected b/tests/breaks/one.expected
index 44fee3ef..6c811256 100644
--- a/tests/breaks/one.expected
+++ b/tests/breaks/one.expected
@@ -1,7 +1,7 @@
Text: ⁦a⁩ ⁦b⁩ ⁦c⁩ ⁦/⁩ ⁦d⁩ ⁦e⁩ ⁦f⁩ [ ] ⁦g⁩ ⁦h⁩ ⁦i⁩ [0xad] ⁦j⁩ ⁦k⁩ ⁦l⁩ ⁦.⁩ [ ] ⁦B⁩ ⁦l⁩ ⁦a⁩ [0x0a]
-Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c lc
+Breaks: c c c c lc c c c lc c c c lc c c c c lc c c c Lc
Whitespace: x x w w
Sentences: bs e bs e b
Words: bs be bs be bs be b bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i i i
+Hyphens: i i i i i i i i i i i i
diff --git a/tests/breaks/seventeen.expected b/tests/breaks/seventeen.expected
index 8f5f2749..35fb5120 100644
--- a/tests/breaks/seventeen.expected
+++ b/tests/breaks/seventeen.expected
@@ -4,4 +4,4 @@ Whitespace: x x x w
Sentences: bs e bs e bs e b
Words: bs be bs be bs be bs e s be bs be bs be bs be bs e s be bs be bs be bs be bs e s e s be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
+Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
diff --git a/tests/breaks/sixteen.expected b/tests/breaks/sixteen.expected
index 0fd06fb2..2868b8e4 100644
--- a/tests/breaks/sixteen.expected
+++ b/tests/breaks/sixteen.expected
@@ -1,7 +1,7 @@
Text: ⁦h⁩ ⁦y⁩ ⁦‧⁩ ⁦p⁩ ⁦h⁩ ⁦e⁩ ⁦n⁩ ⁦|⁩ ⁦a⁩ ⁦t⁩ ⁦i⁩ ⁦o⁩ ⁦n⁩ [ ] ⁦o⁩ ⁦v⁩ ⁦e⁩ ⁦r⁩ [0xad] ⁦l⁩ ⁦o⁩ ⁦a⁩ ⁦d⁩ [0x0a]
-Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c lc
+Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c Lc
Whitespace: x w w
Sentences: bs e b
Words: bs e s be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b
-Hyphens: i ri i i i i i i i i i i i i i i
+Hyphens: i ri i i i i i i i i i i i i i i i
diff --git a/tests/breaks/thirteen.expected b/tests/breaks/thirteen.expected
index e6b1787d..25a38150 100644
--- a/tests/breaks/thirteen.expected
+++ b/tests/breaks/thirteen.expected
@@ -1,7 +1,7 @@
Text: ⁦a⁩ [ ] ⁦a⁩ ⁦b⁩ [0x200b] ⁦s⁩ ⁦p⁩ [0x200b] [ ] [ ] ⁦d⁩ ⁦e⁩ [0xad] ⁦f⁩ ⁦g⁩ [ ] ⁦b⁩ [0x0a]
-Breaks: c c lc c c lc c c c c lc c c lc c c lc c lc
+Breaks: c c lc c c lc c c c c lc c c lc c c lc c Lc
Whitespace: x x x x w w
Sentences: bs e b
Words: bs be bs be bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b
-Hyphens: i i i i i i
+Hyphens: i i i i i i i i
diff --git a/tests/breaks/two.break b/tests/breaks/two.break
index 6ff0a36e..53c39c5c 100644
--- a/tests/breaks/two.break
+++ b/tests/breaks/two.break
@@ -1,3 +1,3 @@
# Example from https://gitlab.gnome.org/GNOME/pango/issues/218
# This shows difference between word start/end and boundary
-goril·les
+goril‧les
diff --git a/tests/breaks/two.expected b/tests/breaks/two.expected
index 2921d224..58d15186 100644
--- a/tests/breaks/two.expected
+++ b/tests/breaks/two.expected
@@ -1,7 +1,7 @@
-Text: ⁦g⁩ ⁦o⁩ ⁦r⁩ ⁦i⁩ ⁦l⁩ ⁦·⁩ ⁦l⁩ ⁦e⁩ ⁦s⁩ [0x0a]
-Breaks: c c c c c c c c c c lc
-Whitespace: w w
-Sentences: bs e b
-Words: bs e s be b
-Graphemes: b b b b b b b b b b b
-Hyphens: i i i i i i i
+Text: ⁦g⁩ ⁦o⁩ ⁦r⁩ ⁦i⁩ ⁦l⁩ ⁦‧⁩ ⁦l⁩ ⁦e⁩ ⁦s⁩ [0x0a]
+Breaks: c c c c c c lc c c c Lc
+Whitespace: w w
+Sentences: bs e b
+Words: bs e s be b
+Graphemes: b b b b b b b b b b b
+Hyphens: i i i i ri i i