summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDov Grobgeld <dov.grobgeld@gmail.com>2016-01-25 21:31:13 +0200
committerDov Grobgeld <dov.grobgeld@gmail.com>2016-01-25 21:31:13 +0200
commit6ad87e038329711983b257d5fb44e20c4eb870d5 (patch)
treefb2808068496799508b385660a65729d01e75a0c
parent881b8d891cc61989ab8811b74d0e721f72bf913b (diff)
downloadfribidi-6ad87e038329711983b257d5fb44e20c4eb870d5.tar.gz
Initial unicode63 support.
-rw-r--r--README14
-rw-r--r--bin/fribidi-main.c2
-rw-r--r--charset/fribidi-char-sets-cap-rtl.c36
-rw-r--r--charset/fribidi-char-sets.c2
-rw-r--r--doc/Makefile.am3
-rw-r--r--gen.tab/gen-joining-type-tab.c4
-rw-r--r--gen.tab/unidata/ArabicShaping.txt227
-rw-r--r--gen.tab/unidata/BidiMirroring.txt14
-rw-r--r--gen.tab/unidata/ReadMe.txt8
-rw-r--r--gen.tab/unidata/UnicodeData.txt21
-rw-r--r--lib/common.h2
-rw-r--r--lib/debug.h4
-rw-r--r--lib/fribidi-bidi-types-list.h4
-rw-r--r--lib/fribidi-bidi-types.h29
-rw-r--r--lib/fribidi-bidi.c301
-rw-r--r--lib/fribidi-common.h1
-rw-r--r--lib/fribidi-deprecated.c1
-rw-r--r--lib/fribidi-run.c6
-rw-r--r--lib/fribidi-unicode.h10
-rw-r--r--test/Makefile.am4
-rw-r--r--test/test_CapRTL_isolate.input8
-rw-r--r--test/test_CapRTL_isolate.reference8
22 files changed, 613 insertions, 96 deletions
diff --git a/README b/README
index 16b7a10..f7e4d02 100644
--- a/README
+++ b/README
@@ -6,17 +6,18 @@ Background
One of the missing links stopping the penetration of free software in Middle
East is the lack of support for the Arabic and Hebrew alphabets. In order to
-have proper Arabic and Hebrew support, the BiDi algorithm should have been
+have proper Arabic and Hebrew support, the BiDi algorithm needs to be
implemented. It is our hope that this library will stimulate more free
software in the Middle Eastern countries.
Audience
========
-It is our hope that this library will stimulate the implementation of Hebrew
-and Arabic in lots of free software. Here is a small list of projects that
-would benefit from the use of the GNU FriBidi library, but of course there are
-many more: Wine, Mozilla, Qt, KDE, lynx, OpenOffice.
+It is our hope that this library will stimulate the implementation of
+Hebrew and Arabic support in lots of free software. Here is a small
+list of projects that would benefit from the use of the GNU FriBidi
+library, but of course there are many more: Wine, Mozilla, Qt, KDE,
+lynx, OpenOffice.
GNU FriBidi is already being used in projects like Pango (resulting in GTK+
and GNOME using GNU FriBidi), AbiWord, MLTerm, MPlayer, and BiCon.
@@ -151,3 +152,6 @@ And send your comments to:
Behdad Esfahbod
behdad@gnu.org
+
+Dov Grobgeld
+dov.grobgeld@gmail.com
diff --git a/bin/fribidi-main.c b/bin/fribidi-main.c
index b95e856..d93ccb4 100644
--- a/bin/fribidi-main.c
+++ b/bin/fribidi-main.c
@@ -508,7 +508,7 @@ main (
while (wid > 0 && idx < len)
{
wid -=
- FRIBIDI_IS_EXPLICIT_OR_BN_OR_NSM
+ FRIBIDI_IS_EXPLICIT_OR_ISOLATE_OR_BN_OR_NSM
(fribidi_get_bidi_type (visual[idx])) ? 0
: 1;
idx++;
diff --git a/charset/fribidi-char-sets-cap-rtl.c b/charset/fribidi-char-sets-cap-rtl.c
index 9d70c51..54ffd85 100644
--- a/charset/fribidi-char-sets-cap-rtl.c
+++ b/charset/fribidi-char-sets-cap-rtl.c
@@ -64,7 +64,7 @@ enum
static FriBidiCharType CapRTLCharTypes[] = {
/* *INDENT-OFF* */
ON, ON, ON, ON, LTR,RTL,ON, ON, ON, ON, ON, ON, ON, BS, RLO,RLE, /* 00-0f */
- LRO,LRE,PDF,WS, ON, ON, ON, ON, ON, ON, ON, ON, ON, ON, ON, ON, /* 10-1f */
+ LRO,LRE,PDF,WS, LRI, RLI, FSI, PDI, ON, ON, ON, ON, ON, ON, ON, ON, /* 10-1f */
WS, ON, ON, ON, ET, ON, ON, ON, ON, ON, ON, ET, CS, ON, ES, ES, /* 20-2f */
EN, EN, EN, EN, EN, EN, AN, AN, AN, AN, CS, ON, ON, ON, ON, ON, /* 30-3f */
RTL,AL, AL, AL, AL, AL, AL, RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL, /* 40-4f */
@@ -216,6 +216,18 @@ fribidi_cap_rtl_to_unicode (
case 'R':
us[j++] = FRIBIDI_CHAR_RLO;
break;
+ case 'i':
+ us[j++] = FRIBIDI_CHAR_LRI;
+ break;
+ case 'y':
+ us[j++] = FRIBIDI_CHAR_RLI;
+ break;
+ case 'f':
+ us[j++] = FRIBIDI_CHAR_FSI;
+ break;
+ case 'I':
+ us[j++] = FRIBIDI_CHAR_PDI;
+ break;
case '_':
us[j++] = '_';
break;
@@ -248,8 +260,9 @@ fribidi_unicode_to_cap_rtl (
for (i = 0; i < len; i++)
{
FriBidiChar ch = us[i];
- if (!FRIBIDI_IS_EXPLICIT (fribidi_get_bidi_type (ch)) && ch != '_'
- && ch != FRIBIDI_CHAR_LRM && ch != FRIBIDI_CHAR_RLM)
+ if (!FRIBIDI_IS_EXPLICIT (fribidi_get_bidi_type (ch))
+ && !FRIBIDI_IS_ISOLATE (fribidi_get_bidi_type (ch))
+ && ch != '_' && ch != FRIBIDI_CHAR_LRM && ch != FRIBIDI_CHAR_RLM)
s[j++] = fribidi_unicode_to_cap_rtl_c (ch);
else
{
@@ -277,6 +290,18 @@ fribidi_unicode_to_cap_rtl (
case FRIBIDI_CHAR_RLO:
s[j++] = 'R';
break;
+ case FRIBIDI_CHAR_LRI:
+ s[j++] = 'i';
+ break;
+ case FRIBIDI_CHAR_RLI:
+ s[j++] = 'y';
+ break;
+ case FRIBIDI_CHAR_FSI:
+ s[j++] = 'f';
+ break;
+ case FRIBIDI_CHAR_PDI:
+ s[j++] = 'I';
+ break;
case '_':
s[j++] = '_';
break;
@@ -333,7 +358,10 @@ fribidi_char_set_desc_cap_rtl (
" * _> LRM\n" " * _< RLM\n"
" * _l LRE\n" " * _r RLE\n"
" * _L LRO\n" " * _R RLO\n"
- " * _o PDF\n" " * __ `_' itself\n" "\n");
+ " * _o PDF\n" " * _i LRI\n"
+ " * _y RLI\n" " * _f FSI\n"
+ " * _I PDI\n" " * __ `_' itself\n"
+ "\n");
return s;
}
diff --git a/charset/fribidi-char-sets.c b/charset/fribidi-char-sets.c
index 214105f..eedff8f 100644
--- a/charset/fribidi-char-sets.c
+++ b/charset/fribidi-char-sets.c
@@ -114,7 +114,7 @@ static FriBidiCharSetHandler char_sets[FRIBIDI_CHAR_SETS_NUM + 1] = {
};
#if FRIBIDI_USE_GLIB+0
-# include <glib.h>
+# include <glib/gstrfuncs.h>
# define fribidi_strcasecmp g_ascii_strcasecmp
#else /* !FRIBIDI_USE_GLIB */
static char
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 7fe8d37..fd1d160 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -40,8 +40,7 @@ man3 = $(dist_man_MANS) $(dist_noinst_MANS)
C2MANFLAGS = $(includepath) \
-D__FRIBIDI_DOC \
-DDONT_HAVE_FRIBIDI_CONFIG_H \
- -M "Programmer's Manual" \
- -m "$(PACKAGE_NAME) $(PACKAGE_VERSION)"
+ -M "Programmer's Manual"
VPATH += $(includevpath)
diff --git a/gen.tab/gen-joining-type-tab.c b/gen.tab/gen-joining-type-tab.c
index 809d113..b7bb84d 100644
--- a/gen.tab/gen-joining-type-tab.c
+++ b/gen.tab/gen-joining-type-tab.c
@@ -151,6 +151,10 @@ static const char *ignored_bidi_types[] = {
"LRO",
"RLO",
"PDF",
+ "LRI",
+ "RLI",
+ "FSI",
+ "PDI",
NULL
};
diff --git a/gen.tab/unidata/ArabicShaping.txt b/gen.tab/unidata/ArabicShaping.txt
index fd22f5d..8add8a5 100644
--- a/gen.tab/unidata/ArabicShaping.txt
+++ b/gen.tab/unidata/ArabicShaping.txt
@@ -1,5 +1,5 @@
-# ArabicShaping-6.2.0.txt
-# Date: 2012-05-15, 21:05:00 GMT [KW]
+# ArabicShaping-6.3.0.txt
+# Date: 2012-11-14, 21:48:00 GMT [KW]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
@@ -11,15 +11,19 @@
# property values for Arabic, Syriac, N'Ko, and Mandaic
# positional shaping, repeating in machine readable form the
# information exemplified in Tables 8-3, 8-8, 8-9, 8-10, 8-13, 8-14,
-# 8-15, 13-5, 14-5, and 14-6 of The Unicode Standard, Version 6.2.
+# 8-15, 13-5, 14-5, and 14-6 of The Unicode Standard, Version 6.3.
+# This file also defines Joining_Type values for Mongolian and
+# Phags-pa positional shaping, which is not listed in tables in
+# the standard.
#
-# See sections 8.2, 8.3, 13.5, and 14.12 of The Unicode Standard,
-# Version 6.2 for more information.
+# See sections 8.2, 8.3, 10.4, 13.2, 13.5, and 14.12 of The Unicode Standard,
+# Version 6.3 for more information.
#
# Each line contains four fields, separated by a semicolon.
#
# Field 0: the code point, in 4-digit hexadecimal
-# form, of an Arabic, Syriac, N'Ko, or Mandaic character.
+# form, of an Arabic, Syriac, N'Ko, Mandaic, Mongolian,
+# Phags-pa, or other character.
#
# Field 1: gives a short schematic name for that character.
# The schematic name is descriptive of the shape, based as
@@ -35,7 +39,13 @@
# C Join_Causing
# U Non_Joining
# T Transparent
-# See Section 8.2, Arabic for more information on these types.
+#
+# See Section 8.2, Arabic for more information on these joining types.
+# Note that for cursive joining scripts which are typically rendered
+# top-to-bottom, rather than right-to-left, Joining_Type=L conventionally
+# refers to bottom joining, and Joining_Type=R conventionally refers
+# to top joining. See Section 10.4 Phags-pa for more information on the
+# interpretation of joining types in vertical layout.
#
# Field 3: defines the joining group (property name: Joining_Group)
#
@@ -68,8 +78,9 @@
# to jg=No_Joining_Group in this data file. Other, more specific
# joining group values will be defined only if an explicit proposal
# to define those values exactly has been approved by the UTC. This
-# is the convention exemplified by the N'Ko and Mandaic scripts. Only the Arabic
-# and Syriac scripts currently have explicit joining group values defined.
+# is the convention exemplified by the N'Ko, Mandaic, Mongolian,
+# and Phags-pa scripts. Only the Arabic and Syriac scripts
+# currently have explicit joining group values defined.
#
# Note: Code points that are not explicitly listed in this file are
# either of joining type T or U:
@@ -81,8 +92,6 @@
# For an explicit listing of characters of joining type T, see
# the derived property file DerivedJoiningType.txt.
#
-# There are currently no characters of joining type L defined in Unicode.
-#
# #############################################################
# Unicode; Schematic Name; Joining Type; Joining Group
@@ -417,9 +426,205 @@
08AB; WAW WITH DOT WITHIN; R; WAW
08AC; ROHINGYA YEH; R; ROHINGYA YEH
+# Mongolian Characters
+
+1806; MONGOLIAN TODO SOFT HYPHEN; U; No_Joining_Group
+1807; MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER; D; No_Joining_Group
+180A; MONGOLIAN NIRUGU; C; No_Joining_Group
+180E; MONGOLIAN VOWEL SEPARATOR; U; No_Joining_Group
+1820; MONGOLIAN A; D; No_Joining_Group
+1821; MONGOLIAN E; D; No_Joining_Group
+1822; MONGOLIAN I; D; No_Joining_Group
+1823; MONGOLIAN O; D; No_Joining_Group
+1824; MONGOLIAN U; D; No_Joining_Group
+1825; MONGOLIAN OE; D; No_Joining_Group
+1826; MONGOLIAN UE; D; No_Joining_Group
+1827; MONGOLIAN EE; D; No_Joining_Group
+1828; MONGOLIAN NA; D; No_Joining_Group
+1829; MONGOLIAN ANG; D; No_Joining_Group
+182A; MONGOLIAN BA; D; No_Joining_Group
+182B; MONGOLIAN PA; D; No_Joining_Group
+182C; MONGOLIAN QA; D; No_Joining_Group
+182D; MONGOLIAN GA; D; No_Joining_Group
+182E; MONGOLIAN MA; D; No_Joining_Group
+182F; MONGOLIAN LA; D; No_Joining_Group
+1830; MONGOLIAN SA; D; No_Joining_Group
+1831; MONGOLIAN SHA; D; No_Joining_Group
+1832; MONGOLIAN TA; D; No_Joining_Group
+1833; MONGOLIAN DA; D; No_Joining_Group
+1834; MONGOLIAN CHA; D; No_Joining_Group
+1835; MONGOLIAN JA; D; No_Joining_Group
+1836; MONGOLIAN YA; D; No_Joining_Group
+1837; MONGOLIAN RA; D; No_Joining_Group
+1838; MONGOLIAN WA; D; No_Joining_Group
+1839; MONGOLIAN FA; D; No_Joining_Group
+183A; MONGOLIAN KA; D; No_Joining_Group
+183B; MONGOLIAN KHA; D; No_Joining_Group
+183C; MONGOLIAN TSA; D; No_Joining_Group
+183D; MONGOLIAN ZA; D; No_Joining_Group
+183E; MONGOLIAN HAA; D; No_Joining_Group
+183F; MONGOLIAN ZRA; D; No_Joining_Group
+1840; MONGOLIAN LHA; D; No_Joining_Group
+1841; MONGOLIAN ZHI; D; No_Joining_Group
+1842; MONGOLIAN CHI; D; No_Joining_Group
+1843; MONGOLIAN TODO LONG VOWEL SIGN; D; No_Joining_Group
+1844; MONGOLIAN TODO E; D; No_Joining_Group
+1845; MONGOLIAN TODO I; D; No_Joining_Group
+1846; MONGOLIAN TODO O; D; No_Joining_Group
+1847; MONGOLIAN TODO U; D; No_Joining_Group
+1848; MONGOLIAN TODO OE; D; No_Joining_Group
+1849; MONGOLIAN TODO UE; D; No_Joining_Group
+184A; MONGOLIAN TODO ANG; D; No_Joining_Group
+184B; MONGOLIAN TODO BA; D; No_Joining_Group
+184C; MONGOLIAN TODO PA; D; No_Joining_Group
+184D; MONGOLIAN TODO QA; D; No_Joining_Group
+184E; MONGOLIAN TODO GA; D; No_Joining_Group
+184F; MONGOLIAN TODO MA; D; No_Joining_Group
+1850; MONGOLIAN TODO TA; D; No_Joining_Group
+1851; MONGOLIAN TODO DA; D; No_Joining_Group
+1852; MONGOLIAN TODO CHA; D; No_Joining_Group
+1853; MONGOLIAN TODO JA; D; No_Joining_Group
+1854; MONGOLIAN TODO TSA; D; No_Joining_Group
+1855; MONGOLIAN TODO YA; D; No_Joining_Group
+1856; MONGOLIAN TODO WA; D; No_Joining_Group
+1857; MONGOLIAN TODO KA; D; No_Joining_Group
+1858; MONGOLIAN TODO GAA; D; No_Joining_Group
+1859; MONGOLIAN TODO HAA; D; No_Joining_Group
+185A; MONGOLIAN TODO JIA; D; No_Joining_Group
+185B; MONGOLIAN TODO NIA; D; No_Joining_Group
+185C; MONGOLIAN TODO DZA; D; No_Joining_Group
+185D; MONGOLIAN SIBE E; D; No_Joining_Group
+185E; MONGOLIAN SIBE I; D; No_Joining_Group
+185F; MONGOLIAN SIBE IY; D; No_Joining_Group
+1860; MONGOLIAN SIBE UE; D; No_Joining_Group
+1861; MONGOLIAN SIBE U; D; No_Joining_Group
+1862; MONGOLIAN SIBE ANG; D; No_Joining_Group
+1863; MONGOLIAN SIBE KA; D; No_Joining_Group
+1864; MONGOLIAN SIBE GA; D; No_Joining_Group
+1865; MONGOLIAN SIBE HA; D; No_Joining_Group
+1866; MONGOLIAN SIBE PA; D; No_Joining_Group
+1867; MONGOLIAN SIBE SHA; D; No_Joining_Group
+1868; MONGOLIAN SIBE TA; D; No_Joining_Group
+1869; MONGOLIAN SIBE DA; D; No_Joining_Group
+186A; MONGOLIAN SIBE JA; D; No_Joining_Group
+186B; MONGOLIAN SIBE FA; D; No_Joining_Group
+186C; MONGOLIAN SIBE GAA; D; No_Joining_Group
+186D; MONGOLIAN SIBE HAA; D; No_Joining_Group
+186E; MONGOLIAN SIBE TSA; D; No_Joining_Group
+186F; MONGOLIAN SIBE ZA; D; No_Joining_Group
+1870; MONGOLIAN SIBE RAA; D; No_Joining_Group
+1871; MONGOLIAN SIBE CHA; D; No_Joining_Group
+1872; MONGOLIAN SIBE ZHA; D; No_Joining_Group
+1873; MONGOLIAN MANCHU I; D; No_Joining_Group
+1874; MONGOLIAN MANCHU KA; D; No_Joining_Group
+1875; MONGOLIAN MANCHU RA; D; No_Joining_Group
+1876; MONGOLIAN MANCHU FA; D; No_Joining_Group
+1877; MONGOLIAN MANCHU ZHA; D; No_Joining_Group
+1880; MONGOLIAN ALI GALI ANUSVARA ONE; U; No_Joining_Group
+1881; MONGOLIAN ALI GALI VISARGA ONE; U; No_Joining_Group
+1882; MONGOLIAN ALI GALI DAMARU; U; No_Joining_Group
+1883; MONGOLIAN ALI GALI UBADAMA; U; No_Joining_Group
+1884; MONGOLIAN ALI GALI INVERTED UBADAMA; U; No_Joining_Group
+1885; MONGOLIAN ALI GALI BALUDA; U; No_Joining_Group
+1886; MONGOLIAN ALI GALI THREE BALUDA; U; No_Joining_Group
+1887; MONGOLIAN ALI GALI A; D; No_Joining_Group
+1888; MONGOLIAN ALI GALI I; D; No_Joining_Group
+1889; MONGOLIAN ALI GALI KA; D; No_Joining_Group
+188A; MONGOLIAN ALI GALI NGA; D; No_Joining_Group
+188B; MONGOLIAN ALI GALI CA; D; No_Joining_Group
+188C; MONGOLIAN ALI GALI TTA; D; No_Joining_Group
+188D; MONGOLIAN ALI GALI TTHA; D; No_Joining_Group
+188E; MONGOLIAN ALI GALI DDA; D; No_Joining_Group
+188F; MONGOLIAN ALI GALI NNA; D; No_Joining_Group
+1890; MONGOLIAN ALI GALI TA; D; No_Joining_Group
+1891; MONGOLIAN ALI GALI DA; D; No_Joining_Group
+1892; MONGOLIAN ALI GALI PA; D; No_Joining_Group
+1893; MONGOLIAN ALI GALI PHA; D; No_Joining_Group
+1894; MONGOLIAN ALI GALI SSA; D; No_Joining_Group
+1895; MONGOLIAN ALI GALI ZHA; D; No_Joining_Group
+1896; MONGOLIAN ALI GALI ZA; D; No_Joining_Group
+1897; MONGOLIAN ALI GALI AH; D; No_Joining_Group
+1898; MONGOLIAN TODO ALI GALI TA; D; No_Joining_Group
+1899; MONGOLIAN TODO ALI GALI ZHA; D; No_Joining_Group
+189A; MONGOLIAN MANCHU ALI GALI GHA; D; No_Joining_Group
+189B; MONGOLIAN MANCHU ALI GALI NGA; D; No_Joining_Group
+189C; MONGOLIAN MANCHU ALI GALI CA; D; No_Joining_Group
+189D; MONGOLIAN MANCHU ALI GALI JHA; D; No_Joining_Group
+189E; MONGOLIAN MANCHU ALI GALI TTA; D; No_Joining_Group
+189F; MONGOLIAN MANCHU ALI GALI DDHA; D; No_Joining_Group
+18A0; MONGOLIAN MANCHU ALI GALI TA; D; No_Joining_Group
+18A1; MONGOLIAN MANCHU ALI GALI DHA; D; No_Joining_Group
+18A2; MONGOLIAN MANCHU ALI GALI SSA; D; No_Joining_Group
+18A3; MONGOLIAN MANCHU ALI GALI CYA; D; No_Joining_Group
+18A4; MONGOLIAN MANCHU ALI GALI ZHA; D; No_Joining_Group
+18A5; MONGOLIAN MANCHU ALI GALI ZA; D; No_Joining_Group
+18A6; MONGOLIAN ALI GALI HALF U; D; No_Joining_Group
+18A7; MONGOLIAN ALI GALI HALF YA; D; No_Joining_Group
+18A8; MONGOLIAN MANCHU ALI GALI BHA; D; No_Joining_Group
+18AA; MONGOLIAN MANCHU ALI GALI LHA; D; No_Joining_Group
+
# Other
200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group
200D; ZERO WIDTH JOINER; C; No_Joining_Group
+2066; LEFT-TO-RIGHT ISOLATE; U; No_Joining_Group
+2067; RIGHT-TO-LEFT ISOLATE; U; No_Joining_Group
+2068; FIRST STRONG ISOLATE; U; No_Joining_Group
+2069; POP DIRECTIONAL ISOLATE; U; No_Joining_Group
+
+# Phags-Pa Characters
+
+A840; PHAGS-PA KA; D; No_Joining_Group
+A841; PHAGS-PA KHA; D; No_Joining_Group
+A842; PHAGS-PA GA; D; No_Joining_Group
+A843; PHAGS-PA NGA; D; No_Joining_Group
+A844; PHAGS-PA CA; D; No_Joining_Group
+A845; PHAGS-PA CHA; D; No_Joining_Group
+A846; PHAGS-PA JA; D; No_Joining_Group
+A847; PHAGS-PA NYA; D; No_Joining_Group
+A848; PHAGS-PA TA; D; No_Joining_Group
+A849; PHAGS-PA THA; D; No_Joining_Group
+A84A; PHAGS-PA DA; D; No_Joining_Group
+A84B; PHAGS-PA NA; D; No_Joining_Group
+A84C; PHAGS-PA PA; D; No_Joining_Group
+A84D; PHAGS-PA PHA; D; No_Joining_Group
+A84E; PHAGS-PA BA; D; No_Joining_Group
+A84F; PHAGS-PA MA; D; No_Joining_Group
+A850; PHAGS-PA TSA; D; No_Joining_Group
+A851; PHAGS-PA TSHA; D; No_Joining_Group
+A852; PHAGS-PA DZA; D; No_Joining_Group
+A853; PHAGS-PA WA; D; No_Joining_Group
+A854; PHAGS-PA ZHA; D; No_Joining_Group
+A855; PHAGS-PA ZA; D; No_Joining_Group
+A856; PHAGS-PA SMALL A; D; No_Joining_Group
+A857; PHAGS-PA YA; D; No_Joining_Group
+A858; PHAGS-PA RA; D; No_Joining_Group
+A859; PHAGS-PA LA; D; No_Joining_Group
+A85A; PHAGS-PA SHA; D; No_Joining_Group
+A85B; PHAGS-PA SA; D; No_Joining_Group
+A85C; PHAGS-PA HA; D; No_Joining_Group
+A85D; PHAGS-PA A; D; No_Joining_Group
+A85E; PHAGS-PA I; D; No_Joining_Group
+A85F; PHAGS-PA U; D; No_Joining_Group
+A860; PHAGS-PA E; D; No_Joining_Group
+A861; PHAGS-PA O; D; No_Joining_Group
+A862; PHAGS-PA QA; D; No_Joining_Group
+A863; PHAGS-PA XA; D; No_Joining_Group
+A864; PHAGS-PA FA; D; No_Joining_Group
+A865; PHAGS-PA GGA; D; No_Joining_Group
+A866; PHAGS-PA EE; D; No_Joining_Group
+A867; PHAGS-PA SUBJOINED WA; D; No_Joining_Group
+A868; PHAGS-PA SUBJOINED YA; D; No_Joining_Group
+A869; PHAGS-PA TTA; D; No_Joining_Group
+A86A; PHAGS-PA TTHA; D; No_Joining_Group
+A86B; PHAGS-PA DDA; D; No_Joining_Group
+A86C; PHAGS-PA NNA; D; No_Joining_Group
+A86D; PHAGS-PA ALTERNATE YA; D; No_Joining_Group
+A86E; PHAGS-PA VOICELESS SHA; D; No_Joining_Group
+A86F; PHAGS-PA VOICED HA; D; No_Joining_Group
+A870; PHAGS-PA ASPIRATED FA; D; No_Joining_Group
+A871; PHAGS-PA SUBJOINED RA; D; No_Joining_Group
+A872; PHAGS-PA SUPERFIXED RA; L; No_Joining_Group
+A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
# EOF
diff --git a/gen.tab/unidata/BidiMirroring.txt b/gen.tab/unidata/BidiMirroring.txt
index ec41b76..d97c0dd 100644
--- a/gen.tab/unidata/BidiMirroring.txt
+++ b/gen.tab/unidata/BidiMirroring.txt
@@ -1,19 +1,19 @@
-# BidiMirroring-6.2.0.txt
-# Date: 2012-05-15, 24:19:00 GMT [KW, LI]
+# BidiMirroring-6.3.0.txt
+# Date: 2013-02-12, 08:20:00 GMT [KW, LI]
#
# Bidi_Mirroring_Glyph Property
#
# This file is an informative contributory data file in the
# Unicode Character Database.
#
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This data file lists characters that have the Bidi_Mirrored=Yes property
# value, for which there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
#
-# The repertoire covered by the file is Unicode 6.2.0.
+# The repertoire covered by the file is Unicode 6.3.0.
#
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.
@@ -42,7 +42,7 @@
#
# This file was originally created by Markus Scherer.
# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler,
-# and for Unicode 6.1 and 6.2 by Ken Whistler and Laurentiu Iancu.
+# and for Unicode 6.1, 6.2, and 6.3 by Ken Whistler and Laurentiu Iancu.
#
# ############################################################
#
@@ -204,8 +204,8 @@
276F; 276E # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
2770; 2771 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
2771; 2770 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
-2772; 2773 # LIGHT LEFT TORTOISE SHELL BRACKET
-2773; 2772 # LIGHT RIGHT TORTOISE SHELL BRACKET
+2772; 2773 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
+2773; 2772 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
2774; 2775 # MEDIUM LEFT CURLY BRACKET ORNAMENT
2775; 2774 # MEDIUM RIGHT CURLY BRACKET ORNAMENT
27C3; 27C4 # OPEN SUBSET
diff --git a/gen.tab/unidata/ReadMe.txt b/gen.tab/unidata/ReadMe.txt
index 370cbd0..f970223 100644
--- a/gen.tab/unidata/ReadMe.txt
+++ b/gen.tab/unidata/ReadMe.txt
@@ -1,7 +1,7 @@
-# Date: 2012-09-24, 22:40:00 GMT [KW]
+# Date: 2013-09-27, 23:05:00 GMT [KW]
#
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# For documentation, see NamesList.html,
@@ -9,7 +9,7 @@
# UAX #44, "Unicode Character Database."
#
-This directory contains final data files
-for the Unicode Character Database (UCD) for Unicode 6.2.0.
+This directory contains the final data files
+for the Unicode Character Database (UCD) for Unicode 6.3.0.
diff --git a/gen.tab/unidata/UnicodeData.txt b/gen.tab/unidata/UnicodeData.txt
index 086379e..9fffa71 100644
--- a/gen.tab/unidata/UnicodeData.txt
+++ b/gen.tab/unidata/UnicodeData.txt
@@ -1509,6 +1509,7 @@
0619;ARABIC SMALL DAMMA;Mn;31;NSM;;;;;N;;;;;
061A;ARABIC SMALL KASRA;Mn;32;NSM;;;;;N;;;;;
061B;ARABIC SEMICOLON;Po;0;AL;;;;;N;;;;;
+061C;ARABIC LETTER MARK;Cf;0;AL;;;;;N;;;;;
061E;ARABIC TRIPLE DOT PUNCTUATION MARK;Po;0;AL;;;;;N;;;;;
061F;ARABIC QUESTION MARK;Po;0;AL;;;;;N;;;;;
0620;ARABIC LETTER KASHMIRI YEH;Lo;0;AL;;;;;N;;;;;
@@ -5296,7 +5297,7 @@
180B;MONGOLIAN FREE VARIATION SELECTOR ONE;Mn;0;NSM;;;;;N;;;;;
180C;MONGOLIAN FREE VARIATION SELECTOR TWO;Mn;0;NSM;;;;;N;;;;;
180D;MONGOLIAN FREE VARIATION SELECTOR THREE;Mn;0;NSM;;;;;N;;;;;
-180E;MONGOLIAN VOWEL SEPARATOR;Zs;0;WS;;;;;N;;;;;
+180E;MONGOLIAN VOWEL SEPARATOR;Cf;0;BN;;;;;N;;;;;
1810;MONGOLIAN DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
1811;MONGOLIAN DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
1812;MONGOLIAN DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
@@ -5751,7 +5752,7 @@
1A18;BUGINESE VOWEL SIGN U;Mn;220;NSM;;;;;N;;;;;
1A19;BUGINESE VOWEL SIGN E;Mc;0;L;;;;;N;;;;;
1A1A;BUGINESE VOWEL SIGN O;Mc;0;L;;;;;N;;;;;
-1A1B;BUGINESE VOWEL SIGN AE;Mc;0;L;;;;;N;;;;;
+1A1B;BUGINESE VOWEL SIGN AE;Mn;0;NSM;;;;;N;;;;;
1A1E;BUGINESE PALLAWA;Po;0;L;;;;;N;;;;;
1A1F;BUGINESE END OF SECTION;Po;0;L;;;;;N;;;;;
1A20;TAI THAM LETTER HIGH KA;Lo;0;L;;;;;N;;;;;
@@ -7116,6 +7117,10 @@
2062;INVISIBLE TIMES;Cf;0;BN;;;;;N;;;;;
2063;INVISIBLE SEPARATOR;Cf;0;BN;;;;;N;;;;;
2064;INVISIBLE PLUS;Cf;0;BN;;;;;N;;;;;
+2066;LEFT-TO-RIGHT ISOLATE;Cf;0;LRI;;;;;N;;;;;
+2067;RIGHT-TO-LEFT ISOLATE;Cf;0;RLI;;;;;N;;;;;
+2068;FIRST STRONG ISOLATE;Cf;0;FSI;;;;;N;;;;;
+2069;POP DIRECTIONAL ISOLATE;Cf;0;PDI;;;;;N;;;;;
206A;INHIBIT SYMMETRIC SWAPPING;Cf;0;BN;;;;;N;;;;;
206B;ACTIVATE SYMMETRIC SWAPPING;Cf;0;BN;;;;;N;;;;;
206C;INHIBIT ARABIC FORM SHAPING;Cf;0;BN;;;;;N;;;;;
@@ -7738,10 +7743,10 @@
2305;PROJECTIVE;So;0;ON;;;;;N;;;;;
2306;PERSPECTIVE;So;0;ON;;;;;N;;;;;
2307;WAVY LINE;So;0;ON;;;;;N;;;;;
-2308;LEFT CEILING;Sm;0;ON;;;;;Y;;;;;
-2309;RIGHT CEILING;Sm;0;ON;;;;;Y;;;;;
-230A;LEFT FLOOR;Sm;0;ON;;;;;Y;;;;;
-230B;RIGHT FLOOR;Sm;0;ON;;;;;Y;;;;;
+2308;LEFT CEILING;Ps;0;ON;;;;;Y;;;;;
+2309;RIGHT CEILING;Pe;0;ON;;;;;Y;;;;;
+230A;LEFT FLOOR;Ps;0;ON;;;;;Y;;;;;
+230B;RIGHT FLOOR;Pe;0;ON;;;;;Y;;;;;
230C;BOTTOM RIGHT CROP;So;0;ON;;;;;N;;;;;
230D;BOTTOM LEFT CROP;So;0;ON;;;;;N;;;;;
230E;TOP RIGHT CROP;So;0;ON;;;;;N;;;;;
@@ -18740,8 +18745,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
12453;CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM;Nl;0;L;;;;4;N;;;;;
12454;CUNEIFORM NUMERIC SIGN FIVE BAN2;Nl;0;L;;;;5;N;;;;;
12455;CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM;Nl;0;L;;;;5;N;;;;;
-12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;-1;N;;;;;
-12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;-1;N;;;;;
+12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;2;N;;;;;
+12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;3;N;;;;;
12458;CUNEIFORM NUMERIC SIGN ONE ESHE3;Nl;0;L;;;;1;N;;;;;
12459;CUNEIFORM NUMERIC SIGN TWO ESHE3;Nl;0;L;;;;2;N;;;;;
1245A;CUNEIFORM NUMERIC SIGN ONE THIRD DISH;Nl;0;L;;;;1/3;N;;;;;
diff --git a/lib/common.h b/lib/common.h
index 10ca13c..055f935 100644
--- a/lib/common.h
+++ b/lib/common.h
@@ -113,7 +113,7 @@
# define fribidi_free free
#else /* fribidi_malloc */
# ifndef fribidi_free
-# error "lYou should define fribidi_free too when you define fribidi_malloc."
+# error "You should define fribidi_free too when you define fribidi_malloc."
# endif /* !fribidi_free */
#endif /* fribidi_malloc */
diff --git a/lib/debug.h b/lib/debug.h
index 78c659a..feb342e 100644
--- a/lib/debug.h
+++ b/lib/debug.h
@@ -70,6 +70,10 @@
FRIBIDI_BEGIN_STMT \
FRIBIDI_FPRINTF(FRIBIDI_STDERR_ s, t, u, v, w); \
FRIBIDI_END_STMT
+#define MSG6(s, t, u, v, w, z) \
+ FRIBIDI_BEGIN_STMT \
+ FRIBIDI_FPRINTF(FRIBIDI_STDERR_ s, t, u, v, w, z); \
+ FRIBIDI_END_STMT
#endif /* !MSG */
#ifndef DBG
diff --git a/lib/fribidi-bidi-types-list.h b/lib/fribidi-bidi-types-list.h
index fa8c32b..25516c7 100644
--- a/lib/fribidi-bidi-types-list.h
+++ b/lib/fribidi-bidi-types-list.h
@@ -61,6 +61,10 @@ _FRIBIDI_ADD_TYPE (RLE, '+') /* Right-to-Left Embedding */
_FRIBIDI_ADD_TYPE (LRO, '+') /* Left-to-Right Override */
_FRIBIDI_ADD_TYPE (RLO, '+') /* Right-to-Left Override */
_FRIBIDI_ADD_TYPE (PDF, '-') /* Pop Directional Flag */
+_FRIBIDI_ADD_TYPE (LRI, '+') /* Left-to-Right Isolate */
+_FRIBIDI_ADD_TYPE (RLI, '+') /* Right-to-Left Isolate */
+_FRIBIDI_ADD_TYPE (FSI, '+') /* First-Strong Isolate */
+_FRIBIDI_ADD_TYPE (PDI, '-') /* Pop Directional Isolate */
#if defined(_FRIBIDI_ADD_ALIAS)
_FRIBIDI_ADD_ALIAS (L, LTR)
diff --git a/lib/fribidi-bidi-types.h b/lib/fribidi-bidi-types.h
index 0f6abda..4edfdcc 100644
--- a/lib/fribidi-bidi-types.h
+++ b/lib/fribidi-bidi-types.h
@@ -57,17 +57,20 @@ typedef signed char FriBidiLevel;
#define FRIBIDI_MASK_SENTINEL 0x00000080L /* Is sentinel */
/* Sentinels are not valid chars, just identify the start/end of strings. */
-/* Each char can be only one of the five following. */
+/* Each char can be only one of the six following. */
#define FRIBIDI_MASK_LETTER 0x00000100L /* Is letter: L, R, AL */
#define FRIBIDI_MASK_NUMBER 0x00000200L /* Is number: EN, AN */
#define FRIBIDI_MASK_NUMSEPTER 0x00000400L /* Is separator or terminator: ES, ET, CS */
#define FRIBIDI_MASK_SPACE 0x00000800L /* Is space: BN, BS, SS, WS */
-#define FRIBIDI_MASK_EXPLICIT 0x00001000L /* Is expilict mark: LRE, RLE, LRO, RLO, PDF */
+#define FRIBIDI_MASK_EXPLICIT 0x00001000L /* Is explicit mark: LRE, RLE, LRO, RLO, PDF */
+#define FRIBIDI_MASK_ISOLATE 0x00008000L /* Is isolate mark: LRI, RLI, FSI, PDI */
/* Can be set only if FRIBIDI_MASK_SPACE is also set. */
#define FRIBIDI_MASK_SEPARATOR 0x00002000L /* Is text separator: BS, SS */
/* Can be set only if FRIBIDI_MASK_EXPLICIT is also set. */
#define FRIBIDI_MASK_OVERRIDE 0x00004000L /* Is explicit override: LRO, RLO */
+#define FRIBIDI_MASK_FIRST 0x02000000L /* Whether direction is determined by first strong */
+
/* The following exist to make types pairwise different, some of them can
* be removed but are here because of efficiency (make queries faster). */
@@ -167,6 +170,18 @@ typedef signed char FriBidiLevel;
#define FRIBIDI_TYPE_PRIVATE ( FRIBIDI_MASK_PRIVATE )
+/* New types in Unicode 6.3 */
+
+/* Left-to-Right Isolate */
+#define FRIBIDI_TYPE_LRI_VAL ( FRIBIDI_MASK_NEUTRAL | FRIBIDI_MASK_ISOLATE )
+/* Right-to-Left Isolate */
+#define FRIBIDI_TYPE_RLI_VAL ( FRIBIDI_MASK_NEUTRAL | FRIBIDI_MASK_ISOLATE | FRIBIDI_MASK_RTL )
+/* First strong isolate */
+#define FRIBIDI_TYPE_FSI_VAL ( FRIBIDI_MASK_NEUTRAL | FRIBIDI_MASK_ISOLATE | FRIBIDI_MASK_FIRST )
+
+/* Pop Directional Isolate*/
+#define FRIBIDI_TYPE_PDI_VAL ( FRIBIDI_MASK_NEUTRAL | FRIBIDI_MASK_WEAK | FRIBIDI_MASK_ISOLATE )
+
/* Define Enums only if sizeof(int) == 4 (UTF-32), and not compiling C++.
* The problem with C++ is that then casts between int32 and enum will fail!
*/
@@ -214,6 +229,10 @@ typedef fribidi_uint32 FriBidiCharType;
# define FRIBIDI_TYPE_LRO FRIBIDI_TYPE_LRO_VAL
# define FRIBIDI_TYPE_RLO FRIBIDI_TYPE_RLO_VAL
# define FRIBIDI_TYPE_PDF FRIBIDI_TYPE_PDF_VAL
+# define FRIBIDI_TYPE_LRI FRIBIDI_TYPE_PDF_LRI
+# define FRIBIDI_TYPE_RLI FRIBIDI_TYPE_PDF_RLI
+# define FRIBIDI_TYPE_FSI FRIBIDI_TYPE_PDF_FSI
+# define FRIBIDI_TYPE_PDI FRIBIDI_TYPE_PDF_PDI
typedef fribidi_uint32 FriBidiParType;
# define FRIBIDI_PAR_LTR FRIBIDI_TYPE_LTR_VAL
@@ -273,6 +292,8 @@ typedef fribidi_uint32 FriBidiParType;
#define FRIBIDI_IS_SPACE(p) ((p) & FRIBIDI_MASK_SPACE)
/* Is explicit mark: LRE, RLE, LRO, RLO, PDF? */
#define FRIBIDI_IS_EXPLICIT(p) ((p) & FRIBIDI_MASK_EXPLICIT)
+/* Is isolator */
+#define FRIBIDI_IS_ISOLATE(p) ((p) & FRIBIDI_MASK_ISOLATE)
/* Is text separator: BS, SS? */
#define FRIBIDI_IS_SEPARATOR(p) ((p) & FRIBIDI_MASK_SEPARATOR)
@@ -303,6 +324,10 @@ typedef fribidi_uint32 FriBidiParType;
#define FRIBIDI_IS_EXPLICIT_OR_BN_OR_NSM(p) \
((p) & (FRIBIDI_MASK_EXPLICIT | FRIBIDI_MASK_BN | FRIBIDI_MASK_NSM))
+/* Is explicit or BN or NSM: LRE, RLE, LRO, RLO, PDF, BN, NSM? */
+#define FRIBIDI_IS_EXPLICIT_OR_ISOLATE_OR_BN_OR_NSM(p) \
+ ((p) & (FRIBIDI_MASK_EXPLICIT | FRIBIDI_MASK_ISOLATE | FRIBIDI_MASK_BN | FRIBIDI_MASK_NSM))
+
/* Is explicit or BN or WS: LRE, RLE, LRO, RLO, PDF, BN, WS? */
#define FRIBIDI_IS_EXPLICIT_OR_BN_OR_WS(p) \
((p) & (FRIBIDI_MASK_EXPLICIT | FRIBIDI_MASK_BN | FRIBIDI_MASK_WS))
diff --git a/lib/fribidi-bidi.c b/lib/fribidi-bidi.c
index 00b1848..7fdcdec 100644
--- a/lib/fribidi-bidi.c
+++ b/lib/fribidi-bidi.c
@@ -55,6 +55,7 @@
#define RL_LEN(list) ((list)->len)
#define RL_POS(list) ((list)->pos)
#define RL_LEVEL(list) ((list)->level)
+#define RL_ISOLATE_LEVEL(list) ((list)->isolate_level)
static FriBidiRun *
merge_with_prev (
@@ -111,6 +112,37 @@ compact_neutrals (
}
}
+/* Search for an adjacent run in the forward or backward direction.
+ This search is O(n) and thus algorithms using it become O(n^2).
+ The concept should be replaced with isolate adjacent runs in the
+ forward and backward directions.
+ */
+static FriBidiRun *get_adjacent_run(FriBidiRun *list, fribidi_boolean forward, fribidi_boolean skip_neutral)
+{
+ FriBidiRun *ppp = forward ? list->next : list->prev;
+ while(ppp)
+ {
+ FriBidiCharType ppp_type = RL_TYPE (ppp);
+
+ if (ppp_type == _FRIBIDI_TYPE_SENTINEL)
+ break;
+
+ /* Note that when sweeping forward we continue one run
+ beyond the PDI to see what lies behind. When looking
+ backwards, this is not necessary as the leading isolate
+ run has already been assigned the resolved level. */
+ if (ppp->isolate_level > list->isolate_level
+ || (forward && ppp_type == FRIBIDI_TYPE_PDI)
+ || (skip_neutral && !FRIBIDI_IS_STRONG(ppp_type)))
+ {
+ ppp = forward ? ppp->next : ppp->prev;
+ continue;
+ }
+ break;
+ }
+ return ppp;
+}
+
#if DEBUG+0
/*======================================================================
* For debugging, define some functions for printing the types and the
@@ -130,6 +162,8 @@ static char char_from_level_array[] = {
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z',
+ /* TBD - insert another 125-64 levels */
+
'@', /* 62 == only must appear after resolving
* implicits. */
@@ -152,8 +186,8 @@ print_types_re (
MSG (" Run types : ");
for_run_list (pp, pp)
{
- MSG5 ("%d:%d(%s)[%d] ",
- pp->pos, pp->len, fribidi_get_bidi_type_name (pp->type), pp->level);
+ MSG6 ("%d:%d(%s)[%d,%d] ",
+ pp->pos, pp->len, fribidi_get_bidi_type_name (pp->type), pp->level, pp->isolate_level);
}
MSG ("\n");
}
@@ -218,20 +252,23 @@ print_bidi_string (
/* There are a few little points in pushing into and poping from the status
stack:
1. when the embedding level is not valid (more than
- FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL=61), you must reject it, and not to push
+ FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL=125), you must reject it, and not to push
into the stack, but when you see a PDF, you must find the matching code,
and if it was pushed in the stack, pop it, it means you must pop if and
only if you have pushed the matching code, the over_pushed var counts the
number of rejected codes so far.
+
2. there's a more confusing point too, when the embedding level is exactly
- FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL-1=60, an LRO or LRE is rejected
- because the new level would be FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL+1=62, that
- is invalid; but an RLO or RLE is accepted because the new level is
- FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL=61, that is valid, so the rejected codes
+ FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL-1=124, an LRO, LRE, or LRI is rejected
+ because the new level would be FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL+1=126, that
+ is invalid; but an RLO, RLE, or RLI is accepted because the new level is
+ FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL=125, that is valid, so the rejected codes
may be not continuous in the logical order, in fact there are at most two
- continuous intervals of codes, with an RLO or RLE between them. To support
- this case, the first_interval var counts the number of rejected codes in
- the first interval, when it is 0, means that there is only one interval.
+ continuous intervals of codes, with an RLO, RLE, or RLI between them. To
+ support this case, the first_interval var counts the number of rejected
+ codes in the first interval, when it is 0, means that there is only one
+ interval.
+
*/
/* a. If this new level would be valid, then this embedding code is valid.
@@ -248,11 +285,13 @@ print_bidi_string (
if UNLIKELY(level == FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL - 1) \
first_interval = over_pushed; \
status_stack[stack_size].level = level; \
+ status_stack[stack_size].isolate_level = isolate_level; \
+ status_stack[stack_size].isolate = isolate; \
status_stack[stack_size].override = override; \
stack_size++; \
level = new_level; \
override = new_override; \
- } else \
+ } else if LIKELY(isolate_overflow == 0) \
over_pushed++; \
FRIBIDI_END_STMT
@@ -272,6 +311,8 @@ print_bidi_string (
stack_size--; \
level = status_stack[stack_size].level; \
override = status_stack[stack_size].override; \
+ isolate = status_stack[stack_size].isolate; \
+ isolate_level = status_stack[stack_size].isolate_level; \
} \
} \
FRIBIDI_END_STMT
@@ -365,12 +406,24 @@ fribidi_get_par_embedding_levels (
/* P2. P3. Search for first strong character and use its direction as
base direction */
{
- for_run_list (pp, main_run_list) if (FRIBIDI_IS_LETTER (RL_TYPE (pp)))
- {
- base_level = FRIBIDI_DIR_TO_LEVEL (RL_TYPE (pp));
- *pbase_dir = FRIBIDI_LEVEL_TO_DIR (base_level);
- break;
- }
+ int valid_isolate_count = 0;
+ for_run_list (pp, main_run_list)
+ {
+ if (RL_TYPE(pp) == FRIBIDI_TYPE_PDI)
+ {
+ /* Ignore if there is no matching isolate */
+ if (valid_isolate_count>0)
+ valid_isolate_count--;
+ }
+ else if (FRIBIDI_IS_ISOLATE(RL_TYPE(pp)))
+ valid_isolate_count++;
+ else if (valid_isolate_count==0 && FRIBIDI_IS_LETTER (RL_TYPE (pp)))
+ {
+ base_level = FRIBIDI_DIR_TO_LEVEL (RL_TYPE (pp));
+ *pbase_dir = FRIBIDI_LEVEL_TO_DIR (base_level);
+ break;
+ }
+ }
}
base_dir = FRIBIDI_LEVEL_TO_DIR (base_level);
DBG2 (" base level : %c", fribidi_char_from_level (base_level));
@@ -388,13 +441,19 @@ fribidi_get_par_embedding_levels (
DBG ("explicit levels and directions");
{
FriBidiLevel level, new_level;
+ int isolate_level = 0;
FriBidiCharType override, new_override;
FriBidiStrIndex i;
int stack_size, over_pushed, first_interval;
+ int valid_isolate_count = 0;
+ int isolate_overflow = 0;
+ int isolate = 0; /* The isolate status flag */
struct
{
FriBidiCharType override; /* only LTR, RTL and ON are valid */
FriBidiLevel level;
+ int isolate;
+ int isolate_level;
} *status_stack;
FriBidiRun temp_link;
@@ -407,9 +466,11 @@ fribidi_get_par_embedding_levels (
(!explicits_list) goto out;
/* X1. Begin by setting the current embedding level to the paragraph
- embedding level. Set the directional override status to neutral.
- Process each character iteratively, applying rules X2 through X9.
- Only embedding levels from 0 to 61 are valid in this phase. */
+ embedding level. Set the directional override status to neutral,
+ and directional isolate status to false.
+
+ Process each character iteratively, applying rules X2 through X8.
+ Only embedding levels from 0 to 123 are valid in this phase. */
level = base_level;
override = FRIBIDI_TYPE_ON;
@@ -417,12 +478,16 @@ fribidi_get_par_embedding_levels (
stack_size = 0;
over_pushed = 0;
first_interval = 0;
+ valid_isolate_count = 0;
+ isolate_overflow = 0;
status_stack = fribidi_malloc (sizeof (status_stack[0]) *
FRIBIDI_BIDI_MAX_RESOLVED_LEVELS);
for_run_list (pp, main_run_list)
{
FriBidiCharType this_type = RL_TYPE (pp);
+ RL_ISOLATE_LEVEL (pp) = isolate_level;
+
if (FRIBIDI_IS_EXPLICIT_OR_BN (this_type))
{
if (FRIBIDI_IS_STRONG (this_type))
@@ -443,6 +508,7 @@ fribidi_get_par_embedding_levels (
new_level =
((level + FRIBIDI_DIR_TO_LEVEL (this_type) + 2) & ~1) -
FRIBIDI_DIR_TO_LEVEL (this_type);
+ isolate = 0;
PUSH_STATUS;
}
}
@@ -451,8 +517,12 @@ fribidi_get_par_embedding_levels (
/* 3. Terminating Embeddings and overrides */
/* X7. With each PDF, determine the matching embedding or
override code. */
- for (i = RL_LEN (pp); i; i--)
- POP_STATUS;
+ for (i = RL_LEN (pp); i; i--)
+ {
+ if (stack_size && status_stack[stack_size-1].isolate != 0)
+ break;
+ POP_STATUS;
+ }
}
/* X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes. */
@@ -462,6 +532,87 @@ fribidi_get_par_embedding_levels (
move_node_before (pp, explicits_list);
pp = &temp_link;
}
+ else if (this_type == FRIBIDI_TYPE_PDI)
+ /* X6a. pop the direction of the stack */
+ {
+ for (i = RL_LEN (pp); i; i--)
+ {
+ if (isolate_overflow > 0)
+ isolate_overflow--;
+ else if (valid_isolate_count > 0)
+ {
+ /* Pop away all LRE,RLE,LRO, RLO levels
+ from the stack, as these are implicitly
+ terminated by the PDI */
+ while (stack_size && !status_stack[stack_size-1].isolate)
+ POP_STATUS;
+ POP_STATUS;
+ isolate_level-- ;
+ valid_isolate_count--;
+ RL_LEVEL (pp) = level;
+ RL_ISOLATE_LEVEL (pp) = isolate_level;
+ }
+ else
+ {
+ /* Ignore isolated PDI's by turning them into ON's */
+ RL_TYPE (pp) = FRIBIDI_TYPE_ON;
+ RL_LEVEL (pp) = level;
+ }
+ }
+ }
+ else if (FRIBIDI_IS_ISOLATE(this_type))
+ {
+ /* TBD support RL_LEN > 1 */
+ new_override = FRIBIDI_TYPE_ON;
+ isolate = 1;
+ if (this_type == FRIBIDI_TYPE_LRI)
+ new_level = level + 2 - (level%2);
+ else if (this_type == FRIBIDI_TYPE_RLI)
+ new_level = level + 1 + (level%2);
+ else if (this_type == FRIBIDI_TYPE_FSI)
+ {
+ /* Search for a local strong character until we
+ meet the corresponding PDI or the end of the
+ paragraph */
+ FriBidiRun *fsi_pp;
+ int isolate_count = 0;
+ int fsi_base_level = 0;
+ for_run_list (fsi_pp, pp)
+ {
+ if (RL_TYPE(fsi_pp) == FRIBIDI_TYPE_PDI)
+ {
+ isolate_count--;
+ if (valid_isolate_count < 0)
+ break;
+ }
+ else if (FRIBIDI_IS_ISOLATE(RL_TYPE(fsi_pp)))
+ isolate_count++;
+ else if (isolate_count==0 && FRIBIDI_IS_LETTER (RL_TYPE (fsi_pp)))
+ {
+ fsi_base_level = FRIBIDI_DIR_TO_LEVEL (RL_TYPE (fsi_pp));
+ break;
+ }
+ }
+
+ /* Same behavior like RLI and LRI above */
+ if (FRIBIDI_LEVEL_IS_RTL (fsi_base_level))
+ new_level = level + 1 + (level%2);
+ else
+ new_level = level + 2 - (level%2);
+ }
+
+ RL_LEVEL (pp) = level;
+ RL_ISOLATE_LEVEL (pp) = isolate_level++;
+
+ if (new_level <= FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL)
+ {
+ valid_isolate_count++;
+ PUSH_STATUS;
+ level = new_level;
+ }
+ else
+ isolate_overflow += 1;
+ }
else if (this_type == FRIBIDI_TYPE_BS)
{
/* X8. All explicit directional embeddings and overrides are
@@ -517,21 +668,38 @@ fribidi_get_par_embedding_levels (
/* 4. Resolving weak types */
DBG ("resolving weak types");
{
- FriBidiCharType last_strong, prev_type_orig;
+ int *last_strong_stack;
+ FriBidiCharType prev_type_orig;
fribidi_boolean w4;
- last_strong = base_dir;
+ last_strong_stack = fribidi_malloc (sizeof (int)
+ * FRIBIDI_BIDI_MAX_RESOLVED_LEVELS);
+ last_strong_stack[0] = base_dir;
for_run_list (pp, main_run_list)
{
register FriBidiCharType prev_type, this_type, next_type;
+ FriBidiRun *ppp_prev, *ppp_next;
+ int iso_level;
+
+ ppp_prev = get_adjacent_run(pp, FALSE, FALSE);
+ ppp_next = get_adjacent_run(pp, TRUE, FALSE);
- prev_type = PREV_TYPE_OR_SOR (pp);
this_type = RL_TYPE (pp);
- next_type = NEXT_TYPE_OR_EOR (pp);
+ iso_level = RL_ISOLATE_LEVEL(pp);
+
+ if (RL_LEVEL(ppp_prev) == RL_LEVEL(pp))
+ prev_type = RL_TYPE(ppp_prev);
+ else
+ prev_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_prev), RL_LEVEL(pp)));
+
+ if (RL_LEVEL(ppp_next) == RL_LEVEL(pp))
+ next_type = RL_TYPE(ppp_next);
+ else
+ next_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_next), RL_LEVEL(pp)));
if (FRIBIDI_IS_STRONG (prev_type))
- last_strong = prev_type;
+ last_strong_stack[iso_level] = prev_type;
/* W1. NSM
Examine each non-spacing mark (NSM) in the level run, and change the
@@ -543,31 +711,41 @@ fribidi_get_par_embedding_levels (
adjacent ETs are in one FriBidiRun. */
if (this_type == FRIBIDI_TYPE_NSM)
{
- if (RL_LEVEL (pp->prev) == RL_LEVEL (pp))
- pp = merge_with_prev (pp);
+ /* New rule in Unicode 6.3 */
+ if (FRIBIDI_IS_ISOLATE (RL_TYPE (pp->prev)))
+ RL_TYPE(pp) = FRIBIDI_TYPE_ON;
+
+ if (RL_LEVEL (ppp_prev) == RL_LEVEL (pp))
+ {
+ if (ppp_prev == pp->prev)
+ pp = merge_with_prev (pp);
+ }
else
- RL_TYPE (pp) = prev_type;
+ RL_TYPE (pp) = prev_type;
+
if (prev_type == next_type && RL_LEVEL (pp) == RL_LEVEL (pp->next))
{
- pp = merge_with_prev (pp->next);
+ if (ppp_next == pp->next)
+ pp = merge_with_prev (pp->next);
}
continue; /* As we know the next condition cannot be true. */
}
/* W2: European numbers. */
- if (this_type == FRIBIDI_TYPE_EN && last_strong == FRIBIDI_TYPE_AL)
+ if (this_type == FRIBIDI_TYPE_EN && last_strong_stack[iso_level] == FRIBIDI_TYPE_AL)
{
RL_TYPE (pp) = FRIBIDI_TYPE_AN;
/* Resolving dependency of loops for rules W1 and W2, so we
can merge them in one loop. */
if (next_type == FRIBIDI_TYPE_NSM)
- RL_TYPE (pp->next) = FRIBIDI_TYPE_AN;
+ RL_TYPE (ppp_next) = FRIBIDI_TYPE_AN;
}
}
- last_strong = base_dir;
+ last_strong_stack[0] = base_dir;
+
/* Resolving dependency of loops for rules W4 and W5, W5 may
want to prevent W4 to take effect in the next turn, do this
through "w4". */
@@ -577,16 +755,31 @@ fribidi_get_par_embedding_levels (
so W4 and W5 in next turn can still do their works. */
prev_type_orig = FRIBIDI_TYPE_ON;
+ /* Each isolate level has its own memory of the last strong character */
for_run_list (pp, main_run_list)
{
register FriBidiCharType prev_type, this_type, next_type;
+ int iso_level;
+ FriBidiRun *ppp_prev, *ppp_next;
- prev_type = PREV_TYPE_OR_SOR (pp);
this_type = RL_TYPE (pp);
- next_type = NEXT_TYPE_OR_EOR (pp);
+ iso_level = RL_ISOLATE_LEVEL(pp);
+
+ ppp_prev = get_adjacent_run(pp, FALSE, FALSE);
+ ppp_next = get_adjacent_run(pp, TRUE, FALSE);
+
+ if (RL_LEVEL(ppp_prev) == RL_LEVEL(pp))
+ prev_type = RL_TYPE(ppp_prev);
+ else
+ prev_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_prev), RL_LEVEL(pp)));
+
+ if (RL_LEVEL(ppp_next) == RL_LEVEL(pp))
+ next_type = RL_TYPE(ppp_next);
+ else
+ next_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_next), RL_LEVEL(pp)));
if (FRIBIDI_IS_STRONG (prev_type))
- last_strong = prev_type;
+ last_strong_stack[iso_level] = prev_type;
/* W3: Change ALs to R. */
if (this_type == FRIBIDI_TYPE_AL)
@@ -628,7 +821,7 @@ fribidi_get_par_embedding_levels (
RL_TYPE (pp) = FRIBIDI_TYPE_ON;
/* W7. Change european numbers to L. */
- if (this_type == FRIBIDI_TYPE_EN && last_strong == FRIBIDI_TYPE_LTR)
+ if (this_type == FRIBIDI_TYPE_EN && last_strong_stack[iso_level] == FRIBIDI_TYPE_LTR)
{
RL_TYPE (pp) = FRIBIDI_TYPE_LTR;
prev_type_orig = (RL_LEVEL (pp) == RL_LEVEL (pp->next) ?
@@ -637,6 +830,8 @@ fribidi_get_par_embedding_levels (
else
prev_type_orig = PREV_TYPE_OR_SOR (pp->next);
}
+
+ fribidi_free (last_strong_stack);
}
compact_neutrals (main_run_list);
@@ -654,16 +849,31 @@ fribidi_get_par_embedding_levels (
DBG ("resolving neutral types");
{
/* N1. and N2.
- For each neutral, resolve it. */
+ For each neutral, resolve it.
+
+ TBDov: This must be done one isolating run at a time!
+ */
for_run_list (pp, main_run_list)
{
FriBidiCharType prev_type, this_type, next_type;
+ FriBidiRun *ppp_prev, *ppp_next;
+
+ ppp_prev = get_adjacent_run(pp, FALSE, FALSE);
+ ppp_next = get_adjacent_run(pp, TRUE, FALSE);
/* "European and Arabic numbers are treated as though they were R"
FRIBIDI_CHANGE_NUMBER_TO_RTL does this. */
this_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (RL_TYPE (pp));
- prev_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (PREV_TYPE_OR_SOR (pp));
- next_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (NEXT_TYPE_OR_EOR (pp));
+
+ if (RL_LEVEL(ppp_prev) == RL_LEVEL(pp))
+ prev_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (RL_TYPE(ppp_prev));
+ else
+ prev_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_prev), RL_LEVEL(pp)));
+
+ if (RL_LEVEL(ppp_next) == RL_LEVEL(pp))
+ next_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (RL_TYPE(ppp_next));
+ else
+ next_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_next), RL_LEVEL(pp)));
if (FRIBIDI_IS_NEUTRAL (this_type))
RL_TYPE (pp) = (prev_type == next_type) ?
@@ -764,7 +974,9 @@ fribidi_get_par_embedding_levels (
1. segment separators,
2. paragraph separators,
3. any sequence of whitespace characters preceding a segment
- separator or paragraph separator, and
+ separator or paragraph separator, and
+ 4. any sequence of whitespace characters and/or isolate formatting
+ characters at the end of the line.
... (to be continued in fribidi_reorder_line()). */
list = new_run_list ();
if UNLIKELY
@@ -784,8 +996,9 @@ fribidi_get_par_embedding_levels (
state = 1;
pos = j;
}
- else if (state && !FRIBIDI_IS_EXPLICIT_OR_SEPARATOR_OR_BN_OR_WS
- (char_type))
+ else if (state &&
+ !(FRIBIDI_IS_EXPLICIT_OR_SEPARATOR_OR_BN_OR_WS(char_type)
+ || FRIBIDI_IS_ISOLATE(char_type)))
{
state = 0;
p = new_run ();
diff --git a/lib/fribidi-common.h b/lib/fribidi-common.h
index ffcb27a..a63fb7a 100644
--- a/lib/fribidi-common.h
+++ b/lib/fribidi-common.h
@@ -122,6 +122,7 @@ struct _FriBidiRunStruct
FriBidiStrIndex pos, len;
FriBidiCharType type;
FriBidiLevel level;
+ FriBidiLevel isolate_level;
};
diff --git a/lib/fribidi-deprecated.c b/lib/fribidi-deprecated.c
index 6c300a2..6a9ed3c 100644
--- a/lib/fribidi-deprecated.c
+++ b/lib/fribidi-deprecated.c
@@ -153,6 +153,7 @@ fribidi_remove_bidi_marks (
for (i = 0; i < len; i++)
if (!FRIBIDI_IS_EXPLICIT_OR_BN (fribidi_get_bidi_type (str[i]))
+ && !FRIBIDI_IS_ISOLATE (fribidi_get_bidi_type (str[i]))
&& str[i] != FRIBIDI_CHAR_LRM && str[i] != FRIBIDI_CHAR_RLM)
{
str[j] = str[i];
diff --git a/lib/fribidi-run.c b/lib/fribidi-run.c
index 6a8632a..fe53511 100644
--- a/lib/fribidi-run.c
+++ b/lib/fribidi-run.c
@@ -52,7 +52,7 @@ new_run (
if LIKELY
(run)
{
- run->len = run->pos = run->level = 0;
+ run->len = run->pos = run->level = run->isolate_level = 0;
run->next = run->prev = NULL;
}
return run;
@@ -131,7 +131,8 @@ run_list_encode_bidi_types (
for (i = 0; i < len; i++)
{
register FriBidiCharType char_type = bidi_types[i];
- if (char_type != last->type)
+ if (char_type != last->type
+ || FRIBIDI_IS_ISOLATE(char_type))
{
run = new_run ();
if UNLIKELY
@@ -226,6 +227,7 @@ shadow_run_list (
p->next->prev = r;
r->next = p->next;
r->level = p->level;
+ r->isolate_level = p->isolate_level;
r->type = p->type;
r->len = p->pos + p->len - pos2;
r->pos = pos2;
diff --git a/lib/fribidi-unicode.h b/lib/fribidi-unicode.h
index 9174fd0..f68dcdd 100644
--- a/lib/fribidi-unicode.h
+++ b/lib/fribidi-unicode.h
@@ -61,10 +61,10 @@ extern const char *fribidi_unicode_version;
#define FRIBIDI_BIDI_NUM_TYPES 19
/* The maximum embedding level value assigned by explicit marks */
-#define FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL 61
-/* The maximum *number* of different resolved embedding levels: 0-62 */
-#define FRIBIDI_BIDI_MAX_RESOLVED_LEVELS 63
+#define FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL 125
+/* The maximum *number* of different resolved embedding levels: 0-126 */
+#define FRIBIDI_BIDI_MAX_RESOLVED_LEVELS 127
/* A few Unicode characters: */
@@ -76,6 +76,10 @@ extern const char *fribidi_unicode_version;
#define FRIBIDI_CHAR_PDF 0x202C
#define FRIBIDI_CHAR_LRO 0x202D
#define FRIBIDI_CHAR_RLO 0x202E
+#define FRIBIDI_CHAR_LRI 0x2066
+#define FRIBIDI_CHAR_RLI 0x2067
+#define FRIBIDI_CHAR_FSI 0x2068
+#define FRIBIDI_CHAR_PDI 0x2069
/* Line and Paragraph Separators */
#define FRIBIDI_CHAR_LS 0x2028
diff --git a/test/Makefile.am b/test/Makefile.am
index ea7ac77..693d79d 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -27,7 +27,9 @@ EXTRA_DIST = \
$(TESTS) \
$(TEST_DATAS)
-test_%.reference: test_%.input
+SUFFIXES: .reference .input
+
+.reference.input:
testcase="$@" && \
charset="`echo "$@" | sed 's/_[^_]*$$//;s/.*_//'`" && \
$(top_builddir)/bin/fribidi --test --charset "$$charset" "$<" > "$@" \
diff --git a/test/test_CapRTL_isolate.input b/test/test_CapRTL_isolate.input
new file mode 100644
index 0000000..3845185
--- /dev/null
+++ b/test/test_CapRTL_isolate.input
@@ -0,0 +1,8 @@
+_yBEAR_I:how is LION?
+_ibear_I:how is LION?
+_fBEAR_I:how is LION?
+_fbear_I:how is LION?
+_yBEAR_I:HOW IS lion?
+_ibear_I:HOW IS lion?
+_fBEAR_I:HOW IS lion?
+_fbear_I:HOW IS lion?
diff --git a/test/test_CapRTL_isolate.reference b/test/test_CapRTL_isolate.reference
new file mode 100644
index 0000000..c2fc6c3
--- /dev/null
+++ b/test/test_CapRTL_isolate.reference
@@ -0,0 +1,8 @@
+_yBEAR_I:how is LION? => RAEB:how is NOIL?
+_ibear_I:how is LION? => bear:how is NOIL?
+_fBEAR_I:how is LION? => RAEB:how is NOIL?
+_fbear_I:how is LION? => bear:how is NOIL?
+_yBEAR_I:HOW IS lion? => ?lion SI WOH:RAEB
+_ibear_I:HOW IS lion? => ?lion SI WOH:bear
+_fBEAR_I:HOW IS lion? => ?lion SI WOH:RAEB
+_fbear_I:HOW IS lion? => ?lion SI WOH:bear