summaryrefslogtreecommitdiff
path: root/lib/unicore/ArabicShaping.txt
diff options
context:
space:
mode:
Diffstat (limited to 'lib/unicore/ArabicShaping.txt')
-rw-r--r--lib/unicore/ArabicShaping.txt105
1 files changed, 82 insertions, 23 deletions
diff --git a/lib/unicore/ArabicShaping.txt b/lib/unicore/ArabicShaping.txt
index 84c308ac8a..ec2e024101 100644
--- a/lib/unicore/ArabicShaping.txt
+++ b/lib/unicore/ArabicShaping.txt
@@ -1,8 +1,12 @@
-# ArabicShaping-4.0.1.txt
+# ArabicShaping-4.1.0.txt
+# Date: 2005-03-17, 15:21:00 PST [KW]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
+# Copyright (c) 1991-2005 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
# This file defines the shaping classes for Arabic and Syriac
# positional shaping, repeating in machine readable form the
# information printed in Tables 8-3, 8-7, 8-8, 8-11, 8-12, and
@@ -17,21 +21,42 @@
# form, of an Arabic or Syriac character.
# Field 1: gives a short schematic name for that character,
# abbreviated from the normative Unicode character name.
-# Field 2: defines the joining type
-# R right-joining,
-# L left-joining,
-# D dual-joining,
-# C join-causing
-# U non-joining
-# T transparent
+# Field 2: defines the joining type (property name: Joining_Type)
+# R Right_Joining
+# L Left_Joining
+# D Dual_Joining
+# C Join_Causing
+# U Non_Joining
+# T Transparent
# See the Arabic block description for more information on these types.
-# Field 3: defines the joining group.
+# Field 3: defines the joining group (property name: Joining_Group)
+#
+# The values of the joining group are based schematically on character
+# names. Where a schematic character name consists of two or more parts separated
+# by spaces, the formal Joining_Group property value, as specified in
+# PropertyValueAliases.txt, consists of the same name parts joined by
+# underscores. Hence, the entry:
+#
+# 0629; TEH MARBUTA; R; TEH MARBUTA
+#
+# corresponds to [Joining_Group = Teh_Marbuta].
#
+# Note: For historical reasons, the property value [Joining_Group = Hamza_On_Heh_Goal]
+# is anachronistically named. It used to apply to both of the following characters
+# in earlier versions of the standard:
+#
+# U+06C2 ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
+# U+06C3 ARABIC LETTER TEH MARBUTA GOAL
+#
+# However, it currently applies only to U+06C3, and *not* to U+06C2.
+# To avoid destabilizing existing Joining_Group property aliases, the
+# value Hamza_On_Heh_Goal has not been changed, despite the fact that it
+# no longer applies to Hamza On Heh Goal, but only to Teh Marbuta Goal.
#
# Note: Code points that are not explicitly listed in this file are
-# either of type T or U:
+# either of joining type T or U:
#
-# - Those that not explicitly listed that are of General Category Mn or Cf
+# - Those that not explicitly listed that are of General Category Mn, Me, or Cf
# have joining type T.
# - All others not explicitly listed have type U.
#
@@ -46,11 +71,12 @@
# Arabic characters
-0600; ARABIC NUMBER SIGN; U; <no shaping>
-0601; ARABIC SIGN SANAH; U; <no shaping>
-0602; ARABIC FOOTNOTE MARKER; U; <no shaping>
-0603; ARABIC SIGN SAFHA; U; <no shaping>
-0621; HAMZA; U; <no shaping>
+0600; ARABIC NUMBER SIGN; U; No_Joining_Group
+0601; ARABIC SIGN SANAH; U; No_Joining_Group
+0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group
+0603; ARABIC SIGN SAFHA; U; No_Joining_Group
+060B; AFGHANI SIGN; U; No_Joining_Group
+0621; HAMZA; U; No_Joining_Group
0622; MADDA ON ALEF; R; ALEF
0623; HAMZA ON ALEF; R; ALEF
0624; HAMZA ON WAW; R; WAW
@@ -76,7 +102,7 @@
0638; ZAH; D; TAH
0639; AIN; D; AIN
063A; GHAIN; D; AIN
-0640; TATWEEL; C; <no shaping>
+0640; TATWEEL; C; No_Joining_Group
0641; FEH; D; FEH
0642; QAF; D; QAF
0643; KAF; D; KAF
@@ -92,7 +118,7 @@
0671; HAMZAT WASL ON ALEF; R; ALEF
0672; WAVY HAMZA ON ALEF; R; ALEF
0673; WAVY HAMZA UNDER ALEF; R; ALEF
-0674; HIGH HAMZA; U; <no shaping>
+0674; HIGH HAMZA; U; No_Joining_Group
0675; HIGH HAMZA ALEF; R; ALEF
0676; HIGH HAMZA WAW; R; WAW
0677; HIGH HAMZA WAW WITH DAMMA; R; WAW
@@ -145,7 +171,7 @@
06A6; FEH WITH 4 DOTS ABOVE; D; FEH
06A7; QAF WITH DOT ABOVE; D; QAF
06A8; QAF WITH 3 DOTS ABOVE; D; QAF
-06A9; OPEN KAF; D; GAF
+06A9; KEHEH; D; GAF
06AA; SWASH KAF; D; SWASH KAF
06AB; KAF WITH RING; D; GAF
06AC; KAF WITH DOT ABOVE; D; KAF
@@ -170,7 +196,7 @@
06BF; HAH WITH MIDDLE 3 DOTS DOWNWARD AND DOT ABOVE; D; HAH
06C0; HAMZA ON HEH; R; TEH MARBUTA
06C1; HEH GOAL; D; HEH GOAL
-06C2; HAMZA ON HEH GOAL; R; HAMZA ON HEH GOAL
+06C2; HAMZA ON HEH GOAL; D; HEH GOAL
06C3; TEH MARBUTA GOAL; R; HAMZA ON HEH GOAL
06C4; WAW WITH RING; R; WAW
06C5; WAW WITH BAR; R; WAW
@@ -189,7 +215,7 @@
06D2; YEH BARREE; R; YEH BARREE
06D3; HAMZA ON YEH BARREE; R; YEH BARREE
06D5; AE; R; TEH MARBUTA
-06DD; ARABIC END OF AYAH; U; <no shaping>
+06DD; ARABIC END OF AYAH; U; No_Joining_Group
06EE; DAL WITH INVERTED V; R; DAL
06EF; REH WITH INVERTED V; R; REH
06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN
@@ -234,7 +260,40 @@
074E; SOGDIAN KHAPH; D; KHAPH
074F; SOGDIAN FE; D; FE
+# Arabic supplement characters
+
+0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH
+0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH
+0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH
+0753; BEH WITH 3 DOTS POINTING UPWARDS BELOW AND 2 DOTS ABOVE; D; BEH
+0754; BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH
+0755; BEH WITH INVERTED SMALL V BELOW; D; BEH
+0756; BEH WITH SMALL V; D; BEH
+0757; HAH WITH 2 DOTS ABOVE; D; HAH
+0758; HAH WITH 3 DOTS POINTING UPWARDS BELOW; D; HAH
+0759; DAL WITH 2 DOTS VERTICALLY BELOW AND SMALL TAH; R; DAL
+075A; DAL WITH INVERTED SMALL V BELOW; R; DAL
+075B; REH WITH STROKE; R; REH
+075C; SEEN WITH 4 DOTS ABOVE; D; SEEN
+075D; AIN WITH 2 DOTS ABOVE; D; AIN
+075E; AIN WITH 3 DOTS POINTING DOWNWARDS ABOVE; D; AIN
+075F; AIN WITH 2 DOTS VERTICALLY ABOVE; D; AIN
+0760; FEH WITH 2 DOTS BELOW; D; FEH
+0761; FEH WITH 3 DOTS POINTING UPWARDS BELOW; D; FEH
+0762; KEHEH WITH DOT ABOVE; D; GAF
+0763; KEHEH WITH 3 DOTS ABOVE; D; GAF
+0764; KEHEH WITH 3 DOTS POINTING UPWARDS BELOW; D; GAF
+0765; MEEM WITH DOT ABOVE; D; MEEM
+0766; MEEM WITH DOT BELOW; D; MEEM
+0767; NOON WITH 2 DOTS BELOW; D; NOON
+0768; NOON WITH SMALL TAH; D; NOON
+0769; NOON WITH SMALL V; D; NOON
+076A; LAM WITH BAR; D; LAM
+076B; REH WITH 2 DOTS VERTICALLY ABOVE; R; REH
+076C; REH WITH HAMZA ABOVE; R; REH
+076D; SEEN WITH 2 DOTS VERTICALLY ABOVE; D; SEEN
+
# Other
-200D; ZERO WIDTH JOINER; C; <no shaping>
-200C; ZERO WIDTH NON-JOINER; U; <no shaping>
+200D; ZERO WIDTH JOINER; C; No_Joining_Group
+200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group