summaryrefslogtreecommitdiff
path: root/chromium/third_party/icu/android/brkitr.patch
blob: 0fdff4fc573f54317452b3fa0933ce01709b124e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk
index 91754f1..ccac4d1 100644
--- a/source/data/brkitr/brklocal.mk
+++ b/source/data/brkitr/brklocal.mk
@@ -34,15 +34,15 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS)
 
 
 # List of compact trie dictionary files (ctd).
-BRK_CTD_SOURCE = thaidict.txt cjdict.txt
+BRK_CTD_SOURCE = thaidict.txt
 
 
 # List of break iterator files (brk).
-# Chrome change: remove word_ja.txt and line_he.txt
-BRK_SOURCE =  sent_el.txt word_POSIX.txt line_fi.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt
+# Chrome change: remove line_he.txt
+BRK_SOURCE =  sent_el.txt word_POSIX.txt line_fi.txt word_ja.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt
 
 
 # Ordinary resources
-# Chrome change: remove ja.txt and he.txt
+# Chrome change: remove he.txt
 BRK_RES_SOURCE = el.txt en.txt en_US.txt en_US_POSIX.txt\
- fi.txt   th.txt
+ fi.txt ja.txt th.txt
diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt
index fb83ac3..5d839bd 100644
--- a/source/data/brkitr/root.txt
+++ b/source/data/brkitr/root.txt
@@ -17,8 +17,5 @@ root{
     }
     dictionaries{
         Thai:process(dependency){"thaidict.ctd"}
-        Hani:process(dependency){"cjdict.ctd"}
-        Hira:process(dependency){"cjdict.ctd"}
-        Kata:process(dependency){"cjdict.ctd"}
     }
 }
diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt
index 0b49377..a0e1ceb 100644
--- a/source/data/brkitr/word.txt
+++ b/source/data/brkitr/word.txt
@@ -60,11 +60,10 @@ $Control        = [\p{Grapheme_Cluster_Break = Control}];
 $HangulSyllable = [\uac00-\ud7a3];
 $ComplexContext = [:LineBreak = Complex_Context:];
 $KanaKanji      = [$Han $Hiragana $Katakana];
-$dictionaryCJK  = [$KanaKanji $HangulSyllable];
-$dictionary     = [$ComplexContext $dictionaryCJK];
+$dictionary   = [:LineBreak = Complex_Context:];
 
-# leave CJK scripts out of ALetterPlus
-$ALetterPlus  = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
+$ALetterPlus  = [$ALetter [$dictionary-$Extend-$Control]];   # Note:  default ALetter does not
+                                                             #  include the dictionary characters.
 
 
 #
@@ -99,8 +98,7 @@ $CR $LF;
 #          begins with a group of Format chars, or with a "word" consisting of a single
 #          char that is not in any of the listed word break categories followed by
 #          format char(s).
- #          format char(s), or is not a CJK dictionary character.
-[^$CR $LF $Newline $dictionaryCJK]? ($Extend |  $Format)+;
+[^$CR $LF $Newline]? ($Extend |  $Format)+;
 
 $NumericEx {100};
 $ALetterEx {200};
@@ -155,9 +153,6 @@ $ExtendNumLetEx $ALetterEx  {200};    #  (13b)
 $ExtendNumLetEx $NumericEx  {100};    #  (13b)
 $ExtendNumLetEx $KatakanaEx {400};    #  (13b)
 
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable {200};
-$KanaKanji $KanaKanji {400}; #different rule status if both kanji and kana found
 
 
 ## -------------------------------------------------
@@ -179,7 +174,7 @@ $BackHebrewLetEx   = ($Format | $Extend)* $HebrewLet;
 $LF $CR;
 
 # rule 4
-($Format | $Extend)*  [^$CR $LF $Newline $dictionaryCJK]?;
+($Format | $Extend)*  [^$CR $LF $Newline]?;
 
 # rule 5
 
@@ -217,10 +212,6 @@ $BackKatakanaEx $BackKatakanaEx;
 $BackExtendNumLetEx ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
 ($BackALetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; 
 
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable;
-$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
-
 ## -------------------------------------------------
 
 !!safe_reverse;