diff options
author | Bruno Haible <bruno@clisp.org> | 2009-07-01 01:49:33 +0200 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-07-01 01:49:33 +0200 |
commit | e43d583475dc364a0816df054471e6bc4bb75106 (patch) | |
tree | b687316cbc6ae6a15a6fca63a1b597a3b1426e23 /lib/unicase | |
parent | a64cbf784415e3dceb15b1a32ea73312d73a8845 (diff) | |
download | gnulib-e43d583475dc364a0816df054471e6bc4bb75106.tar.gz |
Reduce the number of uc_is_cased calls.
Diffstat (limited to 'lib/unicase')
-rw-r--r-- | lib/unicase/context.h | 20 | ||||
-rw-r--r-- | lib/unicase/empty-suffix-context.c | 4 | ||||
-rw-r--r-- | lib/unicase/u-casemap.h | 12 | ||||
-rw-r--r-- | lib/unicase/u-ct-totitle.h | 12 | ||||
-rw-r--r-- | lib/unicase/u-suffix-context.h | 26 |
5 files changed, 40 insertions, 34 deletions
diff --git a/lib/unicase/context.h b/lib/unicase/context.h index c51a5bd436..6a35798d02 100644 --- a/lib/unicase/context.h +++ b/lib/unicase/context.h @@ -44,20 +44,22 @@ casing_suffix_context_t contains the following fields: // For evaluating the FINAL_SIGMA condition: - // Bit 0 is set if the suffix starts with a sequence consisting of a - // case-ignorable sequence and then a cased letter. - // + // First character that was not case-ignorable. + ucs4_t first_char_except_ignorable; + // For evaluating the MORE_ABOVE condition: - // Bit 1 is set if the suffix contains a character of combining class + // Bit 0 is set if the suffix contains a character of combining class // 230 (Above) with no character of combining class 0 or 230 (Above) // before it. // // For evaluating the BEFORE_DOT condition: - // Bit 2 is set if the suffix contains a COMBINING DOT ABOVE (U+0307) + // Bit 1 is set if the suffix contains a COMBINING DOT ABOVE (U+0307) // with no character of combining class 0 or 230 (Above) before it. // uint32_t bits; - */ -#define SCC_FINAL_SIGMA_MASK 1 -#define SCC_MORE_ABOVE_MASK 2 -#define SCC_BEFORE_DOT_MASK 4 + + Three bits would be sufficient to carry the context information, but + that would require to invoke uc_is_cased ahead of time, more often than + actually needed. */ +#define SCC_MORE_ABOVE_MASK 1 +#define SCC_BEFORE_DOT_MASK 2 diff --git a/lib/unicase/empty-suffix-context.c b/lib/unicase/empty-suffix-context.c index 747e65bea9..4e00bffef7 100644 --- a/lib/unicase/empty-suffix-context.c +++ b/lib/unicase/empty-suffix-context.c @@ -22,6 +22,6 @@ const casing_suffix_context_t unicase_empty_suffix_context = { - 0 /* bits */, - 0 /* unused_bits */ + 0xFFFD /* first_char_except_ignorable */, + 0 /* bits */ }; diff --git a/lib/unicase/u-casemap.h b/lib/unicase/u-casemap.h index d904eb432e..ea41444a1f 100644 --- a/lib/unicase/u-casemap.h +++ b/lib/unicase/u-casemap.h @@ -118,18 +118,20 @@ FUNC (const UNIT *s, size_t n, { ucs4_t uc2; int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); - if (uc_is_cased (uc2)) + /* Our uc_is_case_ignorable function is + known to return false for all cased + characters. So we can call + uc_is_case_ignorable first. */ + if (!uc_is_case_ignorable (uc2)) { - applies = false; + applies = ! uc_is_cased (uc2); break; } - if (!uc_is_case_ignorable (uc2)) - break; s2 += count2; } else { - applies = ((suffix_context.bits & SCC_FINAL_SIGMA_MASK) == 0); + applies = ! uc_is_cased (suffix_context.first_char_except_ignorable); break; } } diff --git a/lib/unicase/u-ct-totitle.h b/lib/unicase/u-ct-totitle.h index 181e569bc0..f0d4fd7736 100644 --- a/lib/unicase/u-ct-totitle.h +++ b/lib/unicase/u-ct-totitle.h @@ -194,18 +194,20 @@ FUNC (const UNIT *s, size_t n, { ucs4_t uc2; int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); - if (uc_is_cased (uc2)) + /* Our uc_is_case_ignorable function is + known to return false for all cased + characters. So we can call + uc_is_case_ignorable first. */ + if (!uc_is_case_ignorable (uc2)) { - applies = false; + applies = ! uc_is_cased (uc2); break; } - if (!uc_is_case_ignorable (uc2)) - break; s2 += count2; } else { - applies = ((suffix_context.bits & SCC_FINAL_SIGMA_MASK) == 0); + applies = ! uc_is_cased (suffix_context.first_char_except_ignorable); break; } } diff --git a/lib/unicase/u-suffix-context.h b/lib/unicase/u-suffix-context.h index 96c58dae93..a8880953bf 100644 --- a/lib/unicase/u-suffix-context.h +++ b/lib/unicase/u-suffix-context.h @@ -28,7 +28,7 @@ FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context) /* Evaluate all three conditions in a single pass through the string S. The three variables are -1 as long as the value of the condition has not been determined. */ - int scc_FINAL_SIGMA = -1; + ucs4_t first_char_except_ignorable = (ucs4_t)(-1); int scc_MORE_ABOVE = -1; int scc_BEFORE_DOT = -1; const UNIT *s_end = s + n; @@ -38,12 +38,10 @@ FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context) ucs4_t uc; int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s); - if (scc_FINAL_SIGMA < 0) + if (first_char_except_ignorable == (ucs4_t)(-1)) { - if (uc_is_cased (uc)) - scc_FINAL_SIGMA = SCC_FINAL_SIGMA_MASK; - else if (!uc_is_case_ignorable (uc)) - scc_FINAL_SIGMA = 0; + if (!uc_is_case_ignorable (uc)) + first_char_except_ignorable = uc; } if (scc_MORE_ABOVE < 0) @@ -67,7 +65,8 @@ FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context) } } - if ((scc_FINAL_SIGMA | scc_MORE_ABOVE | scc_BEFORE_DOT) >= 0) + if (first_char_except_ignorable != (ucs4_t)(-1) + && (scc_MORE_ABOVE | scc_BEFORE_DOT) >= 0) /* All conditions have been determined. */ break; @@ -76,13 +75,14 @@ FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context) /* For those conditions that have not been determined so far, use the value from the argument context. */ + context.first_char_except_ignorable = + (first_char_except_ignorable != (ucs4_t)(-1) + ? first_char_except_ignorable + : a_context.first_char_except_ignorable); context.bits = - (scc_FINAL_SIGMA >= 0 - ? scc_FINAL_SIGMA - : a_context.bits & SCC_FINAL_SIGMA_MASK) - | (scc_MORE_ABOVE >= 0 - ? scc_MORE_ABOVE - : a_context.bits & SCC_MORE_ABOVE_MASK) + (scc_MORE_ABOVE >= 0 + ? scc_MORE_ABOVE + : a_context.bits & SCC_MORE_ABOVE_MASK) | (scc_BEFORE_DOT >= 0 ? scc_BEFORE_DOT : a_context.bits & SCC_BEFORE_DOT_MASK); |