summaryrefslogtreecommitdiff
path: root/lib/unicase
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2009-07-01 01:49:33 +0200
committerBruno Haible <bruno@clisp.org>2009-07-01 01:49:33 +0200
commite43d583475dc364a0816df054471e6bc4bb75106 (patch)
treeb687316cbc6ae6a15a6fca63a1b597a3b1426e23 /lib/unicase
parenta64cbf784415e3dceb15b1a32ea73312d73a8845 (diff)
downloadgnulib-e43d583475dc364a0816df054471e6bc4bb75106.tar.gz
Reduce the number of uc_is_cased calls.
Diffstat (limited to 'lib/unicase')
-rw-r--r--lib/unicase/context.h20
-rw-r--r--lib/unicase/empty-suffix-context.c4
-rw-r--r--lib/unicase/u-casemap.h12
-rw-r--r--lib/unicase/u-ct-totitle.h12
-rw-r--r--lib/unicase/u-suffix-context.h26
5 files changed, 40 insertions, 34 deletions
diff --git a/lib/unicase/context.h b/lib/unicase/context.h
index c51a5bd436..6a35798d02 100644
--- a/lib/unicase/context.h
+++ b/lib/unicase/context.h
@@ -44,20 +44,22 @@
casing_suffix_context_t contains the following fields:
// For evaluating the FINAL_SIGMA condition:
- // Bit 0 is set if the suffix starts with a sequence consisting of a
- // case-ignorable sequence and then a cased letter.
- //
+ // First character that was not case-ignorable.
+ ucs4_t first_char_except_ignorable;
+
// For evaluating the MORE_ABOVE condition:
- // Bit 1 is set if the suffix contains a character of combining class
+ // Bit 0 is set if the suffix contains a character of combining class
// 230 (Above) with no character of combining class 0 or 230 (Above)
// before it.
//
// For evaluating the BEFORE_DOT condition:
- // Bit 2 is set if the suffix contains a COMBINING DOT ABOVE (U+0307)
+ // Bit 1 is set if the suffix contains a COMBINING DOT ABOVE (U+0307)
// with no character of combining class 0 or 230 (Above) before it.
//
uint32_t bits;
- */
-#define SCC_FINAL_SIGMA_MASK 1
-#define SCC_MORE_ABOVE_MASK 2
-#define SCC_BEFORE_DOT_MASK 4
+
+ Three bits would be sufficient to carry the context information, but
+ that would require to invoke uc_is_cased ahead of time, more often than
+ actually needed. */
+#define SCC_MORE_ABOVE_MASK 1
+#define SCC_BEFORE_DOT_MASK 2
diff --git a/lib/unicase/empty-suffix-context.c b/lib/unicase/empty-suffix-context.c
index 747e65bea9..4e00bffef7 100644
--- a/lib/unicase/empty-suffix-context.c
+++ b/lib/unicase/empty-suffix-context.c
@@ -22,6 +22,6 @@
const casing_suffix_context_t unicase_empty_suffix_context =
{
- 0 /* bits */,
- 0 /* unused_bits */
+ 0xFFFD /* first_char_except_ignorable */,
+ 0 /* bits */
};
diff --git a/lib/unicase/u-casemap.h b/lib/unicase/u-casemap.h
index d904eb432e..ea41444a1f 100644
--- a/lib/unicase/u-casemap.h
+++ b/lib/unicase/u-casemap.h
@@ -118,18 +118,20 @@ FUNC (const UNIT *s, size_t n,
{
ucs4_t uc2;
int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
- if (uc_is_cased (uc2))
+ /* Our uc_is_case_ignorable function is
+ known to return false for all cased
+ characters. So we can call
+ uc_is_case_ignorable first. */
+ if (!uc_is_case_ignorable (uc2))
{
- applies = false;
+ applies = ! uc_is_cased (uc2);
break;
}
- if (!uc_is_case_ignorable (uc2))
- break;
s2 += count2;
}
else
{
- applies = ((suffix_context.bits & SCC_FINAL_SIGMA_MASK) == 0);
+ applies = ! uc_is_cased (suffix_context.first_char_except_ignorable);
break;
}
}
diff --git a/lib/unicase/u-ct-totitle.h b/lib/unicase/u-ct-totitle.h
index 181e569bc0..f0d4fd7736 100644
--- a/lib/unicase/u-ct-totitle.h
+++ b/lib/unicase/u-ct-totitle.h
@@ -194,18 +194,20 @@ FUNC (const UNIT *s, size_t n,
{
ucs4_t uc2;
int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
- if (uc_is_cased (uc2))
+ /* Our uc_is_case_ignorable function is
+ known to return false for all cased
+ characters. So we can call
+ uc_is_case_ignorable first. */
+ if (!uc_is_case_ignorable (uc2))
{
- applies = false;
+ applies = ! uc_is_cased (uc2);
break;
}
- if (!uc_is_case_ignorable (uc2))
- break;
s2 += count2;
}
else
{
- applies = ((suffix_context.bits & SCC_FINAL_SIGMA_MASK) == 0);
+ applies = ! uc_is_cased (suffix_context.first_char_except_ignorable);
break;
}
}
diff --git a/lib/unicase/u-suffix-context.h b/lib/unicase/u-suffix-context.h
index 96c58dae93..a8880953bf 100644
--- a/lib/unicase/u-suffix-context.h
+++ b/lib/unicase/u-suffix-context.h
@@ -28,7 +28,7 @@ FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context)
/* Evaluate all three conditions in a single pass through the string S.
The three variables are -1 as long as the value of the condition has
not been determined. */
- int scc_FINAL_SIGMA = -1;
+ ucs4_t first_char_except_ignorable = (ucs4_t)(-1);
int scc_MORE_ABOVE = -1;
int scc_BEFORE_DOT = -1;
const UNIT *s_end = s + n;
@@ -38,12 +38,10 @@ FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context)
ucs4_t uc;
int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s);
- if (scc_FINAL_SIGMA < 0)
+ if (first_char_except_ignorable == (ucs4_t)(-1))
{
- if (uc_is_cased (uc))
- scc_FINAL_SIGMA = SCC_FINAL_SIGMA_MASK;
- else if (!uc_is_case_ignorable (uc))
- scc_FINAL_SIGMA = 0;
+ if (!uc_is_case_ignorable (uc))
+ first_char_except_ignorable = uc;
}
if (scc_MORE_ABOVE < 0)
@@ -67,7 +65,8 @@ FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context)
}
}
- if ((scc_FINAL_SIGMA | scc_MORE_ABOVE | scc_BEFORE_DOT) >= 0)
+ if (first_char_except_ignorable != (ucs4_t)(-1)
+ && (scc_MORE_ABOVE | scc_BEFORE_DOT) >= 0)
/* All conditions have been determined. */
break;
@@ -76,13 +75,14 @@ FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context)
/* For those conditions that have not been determined so far, use the
value from the argument context. */
+ context.first_char_except_ignorable =
+ (first_char_except_ignorable != (ucs4_t)(-1)
+ ? first_char_except_ignorable
+ : a_context.first_char_except_ignorable);
context.bits =
- (scc_FINAL_SIGMA >= 0
- ? scc_FINAL_SIGMA
- : a_context.bits & SCC_FINAL_SIGMA_MASK)
- | (scc_MORE_ABOVE >= 0
- ? scc_MORE_ABOVE
- : a_context.bits & SCC_MORE_ABOVE_MASK)
+ (scc_MORE_ABOVE >= 0
+ ? scc_MORE_ABOVE
+ : a_context.bits & SCC_MORE_ABOVE_MASK)
| (scc_BEFORE_DOT >= 0
? scc_BEFORE_DOT
: a_context.bits & SCC_BEFORE_DOT_MASK);