summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Gran <spk121@yahoo.com>2009-09-02 06:19:21 -0700
committerMichael Gran <spk121@yahoo.com>2009-09-02 06:28:42 -0700
commitf4cdfe6140923b5cf11d7af9a2098514d98a61a5 (patch)
tree2330d47c778bc0c1708046251c8d2f2021544bcd
parentbde543e88b070b5fcd2b08ba8129ec1125ab0861 (diff)
downloadguile-f4cdfe6140923b5cf11d7af9a2098514d98a61a5.tar.gz
The charset complement operator should not include surrogates
* libguile/srfi-14.c (charsets_complement): skip over surrogates when making a charset complement
-rw-r--r--libguile/srfi-14.c46
1 files changed, 34 insertions, 12 deletions
diff --git a/libguile/srfi-14.c b/libguile/srfi-14.c
index d747973d0..4f0010a62 100644
--- a/libguile/srfi-14.c
+++ b/libguile/srfi-14.c
@@ -471,22 +471,35 @@ charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
return;
}
+#define SCM_ADD_RANGE(low, high) \
+ do { \
+ p->ranges[idx].lo = (low); \
+ p->ranges[idx++].hi = (high); \
+ } while (0)
+#define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \
+ do { \
+ p->ranges[idx].lo = (low); \
+ p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \
+ p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \
+ p->ranges[idx++].hi = (high); \
+ } while (0)
+
+
+
/* Make P the compelement of Q. */
static void
charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
{
int k, idx;
+ idx = 0;
if (q->len == 0)
{
/* Fill with all valid codepoints. */
p->len = 2;
p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
"character-set");
- p->ranges[0].lo = 0;
- p->ranges[0].hi = SCM_CODEPOINT_SURROGATE_START - 1;
- p->ranges[1].lo = SCM_CODEPOINT_SURROGATE_END + 1;
- p->ranges[1].hi = SCM_CODEPOINT_MAX;
+ SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX);
return;
}
@@ -494,33 +507,42 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
"character-set");
+ /* Count the number of ranges needed for the output. */
p->len = 0;
if (q->ranges[0].lo > 0)
p->len++;
if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
p->len++;
- p->len += q->len - 1;
+ p->len += q->len;
p->ranges =
(scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
"character-set");
- idx = 0;
if (q->ranges[0].lo > 0)
{
- p->ranges[idx].lo = 0;
- p->ranges[idx++].hi = q->ranges[0].lo - 1;
+ if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END)
+ SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1);
+ else
+ SCM_ADD_RANGE (0, q->ranges[0].lo - 1);
}
for (k = 1; k < q->len; k++)
{
- p->ranges[idx].lo = q->ranges[k - 1].hi + 1;
- p->ranges[idx++].hi = q->ranges[k].lo - 1;
+ if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START
+ && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END)
+ SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
+ else
+ SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
}
if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
{
- p->ranges[idx].lo = q->ranges[q->len - 1].hi + 1;
- p->ranges[idx].hi = SCM_CODEPOINT_MAX;
+ if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START)
+ SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
+ else
+ SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
}
return;
}
+#undef SCM_ADD_RANGE
+#undef SCM_ADD_RANGE_SKIP_SURROGATES
/* Replace A with elements only found in one of A or B. */
static void