diff options
author | Nick Vatamaniuc <vatamane@gmail.com> | 2021-04-02 16:46:46 -0400 |
---|---|---|
committer | Nick Vatamaniuc <vatamane@apache.org> | 2021-04-05 09:21:29 -0400 |
commit | b651dd67a582dca4837159b2fc67951797200d1c (patch) | |
tree | ea33f644e9dc3d6123b73b924fbef14e16ab1663 | |
parent | a085399fd2619c21c9f8e0fedcd542c65d059378 (diff) | |
download | couchdb-b651dd67a582dca4837159b2fc67951797200d1c.tar.gz |
Fix collation issue for older versions of libicu libraryfix-centos-7-icu-collation-issue
Previously, mango tests with objects as keys were failing on CentOS 6 and
CentOS 7. The reason for the failures was that old libicu collation algorithms
didn't consider the `<<255,255,255,255>>` as the highest sortable string as
CouchDB intends it to be. Later versions of libicu, at least as old as 59,
started to do that
https://www.unicode.org/reports/tr35/tr35-collation.html#tailored_noncharacter_weights.
However, as long as we support CentOS 7 we can fix the issue by explicitly
checkign for the highest marker.
-rw-r--r-- | src/couch/priv/couch_ejson_compare/couch_ejson_compare.c | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c index ad3d0cdd6..49d6cd812 100644 --- a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c +++ b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c @@ -13,6 +13,7 @@ */ #include <stdio.h> +#include <string.h> #include <assert.h> #include "erl_nif.h" #include "unicode/ucol.h" @@ -65,6 +66,11 @@ static __inline int compare_lists(int, ctx_t*, ERL_NIF_TERM, ERL_NIF_TERM); static __inline int compare_props(int, ctx_t*, ERL_NIF_TERM, ERL_NIF_TERM); static __inline UCollator* get_collator(); +/* Should match the <<255,255,255,255>> in: + * - src/mango/src/mango_idx_view.hrl#L13 + * - src/couch_mrview/src/couch_mrview_util.erl#L40 */ +static const unsigned char max_utf8_marker[] = {255, 255, 255, 255}; + UCollator* get_collator() @@ -357,12 +363,46 @@ compare_props(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b) int +is_max_utf8_marker(ErlNifBinary bin) +{ + if (bin.size == sizeof(max_utf8_marker)) { + if(memcmp(bin.data, max_utf8_marker, sizeof(max_utf8_marker)) == 0) { + return 1; + } + return 0; + } + return 0; +} + + +int compare_strings(ctx_t* ctx, ErlNifBinary a, ErlNifBinary b) { UErrorCode status = U_ZERO_ERROR; UCharIterator iterA, iterB; int result; + /* libicu versions earlier than 59 (at least) don't consider the + * {255,255,255,255} to be the highest sortable string as CouchDB expects. + * While we are still shipping CentOS 7 packages with libicu 50, we should + * explicitly check for the marker, later on we can remove the max + * logic */ + + int a_is_max = is_max_utf8_marker(a); + int b_is_max = is_max_utf8_marker(b); + + if(a_is_max && b_is_max) { + return 0; + } + + if(a_is_max) { + return 1; + } + + if(b_is_max) { + return -1; + } + uiter_setUTF8(&iterA, (const char *) a.data, (uint32_t) a.size); uiter_setUTF8(&iterB, (const char *) b.data, (uint32_t) b.size); |