From 642757fb4d3d9705c6590df1f3dbb4beca948503 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 2 Apr 2021 16:46:46 -0400 Subject: Fix collation issue for older versions of libicu library Previously, mango tests with objects as keys were failing on CentOS 6 and CentOS 7. The reason for the failures was that old libicu collation algorithms didn't consider the `<<255,255,255,255>>` as the highest sortable string as CouchDB intends it to be. Later versions of libicu, at least as old as 59, started to do that https://www.unicode.org/reports/tr35/tr35-collation.html#tailored_noncharacter_weights. However, as long as we support CentOS 7 we can fix the issue by explicitly checkign for the highest marker. --- .../priv/couch_ejson_compare/couch_ejson_compare.c | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c index ad3d0cdd6..ca4785314 100644 --- a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c +++ b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c @@ -13,6 +13,7 @@ */ #include +#include #include #include "erl_nif.h" #include "unicode/ucol.h" @@ -65,6 +66,11 @@ static __inline int compare_lists(int, ctx_t*, ERL_NIF_TERM, ERL_NIF_TERM); static __inline int compare_props(int, ctx_t*, ERL_NIF_TERM, ERL_NIF_TERM); static __inline UCollator* get_collator(); +/* Should match the <<255,255,255,255>> in: + * - src/mango/src/mango_idx_view.hrl#L13 + * - src/couch_mrview/src/couch_mrview_util.erl#L40 */ +static const unsigned char max_utf8_marker[] = {255, 255, 255, 255}; + UCollator* get_collator() @@ -356,6 +362,19 @@ compare_props(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b) } +int +is_max_utf8_marker(ErlNifBinary bin) +{ + if (bin.size == sizeof(max_utf8_marker)) { + if(memcmp(bin.data, max_utf8_marker, sizeof(max_utf8_marker)) == 0) { + return 1; + } + return 0; + } + return 0; +} + + int compare_strings(ctx_t* ctx, ErlNifBinary a, ErlNifBinary b) { @@ -363,6 +382,27 @@ compare_strings(ctx_t* ctx, ErlNifBinary a, ErlNifBinary b) UCharIterator iterA, iterB; int result; + /* libicu versions earlier than 59 (at least) don't consider the + * {255,255,255,255} to be the highest sortable string as CouchDB expects. + * While we are still shipping CentOS 7 packages with libicu 50, we should + * explicitly check for the marker, later one we can remove the max + * logic */ + + int a_is_max = is_max_utf8_marker(a); + int b_is_max = is_max_utf8_marker(b); + + if(a_is_max && b_is_max) { + return 0; + } + + if(a_is_max) { + return 1; + } + + if(b_is_max) { + return -1; + } + uiter_setUTF8(&iterA, (const char *) a.data, (uint32_t) a.size); uiter_setUTF8(&iterB, (const char *) b.data, (uint32_t) b.size); -- cgit v1.2.1