summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2021-04-02 16:46:46 -0400
committerNick Vatamaniuc <vatamane@apache.org>2021-04-05 09:21:29 -0400
commitb651dd67a582dca4837159b2fc67951797200d1c (patch)
treeea33f644e9dc3d6123b73b924fbef14e16ab1663
parenta085399fd2619c21c9f8e0fedcd542c65d059378 (diff)
downloadcouchdb-fix-centos-7-icu-collation-issue.tar.gz
Fix collation issue for older versions of libicu libraryfix-centos-7-icu-collation-issue
Previously, mango tests with objects as keys were failing on CentOS 6 and CentOS 7. The reason for the failures was that old libicu collation algorithms didn't consider the `<<255,255,255,255>>` as the highest sortable string as CouchDB intends it to be. Later versions of libicu, at least as old as 59, started to do that https://www.unicode.org/reports/tr35/tr35-collation.html#tailored_noncharacter_weights. However, as long as we support CentOS 7 we can fix the issue by explicitly checkign for the highest marker.
-rw-r--r--src/couch/priv/couch_ejson_compare/couch_ejson_compare.c40
1 files changed, 40 insertions, 0 deletions
diff --git a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c
index ad3d0cdd6..49d6cd812 100644
--- a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c
+++ b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c
@@ -13,6 +13,7 @@
*/
#include <stdio.h>
+#include <string.h>
#include <assert.h>
#include "erl_nif.h"
#include "unicode/ucol.h"
@@ -65,6 +66,11 @@ static __inline int compare_lists(int, ctx_t*, ERL_NIF_TERM, ERL_NIF_TERM);
static __inline int compare_props(int, ctx_t*, ERL_NIF_TERM, ERL_NIF_TERM);
static __inline UCollator* get_collator();
+/* Should match the <<255,255,255,255>> in:
+ * - src/mango/src/mango_idx_view.hrl#L13
+ * - src/couch_mrview/src/couch_mrview_util.erl#L40 */
+static const unsigned char max_utf8_marker[] = {255, 255, 255, 255};
+
UCollator*
get_collator()
@@ -357,12 +363,46 @@ compare_props(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
int
+is_max_utf8_marker(ErlNifBinary bin)
+{
+ if (bin.size == sizeof(max_utf8_marker)) {
+ if(memcmp(bin.data, max_utf8_marker, sizeof(max_utf8_marker)) == 0) {
+ return 1;
+ }
+ return 0;
+ }
+ return 0;
+}
+
+
+int
compare_strings(ctx_t* ctx, ErlNifBinary a, ErlNifBinary b)
{
UErrorCode status = U_ZERO_ERROR;
UCharIterator iterA, iterB;
int result;
+ /* libicu versions earlier than 59 (at least) don't consider the
+ * {255,255,255,255} to be the highest sortable string as CouchDB expects.
+ * While we are still shipping CentOS 7 packages with libicu 50, we should
+ * explicitly check for the marker, later on we can remove the max
+ * logic */
+
+ int a_is_max = is_max_utf8_marker(a);
+ int b_is_max = is_max_utf8_marker(b);
+
+ if(a_is_max && b_is_max) {
+ return 0;
+ }
+
+ if(a_is_max) {
+ return 1;
+ }
+
+ if(b_is_max) {
+ return -1;
+ }
+
uiter_setUTF8(&iterA, (const char *) a.data, (uint32_t) a.size);
uiter_setUTF8(&iterB, (const char *) b.data, (uint32_t) b.size);