summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Newson <rnewson@apache.org>2023-04-12 14:39:59 +0100
committerRobert Newson <rnewson@apache.org>2023-04-22 11:20:02 +0100
commit757c87beaafe8c3df09c489628036830123e28a2 (patch)
treeb1e1282775de89eca8378998da55a0fe94e98b63
parent4023290b630d103c1d15e1abac996315cb47d26d (diff)
downloadcouchdb-757c87beaafe8c3df09c489628036830123e28a2.tar.gz
send utf-8 ByteRefs as strings
-rw-r--r--nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterDeserializer.java3
-rw-r--r--nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterSerializer.java18
-rw-r--r--nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java14
-rw-r--r--src/nouveau/src/nouveau_bookmark.erl4
4 files changed, 32 insertions, 7 deletions
diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterDeserializer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterDeserializer.java
index 3635d660a..82fadfe86 100644
--- a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterDeserializer.java
+++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterDeserializer.java
@@ -14,6 +14,7 @@
package org.apache.couchdb.nouveau.core.ser;
import java.io.IOException;
+import java.nio.charset.Charset;
import org.apache.couchdb.nouveau.api.After;
@@ -43,7 +44,7 @@ public class AfterDeserializer extends StdDeserializer<After> {
final JsonNode field = fieldNode.get(i);
switch (field.get("@type").asText()) {
case "string":
- fields[i] = field.get("value").asText();
+ fields[i] = field.get("value").asText().getBytes(Charset.forName("UTF-8"));
break;
case "bytes":
fields[i] = field.get("value").binaryValue();
diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterSerializer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterSerializer.java
index 8f06f3819..3746f4cc3 100644
--- a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterSerializer.java
+++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterSerializer.java
@@ -14,6 +14,11 @@
package org.apache.couchdb.nouveau.core.ser;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
import org.apache.couchdb.nouveau.api.After;
@@ -34,6 +39,7 @@ public class AfterSerializer extends StdSerializer<After> {
@Override
public void serialize(final After after, final JsonGenerator gen, final SerializerProvider provider)
throws IOException {
+ final CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder();
// We ignore fieldDoc.score as it will be in the fields array if we're sorting for relevance.
// We ignore fieldDoc.doc as _id is always the last field and is unique.
gen.writeStartArray();
@@ -45,9 +51,15 @@ public class AfterSerializer extends StdSerializer<After> {
gen.writeStringField("value", (String) o);
} else if (o instanceof byte[]) {
final byte[] bytes = (byte[]) o;
- gen.writeStringField("@type", "bytes");
- gen.writeFieldName("value");
- gen.writeBinary(bytes);
+ try {
+ final CharBuffer buf = utf8Decoder.decode(ByteBuffer.wrap(bytes));
+ gen.writeStringField("@type", "string");
+ gen.writeStringField("value", buf.toString());
+ } catch (final CharacterCodingException e) {
+ gen.writeStringField("@type", "bytes");
+ gen.writeFieldName("value");
+ gen.writeBinary(bytes);
+ }
} else if (o instanceof Float) {
gen.writeStringField("@type", "float");
gen.writeNumberField("value", (Float) o);
diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java
index 481af32a3..eed4b0cfa 100644
--- a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java
+++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java
@@ -15,6 +15,11 @@ package org.apache.couchdb.nouveau.lucene9;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -365,6 +370,8 @@ public class Lucene9Index extends Index {
result.add(new org.apache.lucene.document.StringField("_partition", request.getPartition(), Store.NO));
}
+ final CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder();
+
for (Field field : request.getFields()) {
// Underscore-prefix is reserved.
if (field.getName().startsWith("_")) {
@@ -396,7 +403,12 @@ public class Lucene9Index extends Index {
} else if (val instanceof Number) {
result.add(new org.apache.lucene.document.StoredField(f.getName(), ((Number)val).doubleValue()));
} else if (val instanceof byte[]) {
- result.add(new org.apache.lucene.document.StoredField(f.getName(), (byte[]) val));
+ try {
+ final CharBuffer buf = utf8Decoder.decode(ByteBuffer.wrap((byte[])val));
+ result.add(new org.apache.lucene.document.StoredField(f.getName(), buf.toString()));
+ } catch (final CharacterCodingException e) {
+ result.add(new org.apache.lucene.document.StoredField(f.getName(), (byte[]) val));
+ }
} else {
throw new WebApplicationException(field + " is not valid", Status.BAD_REQUEST);
}
diff --git a/src/nouveau/src/nouveau_bookmark.erl b/src/nouveau/src/nouveau_bookmark.erl
index 70b42beeb..0721d5a83 100644
--- a/src/nouveau/src/nouveau_bookmark.erl
+++ b/src/nouveau/src/nouveau_bookmark.erl
@@ -42,8 +42,8 @@ range_of(DbName, DocId) when is_binary(DbName), is_binary(DocId) ->
[#shard{range = Range} | _] = mem3_shards:for_docid(DbName, DocId),
Range;
range_of(DbName, Order) when is_binary(DbName), is_list(Order) ->
- #{<<"@type">> := <<"bytes">>, <<"value">> := EncodedDocId} = lists:last(Order),
- range_of(DbName, base64:decode(EncodedDocId)).
+ #{<<"@type">> := <<"string">>, <<"value">> := DocId} = lists:last(Order),
+ range_of(DbName, DocId).
unpack(_DbName, Empty) when Empty == undefined; Empty == nil; Empty == null ->
new();