diff options
author | Robert Newson <rnewson@apache.org> | 2023-04-12 14:39:59 +0100 |
---|---|---|
committer | Robert Newson <rnewson@apache.org> | 2023-04-22 11:20:02 +0100 |
commit | 757c87beaafe8c3df09c489628036830123e28a2 (patch) | |
tree | b1e1282775de89eca8378998da55a0fe94e98b63 | |
parent | 4023290b630d103c1d15e1abac996315cb47d26d (diff) | |
download | couchdb-757c87beaafe8c3df09c489628036830123e28a2.tar.gz |
send utf-8 ByteRefs as strings
4 files changed, 32 insertions, 7 deletions
diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterDeserializer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterDeserializer.java index 3635d660a..82fadfe86 100644 --- a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterDeserializer.java +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterDeserializer.java @@ -14,6 +14,7 @@ package org.apache.couchdb.nouveau.core.ser; import java.io.IOException; +import java.nio.charset.Charset; import org.apache.couchdb.nouveau.api.After; @@ -43,7 +44,7 @@ public class AfterDeserializer extends StdDeserializer<After> { final JsonNode field = fieldNode.get(i); switch (field.get("@type").asText()) { case "string": - fields[i] = field.get("value").asText(); + fields[i] = field.get("value").asText().getBytes(Charset.forName("UTF-8")); break; case "bytes": fields[i] = field.get("value").binaryValue(); diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterSerializer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterSerializer.java index 8f06f3819..3746f4cc3 100644 --- a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterSerializer.java +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/AfterSerializer.java @@ -14,6 +14,11 @@ package org.apache.couchdb.nouveau.core.ser; import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; import org.apache.couchdb.nouveau.api.After; @@ -34,6 +39,7 @@ public class AfterSerializer extends StdSerializer<After> { @Override public void serialize(final After after, final JsonGenerator gen, final SerializerProvider provider) throws IOException { + final CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder(); // We ignore fieldDoc.score as it will be in the fields array if we're sorting for relevance. // We ignore fieldDoc.doc as _id is always the last field and is unique. gen.writeStartArray(); @@ -45,9 +51,15 @@ public class AfterSerializer extends StdSerializer<After> { gen.writeStringField("value", (String) o); } else if (o instanceof byte[]) { final byte[] bytes = (byte[]) o; - gen.writeStringField("@type", "bytes"); - gen.writeFieldName("value"); - gen.writeBinary(bytes); + try { + final CharBuffer buf = utf8Decoder.decode(ByteBuffer.wrap(bytes)); + gen.writeStringField("@type", "string"); + gen.writeStringField("value", buf.toString()); + } catch (final CharacterCodingException e) { + gen.writeStringField("@type", "bytes"); + gen.writeFieldName("value"); + gen.writeBinary(bytes); + } } else if (o instanceof Float) { gen.writeStringField("@type", "float"); gen.writeNumberField("value", (Float) o); diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java index 481af32a3..eed4b0cfa 100644 --- a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java @@ -15,6 +15,11 @@ package org.apache.couchdb.nouveau.lucene9; import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; import java.nio.file.NoSuchFileException; import java.util.ArrayList; import java.util.Arrays; @@ -365,6 +370,8 @@ public class Lucene9Index extends Index { result.add(new org.apache.lucene.document.StringField("_partition", request.getPartition(), Store.NO)); } + final CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder(); + for (Field field : request.getFields()) { // Underscore-prefix is reserved. if (field.getName().startsWith("_")) { @@ -396,7 +403,12 @@ public class Lucene9Index extends Index { } else if (val instanceof Number) { result.add(new org.apache.lucene.document.StoredField(f.getName(), ((Number)val).doubleValue())); } else if (val instanceof byte[]) { - result.add(new org.apache.lucene.document.StoredField(f.getName(), (byte[]) val)); + try { + final CharBuffer buf = utf8Decoder.decode(ByteBuffer.wrap((byte[])val)); + result.add(new org.apache.lucene.document.StoredField(f.getName(), buf.toString())); + } catch (final CharacterCodingException e) { + result.add(new org.apache.lucene.document.StoredField(f.getName(), (byte[]) val)); + } } else { throw new WebApplicationException(field + " is not valid", Status.BAD_REQUEST); } diff --git a/src/nouveau/src/nouveau_bookmark.erl b/src/nouveau/src/nouveau_bookmark.erl index 70b42beeb..0721d5a83 100644 --- a/src/nouveau/src/nouveau_bookmark.erl +++ b/src/nouveau/src/nouveau_bookmark.erl @@ -42,8 +42,8 @@ range_of(DbName, DocId) when is_binary(DbName), is_binary(DocId) -> [#shard{range = Range} | _] = mem3_shards:for_docid(DbName, DocId), Range; range_of(DbName, Order) when is_binary(DbName), is_list(Order) -> - #{<<"@type">> := <<"bytes">>, <<"value">> := EncodedDocId} = lists:last(Order), - range_of(DbName, base64:decode(EncodedDocId)). + #{<<"@type">> := <<"string">>, <<"value">> := DocId} = lists:last(Order), + range_of(DbName, DocId). unpack(_DbName, Empty) when Empty == undefined; Empty == nil; Empty == null -> new(); |