diff options
author | Robert Newson <rnewson@apache.org> | 2023-04-22 13:45:03 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-22 13:45:03 +0000 |
commit | a28b75a9d9fcf3f3eb68fb9f122ad6d21c589898 (patch) | |
tree | 5222fe2513d1640e4b9781586055010997b4f6b6 /nouveau/src | |
parent | c1195e43c0b55f99892bb5d6b593de178499b969 (diff) | |
download | couchdb-a28b75a9d9fcf3f3eb68fb9f122ad6d21c589898.tar.gz |
Import nouveau (#4291)
Nouveau - a new (experimental) full-text indexing feature for Apache CouchDB, using Lucene 9. Requires Java 11 or higher (19 is preferred).
Diffstat (limited to 'nouveau/src')
56 files changed, 4579 insertions, 0 deletions
diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplication.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplication.java new file mode 100644 index 000000000..9921eaa42 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplication.java @@ -0,0 +1,82 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau; + +import java.util.concurrent.ForkJoinPool; + +import org.apache.couchdb.nouveau.core.IndexManager; +import org.apache.couchdb.nouveau.core.UpdatesOutOfOrderExceptionMapper; +import org.apache.couchdb.nouveau.health.AnalyzeHealthCheck; +import org.apache.couchdb.nouveau.health.IndexHealthCheck; +import org.apache.couchdb.nouveau.lucene9.Lucene9Module; +import org.apache.couchdb.nouveau.lucene9.ParallelSearcherFactory; +import org.apache.couchdb.nouveau.resources.AnalyzeResource; +import org.apache.couchdb.nouveau.resources.IndexResource; +import org.apache.couchdb.nouveau.tasks.CloseAllIndexesTask; +import org.apache.lucene.search.SearcherFactory; + +import io.dropwizard.core.Application; +import io.dropwizard.core.setup.Environment; +import io.swagger.v3.jaxrs2.integration.resources.OpenApiResource; + +public class NouveauApplication extends Application<NouveauApplicationConfiguration> { + + public static void main(String[] args) throws Exception { + new NouveauApplication().run(args); + } + + @Override + public String getName() { + return "Nouveau"; + } + + @Override + public void run(NouveauApplicationConfiguration configuration, Environment environment) throws Exception { + environment.jersey().register(new UpdatesOutOfOrderExceptionMapper()); + + // configure index manager + final IndexManager indexManager = new IndexManager(); + indexManager.setCommitIntervalSeconds(configuration.getCommitIntervalSeconds()); + indexManager.setIdleSeconds(configuration.getIdleSeconds()); + indexManager.setMaxIndexesOpen(configuration.getMaxIndexesOpen()); + indexManager.setMetricRegistry(environment.metrics()); + indexManager.setScheduler(environment.lifecycle().scheduledExecutorService("index-manager-%d").threads(5).build()); + indexManager.setObjectMapper(environment.getObjectMapper()); + indexManager.setRootDir(configuration.getRootDir()); + environment.lifecycle().manage(indexManager); + + // Serialization classes + environment.getObjectMapper().registerModule(new Lucene9Module()); + + // AnalyzeResource + final AnalyzeResource analyzeResource = new AnalyzeResource(); + environment.jersey().register(analyzeResource); + + // IndexResource + final SearcherFactory searcherFactory = new ParallelSearcherFactory(ForkJoinPool.commonPool()); + final IndexResource indexResource = new IndexResource(indexManager, searcherFactory); + environment.jersey().register(indexResource); + + // Health checks + environment.healthChecks().register("analyze", new AnalyzeHealthCheck(analyzeResource)); + environment.healthChecks().register("index", new IndexHealthCheck(indexResource)); + + // configure tasks + environment.admin().addTask(new CloseAllIndexesTask(indexManager)); + + // Swagger + environment.jersey().register(new OpenApiResource()); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplicationConfiguration.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplicationConfiguration.java new file mode 100644 index 000000000..212a57cc0 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplicationConfiguration.java @@ -0,0 +1,74 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau; + +import java.nio.file.Path; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import io.dropwizard.core.Configuration; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotNull; + +public class NouveauApplicationConfiguration extends Configuration { + + @Min(10) + private int maxIndexesOpen = 10; + + @Min(10) + private int commitIntervalSeconds = 10; + + @Min(30) + private int idleSeconds = 30; + + @NotNull + private Path rootDir = null; + + @JsonProperty + public void setMaxIndexesOpen(int maxIndexesOpen) { + this.maxIndexesOpen = maxIndexesOpen; + } + + public int getMaxIndexesOpen() { + return maxIndexesOpen; + } + + @JsonProperty + public void setCommitIntervalSeconds(int commitIntervalSeconds) { + this.commitIntervalSeconds = commitIntervalSeconds; + } + + public int getCommitIntervalSeconds() { + return commitIntervalSeconds; + } + + @JsonProperty + public void setIdleSeconds(int idleSeconds) { + this.idleSeconds = idleSeconds; + } + + public int getIdleSeconds() { + return idleSeconds; + } + + @JsonProperty + public void setRootDir(Path rootDir) { + this.rootDir = rootDir; + } + + public Path getRootDir() { + return rootDir; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeRequest.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeRequest.java new file mode 100644 index 000000000..fa22a36f2 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeRequest.java @@ -0,0 +1,51 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotEmpty; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class AnalyzeRequest { + + @NotEmpty + private String analyzer; + + @NotEmpty + private String text; + + public AnalyzeRequest() { + // Jackson deserialization + } + + public AnalyzeRequest(final String analyzer, final String text) { + this.analyzer = analyzer; + this.text = text; + } + + @JsonProperty + public String getAnalyzer() { + return analyzer; + } + + @JsonProperty + public String getText() { + return text; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeResponse.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeResponse.java new file mode 100644 index 000000000..ce35c75d2 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeResponse.java @@ -0,0 +1,45 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class AnalyzeResponse { + + @NotNull + private List<@NotEmpty String> tokens; + + public AnalyzeResponse() { + // Jackson deserialization + } + + public AnalyzeResponse(List<String> tokens) { + this.tokens = tokens; + } + + @JsonProperty + public List<String> getTokens() { + return tokens; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentDeleteRequest.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentDeleteRequest.java new file mode 100644 index 000000000..805cb65cb --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentDeleteRequest.java @@ -0,0 +1,50 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.Positive; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class DocumentDeleteRequest { + + @Positive + private long seq; + + public DocumentDeleteRequest() { + // Jackson deserialization + } + + public DocumentDeleteRequest(long seq) { + if (seq < 1) { + throw new IllegalArgumentException("seq must be 1 or greater"); + } + this.seq = seq; + } + + @JsonProperty + public long getSeq() { + return seq; + } + + @Override + public String toString() { + return "DocumentDeleteRequest [seq=" + seq + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentUpdateRequest.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentUpdateRequest.java new file mode 100644 index 000000000..45b478322 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentUpdateRequest.java @@ -0,0 +1,72 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Collection; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Positive; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class DocumentUpdateRequest { + + @Positive + private long seq; + + private String partition; + + @NotEmpty + @Valid + private Collection<Field> fields; + + public DocumentUpdateRequest() { + // Jackson deserialization + } + + public DocumentUpdateRequest(long seq, String partition, Collection<Field> fields) { + this.seq = seq; + this.partition = partition; + this.fields = fields; + } + + @JsonProperty + public long getSeq() { + return seq; + } + + @JsonProperty + public String getPartition() { + return partition; + } + + public boolean hasPartition() { + return partition != null; + } + + @JsonProperty + public Collection<Field> getFields() { + return fields; + } + + @Override + public String toString() { + return "DocumentUpdateRequest [seq=" + seq + ", partition=" + partition + ", fields=" + fields + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleField.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleField.java new file mode 100644 index 000000000..57ff4c858 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleField.java @@ -0,0 +1,61 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class DoubleField extends Field { + + @NotNull + private final Double value; + + private final boolean store; + + private final boolean facet; + + public DoubleField(@JsonProperty("name") final String name, @JsonProperty("value") final Double value, + @JsonProperty("store") final boolean store, @JsonProperty("facet") final boolean facet) { + super(name); + this.value = value; + this.store = store; + this.facet = facet; + } + + @JsonProperty + public Double getValue() { + return value; + } + + @JsonProperty + public boolean isStore() { + return store; + } + + @JsonProperty + public boolean isFacet() { + return facet; + } + + @Override + public String toString() { + return "DoubleField [name=" + name + ", value=" + value + ", store=" + store + ", facet=" + facet + "]"; + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleRange.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleRange.java new file mode 100644 index 000000000..ac59a286c --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleRange.java @@ -0,0 +1,31 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class DoubleRange extends Range<Double> { + + public DoubleRange() { + } + + public DoubleRange(String name, Double min, boolean minInclusive, Double max, boolean maxInclusive) { + super(name, min, minInclusive, max, maxInclusive); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Field.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Field.java new file mode 100644 index 000000000..52d5b815f --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Field.java @@ -0,0 +1,49 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + +import jakarta.validation.constraints.Pattern; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +@JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + include = JsonTypeInfo.As.PROPERTY, + property = "@type") +@JsonSubTypes({ + @JsonSubTypes.Type(value = DoubleField.class, name = "double"), + @JsonSubTypes.Type(value = StoredField.class, name = "stored"), + @JsonSubTypes.Type(value = StringField.class, name = "string"), + @JsonSubTypes.Type(value = TextField.class, name = "text"), +}) +public abstract class Field { + + @Pattern(regexp = "^\\$?[a-zA-Z][a-zA-Z0-9_]*$") + protected final String name; + + protected Field(final String name) { + this.name = name; + } + + @JsonProperty + public String getName() { + return name; + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexDefinition.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexDefinition.java new file mode 100644 index 000000000..7d3919c41 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexDefinition.java @@ -0,0 +1,70 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotEmpty; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class IndexDefinition { + + @NotEmpty + private String defaultAnalyzer; + + private Map<@NotEmpty String, @NotEmpty String> fieldAnalyzers; + + public IndexDefinition() { + // Jackson deserialization + } + + public IndexDefinition(final String defaultAnalyzer, final Map<String, String> fieldAnalyzers) { + this.defaultAnalyzer = defaultAnalyzer; + this.fieldAnalyzers = fieldAnalyzers; + } + + @JsonProperty + public String getDefaultAnalyzer() { + return defaultAnalyzer; + } + + public void setDefaultAnalyzer(String defaultAnalyzer) { + this.defaultAnalyzer = defaultAnalyzer; + } + + @JsonProperty + public Map<String, String> getFieldAnalyzers() { + return fieldAnalyzers; + } + + public void setFieldAnalyzers(Map<String, String> fieldAnalyzers) { + this.fieldAnalyzers = fieldAnalyzers; + } + + public boolean hasFieldAnalyzers() { + return fieldAnalyzers != null && !fieldAnalyzers.isEmpty(); + } + + @Override + public String toString() { + return "IndexDefinition [defaultAnalyzer=" + defaultAnalyzer + + ", fieldAnalyzers=" + fieldAnalyzers + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexInfo.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexInfo.java new file mode 100644 index 000000000..2dd072771 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexInfo.java @@ -0,0 +1,64 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.PositiveOrZero; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class IndexInfo { + + @PositiveOrZero + private long updateSeq; + + @PositiveOrZero + private int numDocs; + + @PositiveOrZero + private long diskSize; + + public IndexInfo() { + } + + public IndexInfo(final long updateSeq, final int numDocs, final long diskSize) { + this.updateSeq = updateSeq; + this.numDocs = numDocs; + this.diskSize = diskSize; + } + + @JsonProperty + public int getNumDocs() { + return numDocs; + } + + @JsonProperty + public long getDiskSize() { + return diskSize; + } + + @JsonProperty + public long getUpdateSeq() { + return updateSeq; + } + + @Override + public String toString() { + return "IndexInfo [updateSeq=" + updateSeq + ", numDocs=" + numDocs + ", diskSize=" + diskSize + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Range.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Range.java new file mode 100644 index 000000000..de421402a --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Range.java @@ -0,0 +1,145 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class Range<T> { + + @NotEmpty + private String label; + + @NotNull + private T min; + + private boolean minInclusive = true; + + @NotNull + private T max; + + private boolean maxInclusive = true; + + public Range() { + } + + public Range(String label, T min, boolean minInclusive, T max, boolean maxInclusive) { + this.label = label; + this.min = min; + this.minInclusive = minInclusive; + this.max = max; + this.maxInclusive = maxInclusive; + } + + @JsonProperty + public String getLabel() { + return label; + } + + public void setLabel(String label) { + this.label = label; + } + + @JsonProperty + public T getMin() { + return min; + } + + public void setMin(T min) { + this.min = min; + } + + @JsonProperty("min_inclusive") + public boolean isMinInclusive() { + return minInclusive; + } + + public void setMinInclusive(boolean minInclusive) { + this.minInclusive = minInclusive; + } + + @JsonProperty + public T getMax() { + return max; + } + + public void setMax(T max) { + this.max = max; + } + + @JsonProperty("max_inclusive") + public boolean isMaxInclusive() { + return maxInclusive; + } + + public void setMaxInclusive(boolean maxInclusive) { + this.maxInclusive = maxInclusive; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((label == null) ? 0 : label.hashCode()); + result = prime * result + ((min == null) ? 0 : min.hashCode()); + result = prime * result + (minInclusive ? 1231 : 1237); + result = prime * result + ((max == null) ? 0 : max.hashCode()); + result = prime * result + (maxInclusive ? 1231 : 1237); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Range<?> other = (Range<?>) obj; + if (label == null) { + if (other.label != null) + return false; + } else if (!label.equals(other.label)) + return false; + if (min == null) { + if (other.min != null) + return false; + } else if (!min.equals(other.min)) + return false; + if (minInclusive != other.minInclusive) + return false; + if (max == null) { + if (other.max != null) + return false; + } else if (!max.equals(other.max)) + return false; + if (maxInclusive != other.maxInclusive) + return false; + return true; + } + + @Override + public String toString() { + return "Range [label=" + label + ", min=" + min + ", minInclusive=" + minInclusive + ", max=" + max + + ", maxInclusive=" + maxInclusive + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchHit.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchHit.java new file mode 100644 index 000000000..678970e04 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchHit.java @@ -0,0 +1,65 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Collection; +import java.util.Objects; + +import org.apache.couchdb.nouveau.core.ser.PrimitiveWrapper; + +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class SearchHit { + + @NotEmpty + private String id; + + @NotNull + private PrimitiveWrapper<?>[] order; + + @NotNull + private Collection<@NotNull StoredField> fields; + + public SearchHit() { + } + + public SearchHit(final String id, final PrimitiveWrapper<?>[] order, final Collection<StoredField> fields) { + this.id = id; + this.order = Objects.requireNonNull(order); + this.fields = Objects.requireNonNull(fields); + } + + public String getId() { + return id; + } + + public PrimitiveWrapper<?>[] getOrder() { + return order; + } + + public Collection<StoredField> getFields() { + return fields; + } + + @Override + public String toString() { + return "SearchHit [id=" + id + ", order=" + order + ", fields=" + fields + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchRequest.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchRequest.java new file mode 100644 index 000000000..eb4efa7bf --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchRequest.java @@ -0,0 +1,151 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.List; +import java.util.Map; + +import org.apache.couchdb.nouveau.core.ser.PrimitiveWrapper; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + +import jakarta.validation.constraints.Max; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.Positive; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class SearchRequest { + + @NotNull + private String query; + + private String partition; + + @Positive + private int limit = 25; + + private List<@NotEmpty String> sort; + + private List<@NotEmpty String> counts; + + private Map<@NotEmpty String, List<@NotNull DoubleRange>> ranges; + + private PrimitiveWrapper<?>[] after; + + @Min(1) + @Max(100) + private int topN = 10; + + public SearchRequest() { + // Jackson deserialization + } + + public void setQuery(final String query) { + this.query = query; + } + + @JsonProperty + public String getQuery() { + return query; + } + + public void setPartition(final String partition) { + this.partition = partition; + } + + @JsonProperty + public String getPartition() { + return partition; + } + + public boolean hasPartition() { + return partition != null; + } + + public void setLimit(final int limit) { + this.limit = limit; + } + + @JsonProperty + public int getLimit() { + return limit; + } + + public boolean hasSort() { + return sort != null; + } + + @JsonProperty + public List<String> getSort() { + return sort; + } + + public void setSort(List<String> sort) { + this.sort = sort; + } + + public boolean hasCounts() { + return counts != null; + } + + public void setCounts(final List<String> counts) { + this.counts = counts; + } + + @JsonProperty + public List<String> getCounts() { + return counts; + } + + public boolean hasRanges() { + return ranges != null; + } + + public void setRanges(final Map<String, List<DoubleRange>> ranges) { + this.ranges = ranges; + } + + @JsonProperty + public Map<String, List<DoubleRange>> getRanges() { + return ranges; + } + + public void setTopN(final int topN) { + this.topN = topN; + } + + @JsonProperty + public int getTopN() { + return topN; + } + + public void setAfter(final PrimitiveWrapper<?>[] after) { + this.after = after; + } + + @JsonProperty + public PrimitiveWrapper<?>[] getAfter() { + return after; + } + + @Override + public String toString() { + return "SearchRequest [query=" + query + ", sort=" + sort + ", limit=" + limit + ", after=" + after + ", counts=" + counts + ", ranges=" + ranges + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchResults.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchResults.java new file mode 100644 index 000000000..6ef8c4bc1 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchResults.java @@ -0,0 +1,97 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.List; +import java.util.Map; + +import org.apache.lucene.search.TotalHits.Relation; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.PositiveOrZero; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class SearchResults { + + @PositiveOrZero + private long totalHits; + + @NotNull + private Relation totalHitsRelation; + + @NotNull + private List<@NotNull SearchHit> hits; + + private Map<@NotNull String, Map<@NotNull String, Number>> counts; + + private Map<@NotNull String, Map<@NotNull String, Number>> ranges; + + public SearchResults() { + } + + public void setTotalHits(final long totalHits) { + this.totalHits = totalHits; + } + + @JsonProperty + public long getTotalHits() { + return totalHits; + } + + public Relation getTotalHitsRelation() { + return totalHitsRelation; + } + + public void setTotalHitsRelation(Relation relation) { + this.totalHitsRelation = relation; + } + + public void setHits(final List<SearchHit> hits) { + this.hits = hits; + } + + @JsonProperty + public List<SearchHit> getHits() { + return hits; + } + + public void setCounts(final Map<String, Map<String, Number>> counts) { + this.counts = counts; + } + + @JsonProperty + public Map<String, Map<String, Number>> getCounts() { + return counts; + } + + public void setRanges(final Map<String, Map<String, Number>> ranges) { + this.ranges = ranges; + } + + @JsonProperty + public Map<String, Map<String, Number>> getRanges() { + return ranges; + } + + @Override + public String toString() { + return "SearchResults [hits=" + hits + ", totalHits=" + totalHits + ", counts=" + counts + ", ranges=" + ranges + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StoredField.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StoredField.java new file mode 100644 index 000000000..e8642c530 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StoredField.java @@ -0,0 +1,48 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import io.swagger.v3.oas.annotations.media.Schema; +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public final class StoredField extends Field { + + @NotNull + @Schema(oneOf = {String.class, Double.class, byte[].class}) + private final Object value; + + public StoredField(@JsonProperty("name") final String name, @JsonProperty("value") final Object value) { + super(name); + if (!(value instanceof String || value instanceof Number || value instanceof byte[])) { + throw new IllegalArgumentException(value + " must be a string, number or byte array"); + } + this.value = value; + } + + public Object getValue() { + return value; + } + + @Override + public String toString() { + return "StoredField [name=" + name + ", value=" + value + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StringField.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StringField.java new file mode 100644 index 000000000..d32671ae1 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StringField.java @@ -0,0 +1,63 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public final class StringField extends Field { + + @NotNull + private final String value; + + private final boolean store; + + private final boolean facet; + + public StringField(@JsonProperty("name") final String name, @JsonProperty("value") final String value, + @JsonProperty("store") final boolean store, @JsonProperty("facet") final boolean facet) { + super(name); + this.value = Objects.requireNonNull(value); + this.store = store; + this.facet = facet; + } + + @JsonProperty + public String getValue() { + return value; + } + + @JsonProperty + public boolean isStore() { + return store; + } + + @JsonProperty + public boolean isFacet() { + return facet; + } + + @Override + public String toString() { + return "StringField [name=" + name + ", value=" + value + ", store=" + store + ", facet=" + facet + "]"; + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/TextField.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/TextField.java new file mode 100644 index 000000000..76ee5d86b --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/TextField.java @@ -0,0 +1,55 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public final class TextField extends Field { + + @NotNull + private final String value; + + private final boolean store; + + public TextField(@JsonProperty("name") final String name, @JsonProperty("value") final String value, + @JsonProperty("store") final boolean store) { + super(name); + this.value = Objects.requireNonNull(value); + this.store = store; + } + + @JsonProperty + public String getValue() { + return value; + } + + @JsonProperty + public boolean isStore() { + return store; + } + + @Override + public String toString() { + return "TextField [name=" + name + ", value=" + value + ", store=" + store + "]"; + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IOUtils.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IOUtils.java new file mode 100644 index 000000000..ab2bb7e35 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IOUtils.java @@ -0,0 +1,64 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; + +public class IOUtils { + + // From https://www.baeldung.com/java-delete-directory + public static void rm(final Path path) throws IOException { + File[] allContents = path.toFile().listFiles(); + if (allContents != null) { + for (final File file : allContents) { + rm(file.toPath()); + } + } + if (!path.toFile().delete()) { + throw new IOException("failed to delete " + path); + } + } + + @FunctionalInterface + public interface IORunnable { + public abstract void run() throws IOException; + } + + public static void runAll(final IORunnable... runnables) throws IOException { + Throwable thrown = null; + for (final IORunnable r : runnables) { + try { + r.run(); + } catch (final Throwable e) { + if (thrown == null) { + thrown = e; + } + } + } + if (thrown != null) { + if (thrown instanceof IOException) { + throw (IOException) thrown; + } + if (thrown instanceof RuntimeException) { + throw (RuntimeException) thrown; + } + if (thrown instanceof Error) { + throw (Error) thrown; + } + } + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/Index.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/Index.java new file mode 100644 index 000000000..7d893a9e2 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/Index.java @@ -0,0 +1,176 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; + +import org.apache.couchdb.nouveau.api.DocumentDeleteRequest; +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.IndexInfo; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; + +/** + * An index that reflects a single `.couch` file shard of some + * database. + * + * The class only permits sequential modification (updates and deletes) + * but allows concurrent searching. + * + * This class also expects a monotonically incrementing update sequence + * associated with each modification. + */ + +public abstract class Index implements Closeable { + + private long updateSeq; + private boolean deleteOnClose = false; + private long lastCommit = now(); + private volatile boolean closed; + private final Semaphore permits = new Semaphore(Integer.MAX_VALUE); + + protected Index(final long updateSeq) { + this.updateSeq = updateSeq; + } + + public final boolean tryAcquire() { + if (permits.tryAcquire() == false) { + return false; + } + if (closed) { + permits.release(); + return false; + } + return true; + } + + public final boolean tryAcquire(long timeout, TimeUnit unit) throws InterruptedException { + if (permits.tryAcquire(timeout, unit) == false) { + return false; + } + if (closed) { + permits.release(); + return false; + } + return true; + } + + public final void release() { + permits.release(); + } + + public final IndexInfo info() throws IOException { + final int numDocs = doNumDocs(); + final long diskSize = doDiskSize(); + return new IndexInfo(updateSeq, numDocs, diskSize); + } + + protected abstract int doNumDocs() throws IOException; + + protected abstract long doDiskSize() throws IOException; + + public final synchronized void update(final String docId, final DocumentUpdateRequest request) + throws IOException { + assertUpdateSeqIsLower(request.getSeq()); + doUpdate(docId, request); + incrementUpdateSeq(request.getSeq()); + } + + protected abstract void doUpdate(final String docId, final DocumentUpdateRequest request) throws IOException; + + public final synchronized void delete(final String docId, final DocumentDeleteRequest request) throws IOException { + assertUpdateSeqIsLower(request.getSeq()); + doDelete(docId, request); + incrementUpdateSeq(request.getSeq()); + } + + protected abstract void doDelete(final String docId, final DocumentDeleteRequest request) throws IOException; + + public final SearchResults search(final SearchRequest request) throws IOException { + return doSearch(request); + } + + protected abstract SearchResults doSearch(final SearchRequest request) throws IOException; + + public final boolean commit() throws IOException { + final long updateSeq; + synchronized (this) { + updateSeq = this.updateSeq; + } + final boolean result = doCommit(updateSeq); + if (result) { + final long now = now(); + synchronized (this) { + this.lastCommit = now; + } + } + return result; + } + + protected abstract boolean doCommit(final long updateSeq) throws IOException; + + @Override + public final void close() throws IOException { + synchronized (this) { + closed = true; + } + // Ensures exclusive access to the index before closing. + permits.acquireUninterruptibly(Integer.MAX_VALUE); + try { + doClose(); + } finally { + permits.release(Integer.MAX_VALUE); + } + } + + protected abstract void doClose() throws IOException; + + public boolean isDeleteOnClose() { + return deleteOnClose; + } + + public void setDeleteOnClose(final boolean deleteOnClose) { + synchronized (this) { + this.deleteOnClose = deleteOnClose; + } + } + + protected final void assertUpdateSeqIsLower(final long updateSeq) throws UpdatesOutOfOrderException { + assert Thread.holdsLock(this); + if (!(updateSeq > this.updateSeq)) { + throw new UpdatesOutOfOrderException(); + } + } + + protected final void incrementUpdateSeq(final long updateSeq) throws IOException { + assert Thread.holdsLock(this); + assertUpdateSeqIsLower(updateSeq); + this.updateSeq = updateSeq; + } + + public boolean needsCommit(final long duration, final TimeUnit unit) { + final long commitNeededSince = now() - unit.toNanos(duration); + synchronized (this) { + return this.lastCommit < commitNeededSince; + } + } + + private long now() { + return System.nanoTime(); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexFunction.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexFunction.java new file mode 100644 index 000000000..987c9303b --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexFunction.java @@ -0,0 +1,23 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.IOException; + +@FunctionalInterface +public interface IndexFunction { + + <T> T with(final Index index) throws IOException; + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexLoader.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexLoader.java new file mode 100644 index 000000000..2f2a03fd1 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexLoader.java @@ -0,0 +1,26 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.IOException; +import java.nio.file.Path; + +import org.apache.couchdb.nouveau.api.IndexDefinition; + +@FunctionalInterface +public interface IndexLoader { + + Index apply(final Path path, final IndexDefinition indexDefinition) throws IOException; + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java new file mode 100644 index 000000000..ddc7c3f7f --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java @@ -0,0 +1,321 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import static com.codahale.metrics.MetricRegistry.name; + +import java.io.File; +import java.io.IOException; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.List; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.eclipse.jetty.io.RuntimeIOException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.caffeine.MetricsStatsCounter; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.RemovalCause; +import com.github.benmanes.caffeine.cache.RemovalListener; +import com.github.benmanes.caffeine.cache.Scheduler; + +import io.dropwizard.lifecycle.Managed; +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.Response.Status; + +/** + * The central class of Nouveau, responsible for loading and unloading Lucene + * indexes and making them available for query. + */ + +public final class IndexManager implements Managed { + + @FunctionalInterface + public interface IndexFunction<V, R> { + R apply(final V value) throws IOException; + } + + private static final Logger LOGGER = LoggerFactory.getLogger(IndexManager.class); + + private int maxIndexesOpen; + + private int commitIntervalSeconds; + + private int idleSeconds; + + private Path rootDir; + + private ObjectMapper objectMapper; + + private MetricRegistry metricRegistry; + + private ScheduledExecutorService scheduler; + + private Cache<String, Index> cache; + + public <R> R with(final String name, final IndexLoader loader, final IndexFunction<Index, R> indexFun) + throws IOException, InterruptedException { + while (true) { + if (!exists(name)) { + throw new WebApplicationException("Index does not exist", Status.NOT_FOUND); + } + + final Index index; + try { + index = cache.get(name, (n) -> { + LOGGER.info("opening {}", n); + final Path path = indexPath(n); + try { + final IndexDefinition indexDefinition = loadIndexDefinition(n); + return loader.apply(path, indexDefinition); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + }); + } catch (final RuntimeIOException e) { + throw (IOException) e.getCause(); + } + + if (index.tryAcquire(1, TimeUnit.SECONDS)) { + try { + final R result = indexFun.apply(index); + if (index.needsCommit(commitIntervalSeconds, TimeUnit.SECONDS)) { + scheduler.execute(() -> { + if (index.tryAcquire()) { + try { + LOGGER.debug("committing {}", name); + try { + index.commit(); + } catch (final IOException e) { + LOGGER.warn("I/O exception while committing " + name, e); + } + } finally { + index.release(); + } + } + }); + } + return result; + } finally { + index.release(); + } + } + } + } + + public void create(final String name, IndexDefinition indexDefinition) throws IOException { + if (exists(name)) { + throw new WebApplicationException("Index already exists", Status.EXPECTATION_FAILED); + } + // Validate index definiton + // TODO luceneFor(indexDefinition).validate(indexDefinition); + + // Persist definition + final Path path = indexDefinitionPath(name); + if (Files.exists(path)) { + throw new FileAlreadyExistsException(name + " already exists"); + } + Files.createDirectories(path.getParent()); + objectMapper.writeValue(path.toFile(), indexDefinition); + } + + public boolean exists(final String name) { + return Files.exists(indexDefinitionPath(name)); + } + + public void deleteAll(final String path, final List<String> exclusions) throws IOException { + LOGGER.info("deleting indexes below {} (excluding {})", path, + exclusions == null ? "nothing" : exclusions); + + final Path indexRootPath = indexRootPath(path); + if (!indexRootPath.toFile().exists()) { + return; + } + Stream<Path> stream = Files.find(indexRootPath, 100, + (p, attr) -> attr.isDirectory() && isIndex(p)); + try { + stream.forEach((p) -> { + final String relativeToExclusions = indexRootPath.relativize(p).toString(); + if (exclusions != null && exclusions.indexOf(relativeToExclusions) != -1) { + return; + } + final String relativeName = rootDir.relativize(p).toString(); + try { + deleteIndex(relativeName); + } catch (final IOException e) { + LOGGER.error("I/O exception deleting " + p, e); + } + // Clean any newly empty directories. + do { + final File f = p.toFile(); + if (f.isDirectory() && f.list().length == 0) { + f.delete(); + } + } while ((p = p.getParent()) != null && !rootDir.equals(p)); + }); + } finally { + stream.close(); + } + } + + private void deleteIndex(final String name) throws IOException { + final Index index = cache.asMap().remove(name); + if (index != null) { + index.setDeleteOnClose(true); + close(name, index); + } else { + IOUtils.rm(indexRootPath(name)); + } + } + + @JsonProperty + public int getMaxIndexesOpen() { + return maxIndexesOpen; + } + + public void setMaxIndexesOpen(int maxIndexesOpen) { + this.maxIndexesOpen = maxIndexesOpen; + } + + public int getCommitIntervalSeconds() { + return commitIntervalSeconds; + } + + public void setCommitIntervalSeconds(int commitIntervalSeconds) { + this.commitIntervalSeconds = commitIntervalSeconds; + } + + public int getIdleSeconds() { + return idleSeconds; + } + + public void setIdleSeconds(int idleSeconds) { + this.idleSeconds = idleSeconds; + } + + public void setScheduler(ScheduledExecutorService scheduler) { + this.scheduler = scheduler; + } + + public Path getRootDir() { + return rootDir; + } + + public void setRootDir(Path rootDir) { + this.rootDir = rootDir; + } + + public void setObjectMapper(final ObjectMapper objectMapper) { + this.objectMapper = objectMapper; + } + + public void setMetricRegistry(final MetricRegistry metricRegistry) { + this.metricRegistry = metricRegistry; + } + + @Override + public void start() throws IOException { + cache = Caffeine.newBuilder() + .recordStats(() -> new MetricsStatsCounter(metricRegistry, name(IndexManager.class, "cache"))) + .initialCapacity(maxIndexesOpen) + .maximumSize(maxIndexesOpen) + .expireAfterAccess(Duration.ofSeconds(idleSeconds)) + .scheduler(Scheduler.systemScheduler()) + .evictionListener(new IndexEvictionListener()) + .build(); + } + + @Override + public void stop() throws IOException, InterruptedException { + final var it = cache.asMap().entrySet().iterator(); + while (it.hasNext()) { + var e = it.next(); + LOGGER.info("closing {} during shutdown", e.getKey()); + close(e.getKey(), e.getValue()); + it.remove(); + } + } + + private boolean isIndex(final Path path) { + return path.resolve("index_definition.json").toFile().exists(); + } + + private Path indexDefinitionPath(final String name) { + return indexRootPath(name).resolve("index_definition.json"); + } + + private Path indexPath(final String name) { + return indexRootPath(name).resolve("index"); + } + + private IndexDefinition loadIndexDefinition(final String name) throws IOException { + return objectMapper.readValue(indexDefinitionPath(name).toFile(), IndexDefinition.class); + } + + private Path indexRootPath(final String name) { + final Path result = rootDir.resolve(name).normalize(); + if (result.startsWith(rootDir)) { + return result; + } + throw new WebApplicationException(name + " attempts to escape from index root directory", + Status.BAD_REQUEST); + } + + private class IndexEvictionListener implements RemovalListener<String, Index> { + + public void onRemoval(String name, Index index, RemovalCause cause) { + LOGGER.info("closing {} for cause {}", name, cause); + try { + close(name, index); + } catch (final IOException e) { + LOGGER.error("I/O exception when evicting " + name, e); + } + } + } + + private void close(final String name, final Index index) throws IOException { + IOUtils.runAll( + () -> { + if (index.tryAcquire()) { + try { + if (!index.isDeleteOnClose() && index.commit()) { + LOGGER.debug("committed {} before close", name); + } + } finally { + index.release(); + } + } + }, + () -> { + index.close(); + }, + () -> { + if (index.isDeleteOnClose()) { + IOUtils.rm(indexRootPath(name)); + } + }); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderException.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderException.java new file mode 100644 index 000000000..3b89f41d2 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderException.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.IOException; + +public class UpdatesOutOfOrderException extends IOException { + + public UpdatesOutOfOrderException() { + super("Updates applied in the wrong order"); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderExceptionMapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderExceptionMapper.java new file mode 100644 index 000000000..9e54e4453 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderExceptionMapper.java @@ -0,0 +1,32 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import io.dropwizard.jersey.errors.ErrorMessage; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; +import jakarta.ws.rs.core.Response.Status; +import jakarta.ws.rs.ext.ExceptionMapper; + +public class UpdatesOutOfOrderExceptionMapper implements ExceptionMapper<UpdatesOutOfOrderException> { + + @Override + public Response toResponse(final UpdatesOutOfOrderException exception) { + return Response.status(Status.BAD_REQUEST) + .type(MediaType.APPLICATION_JSON_TYPE) + .entity(new ErrorMessage(Status.BAD_REQUEST.getStatusCode(), exception.getMessage())) + .build(); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/ByteArrayWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/ByteArrayWrapper.java new file mode 100644 index 000000000..875d0d8bb --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/ByteArrayWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class ByteArrayWrapper extends PrimitiveWrapper<byte[]> { + + public ByteArrayWrapper(@JsonProperty("value") byte[] value) { + super(value); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/DoubleWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/DoubleWrapper.java new file mode 100644 index 000000000..c9ae3b4cd --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/DoubleWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class DoubleWrapper extends PrimitiveWrapper<Double> { + + public DoubleWrapper(@JsonProperty("value") Double value) { + super(value); + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/FloatWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/FloatWrapper.java new file mode 100644 index 000000000..490afa6d5 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/FloatWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class FloatWrapper extends PrimitiveWrapper<Float> { + + public FloatWrapper(@JsonProperty("value") float value) { + super(value); + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/IntWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/IntWrapper.java new file mode 100644 index 000000000..c179d0705 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/IntWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class IntWrapper extends PrimitiveWrapper<Integer> { + + public IntWrapper(@JsonProperty("value") Integer value) { + super(value); + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/LongWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/LongWrapper.java new file mode 100644 index 000000000..0eda4e786 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/LongWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class LongWrapper extends PrimitiveWrapper<Long> { + + public LongWrapper(@JsonProperty("value") Long value) { + super(value); + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/PrimitiveWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/PrimitiveWrapper.java new file mode 100644 index 000000000..89877da60 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/PrimitiveWrapper.java @@ -0,0 +1,46 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.annotation.JsonTypeInfo.As; +import com.fasterxml.jackson.annotation.JsonTypeInfo.Id; + +@JsonTypeInfo(use = Id.NAME, include = As.PROPERTY, property = "@type") +@JsonSubTypes({ + @JsonSubTypes.Type(value = ByteArrayWrapper.class, name = "bytes"), + @JsonSubTypes.Type(value = DoubleWrapper.class, name = "double"), + @JsonSubTypes.Type(value = FloatWrapper.class, name = "float"), + @JsonSubTypes.Type(value = IntWrapper.class, name = "int"), + @JsonSubTypes.Type(value = LongWrapper.class, name = "long"), + @JsonSubTypes.Type(value = StringWrapper.class, name = "string"), +}) +public class PrimitiveWrapper<T> { + + private T value; + + public PrimitiveWrapper(T value) { + this.value = value; + } + + public T getValue() { + return value; + } + + public void setValue(T value) { + this.value = value; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/StringWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/StringWrapper.java new file mode 100644 index 000000000..e53f22ca0 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/StringWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class StringWrapper extends PrimitiveWrapper<String> { + + public StringWrapper(@JsonProperty("value") String value) { + super(value); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheck.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheck.java new file mode 100644 index 000000000..4b49a39e0 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheck.java @@ -0,0 +1,46 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.health; + +import java.util.Arrays; +import java.util.List; + +import org.apache.couchdb.nouveau.api.AnalyzeRequest; +import org.apache.couchdb.nouveau.api.AnalyzeResponse; +import org.apache.couchdb.nouveau.resources.AnalyzeResource; + +import com.codahale.metrics.health.HealthCheck; + +public final class AnalyzeHealthCheck extends HealthCheck { + + private AnalyzeResource analyzeResource; + + public AnalyzeHealthCheck(final AnalyzeResource analyzeResource) { + this.analyzeResource = analyzeResource; + } + + @Override + protected Result check() throws Exception { + final AnalyzeRequest request = new AnalyzeRequest("standard", "hello goodbye"); + final AnalyzeResponse response = analyzeResource.analyzeText(request); + final List<String> expected = Arrays.asList("hello", "goodbye"); + final List<String> actual = response.getTokens(); + if (expected.equals(actual)) { + return Result.healthy(); + } else { + return Result.unhealthy("Expected '%s' but got '%s'", expected, actual); + } + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/health/IndexHealthCheck.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/health/IndexHealthCheck.java new file mode 100644 index 000000000..37882043a --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/health/IndexHealthCheck.java @@ -0,0 +1,63 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.health; + +import java.io.IOException; +import java.util.Collections; + +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; +import org.apache.couchdb.nouveau.resources.IndexResource; + +import com.codahale.metrics.health.HealthCheck; + +public final class IndexHealthCheck extends HealthCheck { + + private final IndexResource indexResource; + + public IndexHealthCheck(final IndexResource indexResource) { + this.indexResource = indexResource; + } + + @Override + protected Result check() throws Exception { + final String name = "___test9"; + try { + indexResource.deletePath(name, null); + } catch (IOException e) { + // Ignored, index might not exist yet. + } + + indexResource.createIndex(name, new IndexDefinition("standard", null)); + try { + final DocumentUpdateRequest documentUpdateRequest = + new DocumentUpdateRequest(1, null, Collections.emptyList()); + indexResource.updateDoc(name, "foo", documentUpdateRequest); + + final SearchRequest searchRequest = new SearchRequest(); + searchRequest.setQuery("_id:foo"); + + final SearchResults searchResults = indexResource.searchIndex(name, searchRequest); + if (searchResults.getTotalHits() == 1) { + return Result.healthy(); + } + } finally { + indexResource.deletePath(name, null); + } + return Result.unhealthy(name); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactory.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactory.java new file mode 100644 index 000000000..ca6834f5e --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactory.java @@ -0,0 +1,143 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.Supplier; + +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.apache.lucene.analysis.ca.CatalanAnalyzer; +import org.apache.lucene.analysis.cjk.CJKAnalyzer; +import org.apache.lucene.analysis.classic.ClassicAnalyzer; +import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.analysis.cz.CzechAnalyzer; +import org.apache.lucene.analysis.da.DanishAnalyzer; +import org.apache.lucene.analysis.de.GermanAnalyzer; +import org.apache.lucene.analysis.email.UAX29URLEmailAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.es.SpanishAnalyzer; +import org.apache.lucene.analysis.eu.BasqueAnalyzer; +import org.apache.lucene.analysis.fa.PersianAnalyzer; +import org.apache.lucene.analysis.fi.FinnishAnalyzer; +import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.ga.IrishAnalyzer; +import org.apache.lucene.analysis.gl.GalicianAnalyzer; +import org.apache.lucene.analysis.hi.HindiAnalyzer; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; +import org.apache.lucene.analysis.hy.ArmenianAnalyzer; +import org.apache.lucene.analysis.id.IndonesianAnalyzer; +import org.apache.lucene.analysis.it.ItalianAnalyzer; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.lv.LatvianAnalyzer; +import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.nl.DutchAnalyzer; +import org.apache.lucene.analysis.no.NorwegianAnalyzer; +import org.apache.lucene.analysis.pl.PolishAnalyzer; +import org.apache.lucene.analysis.pt.PortugueseAnalyzer; +import org.apache.lucene.analysis.ro.RomanianAnalyzer; +import org.apache.lucene.analysis.ru.RussianAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.sv.SwedishAnalyzer; +import org.apache.lucene.analysis.th.ThaiAnalyzer; +import org.apache.lucene.analysis.tr.TurkishAnalyzer; + +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.Response.Status; + +public final class Lucene9AnalyzerFactory { + + private Lucene9AnalyzerFactory() { + } + + public static Analyzer fromDefinition(final IndexDefinition indexDefinition) { + final Analyzer defaultAnalyzer = newAnalyzer(indexDefinition.getDefaultAnalyzer()); + if (!indexDefinition.hasFieldAnalyzers()) { + return defaultAnalyzer; + } + final Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>(); + for (Map.Entry<String, String> entry : indexDefinition.getFieldAnalyzers().entrySet()) { + fieldAnalyzers.put(entry.getKey(), newAnalyzer(entry.getValue())); + } + return new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers); + } + + private enum KnownAnalyzer { + + arabic(() -> new ArabicAnalyzer()), + armenian(() -> new ArmenianAnalyzer()), + basque(() -> new BasqueAnalyzer()), + bulgarian(() -> new BulgarianAnalyzer()), + catalan(() -> new CatalanAnalyzer()), + chinese(() -> new SmartChineseAnalyzer()), + cjk(() -> new CJKAnalyzer()), + classic(() -> new ClassicAnalyzer()), + czech(() -> new CzechAnalyzer()), + danish(() -> new DanishAnalyzer()), + dutch(() -> new DutchAnalyzer()), + email(() -> new UAX29URLEmailAnalyzer()), + english(() -> new EnglishAnalyzer()), + finnish(() -> new FinnishAnalyzer()), + french(() -> new FrenchAnalyzer()), + galician(() -> new GalicianAnalyzer()), + german(() -> new GermanAnalyzer()), + hindi(() -> new HindiAnalyzer()), + hungarian(() -> new HungarianAnalyzer()), + indonesian(() -> new IndonesianAnalyzer()), + irish(() -> new IrishAnalyzer()), + italian(() -> new ItalianAnalyzer()), + japanese(() -> new JapaneseAnalyzer()), + keyword(() -> new KeywordAnalyzer()), + latvian(() -> new LatvianAnalyzer()), + norwegian(() -> new NorwegianAnalyzer()), + persian(() -> new PersianAnalyzer()), + polish(() -> new PolishAnalyzer()), + portugese(() -> new PortugueseAnalyzer()), + romanian(() -> new RomanianAnalyzer()), + russian(() -> new RussianAnalyzer()), + simple(() -> new SimpleAnalyzer()), + simple_asciifolding(() -> new SimpleAsciiFoldingAnalyzer()), + spanish(() -> new SpanishAnalyzer()), + standard(() -> new StandardAnalyzer()), + swedish(() -> new SwedishAnalyzer()), + thai(() -> new ThaiAnalyzer()), + turkish(() -> new TurkishAnalyzer()), + whitespace(() -> new WhitespaceAnalyzer()); + + private final Supplier<? extends Analyzer> supplier; + + private KnownAnalyzer(final Supplier<? extends Analyzer> supplier) { + this.supplier = supplier; + } + + private Analyzer newInstance() { + return supplier.get(); + } + } + + public static Analyzer newAnalyzer(final String name) { + try { + return KnownAnalyzer.valueOf(name).newInstance(); + } catch (IllegalArgumentException e) { + throw new WebApplicationException(name + " is not a valid analyzer name", Status.BAD_REQUEST); + } + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java new file mode 100644 index 000000000..02818f41f --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java @@ -0,0 +1,507 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.file.NoSuchFileException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.couchdb.nouveau.api.DocumentDeleteRequest; +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.DoubleField; +import org.apache.couchdb.nouveau.api.DoubleRange; +import org.apache.couchdb.nouveau.api.Field; +import org.apache.couchdb.nouveau.api.SearchHit; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; +import org.apache.couchdb.nouveau.api.StoredField; +import org.apache.couchdb.nouveau.api.StringField; +import org.apache.couchdb.nouveau.api.TextField; +import org.apache.couchdb.nouveau.core.IOUtils; +import org.apache.couchdb.nouveau.core.Index; +import org.apache.couchdb.nouveau.core.ser.ByteArrayWrapper; +import org.apache.couchdb.nouveau.core.ser.DoubleWrapper; +import org.apache.couchdb.nouveau.core.ser.FloatWrapper; +import org.apache.couchdb.nouveau.core.ser.IntWrapper; +import org.apache.couchdb.nouveau.core.ser.LongWrapper; +import org.apache.couchdb.nouveau.core.ser.PrimitiveWrapper; +import org.apache.couchdb.nouveau.core.ser.StringWrapper; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsCollectorManager; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.facet.StringDocValuesReaderState; +import org.apache.lucene.facet.StringValueFacetCounts; +import org.apache.lucene.facet.range.DoubleRangeFacetCounts; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.StoredFields; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.flexible.core.QueryNodeException; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiCollectorManager; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.search.SortedSetSortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; + +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.Response.Status; + +public class Lucene9Index extends Index { + + private static final Sort DEFAULT_SORT = new Sort(SortField.FIELD_SCORE, + new SortField("_id", SortField.Type.STRING)); + private static final Pattern SORT_FIELD_RE = Pattern.compile("^([-+])?([\\.\\w]+)(?:<(\\w+)>)$"); + + private final Analyzer analyzer; + private final IndexWriter writer; + private final SearcherManager searcherManager; + + public Lucene9Index(final Analyzer analyzer, final IndexWriter writer, final long updateSeq, + final SearcherManager searcherManager) { + super(updateSeq); + this.analyzer = Objects.requireNonNull(analyzer); + this.writer = Objects.requireNonNull(writer); + this.searcherManager = Objects.requireNonNull(searcherManager); + } + + @Override + public int doNumDocs() throws IOException { + return writer.getDocStats().numDocs; + } + + @Override + public long doDiskSize() throws IOException { + final Directory dir = writer.getDirectory(); + long result = 0; + for (final String name : dir.listAll()) { + try { + result += dir.fileLength(name); + } catch (final FileNotFoundException | NoSuchFileException e) { + // deleted while we were looping. + } + } + return result; + } + + @Override + public void doUpdate(final String docId, final DocumentUpdateRequest request) throws IOException { + final Term docIdTerm = docIdTerm(docId); + final Document doc = toDocument(docId, request); + writer.updateDocument(docIdTerm, doc); + } + + @Override + public void doDelete(final String docId, final DocumentDeleteRequest request) throws IOException { + final Query query = docIdQuery(docId); + writer.deleteDocuments(query); + } + + @Override + public boolean doCommit(final long updateSeq) throws IOException { + if (!writer.hasUncommittedChanges()) { + return false; + } + writer.setLiveCommitData(Collections.singletonMap("update_seq", Long.toString(updateSeq)).entrySet()); + writer.commit(); + return true; + } + + @Override + public void doClose() throws IOException { + IOUtils.runAll( + () -> { + searcherManager.close(); + }, + () -> { + writer.rollback(); + }, + () -> { + if (isDeleteOnClose()) { + var dir = writer.getDirectory(); + for (final String name : dir.listAll()) { + dir.deleteFile(name); + } + } + }); + } + + @Override + public SearchResults doSearch(final SearchRequest request) throws IOException { + final Query query = parse(request); + + // Construct CollectorManagers. + final MultiCollectorManager cm; + final CollectorManager<?, ? extends TopDocs> hits = hitCollector(request); + if (request.hasCounts() || request.hasRanges()) { + cm = new MultiCollectorManager(hits, new FacetsCollectorManager()); + } else { + cm = new MultiCollectorManager(hits); + } + + searcherManager.maybeRefreshBlocking(); + + final IndexSearcher searcher = searcherManager.acquire(); + try { + final Object[] reduces = searcher.search(query, cm); + return toSearchResults(request, searcher, reduces); + } catch (final IllegalStateException e) { + throw new WebApplicationException(e.getMessage(), e, Status.BAD_REQUEST); + } finally { + searcherManager.release(searcher); + } + } + + private CollectorManager<?, ? extends TopDocs> hitCollector(final SearchRequest searchRequest) { + final Sort sort = toSort(searchRequest); + + final PrimitiveWrapper<?>[] after = searchRequest.getAfter(); + final FieldDoc fieldDoc; + if (after != null) { + fieldDoc = toFieldDoc(after); + if (getLastSortField(sort).getReverse()) { + fieldDoc.doc = 0; + } else { + fieldDoc.doc = Integer.MAX_VALUE; + } + } else { + fieldDoc = null; + } + + return TopFieldCollector.createSharedManager( + sort, + searchRequest.getLimit(), + fieldDoc, + 1000); + } + + private SortField getLastSortField(final Sort sort) { + final SortField[] sortFields = sort.getSort(); + return sortFields[sortFields.length - 1]; + } + + private SearchResults toSearchResults(final SearchRequest searchRequest, final IndexSearcher searcher, + final Object[] reduces) throws IOException { + final SearchResults result = new SearchResults(); + collectHits(searcher, (TopDocs) reduces[0], result); + if (reduces.length == 2) { + collectFacets(searchRequest, searcher, (FacetsCollector) reduces[1], result); + } + return result; + } + + private void collectHits(final IndexSearcher searcher, final TopDocs topDocs, final SearchResults searchResults) + throws IOException { + final List<SearchHit> hits = new ArrayList<SearchHit>(topDocs.scoreDocs.length); + final StoredFields storedFields = searcher.storedFields(); + + for (final ScoreDoc scoreDoc : topDocs.scoreDocs) { + final Document doc = storedFields.document(scoreDoc.doc); + + final List<StoredField> fields = new ArrayList<StoredField>(doc.getFields().size()); + for (IndexableField field : doc.getFields()) { + if (field.name().equals("_id")) { + continue; + } + if (field.numericValue() != null) { + fields.add(new StoredField(field.name(), field.numericValue().doubleValue())); + } else if (field.binaryValue() != null) { + fields.add(new StoredField(field.name(), toBytes(field.binaryValue()))); + } else if (field.stringValue() != null) { + fields.add(new StoredField(field.name(), field.stringValue())); + } + } + + final PrimitiveWrapper<?>[] after = toAfter(((FieldDoc) scoreDoc)); + hits.add(new SearchHit(doc.get("_id"), after, fields)); + } + + searchResults.setTotalHits(topDocs.totalHits.value); + searchResults.setTotalHitsRelation(topDocs.totalHits.relation); + searchResults.setHits(hits); + } + + private void collectFacets(final SearchRequest searchRequest, final IndexSearcher searcher, + final FacetsCollector fc, final SearchResults searchResults) throws IOException { + if (searchRequest.hasCounts()) { + final Map<String, Map<String, Number>> countsMap = new HashMap<String, Map<String, Number>>( + searchRequest.getCounts().size()); + for (final String field : searchRequest.getCounts()) { + final StringDocValuesReaderState state = new StringDocValuesReaderState(searcher.getIndexReader(), + field); + final StringValueFacetCounts counts = new StringValueFacetCounts(state, fc); + countsMap.put(field, collectFacets(counts, searchRequest.getTopN(), field)); + } + searchResults.setCounts(countsMap); + } + + if (searchRequest.hasRanges()) { + final Map<String, Map<String, Number>> rangesMap = new HashMap<String, Map<String, Number>>( + searchRequest.getRanges().size()); + for (final Entry<String, List<DoubleRange>> entry : searchRequest.getRanges().entrySet()) { + final DoubleRangeFacetCounts counts = toDoubleRangeFacetCounts(fc, entry.getKey(), entry.getValue()); + rangesMap.put(entry.getKey(), collectFacets(counts, searchRequest.getTopN(), entry.getKey())); + } + searchResults.setRanges(rangesMap); + } + } + + private DoubleRangeFacetCounts toDoubleRangeFacetCounts(final FacetsCollector fc, final String field, + final List<DoubleRange> ranges) throws IOException { + final org.apache.lucene.facet.range.DoubleRange[] luceneRanges = new org.apache.lucene.facet.range.DoubleRange[ranges + .size()]; + for (int i = 0; i < luceneRanges.length; i++) { + final DoubleRange range = ranges.get(i); + luceneRanges[i] = new org.apache.lucene.facet.range.DoubleRange( + range.getLabel(), range.getMin() != null ? range.getMin() : Double.NEGATIVE_INFINITY, + range.isMinInclusive(), range.getMax() != null ? range.getMax() : Double.POSITIVE_INFINITY, + range.isMaxInclusive()); + } + return new DoubleRangeFacetCounts(field, fc, luceneRanges); + } + + private Map<String, Number> collectFacets(final Facets facets, final int topN, final String dim) + throws IOException { + final FacetResult topChildren = facets.getTopChildren(topN, dim); + final Map<String, Number> result = new HashMap<String, Number>(topChildren.childCount); + for (final LabelAndValue lv : topChildren.labelValues) { + result.put(lv.label, lv.value); + } + return result; + } + + // Ensure _id is final sort field so we can paginate. + private Sort toSort(final SearchRequest searchRequest) { + if (!searchRequest.hasSort()) { + return DEFAULT_SORT; + } + + final List<String> sort = new ArrayList<String>(searchRequest.getSort()); + final String last = sort.get(sort.size() - 1); + // Append _id field if not already present. + switch (last) { + case "-_id<string>": + case "_id<string>": + break; + default: + sort.add("_id<string>"); + } + return convertSort(sort); + } + + private Sort convertSort(final List<String> sort) { + final SortField[] fields = new SortField[sort.size()]; + for (int i = 0; i < sort.size(); i++) { + fields[i] = convertSortField(sort.get(i)); + } + return new Sort(fields); + } + + private SortField convertSortField(final String sortString) { + if ("relevance".equals(sortString)) { + return SortField.FIELD_SCORE; + } + final Matcher m = SORT_FIELD_RE.matcher(sortString); + if (!m.matches()) { + throw new WebApplicationException( + sortString + " is not a valid sort parameter", Status.BAD_REQUEST); + } + final boolean reverse = "-".equals(m.group(1)); + switch (m.group(3)) { + case "string": + return new SortedSetSortField(m.group(2), reverse); + case "double": + return new SortedNumericSortField(m.group(2), SortField.Type.DOUBLE, reverse); + default: + throw new WebApplicationException( + m.group(3) + " is not a valid sort type", Status.BAD_REQUEST); + } + } + + private static Document toDocument(final String docId, final DocumentUpdateRequest request) throws IOException { + final Document result = new Document(); + + // id + result.add(new org.apache.lucene.document.StringField("_id", docId, Store.YES)); + result.add(new SortedDocValuesField("_id", new BytesRef(docId))); + + // partition (optional) + if (request.hasPartition()) { + result.add(new org.apache.lucene.document.StringField("_partition", request.getPartition(), Store.NO)); + } + + final CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder(); + + for (Field field : request.getFields()) { + // Underscore-prefix is reserved. + if (field.getName().startsWith("_")) { + continue; + } + if (field instanceof TextField) { + var f = (TextField) field; + result.add(new org.apache.lucene.document.TextField(f.getName(), f.getValue(), + f.isStore() ? Store.YES : Store.NO)); + } else if (field instanceof StringField) { + // TODO use KeywordField when available. + var f = (StringField) field; + result.add(new org.apache.lucene.document.StringField(f.getName(), f.getValue(), + f.isStore() ? Store.YES : Store.NO)); + result.add(new SortedSetDocValuesField(f.getName(), + new BytesRef(f.getValue()))); + } else if (field instanceof DoubleField) { + var f = (DoubleField) field; + result.add(new org.apache.lucene.document.DoubleField(f.getName(), f.getValue())); + if (f.isStore()) { + result.add(new org.apache.lucene.document.StoredField(f.getName(), f.getValue())); + } + } else if (field instanceof StoredField) { + var f = (StoredField) field; + var val = f.getValue(); + if (val instanceof String) { + result.add(new org.apache.lucene.document.StoredField(f.getName(), (String) val)); + } else if (val instanceof Number) { + result.add(new org.apache.lucene.document.StoredField(f.getName(), ((Number) val).doubleValue())); + } else if (val instanceof byte[]) { + try { + final CharBuffer buf = utf8Decoder.decode(ByteBuffer.wrap((byte[]) val)); + result.add(new org.apache.lucene.document.StoredField(f.getName(), buf.toString())); + } catch (final CharacterCodingException e) { + result.add(new org.apache.lucene.document.StoredField(f.getName(), (byte[]) val)); + } + } else { + throw new WebApplicationException(field + " is not valid", Status.BAD_REQUEST); + } + } else { + throw new WebApplicationException(field + " is not valid", Status.BAD_REQUEST); + } + } + + return result; + } + + private FieldDoc toFieldDoc(final Object[] after) { + final Object[] fields = new Object[after.length]; + for (int i = 0; i < after.length; i++) { + if (after[i] instanceof PrimitiveWrapper<?>) { + fields[i] = ((PrimitiveWrapper<?>) after[i]).getValue(); + } + if (fields[i] instanceof byte[]) { + fields[i] = new BytesRef((byte[]) fields[i]); + } + if (fields[i] instanceof String) { + fields[i] = new BytesRef((String) fields[i]); + } + } + return new FieldDoc(0, Float.NaN, fields); + } + + private PrimitiveWrapper<?>[] toAfter(final FieldDoc fieldDoc) { + final CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder(); + final PrimitiveWrapper<?>[] fields = new PrimitiveWrapper<?>[fieldDoc.fields.length]; + for (int i = 0; i < fields.length; i++) { + if (fieldDoc.fields[i] instanceof String) { + fields[i] = new StringWrapper((String) fieldDoc.fields[i]); + } else if (fieldDoc.fields[i] instanceof BytesRef) { + var bytes = toBytes((BytesRef) fieldDoc.fields[i]); + try { + final CharBuffer buf = utf8Decoder.decode(ByteBuffer.wrap(bytes)); + fields[i] = new StringWrapper(buf.toString()); + } catch (final CharacterCodingException e) { + fields[i] = new ByteArrayWrapper(bytes); + } + } else if (fieldDoc.fields[i] instanceof Double) { + fields[i] = new DoubleWrapper((double) fieldDoc.fields[i]); + } else if (fieldDoc.fields[i] instanceof Integer) { + fields[i] = new IntWrapper((int) fieldDoc.fields[i]); + } else if (fieldDoc.fields[i] instanceof Long) { + fields[i] = new LongWrapper((long) fieldDoc.fields[i]); + } else if (fieldDoc.fields[i] instanceof Float) { + fields[i] = new FloatWrapper((float) fieldDoc.fields[i]); + } else { + throw new WebApplicationException(fieldDoc.fields[i].getClass() + " is not valid", Status.BAD_REQUEST); + } + } + return fields; + } + + private static byte[] toBytes(final BytesRef bytesRef) { + return Arrays.copyOfRange(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length); + } + + private static Query docIdQuery(final String docId) { + return new TermQuery(docIdTerm(docId)); + } + + private static Term docIdTerm(final String docId) { + return new Term("_id", docId); + } + + private Query parse(final SearchRequest request) { + var queryParser = new NouveauQueryParser(analyzer); + Query result; + try { + result = queryParser.parse(request.getQuery(), "default"); + if (request.hasPartition()) { + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("_partition", request.getPartition())), Occur.MUST); + builder.add(result, Occur.MUST); + result = builder.build(); + } + } catch (final QueryNodeException e) { + throw new WebApplicationException(e.getMessage(), e, Status.BAD_REQUEST); + } + return result; + } + + @Override + public String toString() { + return "Lucene9Index [analyzer=" + analyzer + ", writer=" + writer + ", searcherManager=" + searcherManager + + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Module.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Module.java new file mode 100644 index 000000000..8d5555692 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Module.java @@ -0,0 +1,31 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import org.apache.lucene.search.Query; + +import com.fasterxml.jackson.core.Version; +import com.fasterxml.jackson.databind.module.SimpleModule; + +public class Lucene9Module extends SimpleModule { + + public Lucene9Module() { + super("lucene9", Version.unknownVersion()); + + // Query + addSerializer(Query.class, new QuerySerializer()); + addDeserializer(Query.class, new QueryDeserializer()); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java new file mode 100644 index 000000000..6aad65cd4 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java @@ -0,0 +1,181 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.text.NumberFormat; +import java.text.ParseException; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryparser.flexible.core.QueryNodeException; +import org.apache.lucene.queryparser.flexible.core.QueryParserHelper; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.RangeQueryNode; +import org.apache.lucene.queryparser.flexible.core.processors.NoChildOptimizationQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl; +import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline; +import org.apache.lucene.queryparser.flexible.core.processors.RemoveDeletedQueryNodesProcessor; +import org.apache.lucene.queryparser.flexible.standard.builders.StandardQueryTreeBuilder; +import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; +import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler; +import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; +import org.apache.lucene.queryparser.flexible.standard.processors.AllowLeadingWildcardProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.AnalyzerQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.BooleanQuery2ModifierNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.BooleanSingleChildOptimizationQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.BoostQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.DefaultPhraseSlopQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.FuzzyQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.IntervalQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.MatchAllDocsQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.MultiFieldQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.MultiTermRewriteMethodProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.OpenRangeQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.PhraseSlopQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.RegexpQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.RemoveEmptyNonLeafQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.TermRangeQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.WildcardQueryNodeProcessor; +import org.apache.lucene.search.Query; + +public final class NouveauQueryParser extends QueryParserHelper { + + public NouveauQueryParser(final Analyzer analyzer) { + super( + new StandardQueryConfigHandler(), + new StandardSyntaxParser(), + new NouveauQueryNodeProcessorPipeline(), + new StandardQueryTreeBuilder()); + getQueryConfigHandler().set(ConfigurationKeys.ENABLE_POSITION_INCREMENTS, true); + getQueryConfigHandler().set(ConfigurationKeys.ANALYZER, analyzer); + } + + @Override + public Query parse(String query, String defaultField) throws QueryNodeException { + return (Query) super.parse(query, defaultField); + } + + /** + * Same pipeline as StandardQueryParser but we substitute + * PointQueryNodeProcessor and PointRangeQueryNodeProcessor for + * NouveauPointProcessor below. + */ + public static class NouveauQueryNodeProcessorPipeline extends QueryNodeProcessorPipeline { + + public NouveauQueryNodeProcessorPipeline() { + super(null); + add(new WildcardQueryNodeProcessor()); + add(new MultiFieldQueryNodeProcessor()); + add(new FuzzyQueryNodeProcessor()); + add(new RegexpQueryNodeProcessor()); + add(new MatchAllDocsQueryNodeProcessor()); + add(new OpenRangeQueryNodeProcessor()); + add(new NouveauPointProcessor()); + add(new TermRangeQueryNodeProcessor()); + add(new AllowLeadingWildcardProcessor()); + add(new AnalyzerQueryNodeProcessor()); + add(new PhraseSlopQueryNodeProcessor()); + add(new BooleanQuery2ModifierNodeProcessor()); + add(new NoChildOptimizationQueryNodeProcessor()); + add(new RemoveDeletedQueryNodesProcessor()); + add(new RemoveEmptyNonLeafQueryNodeProcessor()); + add(new BooleanSingleChildOptimizationQueryNodeProcessor()); + add(new DefaultPhraseSlopQueryNodeProcessor()); + add(new BoostQueryNodeProcessor()); + add(new MultiTermRewriteMethodProcessor()); + add(new IntervalQueryNodeProcessor()); + } + } + + /** + * If it looks like a number, treat it as a number. + */ + public static class NouveauPointProcessor extends QueryNodeProcessorImpl { + + @Override + protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { + final var numberFormat = NumberFormat.getInstance(); + final var pointsConfig = new PointsConfig(numberFormat, Double.class); + + if (node instanceof FieldQueryNode && !(node.getParent() instanceof RangeQueryNode)) { + final var fieldNode = (FieldQueryNode) node; + String text = fieldNode.getTextAsString(); + if (text.length() == 0) { + return node; + } + final Number number; + try { + number = numberFormat.parse(text).doubleValue(); + } catch (final ParseException e) { + return node; + } + final var lowerNode = new PointQueryNode(fieldNode.getField(), number, numberFormat); + final var upperNode = new PointQueryNode(fieldNode.getField(), number, numberFormat); + return new PointRangeQueryNode(lowerNode, upperNode, true, true, pointsConfig); + } + + if (node instanceof TermRangeQueryNode) { + final var termRangeNode = (TermRangeQueryNode) node; + final var lower = termRangeNode.getLowerBound(); + final var upper = termRangeNode.getUpperBound(); + final var lowerText = lower.getTextAsString(); + final var upperText = upper.getTextAsString(); + Number lowerNumber = null, upperNumber = null; + + if (lowerText.length() > 0 && !lowerText.equals("-Infinity")) { + try { + lowerNumber = numberFormat.parse(lowerText).doubleValue(); + } catch (final ParseException e) { + return node; + } + } + + if (upperText.length() > 0 && !upperText.equals("Infinity")) { + try { + upperNumber = numberFormat.parse(upperText).doubleValue(); + } catch (final ParseException e) { + return node; + } + } + + final var lowerNode = new PointQueryNode(termRangeNode.getField(), lowerNumber, numberFormat); + final var upperNode = new PointQueryNode(termRangeNode.getField(), upperNumber, numberFormat); + final var lowerInclusive = termRangeNode.isLowerInclusive(); + final var upperInclusive = termRangeNode.isUpperInclusive(); + + return new PointRangeQueryNode( + lowerNode, upperNode, lowerInclusive, upperInclusive, pointsConfig); + } + + return node; + } + + @Override + protected QueryNode preProcessNode(final QueryNode node) throws QueryNodeException { + return node; + } + + @Override + protected List<QueryNode> setChildrenOrder(final List<QueryNode> children) throws QueryNodeException { + return children; + } + + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/ParallelSearcherFactory.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/ParallelSearcherFactory.java new file mode 100644 index 000000000..91fee1795 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/ParallelSearcherFactory.java @@ -0,0 +1,36 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.io.IOException; +import java.util.concurrent.Executor; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SearcherFactory; + +public class ParallelSearcherFactory extends SearcherFactory { + + private Executor executor; + + public ParallelSearcherFactory(Executor executor) { + this.executor = executor; + } + + @Override + public IndexSearcher newSearcher(final IndexReader reader, final IndexReader previousReader) throws IOException { + return new IndexSearcher(reader, executor); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QueryDeserializer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QueryDeserializer.java new file mode 100644 index 000000000..2e3b0278e --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QueryDeserializer.java @@ -0,0 +1,121 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; + +public class QueryDeserializer extends StdDeserializer<Query> { + + public QueryDeserializer() { + this(null); + } + + public QueryDeserializer(Class<?> vc) { + super(vc); + } + + @Override + public Query deserialize(final JsonParser parser, final DeserializationContext context) + throws IOException, JsonProcessingException { + return deserializeNode(parser, context, parser.getCodec().readTree(parser)); + } + + private Query deserializeNode(final JsonParser parser, final DeserializationContext context, final JsonNode node) + throws IOException, JsonProcessingException { + final String type = node.get("@type").asText(); + switch (type) { + case "term": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + return new TermQuery(new Term(field, text)); + } + case "boolean": { + if (!node.get("clauses").isArray()) { + throw new JsonParseException(parser, "boolean clauses must be an array"); + } + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + final Iterator<JsonNode> it = node.get("clauses").elements(); + while (it.hasNext()) { + final Query q = deserializeNode(parser, context, it.next()); + builder.add(q, null); + } + return builder.build(); + } + case "wildcard": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + return new WildcardQuery(new Term(field, text)); + } + case "phrase": { + final String field = node.get("field").asText(); + if (!node.get("terms").isArray()) { + throw new JsonParseException(parser, "phrase terms must be an array"); + } + final PhraseQuery.Builder builder = new PhraseQuery.Builder(); + final Iterator<JsonNode> it = node.get("terms").elements(); + while (it.hasNext()) { + builder.add(new Term(field, it.next().asText())); + } + builder.setSlop(node.get("slop").asInt()); + return builder.build(); + } + case "prefix": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + return new PrefixQuery(new Term(field, text)); + } + case "fuzzy": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + final int maxEdits = node.get("max_edits").asInt(); + final int prefixLength = node.get("prefix_length").asInt(); + return new FuzzyQuery(new Term(field, text), maxEdits, prefixLength); + } + case "regexp": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + return new RegexpQuery(new Term(field, text)); + } + case "term_range": { + + } + case "point_range": { + + } + case "match_all": + return new MatchAllDocsQuery(); + } + throw new JsonParseException(parser, type + " not a supported query type"); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QuerySerializer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QuerySerializer.java new file mode 100644 index 000000000..df2f7a675 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QuerySerializer.java @@ -0,0 +1,171 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.io.IOException; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; + +import com.fasterxml.jackson.core.JsonGenerationException; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; + +class QuerySerializer extends StdSerializer<Query> { + + QuerySerializer() { + this(null); + } + + QuerySerializer(Class<Query> vc) { + super(vc); + } + + @Override + public void serialize(final Query query, final JsonGenerator gen, final SerializerProvider provider) + throws IOException { + + if (query instanceof TermQuery) { + final TermQuery termQuery = (TermQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "term"); + gen.writeStringField("field", termQuery.getTerm().field()); + gen.writeStringField("term", termQuery.getTerm().text()); + gen.writeEndObject(); + return; + } + + if (query instanceof BooleanQuery) { + final BooleanQuery booleanQuery = (BooleanQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "boolean"); + gen.writeFieldName("clauses"); + gen.writeStartArray(); + for (final BooleanClause clause : booleanQuery.clauses()) { + gen.writeStartObject(); + gen.writeFieldName("query"); + serialize(clause.getQuery(), gen, provider); + gen.writeStringField("occur", clause.getOccur().name().toLowerCase()); + gen.writeEndObject(); + } + gen.writeEndArray(); + gen.writeEndObject(); + return; + } + + if (query instanceof WildcardQuery) { + final WildcardQuery wildcardQuery = (WildcardQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "wildcard"); + gen.writeStringField("field", wildcardQuery.getField()); + gen.writeStringField("text", wildcardQuery.getTerm().text()); + gen.writeEndObject(); + return; + } + + if (query instanceof PhraseQuery) { + final PhraseQuery phraseQuery = (PhraseQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "phrase"); + gen.writeStringField("field", phraseQuery.getField()); + gen.writeFieldName("terms"); + gen.writeStartArray(); + for (final Term term : phraseQuery.getTerms()) { + gen.writeString(term.text()); + } + gen.writeEndArray(); + gen.writeNumberField("slop", phraseQuery.getSlop()); + gen.writeEndObject(); + return; + } + + if (query instanceof PrefixQuery) { + final PrefixQuery prefixQuery = (PrefixQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "prefix"); + gen.writeStringField("field", prefixQuery.getField()); + gen.writeStringField("text", prefixQuery.getPrefix().text()); + gen.writeEndObject(); + return; + } + + if (query instanceof FuzzyQuery) { + final FuzzyQuery fuzzyQuery = (FuzzyQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "fuzzy"); + gen.writeStringField("field", fuzzyQuery.getField()); + gen.writeStringField("text", fuzzyQuery.getTerm().text()); + gen.writeNumberField("max_edits", fuzzyQuery.getMaxEdits()); + gen.writeNumberField("prefix_length", fuzzyQuery.getPrefixLength()); + gen.writeEndObject(); + return; + } + + if (query instanceof RegexpQuery) { + final RegexpQuery regexpQuery = (RegexpQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "regexp"); + gen.writeStringField("field", regexpQuery.getField()); + gen.writeStringField("text", regexpQuery.getRegexp().text()); + gen.writeEndObject(); + return; + } + + if (query instanceof TermRangeQuery) { + final TermRangeQuery termRangeQuery = (TermRangeQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "term_range"); + gen.writeStringField("field", termRangeQuery.getField()); + gen.writeStringField("lower", termRangeQuery.getLowerTerm().utf8ToString()); + gen.writeBooleanField("includes_lower", termRangeQuery.includesLower()); + gen.writeStringField("upper", termRangeQuery.getUpperTerm().utf8ToString()); + gen.writeBooleanField("includes_upper", termRangeQuery.includesUpper()); + gen.writeEndObject(); + return; + } + + if (query instanceof PointRangeQuery) { + final PointRangeQuery pointRangeQuery = (PointRangeQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "point_range"); + gen.writeStringField("field", pointRangeQuery.getField()); + gen.writeBinaryField("lower", pointRangeQuery.getLowerPoint()); + gen.writeBinaryField("upper", pointRangeQuery.getUpperPoint()); + gen.writeNumberField("num_dims", pointRangeQuery.getNumDims()); + gen.writeEndObject(); + } + + if (query instanceof MatchAllDocsQuery) { + gen.writeStartObject(); + gen.writeStringField("@type", "match_all"); + gen.writeEndObject(); + return; + } + + throw new JsonGenerationException(query.getClass() + " not supported", gen); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/SimpleAsciiFoldingAnalyzer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/SimpleAsciiFoldingAnalyzer.java new file mode 100644 index 000000000..ae7e4c261 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/SimpleAsciiFoldingAnalyzer.java @@ -0,0 +1,39 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.LetterTokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; + +class SimpleAsciiFoldingAnalyzer extends Analyzer { + + SimpleAsciiFoldingAnalyzer() { + } + + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new LetterTokenizer(); + return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(new LowerCaseFilter(tokenizer))); + } + + @Override + protected TokenStream normalize(String fieldName, TokenStream in) { + return new ASCIIFoldingFilter(new LowerCaseFilter(in)); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/AnalyzeResource.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/AnalyzeResource.java new file mode 100644 index 000000000..e29657eab --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/AnalyzeResource.java @@ -0,0 +1,74 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.resources; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.couchdb.nouveau.api.AnalyzeRequest; +import org.apache.couchdb.nouveau.api.AnalyzeResponse; +import org.apache.couchdb.nouveau.lucene9.Lucene9AnalyzerFactory; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + +import com.codahale.metrics.annotation.ExceptionMetered; +import com.codahale.metrics.annotation.Metered; +import com.codahale.metrics.annotation.ResponseMetered; + +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotNull; +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response.Status; + +@Path("/analyze") +@Metered +@ResponseMetered +@ExceptionMetered(cause = IOException.class) +@Consumes(MediaType.APPLICATION_JSON) +@Produces(MediaType.APPLICATION_JSON) +public final class AnalyzeResource { + + @POST + public AnalyzeResponse analyzeText(@NotNull @Valid AnalyzeRequest request) throws IOException { + try { + final List<String> tokens = tokenize(Lucene9AnalyzerFactory.newAnalyzer(request.getAnalyzer()), + request.getText()); + return new AnalyzeResponse(tokens); + } catch (IllegalArgumentException e) { + throw new WebApplicationException(request.getAnalyzer() + " not a valid analyzer", + Status.BAD_REQUEST); + } + } + + private List<String> tokenize(final Analyzer analyzer, final String text) throws IOException { + final List<String> result = new ArrayList<String>(10); + try (final TokenStream tokenStream = analyzer.tokenStream("default", text)) { + tokenStream.reset(); + while (tokenStream.incrementToken()) { + final CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class); + result.add(term.toString()); + } + tokenStream.end(); + } + return result; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/IndexResource.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/IndexResource.java new file mode 100644 index 000000000..4273582b6 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/IndexResource.java @@ -0,0 +1,147 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.resources; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.apache.couchdb.nouveau.api.DocumentDeleteRequest; +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.couchdb.nouveau.api.IndexInfo; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; +import org.apache.couchdb.nouveau.core.IndexLoader; +import org.apache.couchdb.nouveau.core.IndexManager; +import org.apache.couchdb.nouveau.lucene9.Lucene9AnalyzerFactory; +import org.apache.couchdb.nouveau.lucene9.Lucene9Index; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.misc.store.DirectIODirectory; +import org.apache.lucene.search.SearcherFactory; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; + +import com.codahale.metrics.annotation.ExceptionMetered; +import com.codahale.metrics.annotation.Metered; +import com.codahale.metrics.annotation.ResponseMetered; + +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotNull; +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.DELETE; +import jakarta.ws.rs.GET; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.PUT; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.MediaType;; + +@Path("/index/{name}") +@Metered +@ResponseMetered +@ExceptionMetered(cause = IOException.class) +@Consumes(MediaType.APPLICATION_JSON) +@Produces(MediaType.APPLICATION_JSON) +public final class IndexResource { + + private final IndexManager indexManager; + private final SearcherFactory searcherFactory; + + public IndexResource(final IndexManager indexManager, final SearcherFactory searcherFactory) { + this.indexManager = Objects.requireNonNull(indexManager); + this.searcherFactory = Objects.requireNonNull(searcherFactory); + } + + @PUT + public void createIndex(@PathParam("name") String name, @NotNull @Valid IndexDefinition indexDefinition) + throws IOException { + indexManager.create(name, indexDefinition); + } + + @DELETE + @Path("/doc/{docId}") + public void deleteDoc(@PathParam("name") String name, @PathParam("docId") String docId, + @NotNull @Valid DocumentDeleteRequest request) throws Exception { + indexManager.with(name, indexLoader(), (index) -> { + index.delete(docId, request); + return null; + }); + } + + @DELETE + public void deletePath(@PathParam("name") String path, @Valid final List<String> exclusions) throws IOException { + indexManager.deleteAll(path, exclusions); + } + + @GET + public IndexInfo indexInfo(@PathParam("name") String name) throws Exception { + return indexManager.with(name, indexLoader(), (index) -> { + return index.info(); + }); + } + + @POST + @Path("/search") + public SearchResults searchIndex(@PathParam("name") String name, + @NotNull @Valid SearchRequest request) + throws Exception { + return indexManager.with(name, indexLoader(), (index) -> { + return index.search(request); + }); + } + + @PUT + @Path("/doc/{docId}") + public void updateDoc(@PathParam("name") String name, @PathParam("docId") String docId, + @NotNull @Valid DocumentUpdateRequest request) + throws Exception { + indexManager.with(name, indexLoader(), (index) -> { + index.update(docId, request); + return null; + }); + } + + private IndexLoader indexLoader() { + return (path, indexDefinition) -> { + final Analyzer analyzer = Lucene9AnalyzerFactory.fromDefinition(indexDefinition); + final Directory dir = new DirectIODirectory(FSDirectory.open(path)); + final IndexWriterConfig config = new IndexWriterConfig(analyzer); + config.setUseCompoundFile(false); + final IndexWriter writer = new IndexWriter(dir, config); + final long updateSeq = getUpdateSeq(writer); + final SearcherManager searcherManager = new SearcherManager(writer, searcherFactory); + return new Lucene9Index(analyzer, writer, updateSeq, searcherManager); + }; + } + + private static long getUpdateSeq(final IndexWriter writer) throws IOException { + final Iterable<Map.Entry<String, String>> commitData = writer.getLiveCommitData(); + if (commitData == null) { + return 0L; + } + for (Map.Entry<String, String> entry : commitData) { + if (entry.getKey().equals("update_seq")) { + return Long.parseLong(entry.getValue()); + } + } + return 0L; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/tasks/CloseAllIndexesTask.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/tasks/CloseAllIndexesTask.java new file mode 100644 index 000000000..bcc94e34d --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/tasks/CloseAllIndexesTask.java @@ -0,0 +1,38 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.tasks; + +import java.io.PrintWriter; +import java.util.List; +import java.util.Map; + +import org.apache.couchdb.nouveau.core.IndexManager; + +import io.dropwizard.servlets.tasks.Task; + +public class CloseAllIndexesTask extends Task { + + private final IndexManager indexManager; + + public CloseAllIndexesTask(final IndexManager indexManager) { + super("close-all-indexes"); + this.indexManager = indexManager; + } + + @Override + public void execute(Map<String, List<String>> parameters, PrintWriter output) throws Exception { + indexManager.stop(); + } + +} diff --git a/nouveau/src/main/resources/banner.txt b/nouveau/src/main/resources/banner.txt new file mode 100644 index 000000000..3575b3984 --- /dev/null +++ b/nouveau/src/main/resources/banner.txt @@ -0,0 +1,7 @@ + .-. + / | + /\ | .-._.) ( ) .-..-. .-. ) ( + / \ |( )( )( / ./.-'_( | ( ) + .-' / \| `-' `--': \_/ (__.' `-'-'`--': +(__.' `. + diff --git a/nouveau/src/main/resources/openapi.yaml b/nouveau/src/main/resources/openapi.yaml new file mode 100644 index 000000000..2bc4d73f1 --- /dev/null +++ b/nouveau/src/main/resources/openapi.yaml @@ -0,0 +1,9 @@ +resourcePackages: +- org.apache.couchdb.nouveau.resources + +openAPI: + info: + version: '1.0' + license: + name: Apache 2.0 + url: http://www.apache.org/licenses/LICENSE-2.0.html diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/api/SearchRequestTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/api/SearchRequestTest.java new file mode 100644 index 000000000..8fb773419 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/api/SearchRequestTest.java @@ -0,0 +1,59 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public class SearchRequestTest { + + private static ObjectMapper mapper; + + @BeforeAll + public static void setupMapper() { + mapper = new ObjectMapper(); + } + + @Test + public void testSerialisation() throws Exception { + SearchRequest request = asObject(); + final String expected = mapper.writeValueAsString( + mapper.readValue(getClass().getResource("/fixtures/SearchRequest.json"), SearchRequest.class)); + assertThat(mapper.writeValueAsString(request)).isEqualTo(expected); + } + + @Test + public void testDeserialisation() throws Exception { + SearchRequest request = asObject(); + assertThat(mapper.readValue(getClass().getResource("/fixtures/SearchRequest.json"), SearchRequest.class).toString()) + .isEqualTo(request.toString()); + } + + private SearchRequest asObject() { + final SearchRequest result = new SearchRequest(); + result.setQuery("*:*"); + result.setLimit(10); + result.setCounts(List.of("bar")); + result.setRanges(Map.of("foo", List.of(new DoubleRange("0 to 100 inc", 0.0, true, 100.0, true)))); + return result; + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheckTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheckTest.java new file mode 100644 index 000000000..8f39b3f39 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheckTest.java @@ -0,0 +1,29 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.health; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.couchdb.nouveau.resources.AnalyzeResource; +import org.junit.jupiter.api.Test; + +public class AnalyzeHealthCheckTest { + + @Test + public void testAnalyzeHealthCheck() throws Exception { + var resource = new AnalyzeResource(); + var check = new AnalyzeHealthCheck(resource); + assertTrue(check.check().isHealthy()); + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/health/IndexHealthCheckTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/health/IndexHealthCheckTest.java new file mode 100644 index 000000000..42a3626d6 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/health/IndexHealthCheckTest.java @@ -0,0 +1,51 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.health; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.file.Path; +import java.util.concurrent.Executors; + +import org.apache.couchdb.nouveau.core.IndexManager; +import org.apache.couchdb.nouveau.resources.IndexResource; +import org.apache.lucene.search.SearcherFactory; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import com.codahale.metrics.MetricRegistry; +import com.fasterxml.jackson.databind.ObjectMapper; + +public class IndexHealthCheckTest { + + @Test + public void testIndexHealthCheck(@TempDir final Path tempDir) throws Exception { + var scheduler = Executors.newSingleThreadScheduledExecutor(); + var manager = new IndexManager(); + manager.setObjectMapper(new ObjectMapper()); + manager.setMetricRegistry(new MetricRegistry()); + manager.setRootDir(tempDir); + manager.setScheduler(scheduler); + manager.start(); + try { + var resource = new IndexResource(manager, new SearcherFactory()); + var check = new IndexHealthCheck(resource); + assertTrue(check.check().isHealthy()); + } finally { + scheduler.shutdown(); + manager.stop(); + } + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactoryTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactoryTest.java new file mode 100644 index 000000000..693d82918 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactoryTest.java @@ -0,0 +1,283 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.lang.reflect.Method; +import java.util.Map; + +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.apache.lucene.analysis.ca.CatalanAnalyzer; +import org.apache.lucene.analysis.cjk.CJKAnalyzer; +import org.apache.lucene.analysis.classic.ClassicAnalyzer; +import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.analysis.cz.CzechAnalyzer; +import org.apache.lucene.analysis.da.DanishAnalyzer; +import org.apache.lucene.analysis.de.GermanAnalyzer; +import org.apache.lucene.analysis.email.UAX29URLEmailAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.es.SpanishAnalyzer; +import org.apache.lucene.analysis.eu.BasqueAnalyzer; +import org.apache.lucene.analysis.fa.PersianAnalyzer; +import org.apache.lucene.analysis.fi.FinnishAnalyzer; +import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.ga.IrishAnalyzer; +import org.apache.lucene.analysis.gl.GalicianAnalyzer; +import org.apache.lucene.analysis.hi.HindiAnalyzer; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; +import org.apache.lucene.analysis.hy.ArmenianAnalyzer; +import org.apache.lucene.analysis.id.IndonesianAnalyzer; +import org.apache.lucene.analysis.it.ItalianAnalyzer; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.lv.LatvianAnalyzer; +import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.nl.DutchAnalyzer; +import org.apache.lucene.analysis.no.NorwegianAnalyzer; +import org.apache.lucene.analysis.pl.PolishAnalyzer; +import org.apache.lucene.analysis.pt.PortugueseAnalyzer; +import org.apache.lucene.analysis.ro.RomanianAnalyzer; +import org.apache.lucene.analysis.ru.RussianAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.sv.SwedishAnalyzer; +import org.apache.lucene.analysis.th.ThaiAnalyzer; +import org.apache.lucene.analysis.tr.TurkishAnalyzer; +import org.junit.jupiter.api.Test; + +import jakarta.ws.rs.WebApplicationException; + +public class Lucene9AnalyzerFactoryTest { + + @Test + public void testkeyword() throws Exception { + assertAnalyzer("keyword", KeywordAnalyzer.class); + } + + @Test + public void testsimple() throws Exception { + assertAnalyzer("simple", SimpleAnalyzer.class); + } + + @Test + public void testwhitespace() throws Exception { + assertAnalyzer("whitespace", WhitespaceAnalyzer.class); + } + + @Test + public void testarabic() throws Exception { + assertAnalyzer("arabic", ArabicAnalyzer.class); + } + + @Test + public void testbulgarian() throws Exception { + assertAnalyzer("bulgarian", BulgarianAnalyzer.class); + } + + @Test + public void testcatalan() throws Exception { + assertAnalyzer("catalan", CatalanAnalyzer.class); + } + + @Test + public void testcjk() throws Exception { + assertAnalyzer("cjk", CJKAnalyzer.class); + } + + @Test + public void testchinese() throws Exception { + assertAnalyzer("chinese", SmartChineseAnalyzer.class); + } + + @Test + public void testczech() throws Exception { + assertAnalyzer("czech", CzechAnalyzer.class); + } + + @Test + public void testdanish() throws Exception { + assertAnalyzer("danish", DanishAnalyzer.class); + } + + @Test + public void testgerman() throws Exception { + assertAnalyzer("german", GermanAnalyzer.class); + } + + @Test + public void testenglish() throws Exception { + assertAnalyzer("english", EnglishAnalyzer.class); + } + + @Test + public void testspanish() throws Exception { + assertAnalyzer("spanish", SpanishAnalyzer.class); + } + + @Test + public void testbasque() throws Exception { + assertAnalyzer("basque", BasqueAnalyzer.class); + } + + @Test + public void testpersian() throws Exception { + assertAnalyzer("persian", PersianAnalyzer.class); + } + + @Test + public void testfinnish() throws Exception { + assertAnalyzer("finnish", FinnishAnalyzer.class); + } + + @Test + public void testfrench() throws Exception { + assertAnalyzer("french", FrenchAnalyzer.class); + } + + @Test + public void testirish() throws Exception { + assertAnalyzer("irish", IrishAnalyzer.class); + } + + @Test + public void testgalician() throws Exception { + assertAnalyzer("galician", GalicianAnalyzer.class); + } + + @Test + public void testhindi() throws Exception { + assertAnalyzer("hindi", HindiAnalyzer.class); + } + + @Test + public void testhungarian() throws Exception { + assertAnalyzer("hungarian", HungarianAnalyzer.class); + } + + @Test + public void testarmenian() throws Exception { + assertAnalyzer("armenian", ArmenianAnalyzer.class); + } + + @Test + public void testindonesian() throws Exception { + assertAnalyzer("indonesian", IndonesianAnalyzer.class); + } + + @Test + public void testitalian() throws Exception { + assertAnalyzer("italian", ItalianAnalyzer.class); + } + + @Test + public void testjapanese() throws Exception { + assertAnalyzer("japanese", JapaneseAnalyzer.class); + } + + @Test + public void testlatvian() throws Exception { + assertAnalyzer("latvian", LatvianAnalyzer.class); + } + + @Test + public void testdutch() throws Exception { + assertAnalyzer("dutch", DutchAnalyzer.class); + } + + @Test + public void testnorwegian() throws Exception { + assertAnalyzer("norwegian", NorwegianAnalyzer.class); + } + + @Test + public void testpolish() throws Exception { + assertAnalyzer("polish", PolishAnalyzer.class); + } + + @Test + public void testportugese() throws Exception { + assertAnalyzer("portugese", PortugueseAnalyzer.class); + } + + @Test + public void testromanian() throws Exception { + assertAnalyzer("romanian", RomanianAnalyzer.class); + } + + @Test + public void testrussian() throws Exception { + assertAnalyzer("russian", RussianAnalyzer.class); + } + + @Test + public void testclassic() throws Exception { + assertAnalyzer("classic", ClassicAnalyzer.class); + } + + @Test + public void teststandard() throws Exception { + assertAnalyzer("standard", StandardAnalyzer.class); + } + + @Test + public void testemail() throws Exception { + assertAnalyzer("email", UAX29URLEmailAnalyzer.class); + } + + @Test + public void testswedish() throws Exception { + assertAnalyzer("swedish", SwedishAnalyzer.class); + } + + @Test + public void testthai() throws Exception { + assertAnalyzer("thai", ThaiAnalyzer.class); + } + + @Test + public void testturkish() throws Exception { + assertAnalyzer("turkish", TurkishAnalyzer.class); + } + + @Test + public void testFieldAnalyzers() throws Exception { + final IndexDefinition indexDefinition = new IndexDefinition("standard", + Map.of("english", "english", "thai", "thai", "email", "email")); + final Analyzer analyzer = Lucene9AnalyzerFactory.fromDefinition(indexDefinition); + assertThat(analyzer).isInstanceOf(PerFieldAnalyzerWrapper.class); + final Method m = PerFieldAnalyzerWrapper.class.getDeclaredMethod("getWrappedAnalyzer", String.class); + m.setAccessible(true); + assertThat(m.invoke(analyzer, "english")).isInstanceOf(EnglishAnalyzer.class); + assertThat(m.invoke(analyzer, "thai")).isInstanceOf(ThaiAnalyzer.class); + assertThat(m.invoke(analyzer, "email")).isInstanceOf(UAX29URLEmailAnalyzer.class); + assertThat(m.invoke(analyzer, "other")).isInstanceOf(StandardAnalyzer.class); + } + + @Test + public void testUnknownAnalyzer() throws Exception { + assertThrows(WebApplicationException.class, () -> Lucene9AnalyzerFactory.newAnalyzer("foo")); + } + + private void assertAnalyzer(final String name, final Class<? extends Analyzer> clazz) throws Exception { + assertThat(Lucene9AnalyzerFactory.newAnalyzer(name)).isInstanceOf(clazz); + assertThat(Lucene9AnalyzerFactory.fromDefinition(new IndexDefinition(name, null))).isInstanceOf(clazz); + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9IndexTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9IndexTest.java new file mode 100644 index 000000000..1b28a01f4 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9IndexTest.java @@ -0,0 +1,223 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.apache.couchdb.nouveau.api.DocumentDeleteRequest; +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.DoubleField; +import org.apache.couchdb.nouveau.api.DoubleRange; +import org.apache.couchdb.nouveau.api.Field; +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.couchdb.nouveau.api.IndexInfo; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; +import org.apache.couchdb.nouveau.api.StringField; +import org.apache.couchdb.nouveau.core.Index; +import org.apache.couchdb.nouveau.core.IndexLoader; +import org.apache.couchdb.nouveau.core.UpdatesOutOfOrderException; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.misc.store.DirectIODirectory; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class Lucene9IndexTest { + + protected final Index setup(final Path path) throws IOException { + final IndexDefinition indexDefinition = new IndexDefinition(); + indexDefinition.setDefaultAnalyzer("standard"); + final Index index = indexLoader().apply(path, indexDefinition); + index.setDeleteOnClose(true); + return index; + } + + protected final void cleanup(final Index index) throws IOException { + index.close(); + } + + @Test + public void testOpenClose(@TempDir Path path) throws IOException { + final Index index = setup(path); + cleanup(index); + } + + @Test + public void testSearching(@TempDir Path path) throws IOException { + final Index index = setup(path); + try { + final int count = 100; + for (int i = 1; i <= count; i++) { + final Collection<Field> fields = List.of(new StringField("foo", "bar", false, false)); + final DocumentUpdateRequest request = new DocumentUpdateRequest(i, null, fields); + index.update("doc" + i, request); + } + final SearchRequest request = new SearchRequest(); + request.setQuery("*:*"); + final SearchResults results = index.search(request); + assertThat(results.getTotalHits()).isEqualTo(count); + } finally { + cleanup(index); + } + } + + @Test + public void testSort(@TempDir Path path) throws IOException { + final Index index = setup(path); + try { + final int count = 100; + for (int i = 1; i <= count; i++) { + final Collection<Field> fields = List.of(new StringField("foo", "bar", false, false)); + final DocumentUpdateRequest request = new DocumentUpdateRequest(i, null, fields); + index.update("doc" + i, request); + } + final SearchRequest request = new SearchRequest(); + request.setQuery("*:*"); + request.setSort(List.of("foo<string>")); + final SearchResults results = index.search(request); + assertThat(results.getTotalHits()).isEqualTo(count); + } finally { + cleanup(index); + } + } + + @Test + public void testCounts(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + final int count = 100; + for (int i = 1; i <= count; i++) { + final Collection<Field> fields = List.of(new StringField("bar", "baz", false, true)); + final DocumentUpdateRequest request = new DocumentUpdateRequest(i, null, fields); + index.update("doc" + i, request); + } + final SearchRequest request = new SearchRequest(); + request.setQuery("*:*"); + request.setCounts(List.of("bar")); + final SearchResults results = index.search(request); + assertThat(results.getCounts()).isEqualTo(Map.of("bar", Map.of("baz", count))); + } finally { + cleanup(index); + } + } + + @Test + public void testRanges(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + final int count = 100; + for (int i = 1; i <= count; i++) { + final Collection<Field> fields = List.of(new DoubleField("bar", (double) i, false, true)); + final DocumentUpdateRequest request = new DocumentUpdateRequest(i, null, fields); + index.update("doc" + i, request); + } + final SearchRequest request = new SearchRequest(); + request.setQuery("*:*"); + request.setRanges(Map.of("bar", + List.of(new DoubleRange("low", 0.0, true, (double) count / 2, true), + new DoubleRange("high", (double) count / 2, true, (double) count, true)))); + final SearchResults results = index.search(request); + assertThat(results.getRanges()).isEqualTo( + Map.of("bar", Map.of("low", count / 2, "high", count / 2 + 1))); + } finally { + cleanup(index); + } + } + + @Test + public void testOutOfOrder(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + final Collection<Field> fields = Collections.emptyList(); + + // Go to 2. + index.update("foo", new DocumentUpdateRequest(2, null, fields)); + + // Should be prevented from going down to 1. + assertThrows(UpdatesOutOfOrderException.class, + () -> index.update("foo", new DocumentUpdateRequest(1, null, fields))); + } finally { + cleanup(index); + } + } + + @Test + public void testInfo(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + IndexInfo info = index.info(); + assertThat(info.getDiskSize()).isEqualTo(0); + assertThat(info.getNumDocs()).isEqualTo(0); + assertThat(info.getUpdateSeq()).isEqualTo(0); + + final Collection<Field> fields = List.of(new DoubleField("bar", 12.0, false, true)); + index.update("foo", new DocumentUpdateRequest(2, null, fields)); + index.commit(); + + info = index.info(); + assertThat(info.getDiskSize()).isGreaterThan(0); + assertThat(info.getNumDocs()).isEqualTo(1); + assertThat(info.getUpdateSeq()).isEqualTo(2); + } finally { + cleanup(index); + } + } + + @Test + public void testDelete(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + final Collection<Field> fields = List.of(new DoubleField("bar", 12.0, false, true)); + index.update("foo", new DocumentUpdateRequest(2, null, fields)); + index.commit(); + + IndexInfo info = index.info(); + assertThat(info.getNumDocs()).isEqualTo(1); + + index.delete("foo", new DocumentDeleteRequest(3)); + index.commit(); + + info = index.info(); + assertThat(info.getNumDocs()).isEqualTo(0); + } finally { + cleanup(index); + } + } + + protected IndexLoader indexLoader() { + return (path, indexDefinition) -> { + final Analyzer analyzer = Lucene9AnalyzerFactory.fromDefinition(indexDefinition); + final Directory dir = new DirectIODirectory(FSDirectory.open(path)); + final IndexWriterConfig config = new IndexWriterConfig(analyzer); + config.setUseCompoundFile(false); + final IndexWriter writer = new IndexWriter(dir, config); + final SearcherManager searcherManager = new SearcherManager(writer, null); + return new Lucene9Index(analyzer, writer, 0L, searcherManager); + }; + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParserTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParserTest.java new file mode 100644 index 000000000..4c1e23d2a --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParserTest.java @@ -0,0 +1,106 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.util.BytesRef; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class NouveauQueryParserTest { + + private static final String DEFAULT_FIELD = "foo"; + + private static NouveauQueryParser qp; + + @BeforeAll + public static void setup() { + qp = new NouveauQueryParser(new StandardAnalyzer()); + } + + @Test + public void testTermQuery() throws Exception { + assertThat(qp.parse("foo:bar", DEFAULT_FIELD)).isEqualTo(new TermQuery(new Term("foo", "bar"))); + } + + @Test + public void testPrefixQuery() throws Exception { + assertThat(qp.parse("foo:bar*", DEFAULT_FIELD)).isEqualTo(new PrefixQuery(new Term("foo", "bar"))); + } + + @Test + public void testWildcardQuery() throws Exception { + assertThat(qp.parse("foo:ba*r", DEFAULT_FIELD)).isEqualTo(new WildcardQuery(new Term("foo", "ba*r"))); + } + + @Test + public void testStringRangeQuery() throws Exception { + assertThat(qp.parse("foo:[bar TO foo]", DEFAULT_FIELD)).isEqualTo(new TermRangeQuery("foo", + new BytesRef("bar"), new BytesRef("foo"), true, true)); + } + + @Test + public void testMixedRangeQuery() throws Exception { + assertThat(qp.parse("foo:[12.0 TO foo]", DEFAULT_FIELD)).isEqualTo(new TermRangeQuery("foo", + new BytesRef("12.0"), new BytesRef("foo"), true, true)); + } + + @Test + public void testInferredPointQuery() throws Exception { + assertThat(qp.parse("foo:12", DEFAULT_FIELD)).isEqualTo(DoublePoint.newExactQuery("foo", 12.0)); + } + + @Test + public void testInferredPointRangeQuery() throws Exception { + assertThat(qp.parse("foo:[1 TO 12]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { 1 }, new double[] { 12 })); + } + + @Test + public void testOpenLeftPointRangeQuery() throws Exception { + assertThat(qp.parse("foo:[* TO 100.0]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { Double.NEGATIVE_INFINITY }, + new double[] { 100 })); + } + + @Test + public void testOpenRightPointRangeQuery() throws Exception { + assertThat(qp.parse("foo:[1.0 TO *]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { 1 }, + new double[] { Double.POSITIVE_INFINITY })); + } + + @Test + public void testOpenLeftPointRangeQueryLegacy() throws Exception { + assertThat(qp.parse("foo:[-Infinity TO 100.0]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { Double.NEGATIVE_INFINITY }, + new double[] { 100 })); + } + + @Test + public void testOpenRightPointRangeQueryLegacy() throws Exception { + assertThat(qp.parse("foo:[1.0 TO Infinity]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { 1 }, + new double[] { Double.POSITIVE_INFINITY })); + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/QuerySerializationTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/QuerySerializationTest.java new file mode 100644 index 000000000..06cfdfad7 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/QuerySerializationTest.java @@ -0,0 +1,47 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + + +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public class QuerySerializationTest { + + @Test + public void basicTest() throws Exception { + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new Lucene9Module()); + + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("foo", "bar")), Occur.MUST); + builder.add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT); + builder.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD); + builder.add(new PhraseQuery("bar", "foo", "bar", "baz"), Occur.MUST); + final Query query = builder.build(); + + final String expected = "{\"@type\":\"boolean\",\"clauses\":[{\"query\":{\"@type\":\"term\",\"field\":\"foo\",\"term\":\"bar\"},\"occur\":\"must\"},{\"query\":{\"@type\":\"term\",\"field\":\"foo\",\"term\":\"bar\"},\"occur\":\"must_not\"},{\"query\":{\"@type\":\"term\",\"field\":\"foo\",\"term\":\"bar\"},\"occur\":\"should\"},{\"query\":{\"@type\":\"phrase\",\"field\":\"bar\",\"terms\":[\"foo\",\"bar\",\"baz\"],\"slop\":0},\"occur\":\"must\"}]}"; + assertThat(mapper.writeValueAsString(query)).isEqualTo(expected); + } + +} diff --git a/nouveau/src/test/resources/fixtures/DocumentUpdateRequest.json b/nouveau/src/test/resources/fixtures/DocumentUpdateRequest.json new file mode 100644 index 000000000..a22e322d4 --- /dev/null +++ b/nouveau/src/test/resources/fixtures/DocumentUpdateRequest.json @@ -0,0 +1,22 @@ +{ + "seq": 12, + "fields": [ + { + "@type": "string", + "name": "stringfoo", + "value": "bar", + "store": true + }, + { + "@type": "text", + "name": "textfoo", + "value": "hello there", + "store": true + }, + { + "@type": "double", + "name": "doublefoo", + "value": 12 + } + ] +} diff --git a/nouveau/src/test/resources/fixtures/SearchRequest.json b/nouveau/src/test/resources/fixtures/SearchRequest.json new file mode 100644 index 000000000..c588cc16b --- /dev/null +++ b/nouveau/src/test/resources/fixtures/SearchRequest.json @@ -0,0 +1,17 @@ +{ + "query": "*:*", + "limit": 10, + "sort": null, + "counts": [ + "bar" + ], + "ranges": { + "foo": [ + { + "label": "0 to 100 inc", + "min": 0.0, + "max": 100.0 + } + ] + } +}
\ No newline at end of file |