diff options
author | Robert Newson <rnewson@apache.org> | 2023-04-14 11:16:04 +0100 |
---|---|---|
committer | Robert Newson <rnewson@apache.org> | 2023-04-22 11:20:02 +0100 |
commit | bc3c27bbf11960ad3499bab7f586dcf53d3eb4fd (patch) | |
tree | 9e538d5d07d337d49c31b73ea5fc3ffa390a0a5e | |
parent | 755ae73662c328969d715fc103d959eaedb4e397 (diff) | |
download | couchdb-bc3c27bbf11960ad3499bab7f586dcf53d3eb4fd.tar.gz |
Tidy up the query parser
4 files changed, 202 insertions, 208 deletions
diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java index 2da5f9912..f9e4ad1a6 100644 --- a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java @@ -68,6 +68,8 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StoredFields; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.flexible.core.QueryNodeException; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; @@ -167,29 +169,24 @@ public class Lucene9Index extends Index { @Override public SearchResults doSearch(final SearchRequest request) throws IOException { - final Query query; - try { - query = newQueryParser().parse(request); - } catch (final QueryNodeException e) { - throw new WebApplicationException(e.getMessage(), e, Status.BAD_REQUEST); - } + final Query query = parse(request); // Construct CollectorManagers. final MultiCollectorManager cm; final CollectorManager<?, ? extends TopDocs> hits = hitCollector(request); + if (request.hasCounts() || request.hasRanges()) { + cm = new MultiCollectorManager(hits, new FacetsCollectorManager()); + } else { + cm = new MultiCollectorManager(hits); + } searcherManager.maybeRefreshBlocking(); final IndexSearcher searcher = searcherManager.acquire(); try { - if (request.hasCounts() || request.hasRanges()) { - cm = new MultiCollectorManager(hits, new FacetsCollectorManager()); - } else { - cm = new MultiCollectorManager(hits); - } final Object[] reduces = searcher.search(query, cm); return toSearchResults(request, searcher, reduces); - } catch (IllegalStateException e) { + } catch (final IllegalStateException e) { throw new WebApplicationException(e.getMessage(), e, Status.BAD_REQUEST); } finally { searcherManager.release(searcher); @@ -483,8 +480,22 @@ public class Lucene9Index extends Index { return new Term("_id", docId); } - public Lucene9QueryParser newQueryParser() { - return new Lucene9QueryParser("default", analyzer); + private Query parse(final SearchRequest request) { + var queryParser = new NouveauQueryParser(analyzer); + Query result; + try { + result = queryParser.parse(request.getQuery(), "default"); + if (request.hasPartition()) { + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("_partition", request.getPartition())), Occur.MUST); + builder.add(result, Occur.MUST); + result = builder.build(); + } + } catch (final QueryNodeException e) { + throw new WebApplicationException(e.getMessage(), e, Status.BAD_REQUEST); + } + + return result; } @Override diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9QueryParser.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9QueryParser.java deleted file mode 100644 index 9b693f2e5..000000000 --- a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9QueryParser.java +++ /dev/null @@ -1,116 +0,0 @@ -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.apache.couchdb.nouveau.lucene9; - -import org.apache.couchdb.nouveau.api.SearchRequest; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.Term; -import org.apache.lucene.queryparser.flexible.core.QueryNodeException; -import org.apache.lucene.queryparser.flexible.core.QueryParserHelper; -import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler; -import org.apache.lucene.queryparser.flexible.core.processors.NoChildOptimizationQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline; -import org.apache.lucene.queryparser.flexible.core.processors.RemoveDeletedQueryNodesProcessor; -import org.apache.lucene.queryparser.flexible.standard.builders.StandardQueryTreeBuilder; -import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler; -import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys; -import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; -import org.apache.lucene.queryparser.flexible.standard.processors.AllowLeadingWildcardProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.AnalyzerQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.BooleanQuery2ModifierNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.BooleanSingleChildOptimizationQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.BoostQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.DefaultPhraseSlopQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.FuzzyQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.IntervalQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.MatchAllDocsQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.MultiFieldQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.MultiTermRewriteMethodProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.OpenRangeQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.PhraseSlopQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.PointQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.RegexpQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.RemoveEmptyNonLeafQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.TermRangeQueryNodeProcessor; -import org.apache.lucene.queryparser.flexible.standard.processors.WildcardQueryNodeProcessor; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; - -class Lucene9QueryParser extends QueryParserHelper { - - private static class NouveauQueryNodeProcessorPipeline extends QueryNodeProcessorPipeline { - - public NouveauQueryNodeProcessorPipeline(QueryConfigHandler queryConfig) { - super(queryConfig); - - add(new WildcardQueryNodeProcessor()); - add(new MultiFieldQueryNodeProcessor()); - add(new FuzzyQueryNodeProcessor()); - add(new RegexpQueryNodeProcessor()); - add(new MatchAllDocsQueryNodeProcessor()); - add(new OpenRangeQueryNodeProcessor()); - add(new PointQueryNodeProcessor()); - add(new NumericRangeQueryProcessor()); - add(new TermRangeQueryNodeProcessor()); - add(new AllowLeadingWildcardProcessor()); - add(new AnalyzerQueryNodeProcessor()); - add(new PhraseSlopQueryNodeProcessor()); - add(new BooleanQuery2ModifierNodeProcessor()); - add(new NoChildOptimizationQueryNodeProcessor()); - add(new RemoveDeletedQueryNodesProcessor()); - add(new RemoveEmptyNonLeafQueryNodeProcessor()); - add(new BooleanSingleChildOptimizationQueryNodeProcessor()); - add(new DefaultPhraseSlopQueryNodeProcessor()); - add(new BoostQueryNodeProcessor()); - add(new MultiTermRewriteMethodProcessor()); - add(new IntervalQueryNodeProcessor()); - } - - } - - private final String defaultField; - - public Lucene9QueryParser(final String defaultField, final Analyzer analyzer) { - super( - new StandardQueryConfigHandler(), - new StandardSyntaxParser(), - new NouveauQueryNodeProcessorPipeline(null), - new StandardQueryTreeBuilder()); - setEnablePositionIncrements(true); - this.setAnalyzer(analyzer); - this.defaultField = defaultField; - } - - public void setAnalyzer(Analyzer analyzer) { - getQueryConfigHandler().set(ConfigurationKeys.ANALYZER, analyzer); - } - - public void setEnablePositionIncrements(boolean enabled) { - getQueryConfigHandler().set(ConfigurationKeys.ENABLE_POSITION_INCREMENTS, enabled); - } - - public Query parse(SearchRequest searchRequest) throws QueryNodeException { - final Query q = (Query) parse(searchRequest.getQuery(), defaultField); - if (searchRequest.hasPartition()) { - final BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(new TermQuery(new Term("_partition", searchRequest.getPartition())), Occur.MUST); - builder.add(q, Occur.MUST); - return builder.build(); - } - return q; - } - -} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java new file mode 100644 index 000000000..72d8a5be1 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java @@ -0,0 +1,177 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.text.NumberFormat; +import java.text.ParseException; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryparser.flexible.core.QueryNodeException; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.QueryParserHelper; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.RangeQueryNode; +import org.apache.lucene.queryparser.flexible.core.processors.NoChildOptimizationQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl; +import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline; +import org.apache.lucene.queryparser.flexible.core.processors.RemoveDeletedQueryNodesProcessor; +import org.apache.lucene.queryparser.flexible.standard.builders.StandardQueryTreeBuilder; +import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; +import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler; +import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; +import org.apache.lucene.queryparser.flexible.standard.processors.AllowLeadingWildcardProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.AnalyzerQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.BooleanQuery2ModifierNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.BooleanSingleChildOptimizationQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.BoostQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.DefaultPhraseSlopQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.FuzzyQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.IntervalQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.MatchAllDocsQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.MultiFieldQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.MultiTermRewriteMethodProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.OpenRangeQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.PhraseSlopQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.RegexpQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.RemoveEmptyNonLeafQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.TermRangeQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.WildcardQueryNodeProcessor; +import org.apache.lucene.search.Query; + +public final class NouveauQueryParser extends QueryParserHelper { + + public NouveauQueryParser(final Analyzer analyzer) { + super( + new StandardQueryConfigHandler(), + new StandardSyntaxParser(), + new NouveauQueryNodeProcessorPipeline(), + new StandardQueryTreeBuilder()); + getQueryConfigHandler().set(ConfigurationKeys.ENABLE_POSITION_INCREMENTS, true); + getQueryConfigHandler().set(ConfigurationKeys.ANALYZER, analyzer); + } + + @Override + public Query parse(String query, String defaultField) throws QueryNodeException { + return (Query) super.parse(query, defaultField); + } + + /** + * Same pipeline as StandardQueryParser but we substitute + * PointQueryNodeProcessor and PointRangeQueryNodeProcessor for + * NouveauPointProcessor below. + */ + public static class NouveauQueryNodeProcessorPipeline extends QueryNodeProcessorPipeline { + + public NouveauQueryNodeProcessorPipeline() { + super(null); + add(new WildcardQueryNodeProcessor()); + add(new MultiFieldQueryNodeProcessor()); + add(new FuzzyQueryNodeProcessor()); + add(new RegexpQueryNodeProcessor()); + add(new MatchAllDocsQueryNodeProcessor()); + add(new OpenRangeQueryNodeProcessor()); + add(new NouveauPointProcessor()); + add(new TermRangeQueryNodeProcessor()); + add(new AllowLeadingWildcardProcessor()); + add(new AnalyzerQueryNodeProcessor()); + add(new PhraseSlopQueryNodeProcessor()); + add(new BooleanQuery2ModifierNodeProcessor()); + add(new NoChildOptimizationQueryNodeProcessor()); + add(new RemoveDeletedQueryNodesProcessor()); + add(new RemoveEmptyNonLeafQueryNodeProcessor()); + add(new BooleanSingleChildOptimizationQueryNodeProcessor()); + add(new DefaultPhraseSlopQueryNodeProcessor()); + add(new BoostQueryNodeProcessor()); + add(new MultiTermRewriteMethodProcessor()); + add(new IntervalQueryNodeProcessor()); + } + } + + /** + * If it looks like a number, treat it as a number. + */ + public static class NouveauPointProcessor extends QueryNodeProcessorImpl { + + @Override + protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { + final var numberFormat = NumberFormat.getInstance(); + final var pointsConfig = new PointsConfig(numberFormat, Double.class); + + if (node instanceof FieldQueryNode && !(node.getParent() instanceof RangeQueryNode)) { + final var fieldNode = (FieldQueryNode) node; + var number = toNumber(fieldNode.getFieldAsString(), fieldNode.getTextAsString(), numberFormat); + if (number == null) { + return node; + } + final var lowerNode = new PointQueryNode(fieldNode.getField(), number, numberFormat); + final var upperNode = new PointQueryNode(fieldNode.getField(), number, numberFormat); + return new PointRangeQueryNode(lowerNode, upperNode, true, true, pointsConfig); + } + + if (node instanceof TermRangeQueryNode) { + final var termRangeNode = (TermRangeQueryNode) node; + final var lower = termRangeNode.getLowerBound(); + final var upper = termRangeNode.getUpperBound(); + final var lowerNumber = toNumber(lower.getFieldAsString(), lower.getTextAsString(), numberFormat); + final var upperNumber = toNumber(upper.getFieldAsString(), upper.getTextAsString(), numberFormat); + if (lowerNumber == null || upperNumber == null) { + return node; + } + final var lowerNode = new PointQueryNode(termRangeNode.getField(), lowerNumber, numberFormat); + final var upperNode = new PointQueryNode(termRangeNode.getField(), upperNumber, numberFormat); + final var lowerInclusive = termRangeNode.isLowerInclusive(); + final var upperInclusive = termRangeNode.isUpperInclusive(); + + return new PointRangeQueryNode( + lowerNode, upperNode, lowerInclusive, upperInclusive, pointsConfig); + } + + return node; + } + + @Override + protected QueryNode preProcessNode(final QueryNode node) throws QueryNodeException { + return node; + } + + @Override + protected List<QueryNode> setChildrenOrder(final List<QueryNode> children) throws QueryNodeException { + return children; + } + + /** + * Returns null if the value is not a number, indicating we should fallback to + * regular term / termrange query. + */ + private Number toNumber(final String field, final String value, final NumberFormat numberFormat) + throws QueryNodeParseException { + if (value.length() == 0) { + return null; + } + try { + return numberFormat.parse(value).doubleValue(); + } catch (final ParseException e) { + return null; + } + } + + } + +}
\ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NumericRangeQueryProcessor.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NumericRangeQueryProcessor.java deleted file mode 100644 index fa07e4002..000000000 --- a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NumericRangeQueryProcessor.java +++ /dev/null @@ -1,78 +0,0 @@ -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.apache.couchdb.nouveau.lucene9; - -import java.text.NumberFormat; -import java.text.ParsePosition; -import java.util.List; - -import org.apache.lucene.queryparser.flexible.core.QueryNodeException; -import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; -import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; -import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl; -import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; -import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode; -import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode; -import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; - -class NumericRangeQueryProcessor extends QueryNodeProcessorImpl { - - // TODO don't like this is locale dependent. - private final NumberFormat decimalFormat = NumberFormat.getInstance(); - private final PointsConfig doublePointsConfig = new PointsConfig(decimalFormat, Double.class); - - @Override - protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException { - return node; - } - - @Override - protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException { - if (node instanceof TermRangeQueryNode) { - final TermRangeQueryNode rangeNode = (TermRangeQueryNode) node; - final Number lowerValue = toNumber(rangeNode.getLowerBound()); - final Number upperValue = toNumber(rangeNode.getUpperBound()); - if (lowerValue != null && upperValue != null) { - return new PointRangeQueryNode( - toPointQueryNode(rangeNode.getField(), lowerValue), - toPointQueryNode(rangeNode.getField(), upperValue), - rangeNode.isLowerInclusive(), - rangeNode.isUpperInclusive(), - doublePointsConfig); - } - } - return node; - } - - @Override - protected List<QueryNode> setChildrenOrder(List<QueryNode> children) throws QueryNodeException { - return children; - } - - private Number toNumber(final FieldQueryNode node) { - switch (node.getTextAsString()) { - case "Infinity": - return Double.POSITIVE_INFINITY; - case "-Infinity": - return Double.NEGATIVE_INFINITY; - default: - return decimalFormat.parse(node.getTextAsString(), new ParsePosition(0)).doubleValue(); - } - } - - private PointQueryNode toPointQueryNode(final CharSequence field, final Number value) { - return new PointQueryNode(field, value, decimalFormat); - } - -} |