From a28b75a9d9fcf3f3eb68fb9f122ad6d21c589898 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 22 Apr 2023 13:45:03 +0000 Subject: Import nouveau (#4291) Nouveau - a new (experimental) full-text indexing feature for Apache CouchDB, using Lucene 9. Requires Java 11 or higher (19 is preferred). --- .gitignore | 4 +- Makefile | 43 +- configure | 10 + dev/run | 48 +- nouveau/.gitignore | 7 + nouveau/LICENSE | 201 ++++++ nouveau/README.md | 118 ++++ nouveau/TODO | 8 + nouveau/nouveau.yaml | 24 + nouveau/pom.xml | 291 +++++++++ .../apache/couchdb/nouveau/NouveauApplication.java | 82 +++ .../nouveau/NouveauApplicationConfiguration.java | 74 +++ .../apache/couchdb/nouveau/api/AnalyzeRequest.java | 51 ++ .../couchdb/nouveau/api/AnalyzeResponse.java | 45 ++ .../couchdb/nouveau/api/DocumentDeleteRequest.java | 50 ++ .../couchdb/nouveau/api/DocumentUpdateRequest.java | 72 +++ .../apache/couchdb/nouveau/api/DoubleField.java | 61 ++ .../apache/couchdb/nouveau/api/DoubleRange.java | 31 + .../java/org/apache/couchdb/nouveau/api/Field.java | 49 ++ .../couchdb/nouveau/api/IndexDefinition.java | 70 +++ .../org/apache/couchdb/nouveau/api/IndexInfo.java | 64 ++ .../java/org/apache/couchdb/nouveau/api/Range.java | 145 +++++ .../org/apache/couchdb/nouveau/api/SearchHit.java | 65 ++ .../apache/couchdb/nouveau/api/SearchRequest.java | 151 +++++ .../apache/couchdb/nouveau/api/SearchResults.java | 97 +++ .../apache/couchdb/nouveau/api/StoredField.java | 48 ++ .../apache/couchdb/nouveau/api/StringField.java | 63 ++ .../org/apache/couchdb/nouveau/api/TextField.java | 55 ++ .../org/apache/couchdb/nouveau/core/IOUtils.java | 64 ++ .../org/apache/couchdb/nouveau/core/Index.java | 176 ++++++ .../apache/couchdb/nouveau/core/IndexFunction.java | 23 + .../apache/couchdb/nouveau/core/IndexLoader.java | 26 + .../apache/couchdb/nouveau/core/IndexManager.java | 321 ++++++++++ .../nouveau/core/UpdatesOutOfOrderException.java | 24 + .../core/UpdatesOutOfOrderExceptionMapper.java | 32 + .../couchdb/nouveau/core/ser/ByteArrayWrapper.java | 24 + .../couchdb/nouveau/core/ser/DoubleWrapper.java | 24 + .../couchdb/nouveau/core/ser/FloatWrapper.java | 24 + .../couchdb/nouveau/core/ser/IntWrapper.java | 24 + .../couchdb/nouveau/core/ser/LongWrapper.java | 24 + .../couchdb/nouveau/core/ser/PrimitiveWrapper.java | 46 ++ .../couchdb/nouveau/core/ser/StringWrapper.java | 24 + .../couchdb/nouveau/health/AnalyzeHealthCheck.java | 46 ++ .../couchdb/nouveau/health/IndexHealthCheck.java | 63 ++ .../nouveau/lucene9/Lucene9AnalyzerFactory.java | 143 +++++ .../couchdb/nouveau/lucene9/Lucene9Index.java | 507 +++++++++++++++ .../couchdb/nouveau/lucene9/Lucene9Module.java | 31 + .../nouveau/lucene9/NouveauQueryParser.java | 181 ++++++ .../nouveau/lucene9/ParallelSearcherFactory.java | 36 ++ .../couchdb/nouveau/lucene9/QueryDeserializer.java | 121 ++++ .../couchdb/nouveau/lucene9/QuerySerializer.java | 171 +++++ .../lucene9/SimpleAsciiFoldingAnalyzer.java | 39 ++ .../couchdb/nouveau/resources/AnalyzeResource.java | 74 +++ .../couchdb/nouveau/resources/IndexResource.java | 147 +++++ .../couchdb/nouveau/tasks/CloseAllIndexesTask.java | 38 ++ nouveau/src/main/resources/banner.txt | 7 + nouveau/src/main/resources/openapi.yaml | 9 + .../couchdb/nouveau/api/SearchRequestTest.java | 59 ++ .../nouveau/health/AnalyzeHealthCheckTest.java | 29 + .../nouveau/health/IndexHealthCheckTest.java | 51 ++ .../lucene9/Lucene9AnalyzerFactoryTest.java | 283 +++++++++ .../couchdb/nouveau/lucene9/Lucene9IndexTest.java | 223 +++++++ .../nouveau/lucene9/NouveauQueryParserTest.java | 106 ++++ .../nouveau/lucene9/QuerySerializationTest.java | 47 ++ .../resources/fixtures/DocumentUpdateRequest.json | 22 + .../src/test/resources/fixtures/SearchRequest.json | 17 + rebar.config.script | 1 + rel/apps/couch_epi.config | 1 + rel/overlay/etc/default.ini | 4 + rel/reltool.config | 2 + share/server/loop.js | 16 +- share/server/nouveau.js | 108 ++++ share/server/state.js | 12 +- src/chttpd/src/chttpd.erl | 35 +- src/chttpd/src/chttpd_misc.erl | 13 +- src/couch/include/couch_db.hrl | 1 + src/docs/src/api/ddoc/index.rst | 1 + src/docs/src/api/ddoc/nouveau.rst | 142 +++++ src/docs/src/api/server/common.rst | 49 +- src/docs/src/config/query-servers.rst | 28 + src/docs/src/ddocs/index.rst | 1 + src/docs/src/ddocs/nouveau.rst | 692 +++++++++++++++++++++ src/docs/src/experimental.rst | 10 + src/docs/src/install/index.rst | 1 + src/docs/src/install/nouveau.rst | 59 ++ src/dreyfus/src/dreyfus_index.erl | 4 +- src/ken/src/ken_server.erl | 49 +- src/mango/src/mango_cursor.erl | 2 + src/mango/src/mango_cursor_nouveau.erl | 293 +++++++++ src/mango/src/mango_cursor_text.erl | 2 +- src/mango/src/mango_error.erl | 20 + src/mango/src/mango_idx.erl | 34 +- src/mango/src/mango_idx_nouveau.erl | 459 ++++++++++++++ src/mango/src/mango_native_proc.erl | 97 ++- src/mem3/src/mem3_reshard_index.erl | 44 +- src/nouveau/include/nouveau.hrl | 23 + src/nouveau/priv/stats_descriptions.cfg | 21 + src/nouveau/src/nouveau.app.src | 29 + src/nouveau/src/nouveau.erl | 20 + src/nouveau/src/nouveau_api.erl | 216 +++++++ src/nouveau/src/nouveau_app.erl | 30 + src/nouveau/src/nouveau_bookmark.erl | 68 ++ src/nouveau/src/nouveau_epi.erl | 49 ++ src/nouveau/src/nouveau_fabric.erl | 36 ++ src/nouveau/src/nouveau_fabric_cleanup.erl | 43 ++ src/nouveau/src/nouveau_fabric_info.erl | 99 +++ src/nouveau/src/nouveau_fabric_search.erl | 221 +++++++ src/nouveau/src/nouveau_httpd.erl | 276 ++++++++ src/nouveau/src/nouveau_httpd_handlers.erl | 35 ++ src/nouveau/src/nouveau_index_manager.erl | 161 +++++ src/nouveau/src/nouveau_index_updater.erl | 138 ++++ src/nouveau/src/nouveau_rpc.erl | 57 ++ src/nouveau/src/nouveau_sup.erl | 31 + src/nouveau/src/nouveau_util.erl | 97 +++ support/build_js.escript | 2 + test/elixir/test/config/nouveau.elixir | 17 + test/elixir/test/config/test-config.ini | 3 + test/elixir/test/nouveau_test.exs | 242 +++++++ 118 files changed, 9387 insertions(+), 40 deletions(-) create mode 100644 nouveau/.gitignore create mode 100644 nouveau/LICENSE create mode 100644 nouveau/README.md create mode 100644 nouveau/TODO create mode 100644 nouveau/nouveau.yaml create mode 100644 nouveau/pom.xml create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplication.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplicationConfiguration.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeRequest.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeResponse.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentDeleteRequest.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentUpdateRequest.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleField.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleRange.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/Field.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexDefinition.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexInfo.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/Range.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchHit.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchRequest.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchResults.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/StoredField.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/StringField.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/api/TextField.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/IOUtils.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/Index.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexFunction.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexLoader.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderException.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderExceptionMapper.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/ByteArrayWrapper.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/DoubleWrapper.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/FloatWrapper.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/IntWrapper.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/LongWrapper.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/PrimitiveWrapper.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/StringWrapper.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheck.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/health/IndexHealthCheck.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactory.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Module.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/ParallelSearcherFactory.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QueryDeserializer.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QuerySerializer.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/SimpleAsciiFoldingAnalyzer.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/resources/AnalyzeResource.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/resources/IndexResource.java create mode 100644 nouveau/src/main/java/org/apache/couchdb/nouveau/tasks/CloseAllIndexesTask.java create mode 100644 nouveau/src/main/resources/banner.txt create mode 100644 nouveau/src/main/resources/openapi.yaml create mode 100644 nouveau/src/test/java/org/apache/couchdb/nouveau/api/SearchRequestTest.java create mode 100644 nouveau/src/test/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheckTest.java create mode 100644 nouveau/src/test/java/org/apache/couchdb/nouveau/health/IndexHealthCheckTest.java create mode 100644 nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactoryTest.java create mode 100644 nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9IndexTest.java create mode 100644 nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParserTest.java create mode 100644 nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/QuerySerializationTest.java create mode 100644 nouveau/src/test/resources/fixtures/DocumentUpdateRequest.json create mode 100644 nouveau/src/test/resources/fixtures/SearchRequest.json create mode 100644 share/server/nouveau.js create mode 100644 src/docs/src/api/ddoc/nouveau.rst create mode 100644 src/docs/src/ddocs/nouveau.rst create mode 100644 src/docs/src/install/nouveau.rst create mode 100644 src/mango/src/mango_cursor_nouveau.erl create mode 100644 src/mango/src/mango_idx_nouveau.erl create mode 100644 src/nouveau/include/nouveau.hrl create mode 100644 src/nouveau/priv/stats_descriptions.cfg create mode 100644 src/nouveau/src/nouveau.app.src create mode 100644 src/nouveau/src/nouveau.erl create mode 100644 src/nouveau/src/nouveau_api.erl create mode 100644 src/nouveau/src/nouveau_app.erl create mode 100644 src/nouveau/src/nouveau_bookmark.erl create mode 100644 src/nouveau/src/nouveau_epi.erl create mode 100644 src/nouveau/src/nouveau_fabric.erl create mode 100644 src/nouveau/src/nouveau_fabric_cleanup.erl create mode 100644 src/nouveau/src/nouveau_fabric_info.erl create mode 100644 src/nouveau/src/nouveau_fabric_search.erl create mode 100644 src/nouveau/src/nouveau_httpd.erl create mode 100644 src/nouveau/src/nouveau_httpd_handlers.erl create mode 100644 src/nouveau/src/nouveau_index_manager.erl create mode 100644 src/nouveau/src/nouveau_index_updater.erl create mode 100644 src/nouveau/src/nouveau_rpc.erl create mode 100644 src/nouveau/src/nouveau_sup.erl create mode 100644 src/nouveau/src/nouveau_util.erl create mode 100644 test/elixir/test/config/nouveau.elixir create mode 100644 test/elixir/test/nouveau_test.exs diff --git a/.gitignore b/.gitignore index 95c05e981..99a5285cd 100644 --- a/.gitignore +++ b/.gitignore @@ -18,7 +18,7 @@ .rebar/ .eunit/ cover/ -core +/core debian/ log apache-couchdb-*/ @@ -133,3 +133,5 @@ test/javascript/junit.xml .idea *.lock + +.tool-versions diff --git a/Makefile b/Makefile index 06367bfdc..4d7704389 100644 --- a/Makefile +++ b/Makefile @@ -100,7 +100,7 @@ TEST_OPTS="-c 'startup_jitter=0' -c 'default_security=admin_local'" .PHONY: all # target: all - Build everything -all: couch fauxton docs escriptize +all: couch fauxton docs escriptize nouveau .PHONY: help @@ -152,10 +152,12 @@ escriptize: couch .PHONY: check # target: check - Test everything check: all + @$(MAKE) exunit @$(MAKE) eunit @$(MAKE) mango-test @$(MAKE) elixir-suite @$(MAKE) weatherreport-test + @$(MAKE) nouveau-test ifdef apps subdirs = $(apps) @@ -425,6 +427,12 @@ else endif endif +ifeq ($(with_nouveau), 1) + @mkdir -p rel/couchdb/nouveau/ + @cp nouveau/target/server-*-dist.jar rel/couchdb/nouveau/ + @cp nouveau/nouveau.yaml rel/couchdb/nouveau/ +endif + @echo "... done" @echo @echo " You can now copy the rel/couchdb directory anywhere on your system." @@ -465,6 +473,9 @@ clean: @rm -f src/couch/priv/couch_js/config.h @rm -f dev/*.beam dev/devnode.* dev/pbkdf2.pyc log/crash.log @rm -f dev/erlserver.pem dev/couch_ssl_dist.conf +ifeq ($(with_nouveau), 1) + @cd nouveau && mvn clean +endif .PHONY: distclean @@ -525,3 +536,33 @@ derived: @echo "ON_TAG: $(ON_TAG)" @echo "REL_TAG: $(REL_TAG)" @echo "SUB_VSN: $(SUB_VSN)" + +################################################################################ +# Nouveau +################################################################################ + +.PHONY: nouveau +# Build nouveau +nouveau: +ifeq ($(with_nouveau), 1) + @cd nouveau && mvn -D maven.test.skip=true +endif + +.PHONY: nouveau-test +nouveau-test: nouveau-test-maven nouveau-test-elixir + +.PHONY: nouveau-test-maven +nouveau-test-maven: couch nouveau +ifeq ($(with_nouveau), 1) + @cd nouveau && mvn test -P allTests +endif + +.PHONY: nouveau-test-elixir +nouveau-test-elixir: export MIX_ENV=integration +nouveau-test-elixir: elixir-init devclean +nouveau-test-elixir: couch nouveau +ifeq ($(with_nouveau), 1) + @dev/run -n 1 -q -a adm:pass --with-nouveau \ + --locald-config test/config/test-config.ini \ + --no-eval 'mix test --trace --include test/elixir/test/config/nouveau.elixir' +endif diff --git a/configure b/configure index 8e2057e2d..10d1d57ff 100755 --- a/configure +++ b/configure @@ -27,6 +27,7 @@ REBAR3_BRANCH="main" WITH_PROPER="true" WITH_FAUXTON=1 WITH_DOCS=1 +WITH_NOUVEAU=0 ERLANG_MD5="false" SKIP_DEPS=0 @@ -56,6 +57,7 @@ Options: -u | --user USER set the username to run as (defaults to $COUCHDB_USER) --disable-fauxton do not build Fauxton --disable-docs do not build any documentation or manpages + --enable-nouveau enable the new experimental search module --erlang-md5 use erlang for md5 hash operations --dev alias for --disable-docs --disable-fauxton --spidermonkey-version VSN specify the version of SpiderMonkey to use (defaults to $SM_VSN) @@ -112,6 +114,12 @@ parse_opts() { continue ;; + --enable-nouveau) + WITH_NOUVEAU=1 + shift + continue + ;; + --erlang-md5) ERLANG_MD5="true" shift @@ -121,6 +129,7 @@ parse_opts() { --dev) WITH_DOCS=0 WITH_FAUXTON=0 + WITH_NOUVEAU=1 shift continue ;; @@ -302,6 +311,7 @@ package_author_name = $PACKAGE_AUTHOR_NAME with_fauxton = $WITH_FAUXTON with_docs = $WITH_DOCS +with_nouveau = $WITH_NOUVEAU user = $COUCHDB_USER spidermonkey_version = $SM_VSN diff --git a/dev/run b/dev/run index df1a0b105..707dc709c 100755 --- a/dev/run +++ b/dev/run @@ -24,6 +24,7 @@ import optparse import os import posixpath import re +import signal import socket import subprocess as sp import sys @@ -226,6 +227,13 @@ def get_args_parser(): default=[], help="Path to config to place in 'local.d'. Can be repeated", ) + parser.add_option( + "--with-nouveau", + dest="with_nouveau", + default=False, + action="store_true", + help="Start Nouveau server", + ) return parser @@ -255,6 +263,7 @@ def setup_context(opts, args): "procs": [], "auto_ports": opts.auto_ports, "locald_configs": opts.locald_configs, + "with_nouveau": opts.with_nouveau, } @@ -304,9 +313,11 @@ def setup_configs(ctx): ), "node_name": "-name %s@127.0.0.1" % node, "cluster_port": cluster_port, + "clouseau_name": "clouseau%d@127.0.0.1" % (idx + 1), "backend_port": backend_port, "prometheus_port": prometheus_port, "uuid": "fake_uuid_for_dev", + "with_nouveau": str(ctx["with_nouveau"]).lower(), "_default": "", } write_config(ctx, node, env) @@ -451,6 +462,32 @@ def boot_haproxy(ctx): ) +def boot_nouveau(ctx): + if not ctx["with_nouveau"]: + return + + version = "1.0-SNAPSHOT" + cmd = [ + "java", + "-server", + "-jar", + "target/server-%s-dist.jar" % version, + "server", + "nouveau.yaml", + ] + + logfname = os.path.join(ctx["devdir"], "logs", "nouveau.log") + log = open(logfname, "w") + return sp.Popen( + " ".join(cmd), + cwd="nouveau", + shell=True, + stdin=sp.PIPE, + stdout=log, + stderr=sp.STDOUT, + ) + + def hack_default_ini(ctx, node, contents): contents = re.sub( "^\[httpd\]$", @@ -509,6 +546,11 @@ def hashify(pwd, salt=COMMON_SALT, iterations=10, keylen=20): def startup(ctx): + def handler(signalno, frame): + kill_processes(ctx) + sys.exit() + + signal.signal(signal.SIGTERM, handler) atexit.register(kill_processes, ctx) boot_nodes(ctx) ensure_all_nodes_alive(ctx) @@ -525,7 +567,8 @@ def startup(ctx): def kill_processes(ctx): for proc in ctx["procs"]: if proc and proc.returncode is None: - proc.kill() + proc.terminate() + ctx["procs"] = [] def degrade_cluster(ctx): @@ -551,6 +594,9 @@ def boot_nodes(ctx): haproxy_proc = boot_haproxy(ctx) if haproxy_proc is not None: ctx["procs"].append(haproxy_proc) + nouveau_proc = boot_nouveau(ctx) + if nouveau_proc is not None: + ctx["procs"].append(nouveau_proc) def ensure_all_nodes_alive(ctx): diff --git a/nouveau/.gitignore b/nouveau/.gitignore new file mode 100644 index 000000000..89034c41f --- /dev/null +++ b/nouveau/.gitignore @@ -0,0 +1,7 @@ +*~ +.classpath +.project +.settings/ +target/ +.vscode/ +dependency-reduced-pom.xml diff --git a/nouveau/LICENSE b/nouveau/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/nouveau/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/nouveau/README.md b/nouveau/README.md new file mode 100644 index 000000000..86b1914a7 --- /dev/null +++ b/nouveau/README.md @@ -0,0 +1,118 @@ +# nouveau + +Nouveau is a modern replacement for dreyfus/clouseau and is built on; + +1) the Dropwizard framework (https://dropwizard.io) +2) Java 11+ +3) Lucene 9 + +Nouveau transforms Apache CouchDB databases into Apache Lucene indexes at the shard level and then merges the results together. + +This work is currently EXPERIMENTAL and may change in ways that invalidate any existing Nouveau index. + +## What works? + +* you can define a default analyzer and different analyzers by field name. +* sorting on text and numbers (and combinations of fields) +* classic lucene query syntax +* count and range facets +* bookmark support for paginating efficiently through large results sets +* indexes automatically deleted if database is deleted (as long as nouveau is running!) +* integration with ken +* integration with mango +* integration with resharding +* update=false +* `_nouveau_info` +* `_search_cleanup` +* /openapi.{json.yaml} + +## What doesn't work yet? + +* results grouping +* configurable stop words for analyzers +* Makefile.win or Windows generally + +I don't intend to add grouping support, it seems barely used. Would accept a tidy contribution, though. + +## Why is this better than dreyfus/clouseau? + +* No scalang (or Scala!) +* Supports any version of Java that Lucene 9 supports +* memory-mapped I/O for performance (which works best on Java 19) +* direct I/O used for segment merging (so we don't evict useful data from disk cache) + +## Getting started + +Configure CouchDB with `--enable-nouveau' + +Build Nouveau with; + +`make` + +Run Nouveau with; + +`dev/run --admin=foo:bar --with-nouveau` + +Make a database with some data and an index definition; + +``` +#!/bin/sh + +URL="http://foo:bar@127.0.0.1:15984/foo" + +curl -X DELETE "$URL" +curl -X PUT "$URL?n=3&q=16" + +curl -X PUT "$URL/_design/foo" -d '{"nouveau":{"bar":{"default_analyzer":"standard", "field_analyzers":{"foo":"english"}, "index":"function(doc) { index(\"string\", \"foo\", \"bar\"); }"}}}' + +# curl "$URL/_index" -Hcontent-type:application/json -d '{"type":"nouveau", "index": {"fields": [{"name": "bar", "type":"number"}]}}' + +for I in {1..5}; do + DOCID=$RANDOM + DOCID=$[ $DOCID % 100000 ] + BAR=$RANDOM + BAR=$[ $BAR % 100000 ] + curl -X PUT "$URL/doc$DOCID" -d "{\"bar\": $BAR}" +done + +while true; do + curl 'foo:bar@localhost:15984/foo/_design/foo/_nouveau/bar?q=*:*' +done +``` + +In order not to collide with `dreyfus` I've hooked Nouveau in with new paths; + +`curl 'foo:bar@localhost:15984/foo/_design/foo/_nouveau/bar?q=*:*'` + +This will cause Nouveau to build indexes for each copy (N) and each +shard range (Q) and then perform a search and return the results. Lots +of query syntax is working as is sorting on strings and numbers +(`sort=["fieldnamehere<string>"] or sort=["fieldnamehere<number>"`], +defaulting to number). + +Facet support + +Counts of string fields and Ranges for numeric fields; + +``` +curl 'foo:bar@localhost:15984/foo/_design/foo/_nouveau/bar?q=*:*&limit=1&ranges={"bar":[{"label":"cheap","min":0,"max":100}]}&counts=["foo"]' -g +``` + +## Index function + +| Arguments | Effect +| :-------------------------------------------------------------- | :----- +| index("text", "foo", "bar", {"store": true}); | analyzes value for full-text searching, optionally stores the value +| index("string", "foo", "bar", {"store": true, "facet": true}); | indexes value as single token, optionally stores value and/or adds facet +| index("double", "foo", 12.0, {"store": true, "facet": true}); | indexes value, optionally stores value and/or adds facet +| index("stored", "foo", "bar"); | stores a number, returned with hits +| index("stored", "foo", 12.0); | stores a string, returned with hits + +## Deployment options + +All indexes are prefix with their erlang hostname so you can deploy a +single nouveau server per cluster if this meets your needs. You can +also configure a different nouveau server for each couchdb node too. + +There is no need to co-locate the nouveau server with the couchdb +cluster, though this is a common option. diff --git a/nouveau/TODO b/nouveau/TODO new file mode 100644 index 000000000..f43343af2 --- /dev/null +++ b/nouveau/TODO @@ -0,0 +1,8 @@ +targeted dreyfus feature parity + +* partitioned db support (partially done) + +not targeted + +* highlighting +* drilldown diff --git a/nouveau/nouveau.yaml b/nouveau/nouveau.yaml new file mode 100644 index 000000000..59176bb7a --- /dev/null +++ b/nouveau/nouveau.yaml @@ -0,0 +1,24 @@ +maxIndexesOpen: 100 +commitIntervalSeconds: 30 +idleSeconds: 60 +rootDir: target/indexes + +server: + applicationConnectors: + - type: h2c + port: 8080 + bindHost: 127.0.0.1 + maxConcurrentStreams: 1024 + initialStreamRecvWindow: 65535 + adminConnectors: + - type: h2c + port: 8081 + bindHost: 127.0.0.1 + gzip: + includedMethods: + - GET + - POST + requestLog: + appenders: + - type: console + target: stderr diff --git a/nouveau/pom.xml b/nouveau/pom.xml new file mode 100644 index 000000000..b7c9051ff --- /dev/null +++ b/nouveau/pom.xml @@ -0,0 +1,291 @@ + + + 4.0.0 + org.apache.couchdb.nouveau + server + 1.0-SNAPSHOT + ${project.artifactId} + Full-text indexing for CouchDB + 2022 + + + -Duser.language=en -Duser.region=US -Duser.timezone=UTC + 4.0.0 + 5.8.2 + 9.5.0 + 11 + 11 + UTF-8 + UTF-8 + SlowTest + 1.7.32 + 2.2.8 + + + + + + org.junit + junit-bom + ${junit5.version} + pom + import + + + io.dropwizard + dropwizard-dependencies + ${dropwizard.version} + pom + import + + + + + + + + + io.dropwizard + dropwizard-core + + + io.dropwizard + dropwizard-http2 + + + com.fasterxml.jackson.module + jackson-module-afterburner + + + + + io.dropwizard.metrics + metrics-core + + + io.dropwizard.metrics + metrics-caffeine + + + io.dropwizard.metrics + metrics-jersey2 + + + + + org.apache.lucene + lucene-core + ${lucene.version} + + + org.apache.lucene + lucene-queryparser + ${lucene.version} + + + org.apache.lucene + lucene-analysis-common + ${lucene.version} + + + org.apache.lucene + lucene-analysis-stempel + ${lucene.version} + + + org.apache.lucene + lucene-analysis-smartcn + ${lucene.version} + + + org.apache.lucene + lucene-analysis-kuromoji + ${lucene.version} + + + org.apache.lucene + lucene-facet + ${lucene.version} + + + org.apache.lucene + lucene-misc + ${lucene.version} + + + + + + io.swagger.core.v3 + swagger-jaxrs2-jakarta + ${swagger.version} + + + io.swagger.core.v3 + swagger-jaxrs2-servlet-initializer-v2 + ${swagger.version} + + + + + io.dropwizard + dropwizard-testing + test + + + junit + junit + + + + + org.junit.jupiter + junit-jupiter + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.assertj + assertj-core + test + + + org.mockito + mockito-core + test + + + + + package + + + org.apache.maven.plugins + maven-shade-plugin + 3.4.1 + + true + dist + true + + + *:* + + META-INF/*.DSA + META-INF/*.RSA + META-INF/*.SF + + + + + + + org.apache.couchdb.nouveau.NouveauApplication + + true + + + + META-INF/versions + + + + + + package + + shade + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.2 + + ${project.tests.exclude} + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.4.2 + + + org.apache.maven.plugins + maven-dependency-plugin + 3.5.0 + + + + sources + resolve + + + javadoc + + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.3.0 + + + + true + + + + + + org.jacoco + jacoco-maven-plugin + 0.8.9 + + + + prepare-agent + + + + + report + test + + report + + + + + + + + + + allTests + + + + + + + diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplication.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplication.java new file mode 100644 index 000000000..9921eaa42 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplication.java @@ -0,0 +1,82 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau; + +import java.util.concurrent.ForkJoinPool; + +import org.apache.couchdb.nouveau.core.IndexManager; +import org.apache.couchdb.nouveau.core.UpdatesOutOfOrderExceptionMapper; +import org.apache.couchdb.nouveau.health.AnalyzeHealthCheck; +import org.apache.couchdb.nouveau.health.IndexHealthCheck; +import org.apache.couchdb.nouveau.lucene9.Lucene9Module; +import org.apache.couchdb.nouveau.lucene9.ParallelSearcherFactory; +import org.apache.couchdb.nouveau.resources.AnalyzeResource; +import org.apache.couchdb.nouveau.resources.IndexResource; +import org.apache.couchdb.nouveau.tasks.CloseAllIndexesTask; +import org.apache.lucene.search.SearcherFactory; + +import io.dropwizard.core.Application; +import io.dropwizard.core.setup.Environment; +import io.swagger.v3.jaxrs2.integration.resources.OpenApiResource; + +public class NouveauApplication extends Application { + + public static void main(String[] args) throws Exception { + new NouveauApplication().run(args); + } + + @Override + public String getName() { + return "Nouveau"; + } + + @Override + public void run(NouveauApplicationConfiguration configuration, Environment environment) throws Exception { + environment.jersey().register(new UpdatesOutOfOrderExceptionMapper()); + + // configure index manager + final IndexManager indexManager = new IndexManager(); + indexManager.setCommitIntervalSeconds(configuration.getCommitIntervalSeconds()); + indexManager.setIdleSeconds(configuration.getIdleSeconds()); + indexManager.setMaxIndexesOpen(configuration.getMaxIndexesOpen()); + indexManager.setMetricRegistry(environment.metrics()); + indexManager.setScheduler(environment.lifecycle().scheduledExecutorService("index-manager-%d").threads(5).build()); + indexManager.setObjectMapper(environment.getObjectMapper()); + indexManager.setRootDir(configuration.getRootDir()); + environment.lifecycle().manage(indexManager); + + // Serialization classes + environment.getObjectMapper().registerModule(new Lucene9Module()); + + // AnalyzeResource + final AnalyzeResource analyzeResource = new AnalyzeResource(); + environment.jersey().register(analyzeResource); + + // IndexResource + final SearcherFactory searcherFactory = new ParallelSearcherFactory(ForkJoinPool.commonPool()); + final IndexResource indexResource = new IndexResource(indexManager, searcherFactory); + environment.jersey().register(indexResource); + + // Health checks + environment.healthChecks().register("analyze", new AnalyzeHealthCheck(analyzeResource)); + environment.healthChecks().register("index", new IndexHealthCheck(indexResource)); + + // configure tasks + environment.admin().addTask(new CloseAllIndexesTask(indexManager)); + + // Swagger + environment.jersey().register(new OpenApiResource()); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplicationConfiguration.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplicationConfiguration.java new file mode 100644 index 000000000..212a57cc0 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/NouveauApplicationConfiguration.java @@ -0,0 +1,74 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau; + +import java.nio.file.Path; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import io.dropwizard.core.Configuration; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotNull; + +public class NouveauApplicationConfiguration extends Configuration { + + @Min(10) + private int maxIndexesOpen = 10; + + @Min(10) + private int commitIntervalSeconds = 10; + + @Min(30) + private int idleSeconds = 30; + + @NotNull + private Path rootDir = null; + + @JsonProperty + public void setMaxIndexesOpen(int maxIndexesOpen) { + this.maxIndexesOpen = maxIndexesOpen; + } + + public int getMaxIndexesOpen() { + return maxIndexesOpen; + } + + @JsonProperty + public void setCommitIntervalSeconds(int commitIntervalSeconds) { + this.commitIntervalSeconds = commitIntervalSeconds; + } + + public int getCommitIntervalSeconds() { + return commitIntervalSeconds; + } + + @JsonProperty + public void setIdleSeconds(int idleSeconds) { + this.idleSeconds = idleSeconds; + } + + public int getIdleSeconds() { + return idleSeconds; + } + + @JsonProperty + public void setRootDir(Path rootDir) { + this.rootDir = rootDir; + } + + public Path getRootDir() { + return rootDir; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeRequest.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeRequest.java new file mode 100644 index 000000000..fa22a36f2 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeRequest.java @@ -0,0 +1,51 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotEmpty; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class AnalyzeRequest { + + @NotEmpty + private String analyzer; + + @NotEmpty + private String text; + + public AnalyzeRequest() { + // Jackson deserialization + } + + public AnalyzeRequest(final String analyzer, final String text) { + this.analyzer = analyzer; + this.text = text; + } + + @JsonProperty + public String getAnalyzer() { + return analyzer; + } + + @JsonProperty + public String getText() { + return text; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeResponse.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeResponse.java new file mode 100644 index 000000000..ce35c75d2 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/AnalyzeResponse.java @@ -0,0 +1,45 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class AnalyzeResponse { + + @NotNull + private List<@NotEmpty String> tokens; + + public AnalyzeResponse() { + // Jackson deserialization + } + + public AnalyzeResponse(List tokens) { + this.tokens = tokens; + } + + @JsonProperty + public List getTokens() { + return tokens; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentDeleteRequest.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentDeleteRequest.java new file mode 100644 index 000000000..805cb65cb --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentDeleteRequest.java @@ -0,0 +1,50 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.Positive; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class DocumentDeleteRequest { + + @Positive + private long seq; + + public DocumentDeleteRequest() { + // Jackson deserialization + } + + public DocumentDeleteRequest(long seq) { + if (seq < 1) { + throw new IllegalArgumentException("seq must be 1 or greater"); + } + this.seq = seq; + } + + @JsonProperty + public long getSeq() { + return seq; + } + + @Override + public String toString() { + return "DocumentDeleteRequest [seq=" + seq + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentUpdateRequest.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentUpdateRequest.java new file mode 100644 index 000000000..45b478322 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DocumentUpdateRequest.java @@ -0,0 +1,72 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Collection; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Positive; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class DocumentUpdateRequest { + + @Positive + private long seq; + + private String partition; + + @NotEmpty + @Valid + private Collection fields; + + public DocumentUpdateRequest() { + // Jackson deserialization + } + + public DocumentUpdateRequest(long seq, String partition, Collection fields) { + this.seq = seq; + this.partition = partition; + this.fields = fields; + } + + @JsonProperty + public long getSeq() { + return seq; + } + + @JsonProperty + public String getPartition() { + return partition; + } + + public boolean hasPartition() { + return partition != null; + } + + @JsonProperty + public Collection getFields() { + return fields; + } + + @Override + public String toString() { + return "DocumentUpdateRequest [seq=" + seq + ", partition=" + partition + ", fields=" + fields + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleField.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleField.java new file mode 100644 index 000000000..57ff4c858 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleField.java @@ -0,0 +1,61 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class DoubleField extends Field { + + @NotNull + private final Double value; + + private final boolean store; + + private final boolean facet; + + public DoubleField(@JsonProperty("name") final String name, @JsonProperty("value") final Double value, + @JsonProperty("store") final boolean store, @JsonProperty("facet") final boolean facet) { + super(name); + this.value = value; + this.store = store; + this.facet = facet; + } + + @JsonProperty + public Double getValue() { + return value; + } + + @JsonProperty + public boolean isStore() { + return store; + } + + @JsonProperty + public boolean isFacet() { + return facet; + } + + @Override + public String toString() { + return "DoubleField [name=" + name + ", value=" + value + ", store=" + store + ", facet=" + facet + "]"; + } + +} \ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleRange.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleRange.java new file mode 100644 index 000000000..ac59a286c --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleRange.java @@ -0,0 +1,31 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class DoubleRange extends Range { + + public DoubleRange() { + } + + public DoubleRange(String name, Double min, boolean minInclusive, Double max, boolean maxInclusive) { + super(name, min, minInclusive, max, maxInclusive); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Field.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Field.java new file mode 100644 index 000000000..52d5b815f --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Field.java @@ -0,0 +1,49 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + +import jakarta.validation.constraints.Pattern; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +@JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + include = JsonTypeInfo.As.PROPERTY, + property = "@type") +@JsonSubTypes({ + @JsonSubTypes.Type(value = DoubleField.class, name = "double"), + @JsonSubTypes.Type(value = StoredField.class, name = "stored"), + @JsonSubTypes.Type(value = StringField.class, name = "string"), + @JsonSubTypes.Type(value = TextField.class, name = "text"), +}) +public abstract class Field { + + @Pattern(regexp = "^\\$?[a-zA-Z][a-zA-Z0-9_]*$") + protected final String name; + + protected Field(final String name) { + this.name = name; + } + + @JsonProperty + public String getName() { + return name; + } + +} \ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexDefinition.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexDefinition.java new file mode 100644 index 000000000..7d3919c41 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexDefinition.java @@ -0,0 +1,70 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotEmpty; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class IndexDefinition { + + @NotEmpty + private String defaultAnalyzer; + + private Map<@NotEmpty String, @NotEmpty String> fieldAnalyzers; + + public IndexDefinition() { + // Jackson deserialization + } + + public IndexDefinition(final String defaultAnalyzer, final Map fieldAnalyzers) { + this.defaultAnalyzer = defaultAnalyzer; + this.fieldAnalyzers = fieldAnalyzers; + } + + @JsonProperty + public String getDefaultAnalyzer() { + return defaultAnalyzer; + } + + public void setDefaultAnalyzer(String defaultAnalyzer) { + this.defaultAnalyzer = defaultAnalyzer; + } + + @JsonProperty + public Map getFieldAnalyzers() { + return fieldAnalyzers; + } + + public void setFieldAnalyzers(Map fieldAnalyzers) { + this.fieldAnalyzers = fieldAnalyzers; + } + + public boolean hasFieldAnalyzers() { + return fieldAnalyzers != null && !fieldAnalyzers.isEmpty(); + } + + @Override + public String toString() { + return "IndexDefinition [defaultAnalyzer=" + defaultAnalyzer + + ", fieldAnalyzers=" + fieldAnalyzers + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexInfo.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexInfo.java new file mode 100644 index 000000000..2dd072771 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/IndexInfo.java @@ -0,0 +1,64 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.PositiveOrZero; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class IndexInfo { + + @PositiveOrZero + private long updateSeq; + + @PositiveOrZero + private int numDocs; + + @PositiveOrZero + private long diskSize; + + public IndexInfo() { + } + + public IndexInfo(final long updateSeq, final int numDocs, final long diskSize) { + this.updateSeq = updateSeq; + this.numDocs = numDocs; + this.diskSize = diskSize; + } + + @JsonProperty + public int getNumDocs() { + return numDocs; + } + + @JsonProperty + public long getDiskSize() { + return diskSize; + } + + @JsonProperty + public long getUpdateSeq() { + return updateSeq; + } + + @Override + public String toString() { + return "IndexInfo [updateSeq=" + updateSeq + ", numDocs=" + numDocs + ", diskSize=" + diskSize + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Range.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Range.java new file mode 100644 index 000000000..de421402a --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/Range.java @@ -0,0 +1,145 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class Range { + + @NotEmpty + private String label; + + @NotNull + private T min; + + private boolean minInclusive = true; + + @NotNull + private T max; + + private boolean maxInclusive = true; + + public Range() { + } + + public Range(String label, T min, boolean minInclusive, T max, boolean maxInclusive) { + this.label = label; + this.min = min; + this.minInclusive = minInclusive; + this.max = max; + this.maxInclusive = maxInclusive; + } + + @JsonProperty + public String getLabel() { + return label; + } + + public void setLabel(String label) { + this.label = label; + } + + @JsonProperty + public T getMin() { + return min; + } + + public void setMin(T min) { + this.min = min; + } + + @JsonProperty("min_inclusive") + public boolean isMinInclusive() { + return minInclusive; + } + + public void setMinInclusive(boolean minInclusive) { + this.minInclusive = minInclusive; + } + + @JsonProperty + public T getMax() { + return max; + } + + public void setMax(T max) { + this.max = max; + } + + @JsonProperty("max_inclusive") + public boolean isMaxInclusive() { + return maxInclusive; + } + + public void setMaxInclusive(boolean maxInclusive) { + this.maxInclusive = maxInclusive; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((label == null) ? 0 : label.hashCode()); + result = prime * result + ((min == null) ? 0 : min.hashCode()); + result = prime * result + (minInclusive ? 1231 : 1237); + result = prime * result + ((max == null) ? 0 : max.hashCode()); + result = prime * result + (maxInclusive ? 1231 : 1237); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Range other = (Range) obj; + if (label == null) { + if (other.label != null) + return false; + } else if (!label.equals(other.label)) + return false; + if (min == null) { + if (other.min != null) + return false; + } else if (!min.equals(other.min)) + return false; + if (minInclusive != other.minInclusive) + return false; + if (max == null) { + if (other.max != null) + return false; + } else if (!max.equals(other.max)) + return false; + if (maxInclusive != other.maxInclusive) + return false; + return true; + } + + @Override + public String toString() { + return "Range [label=" + label + ", min=" + min + ", minInclusive=" + minInclusive + ", max=" + max + + ", maxInclusive=" + maxInclusive + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchHit.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchHit.java new file mode 100644 index 000000000..678970e04 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchHit.java @@ -0,0 +1,65 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Collection; +import java.util.Objects; + +import org.apache.couchdb.nouveau.core.ser.PrimitiveWrapper; + +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class SearchHit { + + @NotEmpty + private String id; + + @NotNull + private PrimitiveWrapper[] order; + + @NotNull + private Collection<@NotNull StoredField> fields; + + public SearchHit() { + } + + public SearchHit(final String id, final PrimitiveWrapper[] order, final Collection fields) { + this.id = id; + this.order = Objects.requireNonNull(order); + this.fields = Objects.requireNonNull(fields); + } + + public String getId() { + return id; + } + + public PrimitiveWrapper[] getOrder() { + return order; + } + + public Collection getFields() { + return fields; + } + + @Override + public String toString() { + return "SearchHit [id=" + id + ", order=" + order + ", fields=" + fields + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchRequest.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchRequest.java new file mode 100644 index 000000000..eb4efa7bf --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchRequest.java @@ -0,0 +1,151 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.List; +import java.util.Map; + +import org.apache.couchdb.nouveau.core.ser.PrimitiveWrapper; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + +import jakarta.validation.constraints.Max; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.Positive; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class SearchRequest { + + @NotNull + private String query; + + private String partition; + + @Positive + private int limit = 25; + + private List<@NotEmpty String> sort; + + private List<@NotEmpty String> counts; + + private Map<@NotEmpty String, List<@NotNull DoubleRange>> ranges; + + private PrimitiveWrapper[] after; + + @Min(1) + @Max(100) + private int topN = 10; + + public SearchRequest() { + // Jackson deserialization + } + + public void setQuery(final String query) { + this.query = query; + } + + @JsonProperty + public String getQuery() { + return query; + } + + public void setPartition(final String partition) { + this.partition = partition; + } + + @JsonProperty + public String getPartition() { + return partition; + } + + public boolean hasPartition() { + return partition != null; + } + + public void setLimit(final int limit) { + this.limit = limit; + } + + @JsonProperty + public int getLimit() { + return limit; + } + + public boolean hasSort() { + return sort != null; + } + + @JsonProperty + public List getSort() { + return sort; + } + + public void setSort(List sort) { + this.sort = sort; + } + + public boolean hasCounts() { + return counts != null; + } + + public void setCounts(final List counts) { + this.counts = counts; + } + + @JsonProperty + public List getCounts() { + return counts; + } + + public boolean hasRanges() { + return ranges != null; + } + + public void setRanges(final Map> ranges) { + this.ranges = ranges; + } + + @JsonProperty + public Map> getRanges() { + return ranges; + } + + public void setTopN(final int topN) { + this.topN = topN; + } + + @JsonProperty + public int getTopN() { + return topN; + } + + public void setAfter(final PrimitiveWrapper[] after) { + this.after = after; + } + + @JsonProperty + public PrimitiveWrapper[] getAfter() { + return after; + } + + @Override + public String toString() { + return "SearchRequest [query=" + query + ", sort=" + sort + ", limit=" + limit + ", after=" + after + ", counts=" + counts + ", ranges=" + ranges + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchResults.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchResults.java new file mode 100644 index 000000000..6ef8c4bc1 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/SearchResults.java @@ -0,0 +1,97 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.List; +import java.util.Map; + +import org.apache.lucene.search.TotalHits.Relation; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.PositiveOrZero; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public class SearchResults { + + @PositiveOrZero + private long totalHits; + + @NotNull + private Relation totalHitsRelation; + + @NotNull + private List<@NotNull SearchHit> hits; + + private Map<@NotNull String, Map<@NotNull String, Number>> counts; + + private Map<@NotNull String, Map<@NotNull String, Number>> ranges; + + public SearchResults() { + } + + public void setTotalHits(final long totalHits) { + this.totalHits = totalHits; + } + + @JsonProperty + public long getTotalHits() { + return totalHits; + } + + public Relation getTotalHitsRelation() { + return totalHitsRelation; + } + + public void setTotalHitsRelation(Relation relation) { + this.totalHitsRelation = relation; + } + + public void setHits(final List hits) { + this.hits = hits; + } + + @JsonProperty + public List getHits() { + return hits; + } + + public void setCounts(final Map> counts) { + this.counts = counts; + } + + @JsonProperty + public Map> getCounts() { + return counts; + } + + public void setRanges(final Map> ranges) { + this.ranges = ranges; + } + + @JsonProperty + public Map> getRanges() { + return ranges; + } + + @Override + public String toString() { + return "SearchResults [hits=" + hits + ", totalHits=" + totalHits + ", counts=" + counts + ", ranges=" + ranges + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StoredField.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StoredField.java new file mode 100644 index 000000000..e8642c530 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StoredField.java @@ -0,0 +1,48 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import io.swagger.v3.oas.annotations.media.Schema; +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public final class StoredField extends Field { + + @NotNull + @Schema(oneOf = {String.class, Double.class, byte[].class}) + private final Object value; + + public StoredField(@JsonProperty("name") final String name, @JsonProperty("value") final Object value) { + super(name); + if (!(value instanceof String || value instanceof Number || value instanceof byte[])) { + throw new IllegalArgumentException(value + " must be a string, number or byte array"); + } + this.value = value; + } + + public Object getValue() { + return value; + } + + @Override + public String toString() { + return "StoredField [name=" + name + ", value=" + value + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StringField.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StringField.java new file mode 100644 index 000000000..d32671ae1 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/StringField.java @@ -0,0 +1,63 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public final class StringField extends Field { + + @NotNull + private final String value; + + private final boolean store; + + private final boolean facet; + + public StringField(@JsonProperty("name") final String name, @JsonProperty("value") final String value, + @JsonProperty("store") final boolean store, @JsonProperty("facet") final boolean facet) { + super(name); + this.value = Objects.requireNonNull(value); + this.store = store; + this.facet = facet; + } + + @JsonProperty + public String getValue() { + return value; + } + + @JsonProperty + public boolean isStore() { + return store; + } + + @JsonProperty + public boolean isFacet() { + return facet; + } + + @Override + public String toString() { + return "StringField [name=" + name + ", value=" + value + ", store=" + store + ", facet=" + facet + "]"; + } + +} \ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/api/TextField.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/TextField.java new file mode 100644 index 000000000..76ee5d86b --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/api/TextField.java @@ -0,0 +1,55 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; + + +import jakarta.validation.constraints.NotNull; + +@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) +public final class TextField extends Field { + + @NotNull + private final String value; + + private final boolean store; + + public TextField(@JsonProperty("name") final String name, @JsonProperty("value") final String value, + @JsonProperty("store") final boolean store) { + super(name); + this.value = Objects.requireNonNull(value); + this.store = store; + } + + @JsonProperty + public String getValue() { + return value; + } + + @JsonProperty + public boolean isStore() { + return store; + } + + @Override + public String toString() { + return "TextField [name=" + name + ", value=" + value + ", store=" + store + "]"; + } + +} \ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IOUtils.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IOUtils.java new file mode 100644 index 000000000..ab2bb7e35 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IOUtils.java @@ -0,0 +1,64 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; + +public class IOUtils { + + // From https://www.baeldung.com/java-delete-directory + public static void rm(final Path path) throws IOException { + File[] allContents = path.toFile().listFiles(); + if (allContents != null) { + for (final File file : allContents) { + rm(file.toPath()); + } + } + if (!path.toFile().delete()) { + throw new IOException("failed to delete " + path); + } + } + + @FunctionalInterface + public interface IORunnable { + public abstract void run() throws IOException; + } + + public static void runAll(final IORunnable... runnables) throws IOException { + Throwable thrown = null; + for (final IORunnable r : runnables) { + try { + r.run(); + } catch (final Throwable e) { + if (thrown == null) { + thrown = e; + } + } + } + if (thrown != null) { + if (thrown instanceof IOException) { + throw (IOException) thrown; + } + if (thrown instanceof RuntimeException) { + throw (RuntimeException) thrown; + } + if (thrown instanceof Error) { + throw (Error) thrown; + } + } + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/Index.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/Index.java new file mode 100644 index 000000000..7d893a9e2 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/Index.java @@ -0,0 +1,176 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; + +import org.apache.couchdb.nouveau.api.DocumentDeleteRequest; +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.IndexInfo; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; + +/** + * An index that reflects a single `.couch` file shard of some + * database. + * + * The class only permits sequential modification (updates and deletes) + * but allows concurrent searching. + * + * This class also expects a monotonically incrementing update sequence + * associated with each modification. + */ + +public abstract class Index implements Closeable { + + private long updateSeq; + private boolean deleteOnClose = false; + private long lastCommit = now(); + private volatile boolean closed; + private final Semaphore permits = new Semaphore(Integer.MAX_VALUE); + + protected Index(final long updateSeq) { + this.updateSeq = updateSeq; + } + + public final boolean tryAcquire() { + if (permits.tryAcquire() == false) { + return false; + } + if (closed) { + permits.release(); + return false; + } + return true; + } + + public final boolean tryAcquire(long timeout, TimeUnit unit) throws InterruptedException { + if (permits.tryAcquire(timeout, unit) == false) { + return false; + } + if (closed) { + permits.release(); + return false; + } + return true; + } + + public final void release() { + permits.release(); + } + + public final IndexInfo info() throws IOException { + final int numDocs = doNumDocs(); + final long diskSize = doDiskSize(); + return new IndexInfo(updateSeq, numDocs, diskSize); + } + + protected abstract int doNumDocs() throws IOException; + + protected abstract long doDiskSize() throws IOException; + + public final synchronized void update(final String docId, final DocumentUpdateRequest request) + throws IOException { + assertUpdateSeqIsLower(request.getSeq()); + doUpdate(docId, request); + incrementUpdateSeq(request.getSeq()); + } + + protected abstract void doUpdate(final String docId, final DocumentUpdateRequest request) throws IOException; + + public final synchronized void delete(final String docId, final DocumentDeleteRequest request) throws IOException { + assertUpdateSeqIsLower(request.getSeq()); + doDelete(docId, request); + incrementUpdateSeq(request.getSeq()); + } + + protected abstract void doDelete(final String docId, final DocumentDeleteRequest request) throws IOException; + + public final SearchResults search(final SearchRequest request) throws IOException { + return doSearch(request); + } + + protected abstract SearchResults doSearch(final SearchRequest request) throws IOException; + + public final boolean commit() throws IOException { + final long updateSeq; + synchronized (this) { + updateSeq = this.updateSeq; + } + final boolean result = doCommit(updateSeq); + if (result) { + final long now = now(); + synchronized (this) { + this.lastCommit = now; + } + } + return result; + } + + protected abstract boolean doCommit(final long updateSeq) throws IOException; + + @Override + public final void close() throws IOException { + synchronized (this) { + closed = true; + } + // Ensures exclusive access to the index before closing. + permits.acquireUninterruptibly(Integer.MAX_VALUE); + try { + doClose(); + } finally { + permits.release(Integer.MAX_VALUE); + } + } + + protected abstract void doClose() throws IOException; + + public boolean isDeleteOnClose() { + return deleteOnClose; + } + + public void setDeleteOnClose(final boolean deleteOnClose) { + synchronized (this) { + this.deleteOnClose = deleteOnClose; + } + } + + protected final void assertUpdateSeqIsLower(final long updateSeq) throws UpdatesOutOfOrderException { + assert Thread.holdsLock(this); + if (!(updateSeq > this.updateSeq)) { + throw new UpdatesOutOfOrderException(); + } + } + + protected final void incrementUpdateSeq(final long updateSeq) throws IOException { + assert Thread.holdsLock(this); + assertUpdateSeqIsLower(updateSeq); + this.updateSeq = updateSeq; + } + + public boolean needsCommit(final long duration, final TimeUnit unit) { + final long commitNeededSince = now() - unit.toNanos(duration); + synchronized (this) { + return this.lastCommit < commitNeededSince; + } + } + + private long now() { + return System.nanoTime(); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexFunction.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexFunction.java new file mode 100644 index 000000000..987c9303b --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexFunction.java @@ -0,0 +1,23 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.IOException; + +@FunctionalInterface +public interface IndexFunction { + + T with(final Index index) throws IOException; + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexLoader.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexLoader.java new file mode 100644 index 000000000..2f2a03fd1 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexLoader.java @@ -0,0 +1,26 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.IOException; +import java.nio.file.Path; + +import org.apache.couchdb.nouveau.api.IndexDefinition; + +@FunctionalInterface +public interface IndexLoader { + + Index apply(final Path path, final IndexDefinition indexDefinition) throws IOException; + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java new file mode 100644 index 000000000..ddc7c3f7f --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java @@ -0,0 +1,321 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import static com.codahale.metrics.MetricRegistry.name; + +import java.io.File; +import java.io.IOException; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.List; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.eclipse.jetty.io.RuntimeIOException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.caffeine.MetricsStatsCounter; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.RemovalCause; +import com.github.benmanes.caffeine.cache.RemovalListener; +import com.github.benmanes.caffeine.cache.Scheduler; + +import io.dropwizard.lifecycle.Managed; +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.Response.Status; + +/** + * The central class of Nouveau, responsible for loading and unloading Lucene + * indexes and making them available for query. + */ + +public final class IndexManager implements Managed { + + @FunctionalInterface + public interface IndexFunction { + R apply(final V value) throws IOException; + } + + private static final Logger LOGGER = LoggerFactory.getLogger(IndexManager.class); + + private int maxIndexesOpen; + + private int commitIntervalSeconds; + + private int idleSeconds; + + private Path rootDir; + + private ObjectMapper objectMapper; + + private MetricRegistry metricRegistry; + + private ScheduledExecutorService scheduler; + + private Cache cache; + + public R with(final String name, final IndexLoader loader, final IndexFunction indexFun) + throws IOException, InterruptedException { + while (true) { + if (!exists(name)) { + throw new WebApplicationException("Index does not exist", Status.NOT_FOUND); + } + + final Index index; + try { + index = cache.get(name, (n) -> { + LOGGER.info("opening {}", n); + final Path path = indexPath(n); + try { + final IndexDefinition indexDefinition = loadIndexDefinition(n); + return loader.apply(path, indexDefinition); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + }); + } catch (final RuntimeIOException e) { + throw (IOException) e.getCause(); + } + + if (index.tryAcquire(1, TimeUnit.SECONDS)) { + try { + final R result = indexFun.apply(index); + if (index.needsCommit(commitIntervalSeconds, TimeUnit.SECONDS)) { + scheduler.execute(() -> { + if (index.tryAcquire()) { + try { + LOGGER.debug("committing {}", name); + try { + index.commit(); + } catch (final IOException e) { + LOGGER.warn("I/O exception while committing " + name, e); + } + } finally { + index.release(); + } + } + }); + } + return result; + } finally { + index.release(); + } + } + } + } + + public void create(final String name, IndexDefinition indexDefinition) throws IOException { + if (exists(name)) { + throw new WebApplicationException("Index already exists", Status.EXPECTATION_FAILED); + } + // Validate index definiton + // TODO luceneFor(indexDefinition).validate(indexDefinition); + + // Persist definition + final Path path = indexDefinitionPath(name); + if (Files.exists(path)) { + throw new FileAlreadyExistsException(name + " already exists"); + } + Files.createDirectories(path.getParent()); + objectMapper.writeValue(path.toFile(), indexDefinition); + } + + public boolean exists(final String name) { + return Files.exists(indexDefinitionPath(name)); + } + + public void deleteAll(final String path, final List exclusions) throws IOException { + LOGGER.info("deleting indexes below {} (excluding {})", path, + exclusions == null ? "nothing" : exclusions); + + final Path indexRootPath = indexRootPath(path); + if (!indexRootPath.toFile().exists()) { + return; + } + Stream stream = Files.find(indexRootPath, 100, + (p, attr) -> attr.isDirectory() && isIndex(p)); + try { + stream.forEach((p) -> { + final String relativeToExclusions = indexRootPath.relativize(p).toString(); + if (exclusions != null && exclusions.indexOf(relativeToExclusions) != -1) { + return; + } + final String relativeName = rootDir.relativize(p).toString(); + try { + deleteIndex(relativeName); + } catch (final IOException e) { + LOGGER.error("I/O exception deleting " + p, e); + } + // Clean any newly empty directories. + do { + final File f = p.toFile(); + if (f.isDirectory() && f.list().length == 0) { + f.delete(); + } + } while ((p = p.getParent()) != null && !rootDir.equals(p)); + }); + } finally { + stream.close(); + } + } + + private void deleteIndex(final String name) throws IOException { + final Index index = cache.asMap().remove(name); + if (index != null) { + index.setDeleteOnClose(true); + close(name, index); + } else { + IOUtils.rm(indexRootPath(name)); + } + } + + @JsonProperty + public int getMaxIndexesOpen() { + return maxIndexesOpen; + } + + public void setMaxIndexesOpen(int maxIndexesOpen) { + this.maxIndexesOpen = maxIndexesOpen; + } + + public int getCommitIntervalSeconds() { + return commitIntervalSeconds; + } + + public void setCommitIntervalSeconds(int commitIntervalSeconds) { + this.commitIntervalSeconds = commitIntervalSeconds; + } + + public int getIdleSeconds() { + return idleSeconds; + } + + public void setIdleSeconds(int idleSeconds) { + this.idleSeconds = idleSeconds; + } + + public void setScheduler(ScheduledExecutorService scheduler) { + this.scheduler = scheduler; + } + + public Path getRootDir() { + return rootDir; + } + + public void setRootDir(Path rootDir) { + this.rootDir = rootDir; + } + + public void setObjectMapper(final ObjectMapper objectMapper) { + this.objectMapper = objectMapper; + } + + public void setMetricRegistry(final MetricRegistry metricRegistry) { + this.metricRegistry = metricRegistry; + } + + @Override + public void start() throws IOException { + cache = Caffeine.newBuilder() + .recordStats(() -> new MetricsStatsCounter(metricRegistry, name(IndexManager.class, "cache"))) + .initialCapacity(maxIndexesOpen) + .maximumSize(maxIndexesOpen) + .expireAfterAccess(Duration.ofSeconds(idleSeconds)) + .scheduler(Scheduler.systemScheduler()) + .evictionListener(new IndexEvictionListener()) + .build(); + } + + @Override + public void stop() throws IOException, InterruptedException { + final var it = cache.asMap().entrySet().iterator(); + while (it.hasNext()) { + var e = it.next(); + LOGGER.info("closing {} during shutdown", e.getKey()); + close(e.getKey(), e.getValue()); + it.remove(); + } + } + + private boolean isIndex(final Path path) { + return path.resolve("index_definition.json").toFile().exists(); + } + + private Path indexDefinitionPath(final String name) { + return indexRootPath(name).resolve("index_definition.json"); + } + + private Path indexPath(final String name) { + return indexRootPath(name).resolve("index"); + } + + private IndexDefinition loadIndexDefinition(final String name) throws IOException { + return objectMapper.readValue(indexDefinitionPath(name).toFile(), IndexDefinition.class); + } + + private Path indexRootPath(final String name) { + final Path result = rootDir.resolve(name).normalize(); + if (result.startsWith(rootDir)) { + return result; + } + throw new WebApplicationException(name + " attempts to escape from index root directory", + Status.BAD_REQUEST); + } + + private class IndexEvictionListener implements RemovalListener { + + public void onRemoval(String name, Index index, RemovalCause cause) { + LOGGER.info("closing {} for cause {}", name, cause); + try { + close(name, index); + } catch (final IOException e) { + LOGGER.error("I/O exception when evicting " + name, e); + } + } + } + + private void close(final String name, final Index index) throws IOException { + IOUtils.runAll( + () -> { + if (index.tryAcquire()) { + try { + if (!index.isDeleteOnClose() && index.commit()) { + LOGGER.debug("committed {} before close", name); + } + } finally { + index.release(); + } + } + }, + () -> { + index.close(); + }, + () -> { + if (index.isDeleteOnClose()) { + IOUtils.rm(indexRootPath(name)); + } + }); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderException.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderException.java new file mode 100644 index 000000000..3b89f41d2 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderException.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import java.io.IOException; + +public class UpdatesOutOfOrderException extends IOException { + + public UpdatesOutOfOrderException() { + super("Updates applied in the wrong order"); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderExceptionMapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderExceptionMapper.java new file mode 100644 index 000000000..9e54e4453 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/UpdatesOutOfOrderExceptionMapper.java @@ -0,0 +1,32 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core; + +import io.dropwizard.jersey.errors.ErrorMessage; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; +import jakarta.ws.rs.core.Response.Status; +import jakarta.ws.rs.ext.ExceptionMapper; + +public class UpdatesOutOfOrderExceptionMapper implements ExceptionMapper { + + @Override + public Response toResponse(final UpdatesOutOfOrderException exception) { + return Response.status(Status.BAD_REQUEST) + .type(MediaType.APPLICATION_JSON_TYPE) + .entity(new ErrorMessage(Status.BAD_REQUEST.getStatusCode(), exception.getMessage())) + .build(); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/ByteArrayWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/ByteArrayWrapper.java new file mode 100644 index 000000000..875d0d8bb --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/ByteArrayWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class ByteArrayWrapper extends PrimitiveWrapper { + + public ByteArrayWrapper(@JsonProperty("value") byte[] value) { + super(value); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/DoubleWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/DoubleWrapper.java new file mode 100644 index 000000000..c9ae3b4cd --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/DoubleWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class DoubleWrapper extends PrimitiveWrapper { + + public DoubleWrapper(@JsonProperty("value") Double value) { + super(value); + } + +} \ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/FloatWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/FloatWrapper.java new file mode 100644 index 000000000..490afa6d5 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/FloatWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class FloatWrapper extends PrimitiveWrapper { + + public FloatWrapper(@JsonProperty("value") float value) { + super(value); + } + +} \ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/IntWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/IntWrapper.java new file mode 100644 index 000000000..c179d0705 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/IntWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class IntWrapper extends PrimitiveWrapper { + + public IntWrapper(@JsonProperty("value") Integer value) { + super(value); + } + +} \ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/LongWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/LongWrapper.java new file mode 100644 index 000000000..0eda4e786 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/LongWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class LongWrapper extends PrimitiveWrapper { + + public LongWrapper(@JsonProperty("value") Long value) { + super(value); + } + +} \ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/PrimitiveWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/PrimitiveWrapper.java new file mode 100644 index 000000000..89877da60 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/PrimitiveWrapper.java @@ -0,0 +1,46 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.annotation.JsonTypeInfo.As; +import com.fasterxml.jackson.annotation.JsonTypeInfo.Id; + +@JsonTypeInfo(use = Id.NAME, include = As.PROPERTY, property = "@type") +@JsonSubTypes({ + @JsonSubTypes.Type(value = ByteArrayWrapper.class, name = "bytes"), + @JsonSubTypes.Type(value = DoubleWrapper.class, name = "double"), + @JsonSubTypes.Type(value = FloatWrapper.class, name = "float"), + @JsonSubTypes.Type(value = IntWrapper.class, name = "int"), + @JsonSubTypes.Type(value = LongWrapper.class, name = "long"), + @JsonSubTypes.Type(value = StringWrapper.class, name = "string"), +}) +public class PrimitiveWrapper { + + private T value; + + public PrimitiveWrapper(T value) { + this.value = value; + } + + public T getValue() { + return value; + } + + public void setValue(T value) { + this.value = value; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/StringWrapper.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/StringWrapper.java new file mode 100644 index 000000000..e53f22ca0 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/core/ser/StringWrapper.java @@ -0,0 +1,24 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.core.ser; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class StringWrapper extends PrimitiveWrapper { + + public StringWrapper(@JsonProperty("value") String value) { + super(value); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheck.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheck.java new file mode 100644 index 000000000..4b49a39e0 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheck.java @@ -0,0 +1,46 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.health; + +import java.util.Arrays; +import java.util.List; + +import org.apache.couchdb.nouveau.api.AnalyzeRequest; +import org.apache.couchdb.nouveau.api.AnalyzeResponse; +import org.apache.couchdb.nouveau.resources.AnalyzeResource; + +import com.codahale.metrics.health.HealthCheck; + +public final class AnalyzeHealthCheck extends HealthCheck { + + private AnalyzeResource analyzeResource; + + public AnalyzeHealthCheck(final AnalyzeResource analyzeResource) { + this.analyzeResource = analyzeResource; + } + + @Override + protected Result check() throws Exception { + final AnalyzeRequest request = new AnalyzeRequest("standard", "hello goodbye"); + final AnalyzeResponse response = analyzeResource.analyzeText(request); + final List expected = Arrays.asList("hello", "goodbye"); + final List actual = response.getTokens(); + if (expected.equals(actual)) { + return Result.healthy(); + } else { + return Result.unhealthy("Expected '%s' but got '%s'", expected, actual); + } + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/health/IndexHealthCheck.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/health/IndexHealthCheck.java new file mode 100644 index 000000000..37882043a --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/health/IndexHealthCheck.java @@ -0,0 +1,63 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.health; + +import java.io.IOException; +import java.util.Collections; + +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; +import org.apache.couchdb.nouveau.resources.IndexResource; + +import com.codahale.metrics.health.HealthCheck; + +public final class IndexHealthCheck extends HealthCheck { + + private final IndexResource indexResource; + + public IndexHealthCheck(final IndexResource indexResource) { + this.indexResource = indexResource; + } + + @Override + protected Result check() throws Exception { + final String name = "___test9"; + try { + indexResource.deletePath(name, null); + } catch (IOException e) { + // Ignored, index might not exist yet. + } + + indexResource.createIndex(name, new IndexDefinition("standard", null)); + try { + final DocumentUpdateRequest documentUpdateRequest = + new DocumentUpdateRequest(1, null, Collections.emptyList()); + indexResource.updateDoc(name, "foo", documentUpdateRequest); + + final SearchRequest searchRequest = new SearchRequest(); + searchRequest.setQuery("_id:foo"); + + final SearchResults searchResults = indexResource.searchIndex(name, searchRequest); + if (searchResults.getTotalHits() == 1) { + return Result.healthy(); + } + } finally { + indexResource.deletePath(name, null); + } + return Result.unhealthy(name); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactory.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactory.java new file mode 100644 index 000000000..ca6834f5e --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactory.java @@ -0,0 +1,143 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.Supplier; + +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.apache.lucene.analysis.ca.CatalanAnalyzer; +import org.apache.lucene.analysis.cjk.CJKAnalyzer; +import org.apache.lucene.analysis.classic.ClassicAnalyzer; +import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.analysis.cz.CzechAnalyzer; +import org.apache.lucene.analysis.da.DanishAnalyzer; +import org.apache.lucene.analysis.de.GermanAnalyzer; +import org.apache.lucene.analysis.email.UAX29URLEmailAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.es.SpanishAnalyzer; +import org.apache.lucene.analysis.eu.BasqueAnalyzer; +import org.apache.lucene.analysis.fa.PersianAnalyzer; +import org.apache.lucene.analysis.fi.FinnishAnalyzer; +import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.ga.IrishAnalyzer; +import org.apache.lucene.analysis.gl.GalicianAnalyzer; +import org.apache.lucene.analysis.hi.HindiAnalyzer; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; +import org.apache.lucene.analysis.hy.ArmenianAnalyzer; +import org.apache.lucene.analysis.id.IndonesianAnalyzer; +import org.apache.lucene.analysis.it.ItalianAnalyzer; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.lv.LatvianAnalyzer; +import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.nl.DutchAnalyzer; +import org.apache.lucene.analysis.no.NorwegianAnalyzer; +import org.apache.lucene.analysis.pl.PolishAnalyzer; +import org.apache.lucene.analysis.pt.PortugueseAnalyzer; +import org.apache.lucene.analysis.ro.RomanianAnalyzer; +import org.apache.lucene.analysis.ru.RussianAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.sv.SwedishAnalyzer; +import org.apache.lucene.analysis.th.ThaiAnalyzer; +import org.apache.lucene.analysis.tr.TurkishAnalyzer; + +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.Response.Status; + +public final class Lucene9AnalyzerFactory { + + private Lucene9AnalyzerFactory() { + } + + public static Analyzer fromDefinition(final IndexDefinition indexDefinition) { + final Analyzer defaultAnalyzer = newAnalyzer(indexDefinition.getDefaultAnalyzer()); + if (!indexDefinition.hasFieldAnalyzers()) { + return defaultAnalyzer; + } + final Map fieldAnalyzers = new HashMap(); + for (Map.Entry entry : indexDefinition.getFieldAnalyzers().entrySet()) { + fieldAnalyzers.put(entry.getKey(), newAnalyzer(entry.getValue())); + } + return new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers); + } + + private enum KnownAnalyzer { + + arabic(() -> new ArabicAnalyzer()), + armenian(() -> new ArmenianAnalyzer()), + basque(() -> new BasqueAnalyzer()), + bulgarian(() -> new BulgarianAnalyzer()), + catalan(() -> new CatalanAnalyzer()), + chinese(() -> new SmartChineseAnalyzer()), + cjk(() -> new CJKAnalyzer()), + classic(() -> new ClassicAnalyzer()), + czech(() -> new CzechAnalyzer()), + danish(() -> new DanishAnalyzer()), + dutch(() -> new DutchAnalyzer()), + email(() -> new UAX29URLEmailAnalyzer()), + english(() -> new EnglishAnalyzer()), + finnish(() -> new FinnishAnalyzer()), + french(() -> new FrenchAnalyzer()), + galician(() -> new GalicianAnalyzer()), + german(() -> new GermanAnalyzer()), + hindi(() -> new HindiAnalyzer()), + hungarian(() -> new HungarianAnalyzer()), + indonesian(() -> new IndonesianAnalyzer()), + irish(() -> new IrishAnalyzer()), + italian(() -> new ItalianAnalyzer()), + japanese(() -> new JapaneseAnalyzer()), + keyword(() -> new KeywordAnalyzer()), + latvian(() -> new LatvianAnalyzer()), + norwegian(() -> new NorwegianAnalyzer()), + persian(() -> new PersianAnalyzer()), + polish(() -> new PolishAnalyzer()), + portugese(() -> new PortugueseAnalyzer()), + romanian(() -> new RomanianAnalyzer()), + russian(() -> new RussianAnalyzer()), + simple(() -> new SimpleAnalyzer()), + simple_asciifolding(() -> new SimpleAsciiFoldingAnalyzer()), + spanish(() -> new SpanishAnalyzer()), + standard(() -> new StandardAnalyzer()), + swedish(() -> new SwedishAnalyzer()), + thai(() -> new ThaiAnalyzer()), + turkish(() -> new TurkishAnalyzer()), + whitespace(() -> new WhitespaceAnalyzer()); + + private final Supplier supplier; + + private KnownAnalyzer(final Supplier supplier) { + this.supplier = supplier; + } + + private Analyzer newInstance() { + return supplier.get(); + } + } + + public static Analyzer newAnalyzer(final String name) { + try { + return KnownAnalyzer.valueOf(name).newInstance(); + } catch (IllegalArgumentException e) { + throw new WebApplicationException(name + " is not a valid analyzer name", Status.BAD_REQUEST); + } + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java new file mode 100644 index 000000000..02818f41f --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Index.java @@ -0,0 +1,507 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.file.NoSuchFileException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.couchdb.nouveau.api.DocumentDeleteRequest; +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.DoubleField; +import org.apache.couchdb.nouveau.api.DoubleRange; +import org.apache.couchdb.nouveau.api.Field; +import org.apache.couchdb.nouveau.api.SearchHit; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; +import org.apache.couchdb.nouveau.api.StoredField; +import org.apache.couchdb.nouveau.api.StringField; +import org.apache.couchdb.nouveau.api.TextField; +import org.apache.couchdb.nouveau.core.IOUtils; +import org.apache.couchdb.nouveau.core.Index; +import org.apache.couchdb.nouveau.core.ser.ByteArrayWrapper; +import org.apache.couchdb.nouveau.core.ser.DoubleWrapper; +import org.apache.couchdb.nouveau.core.ser.FloatWrapper; +import org.apache.couchdb.nouveau.core.ser.IntWrapper; +import org.apache.couchdb.nouveau.core.ser.LongWrapper; +import org.apache.couchdb.nouveau.core.ser.PrimitiveWrapper; +import org.apache.couchdb.nouveau.core.ser.StringWrapper; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsCollectorManager; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.facet.StringDocValuesReaderState; +import org.apache.lucene.facet.StringValueFacetCounts; +import org.apache.lucene.facet.range.DoubleRangeFacetCounts; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.StoredFields; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.flexible.core.QueryNodeException; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiCollectorManager; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.search.SortedSetSortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; + +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.Response.Status; + +public class Lucene9Index extends Index { + + private static final Sort DEFAULT_SORT = new Sort(SortField.FIELD_SCORE, + new SortField("_id", SortField.Type.STRING)); + private static final Pattern SORT_FIELD_RE = Pattern.compile("^([-+])?([\\.\\w]+)(?:<(\\w+)>)$"); + + private final Analyzer analyzer; + private final IndexWriter writer; + private final SearcherManager searcherManager; + + public Lucene9Index(final Analyzer analyzer, final IndexWriter writer, final long updateSeq, + final SearcherManager searcherManager) { + super(updateSeq); + this.analyzer = Objects.requireNonNull(analyzer); + this.writer = Objects.requireNonNull(writer); + this.searcherManager = Objects.requireNonNull(searcherManager); + } + + @Override + public int doNumDocs() throws IOException { + return writer.getDocStats().numDocs; + } + + @Override + public long doDiskSize() throws IOException { + final Directory dir = writer.getDirectory(); + long result = 0; + for (final String name : dir.listAll()) { + try { + result += dir.fileLength(name); + } catch (final FileNotFoundException | NoSuchFileException e) { + // deleted while we were looping. + } + } + return result; + } + + @Override + public void doUpdate(final String docId, final DocumentUpdateRequest request) throws IOException { + final Term docIdTerm = docIdTerm(docId); + final Document doc = toDocument(docId, request); + writer.updateDocument(docIdTerm, doc); + } + + @Override + public void doDelete(final String docId, final DocumentDeleteRequest request) throws IOException { + final Query query = docIdQuery(docId); + writer.deleteDocuments(query); + } + + @Override + public boolean doCommit(final long updateSeq) throws IOException { + if (!writer.hasUncommittedChanges()) { + return false; + } + writer.setLiveCommitData(Collections.singletonMap("update_seq", Long.toString(updateSeq)).entrySet()); + writer.commit(); + return true; + } + + @Override + public void doClose() throws IOException { + IOUtils.runAll( + () -> { + searcherManager.close(); + }, + () -> { + writer.rollback(); + }, + () -> { + if (isDeleteOnClose()) { + var dir = writer.getDirectory(); + for (final String name : dir.listAll()) { + dir.deleteFile(name); + } + } + }); + } + + @Override + public SearchResults doSearch(final SearchRequest request) throws IOException { + final Query query = parse(request); + + // Construct CollectorManagers. + final MultiCollectorManager cm; + final CollectorManager hits = hitCollector(request); + if (request.hasCounts() || request.hasRanges()) { + cm = new MultiCollectorManager(hits, new FacetsCollectorManager()); + } else { + cm = new MultiCollectorManager(hits); + } + + searcherManager.maybeRefreshBlocking(); + + final IndexSearcher searcher = searcherManager.acquire(); + try { + final Object[] reduces = searcher.search(query, cm); + return toSearchResults(request, searcher, reduces); + } catch (final IllegalStateException e) { + throw new WebApplicationException(e.getMessage(), e, Status.BAD_REQUEST); + } finally { + searcherManager.release(searcher); + } + } + + private CollectorManager hitCollector(final SearchRequest searchRequest) { + final Sort sort = toSort(searchRequest); + + final PrimitiveWrapper[] after = searchRequest.getAfter(); + final FieldDoc fieldDoc; + if (after != null) { + fieldDoc = toFieldDoc(after); + if (getLastSortField(sort).getReverse()) { + fieldDoc.doc = 0; + } else { + fieldDoc.doc = Integer.MAX_VALUE; + } + } else { + fieldDoc = null; + } + + return TopFieldCollector.createSharedManager( + sort, + searchRequest.getLimit(), + fieldDoc, + 1000); + } + + private SortField getLastSortField(final Sort sort) { + final SortField[] sortFields = sort.getSort(); + return sortFields[sortFields.length - 1]; + } + + private SearchResults toSearchResults(final SearchRequest searchRequest, final IndexSearcher searcher, + final Object[] reduces) throws IOException { + final SearchResults result = new SearchResults(); + collectHits(searcher, (TopDocs) reduces[0], result); + if (reduces.length == 2) { + collectFacets(searchRequest, searcher, (FacetsCollector) reduces[1], result); + } + return result; + } + + private void collectHits(final IndexSearcher searcher, final TopDocs topDocs, final SearchResults searchResults) + throws IOException { + final List hits = new ArrayList(topDocs.scoreDocs.length); + final StoredFields storedFields = searcher.storedFields(); + + for (final ScoreDoc scoreDoc : topDocs.scoreDocs) { + final Document doc = storedFields.document(scoreDoc.doc); + + final List fields = new ArrayList(doc.getFields().size()); + for (IndexableField field : doc.getFields()) { + if (field.name().equals("_id")) { + continue; + } + if (field.numericValue() != null) { + fields.add(new StoredField(field.name(), field.numericValue().doubleValue())); + } else if (field.binaryValue() != null) { + fields.add(new StoredField(field.name(), toBytes(field.binaryValue()))); + } else if (field.stringValue() != null) { + fields.add(new StoredField(field.name(), field.stringValue())); + } + } + + final PrimitiveWrapper[] after = toAfter(((FieldDoc) scoreDoc)); + hits.add(new SearchHit(doc.get("_id"), after, fields)); + } + + searchResults.setTotalHits(topDocs.totalHits.value); + searchResults.setTotalHitsRelation(topDocs.totalHits.relation); + searchResults.setHits(hits); + } + + private void collectFacets(final SearchRequest searchRequest, final IndexSearcher searcher, + final FacetsCollector fc, final SearchResults searchResults) throws IOException { + if (searchRequest.hasCounts()) { + final Map> countsMap = new HashMap>( + searchRequest.getCounts().size()); + for (final String field : searchRequest.getCounts()) { + final StringDocValuesReaderState state = new StringDocValuesReaderState(searcher.getIndexReader(), + field); + final StringValueFacetCounts counts = new StringValueFacetCounts(state, fc); + countsMap.put(field, collectFacets(counts, searchRequest.getTopN(), field)); + } + searchResults.setCounts(countsMap); + } + + if (searchRequest.hasRanges()) { + final Map> rangesMap = new HashMap>( + searchRequest.getRanges().size()); + for (final Entry> entry : searchRequest.getRanges().entrySet()) { + final DoubleRangeFacetCounts counts = toDoubleRangeFacetCounts(fc, entry.getKey(), entry.getValue()); + rangesMap.put(entry.getKey(), collectFacets(counts, searchRequest.getTopN(), entry.getKey())); + } + searchResults.setRanges(rangesMap); + } + } + + private DoubleRangeFacetCounts toDoubleRangeFacetCounts(final FacetsCollector fc, final String field, + final List ranges) throws IOException { + final org.apache.lucene.facet.range.DoubleRange[] luceneRanges = new org.apache.lucene.facet.range.DoubleRange[ranges + .size()]; + for (int i = 0; i < luceneRanges.length; i++) { + final DoubleRange range = ranges.get(i); + luceneRanges[i] = new org.apache.lucene.facet.range.DoubleRange( + range.getLabel(), range.getMin() != null ? range.getMin() : Double.NEGATIVE_INFINITY, + range.isMinInclusive(), range.getMax() != null ? range.getMax() : Double.POSITIVE_INFINITY, + range.isMaxInclusive()); + } + return new DoubleRangeFacetCounts(field, fc, luceneRanges); + } + + private Map collectFacets(final Facets facets, final int topN, final String dim) + throws IOException { + final FacetResult topChildren = facets.getTopChildren(topN, dim); + final Map result = new HashMap(topChildren.childCount); + for (final LabelAndValue lv : topChildren.labelValues) { + result.put(lv.label, lv.value); + } + return result; + } + + // Ensure _id is final sort field so we can paginate. + private Sort toSort(final SearchRequest searchRequest) { + if (!searchRequest.hasSort()) { + return DEFAULT_SORT; + } + + final List sort = new ArrayList(searchRequest.getSort()); + final String last = sort.get(sort.size() - 1); + // Append _id field if not already present. + switch (last) { + case "-_id": + case "_id": + break; + default: + sort.add("_id"); + } + return convertSort(sort); + } + + private Sort convertSort(final List sort) { + final SortField[] fields = new SortField[sort.size()]; + for (int i = 0; i < sort.size(); i++) { + fields[i] = convertSortField(sort.get(i)); + } + return new Sort(fields); + } + + private SortField convertSortField(final String sortString) { + if ("relevance".equals(sortString)) { + return SortField.FIELD_SCORE; + } + final Matcher m = SORT_FIELD_RE.matcher(sortString); + if (!m.matches()) { + throw new WebApplicationException( + sortString + " is not a valid sort parameter", Status.BAD_REQUEST); + } + final boolean reverse = "-".equals(m.group(1)); + switch (m.group(3)) { + case "string": + return new SortedSetSortField(m.group(2), reverse); + case "double": + return new SortedNumericSortField(m.group(2), SortField.Type.DOUBLE, reverse); + default: + throw new WebApplicationException( + m.group(3) + " is not a valid sort type", Status.BAD_REQUEST); + } + } + + private static Document toDocument(final String docId, final DocumentUpdateRequest request) throws IOException { + final Document result = new Document(); + + // id + result.add(new org.apache.lucene.document.StringField("_id", docId, Store.YES)); + result.add(new SortedDocValuesField("_id", new BytesRef(docId))); + + // partition (optional) + if (request.hasPartition()) { + result.add(new org.apache.lucene.document.StringField("_partition", request.getPartition(), Store.NO)); + } + + final CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder(); + + for (Field field : request.getFields()) { + // Underscore-prefix is reserved. + if (field.getName().startsWith("_")) { + continue; + } + if (field instanceof TextField) { + var f = (TextField) field; + result.add(new org.apache.lucene.document.TextField(f.getName(), f.getValue(), + f.isStore() ? Store.YES : Store.NO)); + } else if (field instanceof StringField) { + // TODO use KeywordField when available. + var f = (StringField) field; + result.add(new org.apache.lucene.document.StringField(f.getName(), f.getValue(), + f.isStore() ? Store.YES : Store.NO)); + result.add(new SortedSetDocValuesField(f.getName(), + new BytesRef(f.getValue()))); + } else if (field instanceof DoubleField) { + var f = (DoubleField) field; + result.add(new org.apache.lucene.document.DoubleField(f.getName(), f.getValue())); + if (f.isStore()) { + result.add(new org.apache.lucene.document.StoredField(f.getName(), f.getValue())); + } + } else if (field instanceof StoredField) { + var f = (StoredField) field; + var val = f.getValue(); + if (val instanceof String) { + result.add(new org.apache.lucene.document.StoredField(f.getName(), (String) val)); + } else if (val instanceof Number) { + result.add(new org.apache.lucene.document.StoredField(f.getName(), ((Number) val).doubleValue())); + } else if (val instanceof byte[]) { + try { + final CharBuffer buf = utf8Decoder.decode(ByteBuffer.wrap((byte[]) val)); + result.add(new org.apache.lucene.document.StoredField(f.getName(), buf.toString())); + } catch (final CharacterCodingException e) { + result.add(new org.apache.lucene.document.StoredField(f.getName(), (byte[]) val)); + } + } else { + throw new WebApplicationException(field + " is not valid", Status.BAD_REQUEST); + } + } else { + throw new WebApplicationException(field + " is not valid", Status.BAD_REQUEST); + } + } + + return result; + } + + private FieldDoc toFieldDoc(final Object[] after) { + final Object[] fields = new Object[after.length]; + for (int i = 0; i < after.length; i++) { + if (after[i] instanceof PrimitiveWrapper) { + fields[i] = ((PrimitiveWrapper) after[i]).getValue(); + } + if (fields[i] instanceof byte[]) { + fields[i] = new BytesRef((byte[]) fields[i]); + } + if (fields[i] instanceof String) { + fields[i] = new BytesRef((String) fields[i]); + } + } + return new FieldDoc(0, Float.NaN, fields); + } + + private PrimitiveWrapper[] toAfter(final FieldDoc fieldDoc) { + final CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder(); + final PrimitiveWrapper[] fields = new PrimitiveWrapper[fieldDoc.fields.length]; + for (int i = 0; i < fields.length; i++) { + if (fieldDoc.fields[i] instanceof String) { + fields[i] = new StringWrapper((String) fieldDoc.fields[i]); + } else if (fieldDoc.fields[i] instanceof BytesRef) { + var bytes = toBytes((BytesRef) fieldDoc.fields[i]); + try { + final CharBuffer buf = utf8Decoder.decode(ByteBuffer.wrap(bytes)); + fields[i] = new StringWrapper(buf.toString()); + } catch (final CharacterCodingException e) { + fields[i] = new ByteArrayWrapper(bytes); + } + } else if (fieldDoc.fields[i] instanceof Double) { + fields[i] = new DoubleWrapper((double) fieldDoc.fields[i]); + } else if (fieldDoc.fields[i] instanceof Integer) { + fields[i] = new IntWrapper((int) fieldDoc.fields[i]); + } else if (fieldDoc.fields[i] instanceof Long) { + fields[i] = new LongWrapper((long) fieldDoc.fields[i]); + } else if (fieldDoc.fields[i] instanceof Float) { + fields[i] = new FloatWrapper((float) fieldDoc.fields[i]); + } else { + throw new WebApplicationException(fieldDoc.fields[i].getClass() + " is not valid", Status.BAD_REQUEST); + } + } + return fields; + } + + private static byte[] toBytes(final BytesRef bytesRef) { + return Arrays.copyOfRange(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length); + } + + private static Query docIdQuery(final String docId) { + return new TermQuery(docIdTerm(docId)); + } + + private static Term docIdTerm(final String docId) { + return new Term("_id", docId); + } + + private Query parse(final SearchRequest request) { + var queryParser = new NouveauQueryParser(analyzer); + Query result; + try { + result = queryParser.parse(request.getQuery(), "default"); + if (request.hasPartition()) { + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("_partition", request.getPartition())), Occur.MUST); + builder.add(result, Occur.MUST); + result = builder.build(); + } + } catch (final QueryNodeException e) { + throw new WebApplicationException(e.getMessage(), e, Status.BAD_REQUEST); + } + return result; + } + + @Override + public String toString() { + return "Lucene9Index [analyzer=" + analyzer + ", writer=" + writer + ", searcherManager=" + searcherManager + + "]"; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Module.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Module.java new file mode 100644 index 000000000..8d5555692 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/Lucene9Module.java @@ -0,0 +1,31 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import org.apache.lucene.search.Query; + +import com.fasterxml.jackson.core.Version; +import com.fasterxml.jackson.databind.module.SimpleModule; + +public class Lucene9Module extends SimpleModule { + + public Lucene9Module() { + super("lucene9", Version.unknownVersion()); + + // Query + addSerializer(Query.class, new QuerySerializer()); + addDeserializer(Query.class, new QueryDeserializer()); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java new file mode 100644 index 000000000..6aad65cd4 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParser.java @@ -0,0 +1,181 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.text.NumberFormat; +import java.text.ParseException; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryparser.flexible.core.QueryNodeException; +import org.apache.lucene.queryparser.flexible.core.QueryParserHelper; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.RangeQueryNode; +import org.apache.lucene.queryparser.flexible.core.processors.NoChildOptimizationQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl; +import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline; +import org.apache.lucene.queryparser.flexible.core.processors.RemoveDeletedQueryNodesProcessor; +import org.apache.lucene.queryparser.flexible.standard.builders.StandardQueryTreeBuilder; +import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; +import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler; +import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; +import org.apache.lucene.queryparser.flexible.standard.processors.AllowLeadingWildcardProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.AnalyzerQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.BooleanQuery2ModifierNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.BooleanSingleChildOptimizationQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.BoostQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.DefaultPhraseSlopQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.FuzzyQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.IntervalQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.MatchAllDocsQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.MultiFieldQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.MultiTermRewriteMethodProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.OpenRangeQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.PhraseSlopQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.RegexpQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.RemoveEmptyNonLeafQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.TermRangeQueryNodeProcessor; +import org.apache.lucene.queryparser.flexible.standard.processors.WildcardQueryNodeProcessor; +import org.apache.lucene.search.Query; + +public final class NouveauQueryParser extends QueryParserHelper { + + public NouveauQueryParser(final Analyzer analyzer) { + super( + new StandardQueryConfigHandler(), + new StandardSyntaxParser(), + new NouveauQueryNodeProcessorPipeline(), + new StandardQueryTreeBuilder()); + getQueryConfigHandler().set(ConfigurationKeys.ENABLE_POSITION_INCREMENTS, true); + getQueryConfigHandler().set(ConfigurationKeys.ANALYZER, analyzer); + } + + @Override + public Query parse(String query, String defaultField) throws QueryNodeException { + return (Query) super.parse(query, defaultField); + } + + /** + * Same pipeline as StandardQueryParser but we substitute + * PointQueryNodeProcessor and PointRangeQueryNodeProcessor for + * NouveauPointProcessor below. + */ + public static class NouveauQueryNodeProcessorPipeline extends QueryNodeProcessorPipeline { + + public NouveauQueryNodeProcessorPipeline() { + super(null); + add(new WildcardQueryNodeProcessor()); + add(new MultiFieldQueryNodeProcessor()); + add(new FuzzyQueryNodeProcessor()); + add(new RegexpQueryNodeProcessor()); + add(new MatchAllDocsQueryNodeProcessor()); + add(new OpenRangeQueryNodeProcessor()); + add(new NouveauPointProcessor()); + add(new TermRangeQueryNodeProcessor()); + add(new AllowLeadingWildcardProcessor()); + add(new AnalyzerQueryNodeProcessor()); + add(new PhraseSlopQueryNodeProcessor()); + add(new BooleanQuery2ModifierNodeProcessor()); + add(new NoChildOptimizationQueryNodeProcessor()); + add(new RemoveDeletedQueryNodesProcessor()); + add(new RemoveEmptyNonLeafQueryNodeProcessor()); + add(new BooleanSingleChildOptimizationQueryNodeProcessor()); + add(new DefaultPhraseSlopQueryNodeProcessor()); + add(new BoostQueryNodeProcessor()); + add(new MultiTermRewriteMethodProcessor()); + add(new IntervalQueryNodeProcessor()); + } + } + + /** + * If it looks like a number, treat it as a number. + */ + public static class NouveauPointProcessor extends QueryNodeProcessorImpl { + + @Override + protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { + final var numberFormat = NumberFormat.getInstance(); + final var pointsConfig = new PointsConfig(numberFormat, Double.class); + + if (node instanceof FieldQueryNode && !(node.getParent() instanceof RangeQueryNode)) { + final var fieldNode = (FieldQueryNode) node; + String text = fieldNode.getTextAsString(); + if (text.length() == 0) { + return node; + } + final Number number; + try { + number = numberFormat.parse(text).doubleValue(); + } catch (final ParseException e) { + return node; + } + final var lowerNode = new PointQueryNode(fieldNode.getField(), number, numberFormat); + final var upperNode = new PointQueryNode(fieldNode.getField(), number, numberFormat); + return new PointRangeQueryNode(lowerNode, upperNode, true, true, pointsConfig); + } + + if (node instanceof TermRangeQueryNode) { + final var termRangeNode = (TermRangeQueryNode) node; + final var lower = termRangeNode.getLowerBound(); + final var upper = termRangeNode.getUpperBound(); + final var lowerText = lower.getTextAsString(); + final var upperText = upper.getTextAsString(); + Number lowerNumber = null, upperNumber = null; + + if (lowerText.length() > 0 && !lowerText.equals("-Infinity")) { + try { + lowerNumber = numberFormat.parse(lowerText).doubleValue(); + } catch (final ParseException e) { + return node; + } + } + + if (upperText.length() > 0 && !upperText.equals("Infinity")) { + try { + upperNumber = numberFormat.parse(upperText).doubleValue(); + } catch (final ParseException e) { + return node; + } + } + + final var lowerNode = new PointQueryNode(termRangeNode.getField(), lowerNumber, numberFormat); + final var upperNode = new PointQueryNode(termRangeNode.getField(), upperNumber, numberFormat); + final var lowerInclusive = termRangeNode.isLowerInclusive(); + final var upperInclusive = termRangeNode.isUpperInclusive(); + + return new PointRangeQueryNode( + lowerNode, upperNode, lowerInclusive, upperInclusive, pointsConfig); + } + + return node; + } + + @Override + protected QueryNode preProcessNode(final QueryNode node) throws QueryNodeException { + return node; + } + + @Override + protected List setChildrenOrder(final List children) throws QueryNodeException { + return children; + } + + } + +} \ No newline at end of file diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/ParallelSearcherFactory.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/ParallelSearcherFactory.java new file mode 100644 index 000000000..91fee1795 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/ParallelSearcherFactory.java @@ -0,0 +1,36 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.io.IOException; +import java.util.concurrent.Executor; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SearcherFactory; + +public class ParallelSearcherFactory extends SearcherFactory { + + private Executor executor; + + public ParallelSearcherFactory(Executor executor) { + this.executor = executor; + } + + @Override + public IndexSearcher newSearcher(final IndexReader reader, final IndexReader previousReader) throws IOException { + return new IndexSearcher(reader, executor); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QueryDeserializer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QueryDeserializer.java new file mode 100644 index 000000000..2e3b0278e --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QueryDeserializer.java @@ -0,0 +1,121 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; + +public class QueryDeserializer extends StdDeserializer { + + public QueryDeserializer() { + this(null); + } + + public QueryDeserializer(Class vc) { + super(vc); + } + + @Override + public Query deserialize(final JsonParser parser, final DeserializationContext context) + throws IOException, JsonProcessingException { + return deserializeNode(parser, context, parser.getCodec().readTree(parser)); + } + + private Query deserializeNode(final JsonParser parser, final DeserializationContext context, final JsonNode node) + throws IOException, JsonProcessingException { + final String type = node.get("@type").asText(); + switch (type) { + case "term": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + return new TermQuery(new Term(field, text)); + } + case "boolean": { + if (!node.get("clauses").isArray()) { + throw new JsonParseException(parser, "boolean clauses must be an array"); + } + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + final Iterator it = node.get("clauses").elements(); + while (it.hasNext()) { + final Query q = deserializeNode(parser, context, it.next()); + builder.add(q, null); + } + return builder.build(); + } + case "wildcard": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + return new WildcardQuery(new Term(field, text)); + } + case "phrase": { + final String field = node.get("field").asText(); + if (!node.get("terms").isArray()) { + throw new JsonParseException(parser, "phrase terms must be an array"); + } + final PhraseQuery.Builder builder = new PhraseQuery.Builder(); + final Iterator it = node.get("terms").elements(); + while (it.hasNext()) { + builder.add(new Term(field, it.next().asText())); + } + builder.setSlop(node.get("slop").asInt()); + return builder.build(); + } + case "prefix": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + return new PrefixQuery(new Term(field, text)); + } + case "fuzzy": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + final int maxEdits = node.get("max_edits").asInt(); + final int prefixLength = node.get("prefix_length").asInt(); + return new FuzzyQuery(new Term(field, text), maxEdits, prefixLength); + } + case "regexp": { + final String field = node.get("field").asText(); + final String text = node.get("text").asText(); + return new RegexpQuery(new Term(field, text)); + } + case "term_range": { + + } + case "point_range": { + + } + case "match_all": + return new MatchAllDocsQuery(); + } + throw new JsonParseException(parser, type + " not a supported query type"); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QuerySerializer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QuerySerializer.java new file mode 100644 index 000000000..df2f7a675 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/QuerySerializer.java @@ -0,0 +1,171 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import java.io.IOException; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; + +import com.fasterxml.jackson.core.JsonGenerationException; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; + +class QuerySerializer extends StdSerializer { + + QuerySerializer() { + this(null); + } + + QuerySerializer(Class vc) { + super(vc); + } + + @Override + public void serialize(final Query query, final JsonGenerator gen, final SerializerProvider provider) + throws IOException { + + if (query instanceof TermQuery) { + final TermQuery termQuery = (TermQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "term"); + gen.writeStringField("field", termQuery.getTerm().field()); + gen.writeStringField("term", termQuery.getTerm().text()); + gen.writeEndObject(); + return; + } + + if (query instanceof BooleanQuery) { + final BooleanQuery booleanQuery = (BooleanQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "boolean"); + gen.writeFieldName("clauses"); + gen.writeStartArray(); + for (final BooleanClause clause : booleanQuery.clauses()) { + gen.writeStartObject(); + gen.writeFieldName("query"); + serialize(clause.getQuery(), gen, provider); + gen.writeStringField("occur", clause.getOccur().name().toLowerCase()); + gen.writeEndObject(); + } + gen.writeEndArray(); + gen.writeEndObject(); + return; + } + + if (query instanceof WildcardQuery) { + final WildcardQuery wildcardQuery = (WildcardQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "wildcard"); + gen.writeStringField("field", wildcardQuery.getField()); + gen.writeStringField("text", wildcardQuery.getTerm().text()); + gen.writeEndObject(); + return; + } + + if (query instanceof PhraseQuery) { + final PhraseQuery phraseQuery = (PhraseQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "phrase"); + gen.writeStringField("field", phraseQuery.getField()); + gen.writeFieldName("terms"); + gen.writeStartArray(); + for (final Term term : phraseQuery.getTerms()) { + gen.writeString(term.text()); + } + gen.writeEndArray(); + gen.writeNumberField("slop", phraseQuery.getSlop()); + gen.writeEndObject(); + return; + } + + if (query instanceof PrefixQuery) { + final PrefixQuery prefixQuery = (PrefixQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "prefix"); + gen.writeStringField("field", prefixQuery.getField()); + gen.writeStringField("text", prefixQuery.getPrefix().text()); + gen.writeEndObject(); + return; + } + + if (query instanceof FuzzyQuery) { + final FuzzyQuery fuzzyQuery = (FuzzyQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "fuzzy"); + gen.writeStringField("field", fuzzyQuery.getField()); + gen.writeStringField("text", fuzzyQuery.getTerm().text()); + gen.writeNumberField("max_edits", fuzzyQuery.getMaxEdits()); + gen.writeNumberField("prefix_length", fuzzyQuery.getPrefixLength()); + gen.writeEndObject(); + return; + } + + if (query instanceof RegexpQuery) { + final RegexpQuery regexpQuery = (RegexpQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "regexp"); + gen.writeStringField("field", regexpQuery.getField()); + gen.writeStringField("text", regexpQuery.getRegexp().text()); + gen.writeEndObject(); + return; + } + + if (query instanceof TermRangeQuery) { + final TermRangeQuery termRangeQuery = (TermRangeQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "term_range"); + gen.writeStringField("field", termRangeQuery.getField()); + gen.writeStringField("lower", termRangeQuery.getLowerTerm().utf8ToString()); + gen.writeBooleanField("includes_lower", termRangeQuery.includesLower()); + gen.writeStringField("upper", termRangeQuery.getUpperTerm().utf8ToString()); + gen.writeBooleanField("includes_upper", termRangeQuery.includesUpper()); + gen.writeEndObject(); + return; + } + + if (query instanceof PointRangeQuery) { + final PointRangeQuery pointRangeQuery = (PointRangeQuery) query; + gen.writeStartObject(); + gen.writeStringField("@type", "point_range"); + gen.writeStringField("field", pointRangeQuery.getField()); + gen.writeBinaryField("lower", pointRangeQuery.getLowerPoint()); + gen.writeBinaryField("upper", pointRangeQuery.getUpperPoint()); + gen.writeNumberField("num_dims", pointRangeQuery.getNumDims()); + gen.writeEndObject(); + } + + if (query instanceof MatchAllDocsQuery) { + gen.writeStartObject(); + gen.writeStringField("@type", "match_all"); + gen.writeEndObject(); + return; + } + + throw new JsonGenerationException(query.getClass() + " not supported", gen); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/SimpleAsciiFoldingAnalyzer.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/SimpleAsciiFoldingAnalyzer.java new file mode 100644 index 000000000..ae7e4c261 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/lucene9/SimpleAsciiFoldingAnalyzer.java @@ -0,0 +1,39 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.LetterTokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; + +class SimpleAsciiFoldingAnalyzer extends Analyzer { + + SimpleAsciiFoldingAnalyzer() { + } + + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new LetterTokenizer(); + return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(new LowerCaseFilter(tokenizer))); + } + + @Override + protected TokenStream normalize(String fieldName, TokenStream in) { + return new ASCIIFoldingFilter(new LowerCaseFilter(in)); + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/AnalyzeResource.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/AnalyzeResource.java new file mode 100644 index 000000000..e29657eab --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/AnalyzeResource.java @@ -0,0 +1,74 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.resources; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.couchdb.nouveau.api.AnalyzeRequest; +import org.apache.couchdb.nouveau.api.AnalyzeResponse; +import org.apache.couchdb.nouveau.lucene9.Lucene9AnalyzerFactory; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + +import com.codahale.metrics.annotation.ExceptionMetered; +import com.codahale.metrics.annotation.Metered; +import com.codahale.metrics.annotation.ResponseMetered; + +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotNull; +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response.Status; + +@Path("/analyze") +@Metered +@ResponseMetered +@ExceptionMetered(cause = IOException.class) +@Consumes(MediaType.APPLICATION_JSON) +@Produces(MediaType.APPLICATION_JSON) +public final class AnalyzeResource { + + @POST + public AnalyzeResponse analyzeText(@NotNull @Valid AnalyzeRequest request) throws IOException { + try { + final List tokens = tokenize(Lucene9AnalyzerFactory.newAnalyzer(request.getAnalyzer()), + request.getText()); + return new AnalyzeResponse(tokens); + } catch (IllegalArgumentException e) { + throw new WebApplicationException(request.getAnalyzer() + " not a valid analyzer", + Status.BAD_REQUEST); + } + } + + private List tokenize(final Analyzer analyzer, final String text) throws IOException { + final List result = new ArrayList(10); + try (final TokenStream tokenStream = analyzer.tokenStream("default", text)) { + tokenStream.reset(); + while (tokenStream.incrementToken()) { + final CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class); + result.add(term.toString()); + } + tokenStream.end(); + } + return result; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/IndexResource.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/IndexResource.java new file mode 100644 index 000000000..4273582b6 --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/resources/IndexResource.java @@ -0,0 +1,147 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.resources; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.apache.couchdb.nouveau.api.DocumentDeleteRequest; +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.couchdb.nouveau.api.IndexInfo; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; +import org.apache.couchdb.nouveau.core.IndexLoader; +import org.apache.couchdb.nouveau.core.IndexManager; +import org.apache.couchdb.nouveau.lucene9.Lucene9AnalyzerFactory; +import org.apache.couchdb.nouveau.lucene9.Lucene9Index; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.misc.store.DirectIODirectory; +import org.apache.lucene.search.SearcherFactory; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; + +import com.codahale.metrics.annotation.ExceptionMetered; +import com.codahale.metrics.annotation.Metered; +import com.codahale.metrics.annotation.ResponseMetered; + +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotNull; +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.DELETE; +import jakarta.ws.rs.GET; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.PUT; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.MediaType;; + +@Path("/index/{name}") +@Metered +@ResponseMetered +@ExceptionMetered(cause = IOException.class) +@Consumes(MediaType.APPLICATION_JSON) +@Produces(MediaType.APPLICATION_JSON) +public final class IndexResource { + + private final IndexManager indexManager; + private final SearcherFactory searcherFactory; + + public IndexResource(final IndexManager indexManager, final SearcherFactory searcherFactory) { + this.indexManager = Objects.requireNonNull(indexManager); + this.searcherFactory = Objects.requireNonNull(searcherFactory); + } + + @PUT + public void createIndex(@PathParam("name") String name, @NotNull @Valid IndexDefinition indexDefinition) + throws IOException { + indexManager.create(name, indexDefinition); + } + + @DELETE + @Path("/doc/{docId}") + public void deleteDoc(@PathParam("name") String name, @PathParam("docId") String docId, + @NotNull @Valid DocumentDeleteRequest request) throws Exception { + indexManager.with(name, indexLoader(), (index) -> { + index.delete(docId, request); + return null; + }); + } + + @DELETE + public void deletePath(@PathParam("name") String path, @Valid final List exclusions) throws IOException { + indexManager.deleteAll(path, exclusions); + } + + @GET + public IndexInfo indexInfo(@PathParam("name") String name) throws Exception { + return indexManager.with(name, indexLoader(), (index) -> { + return index.info(); + }); + } + + @POST + @Path("/search") + public SearchResults searchIndex(@PathParam("name") String name, + @NotNull @Valid SearchRequest request) + throws Exception { + return indexManager.with(name, indexLoader(), (index) -> { + return index.search(request); + }); + } + + @PUT + @Path("/doc/{docId}") + public void updateDoc(@PathParam("name") String name, @PathParam("docId") String docId, + @NotNull @Valid DocumentUpdateRequest request) + throws Exception { + indexManager.with(name, indexLoader(), (index) -> { + index.update(docId, request); + return null; + }); + } + + private IndexLoader indexLoader() { + return (path, indexDefinition) -> { + final Analyzer analyzer = Lucene9AnalyzerFactory.fromDefinition(indexDefinition); + final Directory dir = new DirectIODirectory(FSDirectory.open(path)); + final IndexWriterConfig config = new IndexWriterConfig(analyzer); + config.setUseCompoundFile(false); + final IndexWriter writer = new IndexWriter(dir, config); + final long updateSeq = getUpdateSeq(writer); + final SearcherManager searcherManager = new SearcherManager(writer, searcherFactory); + return new Lucene9Index(analyzer, writer, updateSeq, searcherManager); + }; + } + + private static long getUpdateSeq(final IndexWriter writer) throws IOException { + final Iterable> commitData = writer.getLiveCommitData(); + if (commitData == null) { + return 0L; + } + for (Map.Entry entry : commitData) { + if (entry.getKey().equals("update_seq")) { + return Long.parseLong(entry.getValue()); + } + } + return 0L; + } + +} diff --git a/nouveau/src/main/java/org/apache/couchdb/nouveau/tasks/CloseAllIndexesTask.java b/nouveau/src/main/java/org/apache/couchdb/nouveau/tasks/CloseAllIndexesTask.java new file mode 100644 index 000000000..bcc94e34d --- /dev/null +++ b/nouveau/src/main/java/org/apache/couchdb/nouveau/tasks/CloseAllIndexesTask.java @@ -0,0 +1,38 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.tasks; + +import java.io.PrintWriter; +import java.util.List; +import java.util.Map; + +import org.apache.couchdb.nouveau.core.IndexManager; + +import io.dropwizard.servlets.tasks.Task; + +public class CloseAllIndexesTask extends Task { + + private final IndexManager indexManager; + + public CloseAllIndexesTask(final IndexManager indexManager) { + super("close-all-indexes"); + this.indexManager = indexManager; + } + + @Override + public void execute(Map> parameters, PrintWriter output) throws Exception { + indexManager.stop(); + } + +} diff --git a/nouveau/src/main/resources/banner.txt b/nouveau/src/main/resources/banner.txt new file mode 100644 index 000000000..3575b3984 --- /dev/null +++ b/nouveau/src/main/resources/banner.txt @@ -0,0 +1,7 @@ + .-. + / | + /\ | .-._.) ( ) .-..-. .-. ) ( + / \ |( )( )( / ./.-'_( | ( ) + .-' / \| `-' `--': \_/ (__.' `-'-'`--': +(__.' `. + diff --git a/nouveau/src/main/resources/openapi.yaml b/nouveau/src/main/resources/openapi.yaml new file mode 100644 index 000000000..2bc4d73f1 --- /dev/null +++ b/nouveau/src/main/resources/openapi.yaml @@ -0,0 +1,9 @@ +resourcePackages: +- org.apache.couchdb.nouveau.resources + +openAPI: + info: + version: '1.0' + license: + name: Apache 2.0 + url: http://www.apache.org/licenses/LICENSE-2.0.html diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/api/SearchRequestTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/api/SearchRequestTest.java new file mode 100644 index 000000000..8fb773419 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/api/SearchRequestTest.java @@ -0,0 +1,59 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.api; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public class SearchRequestTest { + + private static ObjectMapper mapper; + + @BeforeAll + public static void setupMapper() { + mapper = new ObjectMapper(); + } + + @Test + public void testSerialisation() throws Exception { + SearchRequest request = asObject(); + final String expected = mapper.writeValueAsString( + mapper.readValue(getClass().getResource("/fixtures/SearchRequest.json"), SearchRequest.class)); + assertThat(mapper.writeValueAsString(request)).isEqualTo(expected); + } + + @Test + public void testDeserialisation() throws Exception { + SearchRequest request = asObject(); + assertThat(mapper.readValue(getClass().getResource("/fixtures/SearchRequest.json"), SearchRequest.class).toString()) + .isEqualTo(request.toString()); + } + + private SearchRequest asObject() { + final SearchRequest result = new SearchRequest(); + result.setQuery("*:*"); + result.setLimit(10); + result.setCounts(List.of("bar")); + result.setRanges(Map.of("foo", List.of(new DoubleRange("0 to 100 inc", 0.0, true, 100.0, true)))); + return result; + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheckTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheckTest.java new file mode 100644 index 000000000..8f39b3f39 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/health/AnalyzeHealthCheckTest.java @@ -0,0 +1,29 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.health; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.couchdb.nouveau.resources.AnalyzeResource; +import org.junit.jupiter.api.Test; + +public class AnalyzeHealthCheckTest { + + @Test + public void testAnalyzeHealthCheck() throws Exception { + var resource = new AnalyzeResource(); + var check = new AnalyzeHealthCheck(resource); + assertTrue(check.check().isHealthy()); + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/health/IndexHealthCheckTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/health/IndexHealthCheckTest.java new file mode 100644 index 000000000..42a3626d6 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/health/IndexHealthCheckTest.java @@ -0,0 +1,51 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.health; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.file.Path; +import java.util.concurrent.Executors; + +import org.apache.couchdb.nouveau.core.IndexManager; +import org.apache.couchdb.nouveau.resources.IndexResource; +import org.apache.lucene.search.SearcherFactory; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import com.codahale.metrics.MetricRegistry; +import com.fasterxml.jackson.databind.ObjectMapper; + +public class IndexHealthCheckTest { + + @Test + public void testIndexHealthCheck(@TempDir final Path tempDir) throws Exception { + var scheduler = Executors.newSingleThreadScheduledExecutor(); + var manager = new IndexManager(); + manager.setObjectMapper(new ObjectMapper()); + manager.setMetricRegistry(new MetricRegistry()); + manager.setRootDir(tempDir); + manager.setScheduler(scheduler); + manager.start(); + try { + var resource = new IndexResource(manager, new SearcherFactory()); + var check = new IndexHealthCheck(resource); + assertTrue(check.check().isHealthy()); + } finally { + scheduler.shutdown(); + manager.stop(); + } + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactoryTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactoryTest.java new file mode 100644 index 000000000..693d82918 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9AnalyzerFactoryTest.java @@ -0,0 +1,283 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.lang.reflect.Method; +import java.util.Map; + +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.apache.lucene.analysis.ca.CatalanAnalyzer; +import org.apache.lucene.analysis.cjk.CJKAnalyzer; +import org.apache.lucene.analysis.classic.ClassicAnalyzer; +import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.analysis.cz.CzechAnalyzer; +import org.apache.lucene.analysis.da.DanishAnalyzer; +import org.apache.lucene.analysis.de.GermanAnalyzer; +import org.apache.lucene.analysis.email.UAX29URLEmailAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.es.SpanishAnalyzer; +import org.apache.lucene.analysis.eu.BasqueAnalyzer; +import org.apache.lucene.analysis.fa.PersianAnalyzer; +import org.apache.lucene.analysis.fi.FinnishAnalyzer; +import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.ga.IrishAnalyzer; +import org.apache.lucene.analysis.gl.GalicianAnalyzer; +import org.apache.lucene.analysis.hi.HindiAnalyzer; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; +import org.apache.lucene.analysis.hy.ArmenianAnalyzer; +import org.apache.lucene.analysis.id.IndonesianAnalyzer; +import org.apache.lucene.analysis.it.ItalianAnalyzer; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.lv.LatvianAnalyzer; +import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.nl.DutchAnalyzer; +import org.apache.lucene.analysis.no.NorwegianAnalyzer; +import org.apache.lucene.analysis.pl.PolishAnalyzer; +import org.apache.lucene.analysis.pt.PortugueseAnalyzer; +import org.apache.lucene.analysis.ro.RomanianAnalyzer; +import org.apache.lucene.analysis.ru.RussianAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.sv.SwedishAnalyzer; +import org.apache.lucene.analysis.th.ThaiAnalyzer; +import org.apache.lucene.analysis.tr.TurkishAnalyzer; +import org.junit.jupiter.api.Test; + +import jakarta.ws.rs.WebApplicationException; + +public class Lucene9AnalyzerFactoryTest { + + @Test + public void testkeyword() throws Exception { + assertAnalyzer("keyword", KeywordAnalyzer.class); + } + + @Test + public void testsimple() throws Exception { + assertAnalyzer("simple", SimpleAnalyzer.class); + } + + @Test + public void testwhitespace() throws Exception { + assertAnalyzer("whitespace", WhitespaceAnalyzer.class); + } + + @Test + public void testarabic() throws Exception { + assertAnalyzer("arabic", ArabicAnalyzer.class); + } + + @Test + public void testbulgarian() throws Exception { + assertAnalyzer("bulgarian", BulgarianAnalyzer.class); + } + + @Test + public void testcatalan() throws Exception { + assertAnalyzer("catalan", CatalanAnalyzer.class); + } + + @Test + public void testcjk() throws Exception { + assertAnalyzer("cjk", CJKAnalyzer.class); + } + + @Test + public void testchinese() throws Exception { + assertAnalyzer("chinese", SmartChineseAnalyzer.class); + } + + @Test + public void testczech() throws Exception { + assertAnalyzer("czech", CzechAnalyzer.class); + } + + @Test + public void testdanish() throws Exception { + assertAnalyzer("danish", DanishAnalyzer.class); + } + + @Test + public void testgerman() throws Exception { + assertAnalyzer("german", GermanAnalyzer.class); + } + + @Test + public void testenglish() throws Exception { + assertAnalyzer("english", EnglishAnalyzer.class); + } + + @Test + public void testspanish() throws Exception { + assertAnalyzer("spanish", SpanishAnalyzer.class); + } + + @Test + public void testbasque() throws Exception { + assertAnalyzer("basque", BasqueAnalyzer.class); + } + + @Test + public void testpersian() throws Exception { + assertAnalyzer("persian", PersianAnalyzer.class); + } + + @Test + public void testfinnish() throws Exception { + assertAnalyzer("finnish", FinnishAnalyzer.class); + } + + @Test + public void testfrench() throws Exception { + assertAnalyzer("french", FrenchAnalyzer.class); + } + + @Test + public void testirish() throws Exception { + assertAnalyzer("irish", IrishAnalyzer.class); + } + + @Test + public void testgalician() throws Exception { + assertAnalyzer("galician", GalicianAnalyzer.class); + } + + @Test + public void testhindi() throws Exception { + assertAnalyzer("hindi", HindiAnalyzer.class); + } + + @Test + public void testhungarian() throws Exception { + assertAnalyzer("hungarian", HungarianAnalyzer.class); + } + + @Test + public void testarmenian() throws Exception { + assertAnalyzer("armenian", ArmenianAnalyzer.class); + } + + @Test + public void testindonesian() throws Exception { + assertAnalyzer("indonesian", IndonesianAnalyzer.class); + } + + @Test + public void testitalian() throws Exception { + assertAnalyzer("italian", ItalianAnalyzer.class); + } + + @Test + public void testjapanese() throws Exception { + assertAnalyzer("japanese", JapaneseAnalyzer.class); + } + + @Test + public void testlatvian() throws Exception { + assertAnalyzer("latvian", LatvianAnalyzer.class); + } + + @Test + public void testdutch() throws Exception { + assertAnalyzer("dutch", DutchAnalyzer.class); + } + + @Test + public void testnorwegian() throws Exception { + assertAnalyzer("norwegian", NorwegianAnalyzer.class); + } + + @Test + public void testpolish() throws Exception { + assertAnalyzer("polish", PolishAnalyzer.class); + } + + @Test + public void testportugese() throws Exception { + assertAnalyzer("portugese", PortugueseAnalyzer.class); + } + + @Test + public void testromanian() throws Exception { + assertAnalyzer("romanian", RomanianAnalyzer.class); + } + + @Test + public void testrussian() throws Exception { + assertAnalyzer("russian", RussianAnalyzer.class); + } + + @Test + public void testclassic() throws Exception { + assertAnalyzer("classic", ClassicAnalyzer.class); + } + + @Test + public void teststandard() throws Exception { + assertAnalyzer("standard", StandardAnalyzer.class); + } + + @Test + public void testemail() throws Exception { + assertAnalyzer("email", UAX29URLEmailAnalyzer.class); + } + + @Test + public void testswedish() throws Exception { + assertAnalyzer("swedish", SwedishAnalyzer.class); + } + + @Test + public void testthai() throws Exception { + assertAnalyzer("thai", ThaiAnalyzer.class); + } + + @Test + public void testturkish() throws Exception { + assertAnalyzer("turkish", TurkishAnalyzer.class); + } + + @Test + public void testFieldAnalyzers() throws Exception { + final IndexDefinition indexDefinition = new IndexDefinition("standard", + Map.of("english", "english", "thai", "thai", "email", "email")); + final Analyzer analyzer = Lucene9AnalyzerFactory.fromDefinition(indexDefinition); + assertThat(analyzer).isInstanceOf(PerFieldAnalyzerWrapper.class); + final Method m = PerFieldAnalyzerWrapper.class.getDeclaredMethod("getWrappedAnalyzer", String.class); + m.setAccessible(true); + assertThat(m.invoke(analyzer, "english")).isInstanceOf(EnglishAnalyzer.class); + assertThat(m.invoke(analyzer, "thai")).isInstanceOf(ThaiAnalyzer.class); + assertThat(m.invoke(analyzer, "email")).isInstanceOf(UAX29URLEmailAnalyzer.class); + assertThat(m.invoke(analyzer, "other")).isInstanceOf(StandardAnalyzer.class); + } + + @Test + public void testUnknownAnalyzer() throws Exception { + assertThrows(WebApplicationException.class, () -> Lucene9AnalyzerFactory.newAnalyzer("foo")); + } + + private void assertAnalyzer(final String name, final Class clazz) throws Exception { + assertThat(Lucene9AnalyzerFactory.newAnalyzer(name)).isInstanceOf(clazz); + assertThat(Lucene9AnalyzerFactory.fromDefinition(new IndexDefinition(name, null))).isInstanceOf(clazz); + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9IndexTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9IndexTest.java new file mode 100644 index 000000000..1b28a01f4 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/Lucene9IndexTest.java @@ -0,0 +1,223 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.apache.couchdb.nouveau.api.DocumentDeleteRequest; +import org.apache.couchdb.nouveau.api.DocumentUpdateRequest; +import org.apache.couchdb.nouveau.api.DoubleField; +import org.apache.couchdb.nouveau.api.DoubleRange; +import org.apache.couchdb.nouveau.api.Field; +import org.apache.couchdb.nouveau.api.IndexDefinition; +import org.apache.couchdb.nouveau.api.IndexInfo; +import org.apache.couchdb.nouveau.api.SearchRequest; +import org.apache.couchdb.nouveau.api.SearchResults; +import org.apache.couchdb.nouveau.api.StringField; +import org.apache.couchdb.nouveau.core.Index; +import org.apache.couchdb.nouveau.core.IndexLoader; +import org.apache.couchdb.nouveau.core.UpdatesOutOfOrderException; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.misc.store.DirectIODirectory; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class Lucene9IndexTest { + + protected final Index setup(final Path path) throws IOException { + final IndexDefinition indexDefinition = new IndexDefinition(); + indexDefinition.setDefaultAnalyzer("standard"); + final Index index = indexLoader().apply(path, indexDefinition); + index.setDeleteOnClose(true); + return index; + } + + protected final void cleanup(final Index index) throws IOException { + index.close(); + } + + @Test + public void testOpenClose(@TempDir Path path) throws IOException { + final Index index = setup(path); + cleanup(index); + } + + @Test + public void testSearching(@TempDir Path path) throws IOException { + final Index index = setup(path); + try { + final int count = 100; + for (int i = 1; i <= count; i++) { + final Collection fields = List.of(new StringField("foo", "bar", false, false)); + final DocumentUpdateRequest request = new DocumentUpdateRequest(i, null, fields); + index.update("doc" + i, request); + } + final SearchRequest request = new SearchRequest(); + request.setQuery("*:*"); + final SearchResults results = index.search(request); + assertThat(results.getTotalHits()).isEqualTo(count); + } finally { + cleanup(index); + } + } + + @Test + public void testSort(@TempDir Path path) throws IOException { + final Index index = setup(path); + try { + final int count = 100; + for (int i = 1; i <= count; i++) { + final Collection fields = List.of(new StringField("foo", "bar", false, false)); + final DocumentUpdateRequest request = new DocumentUpdateRequest(i, null, fields); + index.update("doc" + i, request); + } + final SearchRequest request = new SearchRequest(); + request.setQuery("*:*"); + request.setSort(List.of("foo")); + final SearchResults results = index.search(request); + assertThat(results.getTotalHits()).isEqualTo(count); + } finally { + cleanup(index); + } + } + + @Test + public void testCounts(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + final int count = 100; + for (int i = 1; i <= count; i++) { + final Collection fields = List.of(new StringField("bar", "baz", false, true)); + final DocumentUpdateRequest request = new DocumentUpdateRequest(i, null, fields); + index.update("doc" + i, request); + } + final SearchRequest request = new SearchRequest(); + request.setQuery("*:*"); + request.setCounts(List.of("bar")); + final SearchResults results = index.search(request); + assertThat(results.getCounts()).isEqualTo(Map.of("bar", Map.of("baz", count))); + } finally { + cleanup(index); + } + } + + @Test + public void testRanges(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + final int count = 100; + for (int i = 1; i <= count; i++) { + final Collection fields = List.of(new DoubleField("bar", (double) i, false, true)); + final DocumentUpdateRequest request = new DocumentUpdateRequest(i, null, fields); + index.update("doc" + i, request); + } + final SearchRequest request = new SearchRequest(); + request.setQuery("*:*"); + request.setRanges(Map.of("bar", + List.of(new DoubleRange("low", 0.0, true, (double) count / 2, true), + new DoubleRange("high", (double) count / 2, true, (double) count, true)))); + final SearchResults results = index.search(request); + assertThat(results.getRanges()).isEqualTo( + Map.of("bar", Map.of("low", count / 2, "high", count / 2 + 1))); + } finally { + cleanup(index); + } + } + + @Test + public void testOutOfOrder(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + final Collection fields = Collections.emptyList(); + + // Go to 2. + index.update("foo", new DocumentUpdateRequest(2, null, fields)); + + // Should be prevented from going down to 1. + assertThrows(UpdatesOutOfOrderException.class, + () -> index.update("foo", new DocumentUpdateRequest(1, null, fields))); + } finally { + cleanup(index); + } + } + + @Test + public void testInfo(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + IndexInfo info = index.info(); + assertThat(info.getDiskSize()).isEqualTo(0); + assertThat(info.getNumDocs()).isEqualTo(0); + assertThat(info.getUpdateSeq()).isEqualTo(0); + + final Collection fields = List.of(new DoubleField("bar", 12.0, false, true)); + index.update("foo", new DocumentUpdateRequest(2, null, fields)); + index.commit(); + + info = index.info(); + assertThat(info.getDiskSize()).isGreaterThan(0); + assertThat(info.getNumDocs()).isEqualTo(1); + assertThat(info.getUpdateSeq()).isEqualTo(2); + } finally { + cleanup(index); + } + } + + @Test + public void testDelete(@TempDir Path path) throws IOException { + Index index = setup(path); + try { + final Collection fields = List.of(new DoubleField("bar", 12.0, false, true)); + index.update("foo", new DocumentUpdateRequest(2, null, fields)); + index.commit(); + + IndexInfo info = index.info(); + assertThat(info.getNumDocs()).isEqualTo(1); + + index.delete("foo", new DocumentDeleteRequest(3)); + index.commit(); + + info = index.info(); + assertThat(info.getNumDocs()).isEqualTo(0); + } finally { + cleanup(index); + } + } + + protected IndexLoader indexLoader() { + return (path, indexDefinition) -> { + final Analyzer analyzer = Lucene9AnalyzerFactory.fromDefinition(indexDefinition); + final Directory dir = new DirectIODirectory(FSDirectory.open(path)); + final IndexWriterConfig config = new IndexWriterConfig(analyzer); + config.setUseCompoundFile(false); + final IndexWriter writer = new IndexWriter(dir, config); + final SearcherManager searcherManager = new SearcherManager(writer, null); + return new Lucene9Index(analyzer, writer, 0L, searcherManager); + }; + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParserTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParserTest.java new file mode 100644 index 000000000..4c1e23d2a --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/NouveauQueryParserTest.java @@ -0,0 +1,106 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.util.BytesRef; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class NouveauQueryParserTest { + + private static final String DEFAULT_FIELD = "foo"; + + private static NouveauQueryParser qp; + + @BeforeAll + public static void setup() { + qp = new NouveauQueryParser(new StandardAnalyzer()); + } + + @Test + public void testTermQuery() throws Exception { + assertThat(qp.parse("foo:bar", DEFAULT_FIELD)).isEqualTo(new TermQuery(new Term("foo", "bar"))); + } + + @Test + public void testPrefixQuery() throws Exception { + assertThat(qp.parse("foo:bar*", DEFAULT_FIELD)).isEqualTo(new PrefixQuery(new Term("foo", "bar"))); + } + + @Test + public void testWildcardQuery() throws Exception { + assertThat(qp.parse("foo:ba*r", DEFAULT_FIELD)).isEqualTo(new WildcardQuery(new Term("foo", "ba*r"))); + } + + @Test + public void testStringRangeQuery() throws Exception { + assertThat(qp.parse("foo:[bar TO foo]", DEFAULT_FIELD)).isEqualTo(new TermRangeQuery("foo", + new BytesRef("bar"), new BytesRef("foo"), true, true)); + } + + @Test + public void testMixedRangeQuery() throws Exception { + assertThat(qp.parse("foo:[12.0 TO foo]", DEFAULT_FIELD)).isEqualTo(new TermRangeQuery("foo", + new BytesRef("12.0"), new BytesRef("foo"), true, true)); + } + + @Test + public void testInferredPointQuery() throws Exception { + assertThat(qp.parse("foo:12", DEFAULT_FIELD)).isEqualTo(DoublePoint.newExactQuery("foo", 12.0)); + } + + @Test + public void testInferredPointRangeQuery() throws Exception { + assertThat(qp.parse("foo:[1 TO 12]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { 1 }, new double[] { 12 })); + } + + @Test + public void testOpenLeftPointRangeQuery() throws Exception { + assertThat(qp.parse("foo:[* TO 100.0]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { Double.NEGATIVE_INFINITY }, + new double[] { 100 })); + } + + @Test + public void testOpenRightPointRangeQuery() throws Exception { + assertThat(qp.parse("foo:[1.0 TO *]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { 1 }, + new double[] { Double.POSITIVE_INFINITY })); + } + + @Test + public void testOpenLeftPointRangeQueryLegacy() throws Exception { + assertThat(qp.parse("foo:[-Infinity TO 100.0]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { Double.NEGATIVE_INFINITY }, + new double[] { 100 })); + } + + @Test + public void testOpenRightPointRangeQueryLegacy() throws Exception { + assertThat(qp.parse("foo:[1.0 TO Infinity]", DEFAULT_FIELD)) + .isEqualTo(DoublePoint.newRangeQuery("foo", new double[] { 1 }, + new double[] { Double.POSITIVE_INFINITY })); + } + +} diff --git a/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/QuerySerializationTest.java b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/QuerySerializationTest.java new file mode 100644 index 000000000..06cfdfad7 --- /dev/null +++ b/nouveau/src/test/java/org/apache/couchdb/nouveau/lucene9/QuerySerializationTest.java @@ -0,0 +1,47 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.apache.couchdb.nouveau.lucene9; + + +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public class QuerySerializationTest { + + @Test + public void basicTest() throws Exception { + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new Lucene9Module()); + + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("foo", "bar")), Occur.MUST); + builder.add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT); + builder.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD); + builder.add(new PhraseQuery("bar", "foo", "bar", "baz"), Occur.MUST); + final Query query = builder.build(); + + final String expected = "{\"@type\":\"boolean\",\"clauses\":[{\"query\":{\"@type\":\"term\",\"field\":\"foo\",\"term\":\"bar\"},\"occur\":\"must\"},{\"query\":{\"@type\":\"term\",\"field\":\"foo\",\"term\":\"bar\"},\"occur\":\"must_not\"},{\"query\":{\"@type\":\"term\",\"field\":\"foo\",\"term\":\"bar\"},\"occur\":\"should\"},{\"query\":{\"@type\":\"phrase\",\"field\":\"bar\",\"terms\":[\"foo\",\"bar\",\"baz\"],\"slop\":0},\"occur\":\"must\"}]}"; + assertThat(mapper.writeValueAsString(query)).isEqualTo(expected); + } + +} diff --git a/nouveau/src/test/resources/fixtures/DocumentUpdateRequest.json b/nouveau/src/test/resources/fixtures/DocumentUpdateRequest.json new file mode 100644 index 000000000..a22e322d4 --- /dev/null +++ b/nouveau/src/test/resources/fixtures/DocumentUpdateRequest.json @@ -0,0 +1,22 @@ +{ + "seq": 12, + "fields": [ + { + "@type": "string", + "name": "stringfoo", + "value": "bar", + "store": true + }, + { + "@type": "text", + "name": "textfoo", + "value": "hello there", + "store": true + }, + { + "@type": "double", + "name": "doublefoo", + "value": 12 + } + ] +} diff --git a/nouveau/src/test/resources/fixtures/SearchRequest.json b/nouveau/src/test/resources/fixtures/SearchRequest.json new file mode 100644 index 000000000..c588cc16b --- /dev/null +++ b/nouveau/src/test/resources/fixtures/SearchRequest.json @@ -0,0 +1,17 @@ +{ + "query": "*:*", + "limit": 10, + "sort": null, + "counts": [ + "bar" + ], + "ranges": { + "foo": [ + { + "label": "0 to 100 inc", + "min": 0.0, + "max": 100.0 + } + ] + } +} \ No newline at end of file diff --git a/rebar.config.script b/rebar.config.script index fce6a2d2b..7c8ee3fd7 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -128,6 +128,7 @@ SubDirs = [ "src/custodian", "src/ddoc_cache", "src/dreyfus", + "src/nouveau", "src/fabric", "src/global_changes", "src/ioq", diff --git a/rel/apps/couch_epi.config b/rel/apps/couch_epi.config index a53721a48..882f1841e 100644 --- a/rel/apps/couch_epi.config +++ b/rel/apps/couch_epi.config @@ -18,5 +18,6 @@ global_changes_epi, mango_epi, mem3_epi, + nouveau_epi, setup_epi ]}. diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 41a88fef4..14b2a5362 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -809,6 +809,7 @@ state_dir = {{state_dir}} ; The name and location of the Clouseau Java service required to ; enable Search functionality. ;name = clouseau@127.0.0.1 +name = {{clouseau_name}} ; CouchDB will try to re-connect to Clouseau using a bounded ; exponential backoff with the following number of iterations. @@ -877,3 +878,6 @@ port = {{prometheus_port}} ; `false`, the expected n value is based on the number of available copies in ; the shard map. ;use_cluster_n_as_expected_n = false + +[nouveau] +enable = {{with_nouveau}} diff --git a/rel/reltool.config b/rel/reltool.config index b46885055..b9eb2f136 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -65,6 +65,7 @@ couch_prometheus, %% extra + nouveau, recon ]}, {rel, "start_clean", "", [kernel, stdlib]}, @@ -128,6 +129,7 @@ {app, couch_prometheus, [{incl_cond, include}]}, %% extra + {app, nouveau, [{incl_cond, include}]}, {app, recon, [{incl_cond, include}]} ]}. diff --git a/share/server/loop.js b/share/server/loop.js index 3ab303c21..6f8bc5c0f 100644 --- a/share/server/loop.js +++ b/share/server/loop.js @@ -25,7 +25,6 @@ function create_sandbox() { sandbox.send = Render.send; sandbox.getRow = Render.getRow; sandbox.isArray = isArray; - sandbox.index = Dreyfus.index; } catch (e) { var sandbox = {}; } @@ -38,6 +37,18 @@ function create_filter_sandbox() { return sandbox; }; +function create_dreyfus_sandbox() { + var sandbox = create_sandbox(); + sandbox.index = Dreyfus.index; + return sandbox; +} + +function create_nouveau_sandbox() { + var sandbox = create_sandbox(); + sandbox.index = Nouveau.index; + return sandbox; +} + // Commands are in the form of json arrays: // ["commandname",..optional args...]\n // @@ -48,7 +59,7 @@ var DDoc = (function() { "lists" : Render.list, "shows" : Render.show, "filters" : Filter.filter, - "views" : Filter.filter_view, + "views" : Filter.filter_view, "updates" : Render.update, "validate_doc_update" : Validate.validate, "rewrites" : Render.rewrite @@ -119,6 +130,7 @@ var Loop = function() { "add_lib" : State.addLib, "map_doc" : Views.mapDoc, "index_doc": Dreyfus.indexDoc, + "nouveau_index_doc": Nouveau.indexDoc, "reduce" : Views.reduce, "rereduce" : Views.rereduce }; diff --git a/share/server/nouveau.js b/share/server/nouveau.js new file mode 100644 index 000000000..8c75d4a25 --- /dev/null +++ b/share/server/nouveau.js @@ -0,0 +1,108 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +var Nouveau = (function () { + + var index_results = []; // holds temporary emitted values during index + + function handleIndexError(err, doc) { + if (err == "fatal_error") { + throw (["error", "map_runtime_error", "function raised 'fatal_error'"]); + } else if (err[0] == "fatal") { + throw (err); + } + var message = "function raised exception " + err.toSource(); + if (doc) message += " with doc._id " + doc._id; + log(message); + }; + + function assertType(name, expected, actual) { + if (typeof actual !== expected) { + throw ({ name: 'TypeError', message: 'type of ' + name + ' must be a ' + expected + ' not ' + typeof actual }); + } + }; + + function rejectReservedName(name) { + if (name.substring(0, 1) === '_') { + throw ({ name: 'ReservedName', message: 'name must not start with an underscore' }); + } + }; + + return { + index: function (doc) { + var type = arguments[0]; + var name = arguments[1]; + + assertType('type', 'string', type); + assertType('name', 'string', name); + + rejectReservedName(name); + + switch (type) { + case 'double': + case 'string': + var value = arguments[2]; + var options = arguments[3] || {}; + assertType('value', type == 'double' ? 'number' : 'string', value); + index_results.push({ + '@type': type, + 'name': name, + 'value': value, + 'store': options.store, + 'facet': options.facet + }); + break; + case 'text': + var value = arguments[2]; + var options = arguments[3] || {}; + assertType('value', 'string', value); + index_results.push({ + '@type': type, + 'name': name, + 'value': value, + 'store': options.store + }); + break; + case 'stored': + var value = arguments[2]; + if (typeof value != 'number' && typeof value != 'string') { + throw ({ name: 'TypeError', message: 'type of ' + value + ' must be a string or number' }); + } + index_results.push({ + '@type': type, + 'name': name, + 'value': value + }); + break; + default: + throw ({ name: 'TypeError', message: type + ' not supported' }); + } + }, + + indexDoc: function (doc) { + Couch.recursivelySeal(doc); + var buf = []; + for (var fun in State.funs) { + index_results = []; + try { + State.funs[fun](doc); + buf.push(index_results); + } catch (err) { + handleIndexError(err, doc); + buf.push([]); + } + } + print(JSON.stringify(buf)); + } + + } +})(); diff --git a/share/server/state.js b/share/server/state.js index ff553dd57..a9b2f7ea0 100644 --- a/share/server/state.js +++ b/share/server/state.js @@ -19,9 +19,17 @@ var State = { gc(); print("true"); // indicates success }, - addFun : function(newFun) { + addFun : function(newFun, option) { // Compile to a function and add it to funs array - State.funs.push(Couch.compileFunction(newFun, {views : {lib : State.lib}})); + switch (option) { + case 'nouveau': + var sandbox = create_nouveau_sandbox(); + break; + default: + var sandbox = create_dreyfus_sandbox(); + break; + } + State.funs.push(Couch.compileFunction(newFun, {views : {lib : State.lib}}, undefined, sandbox)); print("true"); }, addLib : function(lib) { diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 03f93ef0e..7223ce4b3 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -39,6 +39,7 @@ error_info/1, parse_form/1, json_body/1, + json_body/2, json_body_obj/1, body/1, doc_etag/1, @@ -249,7 +250,7 @@ handle_request_int(MochiReq) -> P end, - Peer = MochiReq:get(peer), + Peer = peer(MochiReq), Method1 = case MochiReq:get(method) of @@ -789,14 +790,17 @@ body(#httpd{mochi_req = MochiReq, req_body = ReqBody}) -> validate_ctype(Req, Ctype) -> couch_httpd:validate_ctype(Req, Ctype). -json_body(#httpd{req_body = undefined} = Httpd) -> +json_body(#httpd{} = Httpd) -> + json_body(Httpd, []). + +json_body(#httpd{req_body = undefined} = Httpd, JsonDecodeOptions) -> case body(Httpd) of undefined -> throw({bad_request, "Missing request body"}); Body -> - ?JSON_DECODE(maybe_decompress(Httpd, Body)) + ?JSON_DECODE(maybe_decompress(Httpd, Body), JsonDecodeOptions) end; -json_body(#httpd{req_body = ReqBody}) -> +json_body(#httpd{req_body = ReqBody}, _JsonDecodeOptions) -> ReqBody. json_body_obj(Httpd) -> @@ -1079,6 +1083,8 @@ error_info({bad_ctype, Reason}) -> {415, <<"bad_content_type">>, Reason}; error_info(requested_range_not_satisfiable) -> {416, <<"requested_range_not_satisfiable">>, <<"Requested range not satisfiable">>}; +error_info({expectation_failed, Reason}) -> + {417, <<"expectation_failed">>, Reason}; error_info({error, {illegal_database_name, Name}}) -> Message = <<"Name: '", Name/binary, "'. Only lowercase characters (a-z), ", @@ -1100,6 +1106,8 @@ error_info({error, <<"endpoint has an invalid url">> = Reason}) -> {400, <<"invalid_replication">>, Reason}; error_info({error, <<"proxy has an invalid url">> = Reason}) -> {400, <<"invalid_replication">>, Reason}; +error_info({method_not_allowed, Reason}) -> + {405, <<"method_not_allowed">>, Reason}; error_info({gone, Reason}) -> {410, <<"gone">>, Reason}; error_info({missing_stub, Reason}) -> @@ -1120,6 +1128,8 @@ error_info(all_workers_died) -> "Nodes are unable to service this " "request due to overloading or maintenance mode." >>}; +error_info({internal_server_error, Reason}) -> + {500, <<"internal_server_error">>, Reason}; error_info(not_implemented) -> {501, <<"not_implemented">>, <<"this feature is not yet implemented">>}; error_info(timeout) -> @@ -1462,6 +1472,23 @@ get_user(#httpd{user_ctx = #user_ctx{name = User}}) -> get_user(#httpd{user_ctx = undefined}) -> "undefined". +peer(MochiReq) -> + Socket = MochiReq:get(socket), + case mochiweb_socket:peername(Socket) of + {ok, {{O1, O2, O3, O4}, Port}} -> + io_lib:format( + "~B.~B.~B.~B:~B", + [O1, O2, O3, O4, Port] + ); + {ok, {{O1, O2, O3, O4, O5, O6, O7, O8}, Port}} -> + io_lib:format( + "~B.~B.~B.~B.~B.~B.~B.~B:~B", + [O1, O2, O3, O4, O5, O6, O7, O8, Port] + ); + {error, _Reason} -> + MochiReq:get(peer) + end. + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index 0dedeba4d..fd3d75510 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -69,11 +69,14 @@ handle_welcome_req(Req, _) -> get_features() -> case dreyfus:available() of - true -> - [search | config:features()]; - false -> - config:features() - end. + true -> [search]; + false -> [] + end ++ + case nouveau:enabled() of + true -> [nouveau]; + false -> [] + end ++ + config:features(). handle_favicon_req(Req) -> handle_favicon_req(Req, get_docroot()). diff --git a/src/couch/include/couch_db.hrl b/src/couch/include/couch_db.hrl index 0cb01d0f4..df81cfbb0 100644 --- a/src/couch/include/couch_db.hrl +++ b/src/couch/include/couch_db.hrl @@ -24,6 +24,7 @@ -define(JSON_ENCODE(V), couch_util:json_encode(V)). -define(JSON_DECODE(V), couch_util:json_decode(V)). +-define(JSON_DECODE(V, O), couch_util:json_decode(V, O)). -define(IS_OLD_RECORD(V, R), (tuple_size(V) /= tuple_size(R))). diff --git a/src/docs/src/api/ddoc/index.rst b/src/docs/src/api/ddoc/index.rst index 1eeb64760..ea06ca67f 100644 --- a/src/docs/src/api/ddoc/index.rst +++ b/src/docs/src/api/ddoc/index.rst @@ -31,5 +31,6 @@ from your database. common views search + nouveau render rewrites diff --git a/src/docs/src/api/ddoc/nouveau.rst b/src/docs/src/api/ddoc/nouveau.rst new file mode 100644 index 000000000..fb6731a5b --- /dev/null +++ b/src/docs/src/api/ddoc/nouveau.rst @@ -0,0 +1,142 @@ +.. Licensed under the Apache License, Version 2.0 (the "License"); you may not +.. use this file except in compliance with the License. You may obtain a copy of +.. the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +.. WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +.. License for the specific language governing permissions and limitations under +.. the License. + +.. _api/ddoc/nouveau: + +========================================= +``/{db}/_design/{ddoc}/_nouveau/{index}`` +========================================= + +.. warning:: + Nouveau is an experimental feature. Future releases might change how the endpoints + work and might invalidate existing indexes. + +.. warning:: + Nouveau endpoints require a running nouveau server. + See :ref:`Nouveau Server Installation ` for details. + +.. versionadded:: 4.0 + +.. http:get:: /{db}/_design/{ddoc}/_nouveau/{index} + :synopsis: Returns results for the specified nouveau index + + Executes a nouveau request against the named index in the specified design document. + + :param db: Database name + :param ddoc: Design document name + :param index: Nouveau index name + + :
"`` or ``"-fieldname"`` for descending order, where + fieldname is the name of a string or number field, and ``type`` is either + ``double`` or ``string``. You can use a single string to sort by one field + or an array of strings to sort by several fields in the same order as the + array. + Some examples are ``"relevance"``, ``"bar"``, + ``"-foo"`` and [``"-foo"``, ``"bar"``]. + :query boolean update: Set to ``false`` to allow the use of an out-of-date index. + + :>header Content-Type: - :mimetype:`application/json` + + :>header Transfer-Encoding: ``chunked`` + + :>json array hits: Array of search hits. By default the information + returned contains only the document ID and revision. + :>json number total_hits: Number of matches for the query. + :>json string total_hits_relation: ``EQUAL_TO`` if ``total_hits`` is exact. + ``GREATER_THAN_OR_EQUAL_TO`` if not. + :>json string bookmark: Opaque identifier to enable pagination. + + :code 200: Request completed successfully + :code 400: Invalid request + :code 401: Read permission required + :code 404: Specified database, design document or view is missed + +.. note:: + Faceting is not supported on partitioned searches, so the following + query parameters should not be used on those requests: ``counts`` and + ``ranges``. + +.. seealso:: + For more information about how nouveau works, see the + :ref:`Nouveau User Guide`. + +============================================== +``/{db}/_design/{ddoc}/_nouveau_info/{index}`` +============================================== + +.. warning:: + Nouveau is an experimental feature. Future releases might change how the endpoints + work and might invalidate existing indexes. + +.. warning:: + Nouveau endpoints require a running nouveau server. + See :ref:`Nouveau Server Installation ` for details. + +.. versionadded:: 4.0 + +.. http:get:: /{db}/_design/{ddoc}/_nouveau_info/{index} + :synopsis: Returns metadata for the specified nouveau index + + :param db: Database name + :param ddoc: Design document name + :param index: Search index name + :code 200: Request completed successfully + :code 400: Request body is wrong (malformed or missing one of the mandatory fields) + :code 500: A server error (or other kind of error) occurred + +**Request**: + +.. code-block:: http + + GET /recipes/_design/cookbook/_search_info/ingredients HTTP/1.1 + Accept: application/json + Host: localhost:5984 + +**Response**: + +.. code-block:: http + + HTTP/1.1 200 OK + Content-Type: application/json + + { + "name": "_design/cookbook/ingredients", + "search_index": { + "num_docs": 1000, + "update_seq": 5000, + "disk_size": 1048576 + } + } diff --git a/src/docs/src/api/server/common.rst b/src/docs/src/api/server/common.rst index 95478bf10..9e645f649 100644 --- a/src/docs/src/api/server/common.rst +++ b/src/docs/src/api/server/common.rst @@ -2045,7 +2045,7 @@ See :ref:`Configuration of Prometheus Endpoint ` for details. Tests the results of Lucene analyzer tokenization on sample text. - :param field: Type of analyzer + :param analyzer: Type of analyzer :param text: Analyzer token you want to test :code 200: Request completed successfully :code 400: Request body is wrong (malformed or missing one of the mandatory fields) @@ -2063,6 +2063,53 @@ See :ref:`Configuration of Prometheus Endpoint ` for details. **Response**: +.. code-block:: javascript + + { + "tokens": [ + "run" + ] + } + +.. _api/server/nouveau_analyze: + +========================================== +``/_nouveau_analyze`` +========================================== + +.. warning:: + Nouveau is an experimental feature. Future releases might change how the endpoints + work and might invalidate existing indexes. + +.. warning:: + Nouveau endpoints require a running nouveau server. + See :ref:`Nouveau Server Installation ` for details. + +.. versionadded:: 4.0 + +.. http:post:: /_nouveau_analyze + :synopsis: Tests the results of analyzer tokenization + + Tests the results of Lucene analyzer tokenization on sample text. + + :param analyzer: Name of analyzer + :param text: Analyzer token you want to test + :code 200: Request completed successfully + :code 400: Request body is wrong (malformed or missing one of the mandatory fields) + :code 500: A server error (or other kind of error) occurred + +**Request**: + +.. code-block:: http + + POST /_nouveau_analyze HTTP/1.1 + Host: localhost:5984 + Content-Type: application/json + + {"analyzer":"english", "text":"running"} + +**Response**: + .. code-block:: javascript { diff --git a/src/docs/src/config/query-servers.rst b/src/docs/src/config/query-servers.rst index 3bd99c439..cf6963fdc 100644 --- a/src/docs/src/config/query-servers.rst +++ b/src/docs/src/config/query-servers.rst @@ -241,6 +241,34 @@ CouchDB's search subsystem can be configured via the ``dreyfus`` configuration s this config setting is not defined, CouchDB will use the value of ``max_limit`` instead. If neither is defined, the default is ``2000``. +Nouveau +======= + +CouchDB's experimental search subsystem can be configured via the +``nouveau`` configuration section. + +.. config:section:: nouveau :: Nouveau Server Configuration + + .. config:option:: enable :: Whether nouveau is enabled + + Set to ``true`` to enable Nouveau. If disabled, all nouveau + endpoints return 404 Not Found. Defaults to ``false``. + + .. config:option:: url :: Nouveau Server location + + The URL to a running nouveau server. Defaults to + ``http://127.0.0.1:8080``. + + .. config:option:: max_sessions :: Maximum number of ibrowse sessions + + Nouveau will configure ibrowse max_sessions to this value for + the configured ``url``. Defaults to ``100``. + + .. config:option:: max_pipeline_size :: Max pipeline size + + Nouveau will configure ibrowse max_pipeline_size to this value + for the configured ``url``. Defaults to ``1000``. + .. _config/mango: Mango diff --git a/src/docs/src/ddocs/index.rst b/src/docs/src/ddocs/index.rst index ad11d8553..8c8a5449b 100644 --- a/src/docs/src/ddocs/index.rst +++ b/src/docs/src/ddocs/index.rst @@ -27,6 +27,7 @@ replications. ddocs views/index search + nouveau *Note*: Previously, the functionality provided by CouchDB's design documents, in combination with document attachments, was referred to as "CouchApps." The diff --git a/src/docs/src/ddocs/nouveau.rst b/src/docs/src/ddocs/nouveau.rst new file mode 100644 index 000000000..7a9e6d217 --- /dev/null +++ b/src/docs/src/ddocs/nouveau.rst @@ -0,0 +1,692 @@ +.. Licensed under the Apache License, Version 2.0 (the "License"); you may not +.. use this file except in compliance with the License. You may obtain a copy of +.. the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +.. WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +.. License for the specific language governing permissions and limitations under +.. the License. + +.. _ddoc/nouveau: + +======= +Nouveau +======= + +.. warning:: + Nouveau is an experimental feature. Future releases might change how the endpoints + work and might invalidate existing indexes. + +Nouveau indexes enable you to query a database by using the +`Lucene Query Parser Syntax. `_ +A nouveau index uses one, or multiple, fields from your documents. You can use a nouveau +index to run queries to find documents based on the content they contain. + +.. warning:: + Nouveau cannot function unless it has a functioning Nouveau server. + See :ref:`Nouveau Server Installation ` for details. + +To create a nouveau index, you add a JavaScript function to a design document in the +database. An index builds after processing one search request or after the server detects +a document update. The ``index`` function takes the following parameters: + +#. Field type - The type of the field, can be ``string``, ``text``, ``double`` + or ``stored``. +#. Field name - The name of the field you want to use when you query the index. + If you set this parameter to ``default``, then this field is queried if no field is + specified in the query syntax. +#. Data that you want to index, for example, ``doc.address.country``. +#. (Optional) The third parameter includes the following field: ``store``. + +By default, a nouveau index response returns 25 rows. The number of hits that are returned +can be changed by using the ``limit`` parameter. Each response includes a ``bookmark`` +field. You can include the value of the ``bookmark`` field in subsequent queries to fetch +results from deeper in the result set. + +*Example design document that defines a nouveau index:* + +.. code-block:: javascript + + { + "_id": "_design/nouveau_example", + "nouveau": { + "animals": { + "index": "function(doc){ ... }" + } + } + } + +A nouveau index will inherit the partitioning type from the ``options.partitioned`` field +of the design document that contains it. + +Index functions +=============== + +Attempting to index by using a data field that does not exist fails. To avoid +this problem, use the appropriate +:ref:`guard clause `. + +.. note:: + Your indexing functions operate in a memory-constrained environment + where the document itself forms a part of the memory that is used + in that environment. Your code's stack and document must fit inside this + memory. In other words, a document must be loaded in order to be indexed. + Documents are limited to a maximum size of 64 MB. + +The function that is contained in the index field is a JavaScript function +that is called for each document in the database. +The function takes the document as a parameter, +extracts some data from it, and then calls the function that is defined +in the ``index`` field to index that data. + +The ``index`` function takes four parameters, where the third parameter is optional. + +#. The first parameter is the type of the field. + +#. The second parameter is the name of the field you intend to use + when querying the index, and which is specified in the Lucene + syntax portion of subsequent queries. An example appears in the + following query: + + .. code-block:: javascript + + q=color:red + + The Lucene field name ``color`` is the first parameter of the ``index`` function. + + If the special value ``"default"`` is used when you define the name, + you do not have to specify a field name at query time. + The effect is that the query can be simplified: + + .. code-block:: javascript + + q=red + +#. The third parameter is the data to be indexed. Keep the following information + in mind when you index your data: + + - This data must be only a string, number, or boolean. Other types will cause + an error to be thrown by the index function call. + + - If an error is thrown when running your function, for this reason or others, + the document will not be added to that search index. + +#. The fourth, optional, parameter is a JavaScript object with the following fields: + + *Index function (optional parameter)* + + * **store** - If ``true``, the value is returned in the search result; otherwise, + the value is not returned. Values are ``true`` or ``false``. Default is ``false``. + + .. note:: + + If you do not set the ``store`` parameter, + the index data results for the document are not returned in response to a query. + +*Example search index function:* + +.. code-block:: javascript + + function(doc) { + if (typeof(doc.min_length) == 'number') { + index("double", "min_length", doc.min_length, {"store": true}); + } + if (typeof(doc.diet) == 'string') { + index("string", "diet", doc.diet, {"store": true}); + } + if (typeof(doc.latin_name) == 'string') { + index("string", "latin_name", doc.latin_name, {"store": true}); + } + if (typeof(doc.class) == 'string') { + index("string", "class", doc.class, {"store": true}); + } + } + +.. _ddoc/nouveau/index_guard_clauses: + +Index guard clauses +------------------- + +Runtime errors in the index function cause the document not to be indexed at all. The +most common runtime errors are described below; + +*Example of failing to check whether the indexed value exists:* + +.. warning:: example of bad code +.. code-block:: javascript + + index("min_length", doc.min_length, {"store": true}); + +For documents without a `min_length` value, this index call will +pass ``undefined`` as the value. This will be rejected by nouveau's +validation function and the document will not be indexed. + +*Example of failing to check whether the nested indexed value exists:* + +.. warning:: example of bad code +.. code-block:: javascript + + if (doc.foo.bar) { + index("bar", doc.foo.bar, {"store": true}); + } + +This bad example fails in a different way if ``doc.foo`` doesn't +exist; the evaluation of ``doc.foo.bar`` throws an exception. + +.. code-block:: javascript + + if (doc.foo && typeof(doc.foo) == 'object' && typeof(doc.foo.bar == 'string')) { + index("bar", doc.foo.bar, {"store": true}); + } + +This example correctly checks that ``doc.foo`` is an object and its +``bar`` entry is a string. + +*Example of checking the index value exists but disallowing valid false values:* + +.. warning:: example of bad code +.. code-block:: javascript + + if (doc.min_length) { + index("min_length", doc.min_length, {"store": true}); + } + +We correct the previous mistake so documents without min_length are +indexed (assuming there are other index calls for values that `do` +exist) but we've acccidentally prevented the indexing of the +``min_length`` field if the ``doc.min_length`` happens to be ``0``. + +.. code-block:: javascript + + if (typeof(doc.min_length == 'number')) { + index("min_length", doc.min_length, {"store": true}); + } + +This good example ensures we index any document where ``min_length`` is a number. + +.. _ddoc/nouveau/analyzers: + +Analyzers +========= + +Analyzers convert textual input into ``tokens`` which can be searched +on. Analyzers typically have different rules for how they break up +input into tokens, they might convert all text to lower case, they +might omit whole words (typically words so common they are unlikely to +be useful for searching), they might omit parts of words (removing +``ing`` suffixes in English, for example): + +We expose a large number of Lucene's analyzers. We invent one +ourselves (``simple_asciifolding``); + +* arabic +* armenian +* basque +* bulgarian +* catalan +* chinese +* cjk +* classic +* czech +* danish +* dutch +* email +* english +* finnish +* french +* galician +* german +* hindi +* hungarian +* indonesian +* irish +* italian +* japanese +* keyword +* latvian +* norwegian +* persian +* polish +* portugese +* romanian +* russian +* simple +* simple_asciifolding +* spanish +* standard +* swedish +* thai +* turkish +* whitespace + +*Example analyzer document:* + +.. code-block:: javascript + + { + "_id": "_design/analyzer_example", + "nouveau": { + "INDEX_NAME": { + "index": "function (doc) { ... }", + "default_analyzer": "$ANALYZER_NAME" + } + } + } + +.. _ddoc/nouveau/field-analyzers: + +Field analyzers +---------------- + +You may optionally specify a different analyzer for a specific field. + +*Example of defining different analyzers for different fields:* + +.. code-block:: javascript + + { + "_id": "_design/analyzer_example", + "nouveau": { + "INDEX_NAME": { + "default_analyzer": "english", + "field_analyzers": { + "spanish": "spanish", + "german": "german" + }, + "index": "function (doc) { ... }" + } + } + } + +Testing analyzer tokenization +----------------------------- + +You can test the results of analyzer tokenization by posting sample data to the +``_nouveau_analyze`` endpoint. + +*Example of using HTTP to test the keyword analyzer:* + +.. code-block:: http + + POST /_nouveau_analyze HTTP/1.1 + Content-Type: application/json + {"analyzer":"keyword", "text":"ablanks@renovations.com"} + +*Example of using the command line to test the keyword analyzer:* + +.. code-block:: sh + + curl 'https://$HOST:5984/_nouveau_analyze' -H 'Content-Type: application/json' + -d '{"analyzer":"keyword", "text":"ablanks@renovations.com"}' + +*Result of testing the keyword analyzer:* + +.. code-block:: javascript + + { + "tokens": [ + "ablanks@renovations.com" + ] + } + +*Example of using HTTP to test the standard analyzer:* + +.. code-block:: http + + POST /_nouveau_analyze HTTP/1.1 + Content-Type: application/json + {"analyzer":"standard", "text":"ablanks@renovations.com"} + +*Example of using the command line to test the standard analyzer:* + +.. code-block:: sh + + curl 'https://$HOST:5984/_nouveau_analyze' -H 'Content-Type: application/json' + -d '{"analyzer":"standard", "text":"ablanks@renovations.com"}' + +*Result of testing the standard analyzer:* + +.. code-block:: javascript + + { + "tokens": [ + "ablanks", + "renovations.com" + ] + } + +Queries +======= + +After you create a search index, you can query it. + +- Issue a partition query using: + ``GET /$DATABASE/_partition/$PARTITION_KEY/_design/$DDOC/_nouveau/$INDEX_NAME`` +- Issue a global query using: + ``GET /$DATABASE/_design/$DDOC/_nouveau/$INDEX_NAME`` + +Specify your search by using the ``q`` parameter. + +*Example of using HTTP to query a partitioned index:* + +.. code-block:: http + + GET /$DATABASE/_partition/$PARTITION_KEY/_design/$DDOC/_nouveau/$INDEX_NAME?include_docs=true&query="*:*"&limit=1 HTTP/1.1 + Content-Type: application/json + +*Example of using HTTP to query a global index:* + +.. code-block:: http + + GET /$DATABASE/_design/$DDOC/_nouveau/$INDEX_NAME?include_docs=true&query="*:*"&limit=1 HTTP/1.1 + Content-Type: application/json + +*Example of using the command line to query a partitioned index:* + +.. code-block:: sh + + curl https://$HOST:5984/$DATABASE/_partition/$PARTITION_KEY/_design/$DDOC/ + _nouveau/$INDEX_NAME?include_docs=true\&query="*:*"\&limit=1 \ + +*Example of using the command line to query a global index:* + +.. code-block:: sh + + curl https://$HOST:5984/$DATABASE/_design/$DDOC/_nouveau/$INDEX_NAME? + include_docs=true\&query="*:*"\&limit=1 \ + +.. _ddoc/nouveau/query_parameters: + +Query Parameters +---------------- + +A full list of query parameters can be found in the +:ref:`API Reference `. + +.. note:: + Do not combine the ``bookmark`` and ``update`` options. These options + constrain the choice of shard replicas to use for the response. When used + together, the options might cause problems when contact is attempted + with replicas that are slow or not available. + +Relevance +--------- + +When more than one result might be returned, it is possible for them to be sorted. By +default, the sorting order is determined by 'relevance'. + +Relevance is measured according to `Apache Lucene Scoring +`_. +As an example, if you search a simple database for the word +``example``, two documents might contain the word. If one document +mentions the word ``example`` 10 times, but the second document +mentions it only twice, then the first document is considered to be +more 'relevant'. + +If you do not provide a ``sort`` parameter, relevance is used by default. The highest +scoring matches are returned first. + +If you provide a ``sort`` parameter, then matches are returned in that order, ignoring +relevance. + +If you want to use a ``sort`` parameter, and also include ordering by relevance in your +search results, use the special fields ``-`` or ```` within the ``sort`` +parameter. + +POSTing search queries +---------------------- + +Instead of using the ``GET`` HTTP method, you can also use ``POST``. The main advantage of +``POST`` queries is that they can have a request body, so you can specify the request as a +JSON object. Each parameter in the query string of a ``GET`` request corresponds to a +field in the JSON object in the request body. + +*Example of using HTTP to POST a search request:* + +.. code-block:: http + + POST /db/_design/ddoc/_nouveau/searchname HTTP/1.1 + Content-Type: application/json + +*Example of using the command line to POST a search request:* + +.. code-block:: sh + + curl 'https://$HOST:5984/db/_design/ddoc/_nouveau/searchname' -X POST -H 'Content-Type: application/json' -d @search.json + +*Example JSON document that contains a search request:* + +.. code-block:: javascript + + { + "q": "index:my query", + "sort": "foo", + "limit": 3 + } + +Query syntax +============ + +The CouchDB search query syntax is based on the +`Lucene syntax. `_ +Search queries take the form of ``name:value`` unless the name is omitted, in which case +they use the default field, as demonstrated in the following examples: + +*Example search query expressions:* + +.. code-block:: javascript + + // Birds + class:bird + +.. code-block:: text + + // Animals that begin with the letter "l" + l* + +.. code-block:: text + + // Carnivorous birds + class:bird AND diet:carnivore + +.. code-block:: text + + // Herbivores that start with letter "l" + l* AND diet:herbivore + +.. code-block:: text + + // Medium-sized herbivores + min_length:[1 TO 3] AND diet:herbivore + +.. code-block:: text + + // Herbivores that are 2m long or less + diet:herbivore AND min_length:[-Infinity TO 2] + +.. code-block:: text + + // Mammals that are at least 1.5m long + class:mammal AND min_length:[1.5 TO Infinity] + +.. code-block:: text + + // Find "Meles meles" + latin_name:"Meles meles" + +.. code-block:: text + + // Mammals who are herbivore or carnivore + diet:(herbivore OR omnivore) AND class:mammal + +.. code-block:: text + + // Return all results + *:* + +Queries over multiple fields can be logically combined, and groups and fields can be +further grouped. The available logical operators are case-sensitive and are ``AND``, +``+``, ``OR``, ``NOT`` and ``-``. Range queries can run over strings or numbers. + +If you want a fuzzy search, you can run a query with ``~`` to find terms like the search +term. For instance, ``look~`` finds the terms ``book`` and ``took``. + +.. note:: + If the lower and upper bounds of a range query are both strings that + contain only numeric digits, the bounds are treated as numbers not as + strings. For example, if you search by using the query + ``mod_date:["20170101" TO "20171231"]``, the results include documents + for which ``mod_date`` is between the numeric values 20170101 and + 20171231, not between the strings "20170101" and "20171231". + +You can alter the importance of a search term by adding ``^`` and a positive number. This +alteration makes matches containing the term more or less relevant, proportional to the +power of the boost value. The default value is 1, which means no increase or decrease in +the strength of the match. A decimal value of 0 - 1 reduces importance. making the match +strength weaker. A value greater than one increases importance, making the match strength +stronger. + +Wildcard searches are supported, for both single (``?``) and multiple (``*``) character +searches. For example, ``dat?`` would match ``date`` and ``data``, whereas ``dat*`` would +match ``date``, ``data``, ``database``, and ``dates``. Wildcards must come after the +search term. + +Use ``*:*`` to return all results. + +The following characters require escaping if you want to search on them: + +.. code-block:: sh + + + - && || ! ( ) { } [ ] ^ " ~ * ? : \ / + +To escape one of these characters, use a preceding backslash character (``\``). + +The response to a search query contains an ``order`` field for each of the results. The +``order`` field is an array where the first element is the field or fields that are +specified in the ``sort`` parameter. See the +:ref:`sort parameter `. If no ``sort`` parameter is included +in the query, then the ``order`` field contains the `Lucene relevance score +`_. + +.. _ddoc/nouveau/faceting: + +Faceting +-------- + +Nouveau Search also supports faceted searching, enabling discovery of aggregate +information about matches quickly and easily. You can match all documents by using the +special ``?q=*:*`` query syntax, and use the returned facets to refine your query. + +*Example of search query:* + +.. code-block:: javascript + + function(doc) { + index("string", "type", doc.type); + index("double", "price", doc.price); + } + +To use facets, all the documents in the index must include all the fields that have +faceting enabled. If your documents do not include all the fields, you receive a +``bad_request`` error with the following reason, "The ``field_name`` does not exist." If +each document does not contain all the fields for facets, create separate indexes for each +field. If you do not create separate indexes for each field, you must include only +documents that contain all the fields. Verify that the fields exist in each document by +using a single ``if`` statement. + +*Example if statement to verify that the required fields exist in each document:* + +.. code-block:: javascript + + if (typeof doc.town == "string" && typeof doc.name == "string") { + index("string", "town", doc.town); + index("string", "name", doc.name); + } + +Counts +------ + +.. note:: + The ``counts`` option is only available when making global queries. + +The ``counts`` facet syntax takes a list of fields, and returns the number of query +results for each unique value of each named field. + +.. note:: + The ``count`` operation works only if the indexed values are strings. + The indexed values cannot be mixed types. For example, + if 100 strings are indexed, and one number, + then the index cannot be used for ``count`` operations. + You can check the type by using the ``typeof`` operator, and convert it + by using the ``parseInt``, + ``parseFloat``, or ``.toString()`` functions. + +*Example of a query using the counts facet syntax:* + +.. code-block:: text + + ?q=*:*&counts=["type"] + +*Example response after using of the counts facet syntax:* + +.. code-block:: javascript + + { + "total_rows":100000, + "bookmark":"g...", + "rows":[...], + "counts":{ + "type":{ + "sofa": 10, + "chair": 100, + "lamp": 97 + } + } + } + +Ranges +------ + +.. note:: + The ``ranges`` option is only available when making global queries. + +The ``range`` facet syntax reuses the standard Lucene syntax for ranges to return counts +of results that fit into each specified category. Inclusive range queries are denoted by +brackets (``[``, ``]``). Exclusive range queries are denoted by curly brackets (``{``, +``}``). + +.. note:: + The ``range`` operation works only if the indexed values are numbers. The indexed + values cannot be mixed types. For example, if 100 strings are indexed, and one number, + then the index cannot be used for ``range`` operations. You can check the type by + using the ``typeof`` operator, and convert it by using the ``parseInt``, + ``parseFloat``, or ``.toString()`` functions. + +*Example of a request that uses faceted search for matching ranges:* + +.. code-block:: text + + ?q=*:*&ranges={"price":{"cheap":"[0 TO 100]","expensive":"{100 TO Infinity}"}} + +*Example results after a ranges check on a faceted search:* + +.. code-block:: javascript + + { + "total_rows":100000, + "bookmark":"g...", + "rows":[...], + "ranges": { + "price": { + "expensive": 278682, + "cheap": 257023 + } + } + } diff --git a/src/docs/src/experimental.rst b/src/docs/src/experimental.rst index c5dc4d434..9a977e678 100644 --- a/src/docs/src/experimental.rst +++ b/src/docs/src/experimental.rst @@ -37,4 +37,14 @@ sent for everything in /_utils. Then restart CouchDB. +Nouveau Server (new Apache Lucene integration) +============================================== + +Enable nouveau in config and run the Java service. + + .. code-block:: ini + + [nouveau] + enable = true + Have fun! diff --git a/src/docs/src/install/index.rst b/src/docs/src/install/index.rst index fc8f6f62d..1c20c9027 100644 --- a/src/docs/src/install/index.rst +++ b/src/docs/src/install/index.rst @@ -27,5 +27,6 @@ Installation snap kubernetes search + nouveau upgrading troubleshooting diff --git a/src/docs/src/install/nouveau.rst b/src/docs/src/install/nouveau.rst new file mode 100644 index 000000000..ca39c9f09 --- /dev/null +++ b/src/docs/src/install/nouveau.rst @@ -0,0 +1,59 @@ +.. Licensed under the Apache License, Version 2.0 (the "License"); you may not +.. use this file except in compliance with the License. You may obtain a copy of +.. the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +.. WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +.. License for the specific language governing permissions and limitations under +.. the License. + +.. _install/nouveau: + +=========================== +Nouveau Server Installation +=========================== + +.. versionadded:: 4.0 + +.. highlight:: ini + +CouchDB can build and query full-text search indexes using an external Java +service that embeds `Apache Lucene `_. Typically, this +service is installed on the same host as CouchDB and communicates with it over +the loopback network. + +Nouveau server is runtime-compatible with Java 11 or higher. + +Installation of Binary Packages +=============================== + +The nouveau server code is contained in a single ``jar`` file that ships with a compatible +CouchDB release, named ``server-$version-dist.jar`` + +To start the nouveau server:: + + java -jar /path/to/server-$version-dist.jar server /path/to/nouveau.yaml + +We ship a basic ``nouveau.yaml`` configuration with useful defaults; +that file for details. + +**nouveau.yaml**:: + + maxIndexesOpen: 100 + commitIntervalSeconds: 30 + idleSeconds: 60 + rootDir: target/indexes + +As a `DropWizard `_ project you can also use the many +configuration options that it supports. See `configuration reference +`_. + +By default Nouveau will attempt a clean shutdown if sent a ``TERM`` +signal, committing any outstanding index updates, completing any +in-progress segment merges, and finally closes all indexes. This is +not essential and you may safely kill the JVM without letting it do +this, though any uncommitted changes are necessarily lost. Once the +JVM is started again this indexing work will be attempted again. diff --git a/src/dreyfus/src/dreyfus_index.erl b/src/dreyfus/src/dreyfus_index.erl index df3e68f84..69f44b52e 100644 --- a/src/dreyfus/src/dreyfus_index.erl +++ b/src/dreyfus/src/dreyfus_index.erl @@ -362,7 +362,7 @@ index_name(#index{dbname = DbName, ddoc_id = DDocId, name = IndexName}) -> args_to_proplist(#index_query_args{} = Args) -> [ - {'query', Args#index_query_args.q}, + {query, Args#index_query_args.q}, {partition, Args#index_query_args.partition}, {limit, Args#index_query_args.limit}, {refresh, Args#index_query_args.stale =:= false}, @@ -381,7 +381,7 @@ args_to_proplist(#index_query_args{} = Args) -> args_to_proplist2(#index_query_args{} = Args) -> [ - {'query', Args#index_query_args.q}, + {query, Args#index_query_args.q}, {field, Args#index_query_args.grouping#grouping.by}, {refresh, Args#index_query_args.stale =:= false}, {groups, Args#index_query_args.grouping#grouping.groups}, diff --git a/src/ken/src/ken_server.erl b/src/ken/src/ken_server.erl index 3fb8d9031..382c8942e 100644 --- a/src/ken/src/ken_server.erl +++ b/src/ken/src/ken_server.erl @@ -160,6 +160,16 @@ handle_cast({trigger_update, #job{name = {_, _, hastings}, server = GPid, seq = Now = erlang:monotonic_time(), ets:insert(ken_workers, Job#job{worker_pid = Pid, lru = Now}), {noreply, State, 0}; +handle_cast({trigger_update, #job{name = {_, Index, nouveau}} = Job}, State) -> + % nouveau_index_manager:update_index will trigger a search index update. + {Pid, _} = erlang:spawn_monitor( + nouveau_index_manager, + update_index, + [Index] + ), + Now = erlang:monotonic_time(), + ets:insert(ken_workers, Job#job{worker_pid = Pid, lru = Now}), + {noreply, State, 0}; % search index job names have 3 elements. See job record definition. handle_cast({trigger_update, #job{name = {_, _, _}, server = GPid, seq = Seq} = Job}, State) -> % dreyfus_index:await will trigger a search index update. @@ -318,8 +328,9 @@ update_ddoc_indexes(Name, #doc{} = Doc, State) -> end, SearchUpdated = search_updated(Name, Doc, Seq, State), STUpdated = st_updated(Name, Doc, Seq, State), - case {ViewUpdated, SearchUpdated, STUpdated} of - {ok, ok, ok} -> ok; + NouveauUpdated = nouveau_updated(Name, Doc, Seq, State), + case {ViewUpdated, SearchUpdated, STUpdated, NouveauUpdated} of + {ok, ok, ok, ok} -> ok; _ -> resubmit end. @@ -359,6 +370,19 @@ st_updated(_Name, _Doc, _Seq, _State) -> ok. -endif. +nouveau_updated(Name, Doc, Seq, State) -> + case should_update(Doc, <<"indexes">>) of + true -> + try nouveau_util:design_doc_to_indexes(Name, Doc) of + SIndexes -> update_ddoc_nouveau_indexes(Name, SIndexes, Seq, State) + catch + _:_ -> + ok + end; + false -> + ok + end. + should_update(#doc{body = {Props}}, IndexType) -> case couch_util:get_value(<<"autoupdate">>, Props) of false -> @@ -440,6 +464,24 @@ update_ddoc_st_indexes(DbName, Indexes, Seq, State) -> end. -endif. +update_ddoc_nouveau_indexes(DbName, Indexes, Seq, State) -> + if + Indexes =/= [] -> + % Spawn a job for each search index in the ddoc + lists:foldl( + fun(Index, Acc) -> + case maybe_start_job({DbName, Index, nouveau}, nil, Seq, State) of + resubmit -> resubmit; + _ -> Acc + end + end, + ok, + Indexes + ); + true -> + ok + end. + should_start_job(#job{name = Name, seq = Seq, server = Pid}, State) -> Threshold = list_to_integer(config("max_incremental_updates", "1000")), IncrementalChannels = list_to_integer(config("incremental_channels", "80")), @@ -465,6 +507,9 @@ should_start_job(#job{name = Name, seq = Seq, server = Pid}, State) -> {ok, MRSt} = couch_index:get_state(Pid, 0), CurrentSeq = couch_mrview_index:get(update_seq, MRSt), (Seq - CurrentSeq) < Threshold; + % Nouveau has three elements + {_, Index, nouveau} -> + nouveau_index_updater:outdated(Index); % Search name has three elements. {_, _, _} -> {ok, _IndexPid, CurrentSeq} = dreyfus_index:await(Pid, 0), diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl index e9db4c3cf..a3805159d 100644 --- a/src/mango/src/mango_cursor.erl +++ b/src/mango/src/mango_cursor.erl @@ -31,11 +31,13 @@ -define(CURSOR_MODULES, [ mango_cursor_view, mango_cursor_text, + mango_cursor_nouveau, mango_cursor_special ]). -else. -define(CURSOR_MODULES, [ mango_cursor_view, + mango_cursor_nouveau, mango_cursor_special ]). -endif. diff --git a/src/mango/src/mango_cursor_nouveau.erl b/src/mango/src/mango_cursor_nouveau.erl new file mode 100644 index 000000000..8a6525cca --- /dev/null +++ b/src/mango/src/mango_cursor_nouveau.erl @@ -0,0 +1,293 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(mango_cursor_nouveau). + +-export([ + create/4, + explain/1, + execute/3 +]). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("nouveau/include/nouveau.hrl"). +-include("mango_cursor.hrl"). +-include("mango.hrl"). + +-record(cacc, { + selector, + dbname, + ddocid, + idx_name, + query_args, + bookmark, + limit, + skip, + user_fun, + user_acc, + fields, + execution_stats +}). + +create(Db, Indexes, Selector, Opts) -> + Index = + case Indexes of + [Index0] -> + Index0; + _ -> + ?MANGO_ERROR(multiple_nouveau_indexes) + end, + + NouveauLimit = get_nouveau_limit(), + Limit = erlang:min(NouveauLimit, couch_util:get_value(limit, Opts, mango_opts:default_limit())), + Skip = couch_util:get_value(skip, Opts, 0), + Fields = couch_util:get_value(fields, Opts, all_fields), + + {ok, #cursor{ + db = Db, + index = Index, + ranges = null, + selector = Selector, + opts = Opts, + limit = Limit, + skip = Skip, + fields = Fields + }}. + +explain(Cursor) -> + #cursor{ + selector = Selector, + opts = Opts + } = Cursor, + [ + {query, mango_selector_text:convert(Selector)}, + {partition, get_partition(Opts, null)}, + {sort, sort_query(Opts, Selector)} + ]. + +execute(Cursor, UserFun, UserAcc) -> + #cursor{ + db = Db, + index = Idx, + limit = Limit, + skip = Skip, + selector = Selector, + opts = Opts, + execution_stats = Stats + } = Cursor, + Query = mango_selector_text:convert(Selector), + QueryArgs = #{ + query => Query, + partition => get_partition(Opts, null), + sort => sort_query(Opts, Selector) + }, + CAcc = #cacc{ + selector = Selector, + dbname = couch_db:name(Db), + ddocid = ddocid(Idx), + idx_name = mango_idx:name(Idx), + bookmark = get_bookmark(Opts), + limit = Limit, + skip = Skip, + query_args = QueryArgs, + user_fun = UserFun, + user_acc = UserAcc, + fields = Cursor#cursor.fields, + execution_stats = mango_execution_stats:log_start(Stats) + }, + try + case Query of + <<>> -> + throw({stop, CAcc}); + _ -> + execute(CAcc) + end + catch + throw:{stop, FinalCAcc} -> + #cacc{ + bookmark = FinalBM, + user_fun = UserFun, + user_acc = LastUserAcc, + execution_stats = Stats0 + } = FinalCAcc, + JsonBM = nouveau_bookmark:pack(FinalBM), + Arg = {add_key, bookmark, JsonBM}, + {_Go, FinalUserAcc} = UserFun(Arg, LastUserAcc), + FinalUserAcc0 = mango_execution_stats:maybe_add_stats( + Opts, UserFun, Stats0, FinalUserAcc + ), + FinalUserAcc1 = mango_cursor:maybe_add_warning(UserFun, Cursor, Stats0, FinalUserAcc0), + {ok, FinalUserAcc1} + end. + +execute(CAcc) -> + case search_docs(CAcc) of + {ok, #{bookmark := Bookmark, <<"hits">> := []}} -> + % If we don't have any results from the + % query it means the request has paged through + % all possible results and the request is over. + NewCAcc = CAcc#cacc{bookmark = Bookmark}, + throw({stop, NewCAcc}); + {ok, #{bookmark := Bookmark, <<"hits">> := Hits}} -> + NewCAcc = CAcc#cacc{bookmark = nouveau_bookmark:to_ejson(Bookmark)}, + HitDocs = get_json_docs(CAcc#cacc.dbname, Hits), + {ok, FinalCAcc} = handle_hits(NewCAcc, HitDocs), + execute(FinalCAcc) + end. + +search_docs(CAcc) -> + #cacc{ + dbname = DbName, + ddocid = DDocId, + idx_name = IdxName + } = CAcc, + QueryArgs = update_query_args(CAcc), + case nouveau_fabric_search:go(DbName, DDocId, IdxName, QueryArgs) of + {ok, SearchResults} -> + {ok, SearchResults}; + {error, Reason} -> + ?MANGO_ERROR({nouveau_search_error, {error, Reason}}) + end. + +handle_hits(CAcc, []) -> + {ok, CAcc}; +handle_hits(CAcc0, [{Hit, Doc} | Rest]) -> + CAcc1 = handle_hit(CAcc0, Hit, Doc), + handle_hits(CAcc1, Rest). + +handle_hit(CAcc0, Hit, not_found) -> + update_bookmark(CAcc0, Hit); +handle_hit(CAcc0, Hit, Doc) -> + #cacc{ + limit = Limit, + skip = Skip, + execution_stats = Stats + } = CAcc0, + CAcc1 = update_bookmark(CAcc0, Hit), + Stats1 = mango_execution_stats:incr_docs_examined(Stats), + couch_stats:increment_counter([mango, docs_examined]), + CAcc2 = CAcc1#cacc{execution_stats = Stats1}, + case mango_selector:match(CAcc2#cacc.selector, Doc) of + true when Skip > 0 -> + CAcc2#cacc{skip = Skip - 1}; + true when Limit == 0 -> + % We hit this case if the user spcified with a + % zero limit. Notice that in this case we need + % to return the bookmark from before this match + throw({stop, CAcc0}); + true when Limit == 1 -> + NewCAcc = apply_user_fun(CAcc2, Doc), + throw({stop, NewCAcc}); + true when Limit > 1 -> + NewCAcc = apply_user_fun(CAcc2, Doc), + NewCAcc#cacc{limit = Limit - 1}; + false -> + CAcc2 + end. + +apply_user_fun(CAcc, Doc) -> + FinalDoc = mango_fields:extract(Doc, CAcc#cacc.fields), + #cacc{ + user_fun = UserFun, + user_acc = UserAcc, + execution_stats = Stats + } = CAcc, + Stats0 = mango_execution_stats:incr_results_returned(Stats), + case UserFun({row, FinalDoc}, UserAcc) of + {ok, NewUserAcc} -> + CAcc#cacc{user_acc = NewUserAcc, execution_stats = Stats0}; + {stop, NewUserAcc} -> + throw({stop, CAcc#cacc{user_acc = NewUserAcc, execution_stats = Stats0}}) + end. + +%% Convert Query to Nouveau sort specifications +%% Convert <<"Field">>, <<"desc">> to <<"-Field">> +%% and append to the nouveau query +sort_query(Opts, Selector) -> + {sort, {Sort}} = lists:keyfind(sort, 1, Opts), + SortList = lists:map( + fun(SortField) -> + {Dir, RawSortField} = + case SortField of + {Field, <<"asc">>} -> {asc, Field}; + {Field, <<"desc">>} -> {desc, Field}; + Field when is_binary(Field) -> {asc, Field} + end, + SField = mango_selector_text:append_sort_type(RawSortField, Selector), + case Dir of + asc -> + SField; + desc -> + <<"-", SField/binary>> + end + end, + Sort + ), + case SortList of + [] -> null; + _ -> SortList + end. + +get_partition(Opts, Default) -> + case couch_util:get_value(partition, Opts) of + <<>> -> Default; + Else -> Else + end. + +get_bookmark(Opts) -> + case lists:keyfind(bookmark, 1, Opts) of + {_, BM} when is_list(BM), BM /= [] -> + BM; + _ -> + nil + end. + +update_bookmark(CAcc, Hit) -> + BM = CAcc#cacc.bookmark, + DbName = CAcc#cacc.dbname, + NewBM = nouveau_bookmark:update(DbName, BM, #{<<"hits">> => [Hit]}), + CAcc#cacc{bookmark = NewBM}. + +ddocid(Idx) -> + case mango_idx:ddoc(Idx) of + <<"_design/", Rest/binary>> -> + Rest; + Else -> + Else + end. + +update_query_args(CAcc) -> + #cacc{ + bookmark = Bookmark, + query_args = QueryArgs + } = CAcc, + QueryArgs#{ + bookmark => nouveau_bookmark:pack(Bookmark), + limit => get_limit(CAcc) + }. + +get_limit(CAcc) -> + erlang:min(get_nouveau_limit(), CAcc#cacc.limit + CAcc#cacc.skip). + +get_nouveau_limit() -> + config:get_integer("nouveau", "max_limit", 200). + +get_json_docs(DbName, Hits) -> + Ids = lists:map( + fun(Hit) -> + maps:get(<<"id">>, Hit) + end, + Hits + ), + % TODO: respect R query parameter (same as json indexes) + {ok, Docs} = nouveau_fabric:get_json_docs(DbName, Ids), + lists:zip(Hits, Docs). diff --git a/src/mango/src/mango_cursor_text.erl b/src/mango/src/mango_cursor_text.erl index 53bf63edb..959603316 100644 --- a/src/mango/src/mango_cursor_text.erl +++ b/src/mango/src/mango_cursor_text.erl @@ -73,7 +73,7 @@ explain(Cursor) -> opts = Opts } = Cursor, [ - {'query', mango_selector_text:convert(Selector)}, + {query, mango_selector_text:convert(Selector)}, {partition, get_partition(Opts, null)}, {sort, sort_query(Opts, Selector)} ]. diff --git a/src/mango/src/mango_error.erl b/src/mango/src/mango_error.erl index d8ae3fcbf..22cb37106 100644 --- a/src/mango/src/mango_error.erl +++ b/src/mango/src/mango_error.erl @@ -74,6 +74,26 @@ info(mango_cursor_text, {text_search_error, {error, Error}}) -> <<"text_search_error">>, fmt("~p", [Error]) }; +info(mango_cursor_nouveau, multiple_nouveau_indexes) -> + { + 400, + <<"multiple_nouveau_indexes">>, + <<"You must specify an index with the `use_index` parameter.">> + }; +info(mango_cursor_nouveau, {nouveau_search_error, {error, {Type, Msg}}}) when + is_binary(Msg) +-> + { + 500, + <<"nouveau_search_error">>, + fmt("~p: ~s", [Type, Msg]) + }; +info(mango_cursor_nouveau, {nouveau_search_error, {error, Error}}) -> + { + 500, + <<"nouveau_search_error">>, + fmt("~p", [Error]) + }; info(mango_fields, {invalid_fields_json, BadFields}) -> { 400, diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index a20d730a2..9ce1ef93c 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -176,11 +176,14 @@ from_ddoc(Db, {Props}) -> end, IdxMods = case dreyfus:available() of - true -> - [mango_idx_view, mango_idx_text]; - false -> - [mango_idx_view] - end, + true -> [mango_idx_text]; + false -> [] + end ++ + case nouveau:enabled() of + true -> [mango_idx_nouveau]; + false -> [] + end ++ + [mango_idx_view], Idxs = lists:flatmap(fun(Mod) -> Mod:from_ddoc({Props}) end, IdxMods), lists:map( fun(Idx) -> @@ -249,6 +252,13 @@ cursor_mod(#idx{type = <<"json">>}) -> mango_cursor_view; cursor_mod(#idx{def = all_docs, type = <<"special">>}) -> mango_cursor_special; +cursor_mod(#idx{type = <<"nouveau">>}) -> + case nouveau:enabled() of + true -> + mango_cursor_nouveau; + false -> + ?MANGO_ERROR({index_service_unavailable, <<"nouveau">>}) + end; cursor_mod(#idx{type = <<"text">>}) -> case dreyfus:available() of true -> @@ -261,6 +271,13 @@ idx_mod(#idx{type = <<"json">>}) -> mango_idx_view; idx_mod(#idx{type = <<"special">>}) -> mango_idx_special; +idx_mod(#idx{type = <<"nouveau">>}) -> + case nouveau:enabled() of + true -> + mango_idx_nouveau; + false -> + ?MANGO_ERROR({index_service_unavailable, <<"nouveau">>}) + end; idx_mod(#idx{type = <<"text">>}) -> case dreyfus:available() of true -> @@ -288,6 +305,13 @@ get_idx_type(Opts) -> case proplists:get_value(type, Opts) of <<"json">> -> <<"json">>; + <<"nouveau">> -> + case nouveau:enabled() of + true -> + <<"nouveau">>; + false -> + ?MANGO_ERROR({index_service_unavailable, <<"nouveau">>}) + end; <<"text">> -> case dreyfus:available() of true -> diff --git a/src/mango/src/mango_idx_nouveau.erl b/src/mango/src/mango_idx_nouveau.erl new file mode 100644 index 000000000..074a755ee --- /dev/null +++ b/src/mango/src/mango_idx_nouveau.erl @@ -0,0 +1,459 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(mango_idx_nouveau). + +-export([ + validate_new/2, + validate_fields/1, + validate_index_def/1, + add/2, + remove/2, + from_ddoc/1, + to_json/1, + columns/1, + is_usable/3, + get_default_field_options/1 +]). + +-include_lib("couch/include/couch_db.hrl"). +-include("mango.hrl"). +-include("mango_idx.hrl"). + +validate_new(#idx{} = Idx, Db) -> + {ok, Def} = do_validate(Idx#idx.def), + maybe_reject_index_all_req(Def, Db), + {ok, Idx#idx{def = Def}}. + +validate_index_def(IndexInfo) -> + do_validate(IndexInfo). + +add(#doc{body = {Props0}} = DDoc, Idx) -> + Texts1 = + case proplists:get_value(<<"nouveau">>, Props0) of + {Texts0} -> Texts0; + _ -> [] + end, + NewText = make_text(Idx), + Texts2 = lists:keystore(element(1, NewText), 1, Texts1, NewText), + Props1 = lists:keystore(<<"nouveau">>, 1, Props0, {<<"nouveau">>, {Texts2}}), + {ok, DDoc#doc{body = {Props1}}}. + +remove(#doc{body = {Props0}} = DDoc, Idx) -> + Texts1 = + case proplists:get_value(<<"nouveau">>, Props0) of + {Texts0} -> + Texts0; + _ -> + ?MANGO_ERROR({index_not_found, Idx#idx.name}) + end, + Texts2 = lists:keydelete(Idx#idx.name, 1, Texts1), + if + Texts2 /= Texts1 -> ok; + true -> ?MANGO_ERROR({index_not_found, Idx#idx.name}) + end, + Props1 = + case Texts2 of + [] -> + lists:keydelete(<<"nouveau">>, 1, Props0); + _ -> + lists:keystore(<<"nouveau">>, 1, Props0, {<<"nouveau">>, {Texts2}}) + end, + {ok, DDoc#doc{body = {Props1}}}. + +from_ddoc({Props}) -> + case lists:keyfind(<<"nouveau">>, 1, Props) of + {<<"nouveau">>, {Texts}} when is_list(Texts) -> + lists:flatmap( + fun({Name, {VProps}}) -> + case validate_ddoc(VProps) of + invalid_ddoc -> + []; + Def -> + I = #idx{ + type = <<"nouveau">>, + name = Name, + def = Def + }, + [I] + end + end, + Texts + ); + _ -> + [] + end. + +to_json(Idx) -> + {[ + {ddoc, Idx#idx.ddoc}, + {name, Idx#idx.name}, + {type, Idx#idx.type}, + {partitioned, Idx#idx.partitioned}, + {def, {def_to_json(Idx#idx.def)}} + ]}. + +columns(Idx) -> + {Props} = Idx#idx.def, + {<<"fields">>, Fields} = lists:keyfind(<<"fields">>, 1, Props), + case Fields of + <<"all_fields">> -> + all_fields; + _ -> + {DFProps} = couch_util:get_value(<<"default_field">>, Props, {[]}), + Enabled = couch_util:get_value(<<"enabled">>, DFProps, true), + Default = + case Enabled of + true -> [<<"$default">>]; + false -> [] + end, + Default ++ + lists:map( + fun({FProps}) -> + {_, Name} = lists:keyfind(<<"name">>, 1, FProps), + {_, Type} = lists:keyfind(<<"type">>, 1, FProps), + iolist_to_binary([Name, ":", Type]) + end, + Fields + ) + end. + +is_usable(_, Selector, _) when Selector =:= {[]} -> + false; +is_usable(Idx, Selector, _) -> + case columns(Idx) of + all_fields -> + true; + Cols -> + Fields = indexable_fields(Selector), + sets:is_subset(sets:from_list(Fields), sets:from_list(Cols)) + end. + +do_validate({Props}) -> + {ok, Opts} = mango_opts:validate(Props, opts()), + {ok, {Opts}}; +do_validate(Else) -> + ?MANGO_ERROR({invalid_index_text, Else}). + +def_to_json({Props}) -> + def_to_json(Props); +def_to_json([]) -> + []; +def_to_json([{<<"fields">>, <<"all_fields">>} | Rest]) -> + [{<<"fields">>, []} | def_to_json(Rest)]; +def_to_json([{fields, Fields} | Rest]) -> + [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)]; +def_to_json([{<<"fields">>, Fields} | Rest]) -> + [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)]; +% Don't include partial_filter_selector in the json conversion +% if its the default value +def_to_json([{<<"partial_filter_selector">>, {[]}} | Rest]) -> + def_to_json(Rest); +def_to_json([{Key, Value} | Rest]) -> + [{Key, Value} | def_to_json(Rest)]. + +fields_to_json([]) -> + []; +fields_to_json([{[{<<"name">>, Name}, {<<"type">>, Type0}]} | Rest]) -> + ok = validate_field_name(Name), + Type = validate_field_type(Type0), + [{[{Name, Type}]} | fields_to_json(Rest)]; +fields_to_json([{[{<<"type">>, Type0}, {<<"name">>, Name}]} | Rest]) -> + ok = validate_field_name(Name), + Type = validate_field_type(Type0), + [{[{Name, Type}]} | fields_to_json(Rest)]. + +%% In the future, we can possibly add more restrictive validation. +%% For now, let's make sure the field name is not blank. +validate_field_name(<<"">>) -> + throw(invalid_field_name); +validate_field_name(Else) when is_binary(Else) -> + ok; +validate_field_name(_) -> + throw(invalid_field_name). + +validate_field_type(<<"string">>) -> + <<"string">>; +validate_field_type(<<"number">>) -> + <<"number">>; +validate_field_type(<<"boolean">>) -> + <<"boolean">>. + +validate_fields(<<"all_fields">>) -> + {ok, all_fields}; +validate_fields(Fields) -> + try fields_to_json(Fields) of + _ -> + mango_fields:new(Fields) + catch + error:function_clause -> + ?MANGO_ERROR({invalid_index_fields_definition, Fields}); + throw:invalid_field_name -> + ?MANGO_ERROR({invalid_index_fields_definition, Fields}) + end. + +validate_ddoc(VProps) -> + try + Def = proplists:get_value(<<"index">>, VProps), + validate_index_def(Def), + Def + catch + Error:Reason -> + couch_log:error( + "Invalid Index Def ~p: Error. ~p, Reason: ~p", + [VProps, Error, Reason] + ), + invalid_ddoc + end. + +opts() -> + [ + {<<"default_analyzer">>, [ + {tag, default_analyzer}, + {optional, true}, + {default, <<"keyword">>} + ]}, + {<<"default_field">>, [ + {tag, default_field}, + {optional, true}, + {default, {[]}} + ]}, + {<<"partial_filter_selector">>, [ + {tag, partial_filter_selector}, + {optional, true}, + {default, {[]}}, + {validator, fun mango_opts:validate_selector/1} + ]}, + {<<"selector">>, [ + {tag, selector}, + {optional, true}, + {default, {[]}}, + {validator, fun mango_opts:validate_selector/1} + ]}, + {<<"fields">>, [ + {tag, fields}, + {optional, true}, + {default, []}, + {validator, fun ?MODULE:validate_fields/1} + ]}, + {<<"index_array_lengths">>, [ + {tag, index_array_lengths}, + {optional, true}, + {default, true}, + {validator, fun mango_opts:is_boolean/1} + ]} + ]. + +make_text(Idx) -> + Text = + {[ + {<<"index">>, Idx#idx.def}, + {<<"analyzer">>, construct_analyzer(Idx#idx.def)} + ]}, + {Idx#idx.name, Text}. + +get_default_field_options(Props) -> + Default = couch_util:get_value(default_field, Props, {[]}), + case Default of + Bool when is_boolean(Bool) -> + {Bool, <<"standard">>}; + {[]} -> + {true, <<"standard">>}; + {Opts} -> + Enabled = couch_util:get_value(<<"enabled">>, Opts, true), + Analyzer = couch_util:get_value( + <<"analyzer">>, + Opts, + <<"standard">> + ), + {Enabled, Analyzer} + end. + +construct_analyzer({Props}) -> + DefaultAnalyzer = couch_util:get_value( + default_analyzer, + Props, + <<"keyword">> + ), + {DefaultField, DefaultFieldAnalyzer} = get_default_field_options(Props), + DefaultAnalyzerDef = + case DefaultField of + true -> + [{<<"$default">>, DefaultFieldAnalyzer}]; + _ -> + [] + end, + case DefaultAnalyzerDef of + [] -> + <<"keyword">>; + _ -> + {[ + {<<"name">>, <<"perfield">>}, + {<<"default">>, DefaultAnalyzer}, + {<<"fields">>, {DefaultAnalyzerDef}} + ]} + end. + +indexable_fields(Selector) -> + TupleTree = mango_selector_text:convert([], Selector), + indexable_fields([], TupleTree). + +indexable_fields(Fields, {op_and, Args}) when is_list(Args) -> + lists:foldl( + fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end, + Fields, + Args + ); +%% For queries that use array element access or $in operations, two +%% fields get generated by mango_selector_text:convert. At index +%% definition time, only one field gets defined. In this situation, we +%% remove the extra generated field so that the index can be used. For +%% all other situations, we include the fields as normal. +indexable_fields( + Fields, + {op_or, [ + {op_field, Field0}, + {op_field, {[Name | _], _}} = Field1 + ]} +) -> + case lists:member(<<"[]">>, Name) of + true -> + indexable_fields(Fields, {op_field, Field0}); + false -> + Fields1 = indexable_fields(Fields, {op_field, Field0}), + indexable_fields(Fields1, Field1) + end; +indexable_fields(Fields, {op_or, Args}) when is_list(Args) -> + lists:foldl( + fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end, + Fields, + Args + ); +indexable_fields(Fields, {op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) -> + Fields0 = indexable_fields(Fields, ExistsQuery), + indexable_fields(Fields0, Arg); +% forces "$exists" : false to use _all_docs +indexable_fields(_, {op_not, {_, false}}) -> + []; +indexable_fields(Fields, {op_insert, Arg}) when is_binary(Arg) -> + Fields; +%% fieldname.[]:length is not a user defined field. +indexable_fields(Fields, {op_field, {[_, <<":length">>], _}}) -> + Fields; +indexable_fields(Fields, {op_field, {Name, _}}) -> + [iolist_to_binary(Name) | Fields]; +%% In this particular case, the lucene index is doing a field_exists query +%% so it is looking at all sorts of combinations of field:* and field.* +%% We don't add the field because we cannot pre-determine what field will exist. +%% Hence we just return Fields and make it less restrictive. +indexable_fields(Fields, {op_fieldname, {_, _}}) -> + Fields; +%% Similar idea to op_fieldname but with fieldname:null +indexable_fields(Fields, {op_null, {_, _}}) -> + Fields; +indexable_fields(Fields, {op_default, _}) -> + [<<"$default">> | Fields]. + +maybe_reject_index_all_req({Def}, Db) -> + DbName = couch_db:name(Db), + #user_ctx{name = User} = couch_db:get_user_ctx(Db), + Fields = couch_util:get_value(fields, Def), + case {Fields, forbid_index_all()} of + {all_fields, "true"} -> + ?MANGO_ERROR(index_all_disabled); + {all_fields, "warn"} -> + couch_log:warning( + "User ~p is indexing all fields in db ~p", + [User, DbName] + ); + _ -> + ok + end. + +forbid_index_all() -> + config:get("mango", "index_all_disabled", "false"). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +setup_all() -> + Ctx = test_util:start_couch(), + meck:expect( + couch_log, + warning, + 2, + fun(_, _) -> + throw({test_error, logged_warning}) + end + ), + Ctx. + +teardown_all(Ctx) -> + meck:unload(), + test_util:stop_couch(Ctx). + +setup() -> + %default index all def that generates {fields, all_fields} + Index = #idx{def = {[]}}, + DbName = <<"testdb">>, + UserCtx = #user_ctx{name = <<"u1">>}, + {ok, Db} = couch_db:clustered_db(DbName, UserCtx), + {Index, Db}. + +teardown(_) -> + ok. + +index_all_test_() -> + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + fun forbid_index_all/1, + fun default_and_false_index_all/1, + fun warn_index_all/1 + ] + } + }. + +forbid_index_all({Idx, Db}) -> + ?_test(begin + ok = config:set("mango", "index_all_disabled", "true", false), + ?assertThrow( + {mango_error, ?MODULE, index_all_disabled}, + validate_new(Idx, Db) + ) + end). + +default_and_false_index_all({Idx, Db}) -> + ?_test(begin + config:delete("mango", "index_all_disabled", false), + {ok, #idx{def = {Def}}} = validate_new(Idx, Db), + Fields = couch_util:get_value(fields, Def), + ?assertEqual(all_fields, Fields), + ok = config:set("mango", "index_all_disabled", "false", false), + {ok, #idx{def = {Def2}}} = validate_new(Idx, Db), + Fields2 = couch_util:get_value(fields, Def2), + ?assertEqual(all_fields, Fields2) + end). + +warn_index_all({Idx, Db}) -> + ?_test(begin + ok = config:set("mango", "index_all_disabled", "warn", false), + ?assertThrow({test_error, logged_warning}, validate_new(Idx, Db)) + end). + +-endif. diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl index d3d200517..8e04ab4b8 100644 --- a/src/mango/src/mango_native_proc.erl +++ b/src/mango/src/mango_native_proc.erl @@ -62,7 +62,7 @@ handle_call({prompt, [<<"reset">>]}, _From, St) -> {reply, true, St#st{indexes = []}}; handle_call({prompt, [<<"reset">>, _QueryConfig]}, _From, St) -> {reply, true, St#st{indexes = []}}; -handle_call({prompt, [<<"add_fun">>, IndexInfo]}, _From, St) -> +handle_call({prompt, [<<"add_fun">>, IndexInfo | _IgnoreRest]}, _From, St) -> Indexes = case validate_index_info(IndexInfo) of true -> @@ -88,6 +88,15 @@ handle_call({prompt, [<<"index_doc">>, Doc]}, _From, St) -> Else end, {reply, Vals, St}; +handle_call({prompt, [<<"nouveau_index_doc">>, Doc]}, _From, St) -> + Vals = + case nouveau_index_doc(St, mango_json:to_binary(Doc)) of + [] -> + [[]]; + Else -> + Else + end, + {reply, Vals, St}; handle_call(Msg, _From, St) -> {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}. @@ -111,6 +120,9 @@ map_doc(#st{indexes = Indexes}, Doc) -> index_doc(#st{indexes = Indexes}, Doc) -> lists:map(fun(Idx) -> get_text_entries(Idx, Doc) end, Indexes). +nouveau_index_doc(#st{indexes = Indexes}, Doc) -> + lists:map(fun(Idx) -> get_nouveau_entries(Idx, Doc) end, Indexes). + get_index_entries({IdxProps}, Doc) -> {Fields} = couch_util:get_value(<<"fields">>, IdxProps), Selector = get_index_partial_filter_selector(IdxProps), @@ -146,6 +158,15 @@ get_text_entries({IdxProps}, Doc) -> [] end. +get_nouveau_entries({IdxProps}, Doc) -> + Selector = get_index_partial_filter_selector(IdxProps), + case should_index(Selector, Doc) of + true -> + get_nouveau_entries0(IdxProps, Doc); + false -> + [] + end. + get_index_partial_filter_selector(IdxProps) -> case couch_util:get_value(<<"partial_filter_selector">>, IdxProps, {[]}) of {[]} -> @@ -307,14 +328,78 @@ make_text_field_name([P | Rest], Type) -> Escaped = [mango_util:lucene_escape_field(N) || N <- Parts], iolist_to_binary(mango_util:join(".", Escaped)). +get_nouveau_entries0(IdxProps, Doc) -> + DefaultEnabled = get_default_enabled(IdxProps), + IndexArrayLengths = get_index_array_lengths(IdxProps), + FieldsList = get_text_field_list(IdxProps), + TAcc = #tacc{ + index_array_lengths = IndexArrayLengths, + fields = FieldsList + }, + Fields0 = get_text_field_values(Doc, TAcc), + Fields = + if + not DefaultEnabled -> Fields0; + true -> add_default_text_field(Fields0) + end, + FieldNames0 = get_field_names(Fields), + FieldNames1 = lists:map(fun convert_to_nouveau_string_field/1, FieldNames0), + Converted = convert_nouveau_fields(Fields), + FieldNames1 ++ Converted. + +convert_to_nouveau_string_field([Name, Value, []]) when is_binary(Name), is_binary(Value) -> + {[ + {<<"@type">>, <<"string">>}, + {<<"name">>, Name}, + {<<"value">>, Value} + ]}. + +convert_nouveau_fields([]) -> + []; +convert_nouveau_fields([{Name, <<"string">>, Value} | Rest]) -> + Field = + {[ + {<<"@type">>, <<"text">>}, + {<<"name">>, Name}, + {<<"value">>, Value} + ]}, + [Field | convert_nouveau_fields(Rest)]; +convert_nouveau_fields([{Name, <<"number">>, Value} | Rest]) -> + Field = + {[ + {<<"@type">>, <<"double">>}, + {<<"name">>, Name}, + {<<"value">>, Value} + ]}, + [Field | convert_nouveau_fields(Rest)]; +convert_nouveau_fields([{Name, <<"boolean">>, true} | Rest]) -> + Field = + {[ + {<<"@type">>, <<"string">>}, + {<<"name">>, Name}, + {<<"value">>, <<"true">>} + ]}, + [Field | convert_nouveau_fields(Rest)]; +convert_nouveau_fields([{Name, <<"boolean">>, false} | Rest]) -> + Field = + {[ + {<<"@type">>, <<"string">>}, + {<<"name">>, Name}, + {<<"value">>, <<"false">>} + ]}, + [Field | convert_nouveau_fields(Rest)]. + validate_index_info(IndexInfo) -> IdxTypes = case dreyfus:available() of - true -> - [mango_idx_view, mango_idx_text]; - false -> - [mango_idx_view] - end, + true -> [mango_idx_text]; + false -> [] + end ++ + case nouveau:enabled() of + true -> [mango_idx_nouveau]; + false -> [] + end ++ + [mango_idx_view], Results = lists:foldl( fun(IdxType, Results0) -> try diff --git a/src/mem3/src/mem3_reshard_index.erl b/src/mem3/src/mem3_reshard_index.erl index d45701362..41e225d22 100644 --- a/src/mem3/src/mem3_reshard_index.erl +++ b/src/mem3/src/mem3_reshard_index.erl @@ -22,6 +22,7 @@ -define(MRVIEW, mrview). -define(DREYFUS, dreyfus). -define(HASTINGS, hastings). +-define(NOUVEAU, nouveau). -include_lib("mem3/include/mem3.hrl"). @@ -61,6 +62,7 @@ fabric_design_docs(DbName) -> indices(DbName, Doc) -> mrview_indices(DbName, Doc) ++ + nouveau_indices(DbName, Doc) ++ [dreyfus_indices(DbName, Doc) || has_app(dreyfus)] ++ [hastings_indices(DbName, Doc) || has_app(hastings)]. @@ -81,6 +83,22 @@ mrview_indices(DbName, Doc) -> [] end. +nouveau_indices(DbName, Doc) -> + case nouveau:enabled() of + false -> + []; + true -> + try + Indices = nouveau_util:design_doc_to_indexes(DbName, Doc), + [{?NOUVEAU, DbName, Index} || Index <- Indices] + catch + Tag:Err -> + Msg = "~p couldn't get nouveau indices ~p ~p ~p:~p", + couch_log:error(Msg, [?MODULE, DbName, Doc, Tag, Err]), + [] + end + end. + dreyfus_indices(DbName, Doc) -> try Indices = dreyfus_index:design_doc_to_indexes(Doc), @@ -111,6 +129,9 @@ build_index({?MRVIEW, DbName, MRSt} = Ctx, Try) -> Ctx, Try ); +build_index({?NOUVEAU, _DbName, DIndex} = Ctx, Try) -> + UpdateFun = fun() -> nouveau_index_updater:update(DIndex) end, + retry_loop(Ctx, UpdateFun, Try); build_index({?DREYFUS, DbName, DIndex} = Ctx, Try) -> await_retry( dreyfus_index_manager:get_index(DbName, DIndex), @@ -127,16 +148,25 @@ build_index({?HASTINGS, DbName, HIndex} = Ctx, Try) -> ). await_retry({ok, Pid}, AwaitIndex, {_, DbName, _} = Ctx, Try) -> - try AwaitIndex(Pid, get_update_seq(DbName)) of - {ok, _} -> ok; - {ok, _, _} -> ok; - AwaitError -> maybe_retry(Ctx, AwaitError, Try) + UpdateFun = fun() -> + case AwaitIndex(Pid, get_update_seq(DbName)) of + {ok, _} -> ok; + {ok, _, _} -> ok; + AwaitError -> AwaitError + end + end, + retry_loop(Ctx, UpdateFun, Try); +await_retry(OpenError, _AwaitIndex, Ctx, Try) -> + maybe_retry(Ctx, OpenError, Try). + +retry_loop(Ctx, UpdateFun, Try) -> + try UpdateFun() of + ok -> ok; + UpdateError -> maybe_retry(Ctx, UpdateError, Try) catch _:CatchError -> maybe_retry(Ctx, CatchError, Try) - end; -await_retry(OpenError, _AwaitIndex, Ctx, Try) -> - maybe_retry(Ctx, OpenError, Try). + end. maybe_retry(Ctx, killed = Error, Try) -> retry(Ctx, Error, Try); diff --git a/src/nouveau/include/nouveau.hrl b/src/nouveau/include/nouveau.hrl new file mode 100644 index 000000000..e50cd45d3 --- /dev/null +++ b/src/nouveau/include/nouveau.hrl @@ -0,0 +1,23 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +-record(index, { + dbname, + ddoc_id, + default_analyzer, + field_analyzers, + def, + def_lang, + name, + sig=nil +}). diff --git a/src/nouveau/priv/stats_descriptions.cfg b/src/nouveau/priv/stats_descriptions.cfg new file mode 100644 index 000000000..56a00f0c8 --- /dev/null +++ b/src/nouveau/priv/stats_descriptions.cfg @@ -0,0 +1,21 @@ +%% Licensed under the Apache License, Version 2.0 (the "License"); you may not +%% use this file except in compliance with the License. You may obtain a copy of +%% the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +%% License for the specific language governing permissions and limitations under +%% the License. + +{[nouveau, search_latency], [ + {type, histogram}, + {desc, <<"Distribution of overall search request latency as experienced by the end user">>} +]}. + +{[nouveau, active_searches], [ + {type, counter}, + {desc, <<"number of active search requests">>} +]}. \ No newline at end of file diff --git a/src/nouveau/src/nouveau.app.src b/src/nouveau/src/nouveau.app.src new file mode 100644 index 000000000..0828437c1 --- /dev/null +++ b/src/nouveau/src/nouveau.app.src @@ -0,0 +1,29 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +{application, nouveau, [ + {description, "FuLL tExT SeArcH"}, + {vsn, git}, + {applications, [ + config, + ibrowse, + kernel, + stdlib, + mem3, + rexi + ]}, + {mod, {nouveau_app, []}}, + {registered, [nouveau_index_manager, nouveau_sup]} +]}. diff --git a/src/nouveau/src/nouveau.erl b/src/nouveau/src/nouveau.erl new file mode 100644 index 000000000..a8f8fa8ec --- /dev/null +++ b/src/nouveau/src/nouveau.erl @@ -0,0 +1,20 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau). + +-export([enabled/0]). + +enabled() -> + config:get_boolean("nouveau", "enable", false). diff --git a/src/nouveau/src/nouveau_api.erl b/src/nouveau/src/nouveau_api.erl new file mode 100644 index 000000000..5bf6b1731 --- /dev/null +++ b/src/nouveau/src/nouveau_api.erl @@ -0,0 +1,216 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_api). + +-include("nouveau.hrl"). + +-export([ + analyze/2, + index_info/1, + create_index/2, + delete_path/1, + delete_path/2, + delete_doc/3, + update_doc/4, + search/2 +]). + +-define(JSON_CONTENT_TYPE, {"Content-Type", "application/json"}). + +analyze(Text, Analyzer) when + is_binary(Text), is_binary(Analyzer) +-> + ReqBody = {[{<<"text">>, Text}, {<<"analyzer">>, Analyzer}]}, + Resp = send_if_enabled( + nouveau_util:nouveau_url() ++ "/analyze", + [?JSON_CONTENT_TYPE], + post, + jiffy:encode(ReqBody) + ), + case Resp of + {ok, "200", _, RespBody} -> + Json = jiffy:decode(RespBody, [return_maps]), + {ok, maps:get(<<"tokens">>, Json)}; + {ok, StatusCode, _, RespBody} -> + {error, jaxrs_error(StatusCode, RespBody)}; + {error, Reason} -> + send_error(Reason) + end; +analyze(_, _) -> + {error, {bad_request, <<"'text' and 'analyzer' fields must be non-empty strings">>}}. + +index_info(#index{} = Index) -> + Resp = send_if_enabled(index_url(Index), [], get), + case Resp of + {ok, "200", _, RespBody} -> + {ok, jiffy:decode(RespBody, [return_maps])}; + {ok, StatusCode, _, RespBody} -> + {error, jaxrs_error(StatusCode, RespBody)}; + {error, Reason} -> + send_error(Reason) + end. + +create_index(#index{} = Index, IndexDefinition) -> + Resp = send_if_enabled( + index_url(Index), [?JSON_CONTENT_TYPE], put, jiffy:encode(IndexDefinition) + ), + case Resp of + {ok, "204", _, _} -> + ok; + {ok, StatusCode, _, RespBody} -> + {error, jaxrs_error(StatusCode, RespBody)}; + {error, Reason} -> + send_error(Reason) + end. + +delete_path(Path) -> + delete_path(Path, []). + +delete_path(Path, Exclusions) when + is_binary(Path), is_list(Exclusions) +-> + Resp = send_if_enabled( + index_path(Path), [?JSON_CONTENT_TYPE], delete, jiffy:encode(Exclusions) + ), + case Resp of + {ok, "204", _, _} -> + ok; + {ok, StatusCode, _, RespBody} -> + {error, jaxrs_error(StatusCode, RespBody)}; + {error, Reason} -> + send_error(Reason) + end. + +delete_doc(#index{} = Index, DocId, UpdateSeq) when + is_binary(DocId), is_integer(UpdateSeq) +-> + ReqBody = {[{<<"seq">>, UpdateSeq}]}, + Resp = send_if_enabled( + doc_url(Index, DocId), [?JSON_CONTENT_TYPE], delete, jiffy:encode(ReqBody) + ), + case Resp of + {ok, "204", _, _} -> + ok; + {ok, StatusCode, _, RespBody} -> + {error, jaxrs_error(StatusCode, RespBody)}; + {error, Reason} -> + send_error(Reason) + end. + +update_doc(#index{} = Index, DocId, UpdateSeq, Fields) when + is_binary(DocId), is_integer(UpdateSeq), is_list(Fields) +-> + ReqBody = {[{<<"seq">>, UpdateSeq}, {<<"fields">>, Fields}]}, + Resp = send_if_enabled( + doc_url(Index, DocId), [?JSON_CONTENT_TYPE], put, jiffy:encode(ReqBody) + ), + case Resp of + {ok, "204", _, _} -> + ok; + {ok, StatusCode, _, RespBody} -> + {error, jaxrs_error(StatusCode, RespBody)}; + {error, Reason} -> + send_error(Reason) + end. + +search(#index{} = Index, QueryArgs) -> + Resp = send_if_enabled( + search_url(Index), [?JSON_CONTENT_TYPE], post, jiffy:encode(QueryArgs) + ), + case Resp of + {ok, "200", _, RespBody} -> + {ok, jiffy:decode(RespBody, [return_maps])}; + {ok, StatusCode, _, RespBody} -> + {error, jaxrs_error(StatusCode, RespBody)}; + {error, Reason} -> + send_error(Reason) + end. + +%% private functions + +index_path(Path) -> + lists:flatten( + io_lib:format( + "~s/index/~s", + [ + nouveau_util:nouveau_url(), + couch_util:url_encode(Path) + ] + ) + ). + +index_url(#index{} = Index) -> + lists:flatten( + io_lib:format( + "~s/index/~s", + [ + nouveau_util:nouveau_url(), + couch_util:url_encode(nouveau_util:index_name(Index)) + ] + ) + ). + +doc_url(#index{} = Index, DocId) -> + lists:flatten( + io_lib:format( + "~s/index/~s/doc/~s", + [ + nouveau_util:nouveau_url(), + couch_util:url_encode(nouveau_util:index_name(Index)), + couch_util:url_encode(DocId) + ] + ) + ). + +search_url(IndexName) -> + index_url(IndexName) ++ "/search". + +jaxrs_error("400", Body) -> + {bad_request, message(Body)}; +jaxrs_error("404", Body) -> + {not_found, message(Body)}; +jaxrs_error("405", Body) -> + {method_not_allowed, message(Body)}; +jaxrs_error("417", Body) -> + {expectation_failed, message(Body)}; +jaxrs_error("422", Body) -> + {bad_request, lists:join(" and ", errors(Body))}; +jaxrs_error("500", Body) -> + {internal_server_error, message(Body)}. + +send_error({conn_failed, _}) -> + {error, {service_unavailable, <<"Search service unavailable.">>}}; +send_error(Reason) -> + {error, Reason}. + +message(Body) -> + Json = jiffy:decode(Body, [return_maps]), + maps:get(<<"message">>, Json). + +errors(Body) -> + Json = jiffy:decode(Body, [return_maps]), + maps:get(<<"errors">>, Json). + +send_if_enabled(Url, Header, Method) -> + send_if_enabled(Url, Header, Method, []). + +send_if_enabled(Url, Header, Method, Body) -> + case nouveau:enabled() of + true -> + ibrowse:send_req(Url, Header, Method, Body); + false -> + {error, nouveau_not_enabled} + end. diff --git a/src/nouveau/src/nouveau_app.erl b/src/nouveau/src/nouveau_app.erl new file mode 100644 index 000000000..a432398b4 --- /dev/null +++ b/src/nouveau/src/nouveau_app.erl @@ -0,0 +1,30 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_app). +-behaviour(application). + +%% Application callbacks +-export([start/2, stop/1]). + +%% =================================================================== +%% Application callbacks +%% =================================================================== + +start(_StartType, _StartArgs) -> + nouveau_sup:start_link(). + +stop(_State) -> + ok. diff --git a/src/nouveau/src/nouveau_bookmark.erl b/src/nouveau/src/nouveau_bookmark.erl new file mode 100644 index 000000000..b919534ea --- /dev/null +++ b/src/nouveau/src/nouveau_bookmark.erl @@ -0,0 +1,68 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_bookmark). + +-include_lib("mem3/include/mem3.hrl"). + +-export([new/0, update/3, unpack/2, pack/1, to_ejson/1]). + +new() -> + #{}. + +%% Form a bookmark from the last contribution from each shard range +update(DbName, PreviousBookmark, SearchResults) when is_binary(PreviousBookmark) -> + update(DbName, unpack(DbName, PreviousBookmark), SearchResults); +update(DbName, {EJson}, SearchResults) when is_list(EJson) -> + update(DbName, from_ejson({EJson}), SearchResults); +update(DbName, PreviousBookmark, SearchResults) when is_map(PreviousBookmark) -> + #{<<"hits">> := Hits} = SearchResults, + NewBookmark0 = lists:foldl( + fun(#{<<"id">> := Id, <<"order">> := Order}, Acc) -> + Acc#{range_of(DbName, Id) => Order} + end, + new(), + Hits + ), + maps:merge(PreviousBookmark, NewBookmark0). + +range_of(DbName, DocId) when is_binary(DbName), is_binary(DocId) -> + [#shard{range = Range} | _] = mem3_shards:for_docid(DbName, DocId), + Range; +range_of(DbName, Order) when is_binary(DbName), is_list(Order) -> + #{<<"@type">> := <<"string">>, <<"value">> := DocId} = lists:last(Order), + range_of(DbName, DocId). + +unpack(_DbName, Empty) when Empty == undefined; Empty == nil; Empty == null -> + new(); +unpack(DbName, PackedBookmark) when is_list(PackedBookmark) -> + unpack(DbName, list_to_binary(PackedBookmark)); +unpack(DbName, PackedBookmark) when is_binary(PackedBookmark) -> + Bookmark = jiffy:decode(base64:decode(PackedBookmark), [return_maps]), + maps:from_list([{range_of(DbName, V), V} || V <- Bookmark]). + +pack(nil) -> + null; +pack({EJson}) when is_list(EJson) -> + pack(from_ejson(EJson)); +pack(UnpackedBookmark) when is_map(UnpackedBookmark) -> + base64:encode(jiffy:encode(maps:values(UnpackedBookmark))). + +%% legacy use of ejson within mango +from_ejson({Props}) -> + maps:from_list(Props). + +to_ejson(Bookmark) when is_map(Bookmark) -> + {maps:to_list(Bookmark)}. diff --git a/src/nouveau/src/nouveau_epi.erl b/src/nouveau/src/nouveau_epi.erl new file mode 100644 index 000000000..f42e17970 --- /dev/null +++ b/src/nouveau/src/nouveau_epi.erl @@ -0,0 +1,49 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_epi). + +-behaviour(couch_epi_plugin). + +-export([ + app/0, + providers/0, + services/0, + data_subscriptions/0, + data_providers/0, + processes/0, + notify/3 +]). + +app() -> + nouveau. + +providers() -> + [{chttpd_handlers, nouveau_httpd_handlers}]. + +services() -> + []. + +data_subscriptions() -> + []. + +data_providers() -> + []. + +processes() -> + []. + +notify(_Key, _Old, _New) -> + ok. diff --git a/src/nouveau/src/nouveau_fabric.erl b/src/nouveau/src/nouveau_fabric.erl new file mode 100644 index 000000000..3b8517031 --- /dev/null +++ b/src/nouveau/src/nouveau_fabric.erl @@ -0,0 +1,36 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- +%% inspired by dreyfus_fabric.erl but better + +-module(nouveau_fabric). +-export([get_json_docs/2]). + +get_json_docs(DbName, DocIds) -> + fabric:all_docs(DbName, fun callback/2, [], [{keys, DocIds}, {include_docs, true}]). + +callback({meta, _}, Acc) -> + {ok, Acc}; +callback({error, Reason}, _Acc) -> + {error, Reason}; +callback({row, Row}, Acc) -> + case lists:keyfind(doc, 1, Row) of + false -> + {ok, [not_found | Acc]}; + {doc, Doc} -> + {ok, [Doc | Acc]} + end; +callback(complete, Acc) -> + {ok, lists:reverse(Acc)}; +callback(timeout, _Acc) -> + {error, timeout}. diff --git a/src/nouveau/src/nouveau_fabric_cleanup.erl b/src/nouveau/src/nouveau_fabric_cleanup.erl new file mode 100644 index 000000000..cd4128fb1 --- /dev/null +++ b/src/nouveau/src/nouveau_fabric_cleanup.erl @@ -0,0 +1,43 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_fabric_cleanup). + +-include_lib("couch/include/couch_db.hrl"). + +-include("nouveau.hrl"). +-include_lib("mem3/include/mem3.hrl"). + +-export([go/1]). + +go(DbName) -> + {ok, DesignDocs} = fabric:design_docs(DbName), + ActiveSigs = + lists:usort( + lists:flatmap( + fun(Doc) -> active_sigs(DbName, Doc) end, + [couch_doc:from_json_obj(DD) || DD <- DesignDocs] + ) + ), + Shards = mem3:shards(DbName), + lists:foreach( + fun(Shard) -> + rexi:cast(Shard#shard.node, {nouveau_rpc, cleanup, [Shard#shard.name, ActiveSigs]}) + end, + Shards + ). + +active_sigs(DbName, #doc{} = Doc) -> + Indexes = nouveau_util:design_doc_to_indexes(DbName, Doc), + lists:map(fun(Index) -> Index#index.sig end, Indexes). diff --git a/src/nouveau/src/nouveau_fabric_info.erl b/src/nouveau/src/nouveau_fabric_info.erl new file mode 100644 index 000000000..59e47094f --- /dev/null +++ b/src/nouveau/src/nouveau_fabric_info.erl @@ -0,0 +1,99 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_fabric_info). + +-export([go/3]). + +-include_lib("mem3/include/mem3.hrl"). + +go(DbName, DDocId, IndexName) when is_binary(DDocId) -> + {ok, DDoc} = fabric:open_doc(DbName, <<"_design/", DDocId/binary>>, [ejson_body]), + go(DbName, DDoc, IndexName); +go(DbName, DDoc, IndexName) -> + {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName), + Shards = mem3:shards(DbName), + Counters0 = lists:map( + fun(#shard{} = Shard) -> + Ref = rexi:cast( + Shard#shard.node, + {nouveau_rpc, info, [Shard#shard.name, Index]} + ), + Shard#shard{ref = Ref} + end, + Shards + ), + Counters = fabric_dict:init(Counters0, nil), + Workers = fabric_dict:fetch_keys(Counters), + RexiMon = fabric_util:create_monitors(Workers), + + Acc0 = {fabric_dict:init(Workers, nil), #{}}, + try + fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0) + after + rexi_monitor:stop(RexiMon), + fabric_util:cleanup(Workers) + end. + +handle_message({rexi_DOWN, _, {_, NodeRef}, _}, _Worker, {Counters, Acc}) -> + case fabric_util:remove_down_workers(Counters, NodeRef) of + {ok, NewCounters} -> + {ok, {NewCounters, Acc}}; + error -> + {error, {nodedown, <<"progress not possible">>}} + end; +handle_message({rexi_EXIT, Reason}, Worker, {Counters, Acc}) -> + NewCounters = fabric_dict:erase(Worker, Counters), + case fabric_ring:is_progress_possible(NewCounters) of + true -> + {ok, {NewCounters, Acc}}; + false -> + {error, Reason} + end; +handle_message({ok, Info}, Worker, {Counters, Acc0}) -> + case fabric_dict:lookup_element(Worker, Counters) of + undefined -> + % already heard from someone else in this range + {ok, {Counters, Acc0}}; + nil -> + C1 = fabric_dict:store(Worker, ok, Counters), + C2 = fabric_view:remove_overlapping_shards(Worker, C1), + Acc1 = maps:merge_with(fun merge_info/3, Info, Acc0), + case fabric_dict:any(nil, C2) of + true -> + {ok, {C2, Acc1}}; + false -> + {stop, Acc1} + end + end; +handle_message({error, Reason}, Worker, {Counters, Acc}) -> + NewCounters = fabric_dict:erase(Worker, Counters), + case fabric_ring:is_progress_possible(NewCounters) of + true -> + {ok, {NewCounters, Acc}}; + false -> + {error, Reason} + end; +handle_message({'EXIT', _}, Worker, {Counters, Acc}) -> + NewCounters = fabric_dict:erase(Worker, Counters), + case fabric_ring:is_progress_possible(NewCounters) of + true -> + {ok, {NewCounters, Acc}}; + false -> + {error, {nodedown, <<"progress not possible">>}} + end. + +merge_info(_Key, Val1, Val2) -> + Val1 + Val2. diff --git a/src/nouveau/src/nouveau_fabric_search.erl b/src/nouveau/src/nouveau_fabric_search.erl new file mode 100644 index 000000000..4e528cc93 --- /dev/null +++ b/src/nouveau/src/nouveau_fabric_search.erl @@ -0,0 +1,221 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_fabric_search). + +-export([go/4]). + +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-record(state, { + limit, + sort, + counters, + search_results +}). + +go(DbName, GroupId, IndexName, QueryArgs0) when is_binary(GroupId) -> + {ok, DDoc} = fabric:open_doc( + DbName, + <<"_design/", GroupId/binary>>, + [ejson_body] + ), + go(DbName, DDoc, IndexName, QueryArgs0); +go(DbName, #doc{} = DDoc, IndexName, QueryArgs0) -> + {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName), + Shards = mem3:shards(DbName), + {PackedBookmark, #{limit := Limit, sort := Sort} = QueryArgs1} = + maps:take(bookmark, QueryArgs0), + Bookmark = nouveau_bookmark:unpack(DbName, PackedBookmark), + Counters0 = lists:map( + fun(#shard{} = Shard) -> + After = maps:get(Shard#shard.range, Bookmark, null), + Ref = rexi:cast( + Shard#shard.node, + {nouveau_rpc, search, [Shard#shard.name, Index, QueryArgs1#{'after' => After}]} + ), + Shard#shard{ref = Ref} + end, + Shards + ), + Counters = fabric_dict:init(Counters0, nil), + Workers = fabric_dict:fetch_keys(Counters), + RexiMon = fabric_util:create_monitors(Workers), + State = #state{ + limit = Limit, + sort = Sort, + counters = Counters, + search_results = #{} + }, + try + rexi_utils:recv( + Workers, + #shard.ref, + fun handle_message/3, + State, + fabric_util:timeout("nouveau", "infinity"), + fabric_util:timeout("nouveau_permsg", "3600000") + ) + of + {ok, SearchResults} -> + NewBookmark = nouveau_bookmark:update(DbName, Bookmark, SearchResults), + {ok, simplify_hits(SearchResults#{bookmark => NewBookmark})}; + {error, Reason} -> + {error, Reason} + after + rexi_monitor:stop(RexiMon), + fabric_util:cleanup(Workers) + end. + +handle_message({ok, Response}, Shard, State) -> + case fabric_dict:lookup_element(Shard, State#state.counters) of + undefined -> + %% already heard from someone else in this range + {ok, State}; + nil -> + SearchResults = merge_search_results(State#state.search_results, Response, State), + Counters1 = fabric_dict:store(Shard, ok, State#state.counters), + Counters2 = fabric_view:remove_overlapping_shards(Shard, Counters1), + State1 = State#state{counters = Counters2, search_results = SearchResults}, + case fabric_dict:any(nil, Counters2) of + true -> + {ok, State1}; + false -> + {stop, SearchResults} + end + end; +handle_message({rexi_DOWN, _, {_, NodeRef}, _}, _Shard, State) -> + #state{counters = Counters0} = State, + case fabric_util:remove_down_workers(Counters0, NodeRef, []) of + {ok, Counters1} -> + {ok, Counters1}; + error -> + {error, {nodedown, <<"progress not possible">>}} + end; +handle_message({error, Reason}, _Shard, _State) -> + {error, Reason}; +handle_message(Else, _Shard, _State) -> + {error, Else}. + +merge_search_results(A, B, #state{} = State) -> + #{ + <<"total_hits">> => merge_total_hits( + maps:get(<<"total_hits">>, A, 0), maps:get(<<"total_hits">>, B, 0) + ), + <<"total_hits_relation">> => merge_total_hits_relation( + maps:get(<<"total_hits_relation">>, A, null), + maps:get(<<"total_hits_relation">>, B, null) + ), + <<"hits">> => merge_hits( + maps:get(<<"hits">>, A, []), + maps:get(<<"hits">>, B, []), + State#state.sort, + State#state.limit + ), + <<"counts">> => merge_facets( + maps:get(<<"counts">>, A, null), maps:get(<<"counts">>, B, null), State#state.limit + ), + <<"ranges">> => merge_facets( + maps:get(<<"ranges">>, A, null), maps:get(<<"ranges">>, B, null), State#state.limit + ) + }. + +merge_total_hits(TotalHitsA, TotalHitsB) -> + TotalHitsA + TotalHitsB. + +merge_total_hits_relation(A, B) when + A == <<"GREATER_THAN_OR_EQUAL_TO">>; B == <<"GREATER_THAN_OR_EQUAL_TO">> +-> + <<"GREATER_THAN_OR_EQUAL_TO">>; +merge_total_hits_relation(A, B) when A == <<"EQUAL_TO">>; B == <<"EQUAL_TO">> -> + <<"EQUAL_TO">>; +merge_total_hits_relation(null, null) -> + %% not supported in selected Lucene version. + null. + +merge_hits(HitsA, HitsB, Sort, Limit) -> + MergedHits = lists:merge(merge_fun(Sort), HitsA, HitsB), + lists:sublist(MergedHits, Limit). + +simplify_hits(SearchResults) -> + #{<<"hits">> := Hits} = SearchResults, + SearchResults#{<<"hits">> => lists:map(fun simplify_hit/1, Hits)}. + +simplify_hit(#{} = Hit) -> + #{<<"fields">> := Fields} = Hit, + Hit#{<<"fields">> => simplify_fields(Fields)}. + +simplify_fields(Fields) when is_list(Fields) -> + Fun = fun(Field, Acc) -> + {Key, Value} = simplify_field(Field), + Acc#{Key => Value} + end, + lists:foldl(Fun, #{}, Fields). + +simplify_field(#{<<"@type">> := <<"stored">>} = Field) -> + #{<<"name">> := Key, <<"value">> := Value} = Field, + {Key, Value}. + +merge_fun(Sort) -> + fun(HitA, HitB) -> + OrderA = maps:get(<<"order">>, HitA), + OrderB = maps:get(<<"order">>, HitB), + compare_order(Sort, OrderA, OrderB) + end. + +%% no sort order specified +compare_order(null, [A | ARest], [B | BRest]) -> + case couch_ejson_compare:less(convert_item(A), convert_item(B)) of + 0 -> + compare_order(null, ARest, BRest); + Less -> + Less < 1 + end; +%% server-side adds _id on the end of sort order if not present +compare_order([], [A], [B]) -> + couch_ejson_compare:less(convert_item(A), convert_item(B)) < 1; +%% reverse order specified +compare_order([<<"-", _/binary>> | SortRest], [A | ARest], [B | BRest]) -> + case couch_ejson_compare:less(convert_item(B), convert_item(A)) of + 0 -> + compare_order(SortRest, ARest, BRest); + Less -> + Less < 1 + end; +%% forward order specified +compare_order([_ | SortRest], [A | ARest], [B | BRest]) -> + case couch_ejson_compare:less(convert_item(A), convert_item(B)) of + 0 -> + compare_order(SortRest, ARest, BRest); + Less -> + Less < 1 + end. + +convert_item(Item) -> + case maps:get(<<"@type">>, Item) of + <<"bytes">> -> + base64:decode(maps:get(<<"value">>, Item)); + _ -> + maps:get(<<"value">>, Item) + end. + +merge_facets(FacetsA, null, _Limit) -> + FacetsA; +merge_facets(null, FacetsB, _Limit) -> + FacetsB; +merge_facets(FacetsA, FacetsB, _Limit) -> + Combiner = fun(_, V1, V2) -> maps:merge_with(fun(_, V3, V4) -> V3 + V4 end, V1, V2) end, + maps:merge_with(Combiner, FacetsA, FacetsB). diff --git a/src/nouveau/src/nouveau_httpd.erl b/src/nouveau/src/nouveau_httpd.erl new file mode 100644 index 000000000..999acc7ea --- /dev/null +++ b/src/nouveau/src/nouveau_httpd.erl @@ -0,0 +1,276 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_httpd). + +-include_lib("couch/include/couch_db.hrl"). + +-export([ + handle_analyze_req/1, + handle_search_req/3, + handle_info_req/3, + handle_cleanup_req/2 +]). + +-import(chttpd, [ + send_method_not_allowed/2, + send_json/2, send_json/3, + send_error/2 +]). + +-define(RETRY_LIMIT, 20). +-define(RETRY_SLEEP, 500). + +handle_analyze_req(#httpd{method = 'POST'} = Req) -> + check_if_enabled(), + couch_httpd:validate_ctype(Req, "application/json"), + {Fields} = chttpd:json_body_obj(Req), + Analyzer = couch_util:get_value(<<"analyzer">>, Fields), + Text = couch_util:get_value(<<"text">>, Fields), + case nouveau_api:analyze(Text, Analyzer) of + {ok, Tokens} -> + send_json(Req, 200, {[{<<"tokens">>, Tokens}]}); + {error, Reason} -> + send_error(Req, Reason) + end; +handle_analyze_req(Req) -> + send_method_not_allowed(Req, "POST"). + +handle_search_req(Req, Db, DDoc) -> + check_if_enabled(), + couch_stats:increment_counter([nouveau, active_searches]), + T0 = erlang:monotonic_time(), + try + handle_search_req_int(Req, Db, DDoc) + after + T1 = erlang:monotonic_time(), + couch_stats:decrement_counter([nouveau, active_searches]), + RequestTime = erlang:convert_time_unit(T1 - T0, native, millisecond), + couch_stats:update_histogram([nouveau, search_latency], RequestTime) + end. + +handle_search_req_int(#httpd{method = 'GET', path_parts = [_, _, _, _, IndexName]} = Req, Db, DDoc) -> + DbName = couch_db:name(Db), + QueryArgs = validate_query_args(#{ + query => chttpd:qs_value(Req, "q"), + limit => chttpd:qs_value(Req, "limit"), + sort => chttpd:qs_value(Req, "sort"), + ranges => chttpd:qs_value(Req, "ranges"), + counts => chttpd:qs_value(Req, "counts"), + update => chttpd:qs_value(Req, "update"), + bookmark => chttpd:qs_value(Req, "bookmark"), + include_docs => chttpd:qs_value(Req, "include_docs") + }), + handle_search_req(Req, DbName, DDoc, IndexName, QueryArgs, ?RETRY_LIMIT); +handle_search_req_int( + #httpd{method = 'POST', path_parts = [_, _, _, _, IndexName]} = Req, Db, DDoc +) -> + couch_httpd:validate_ctype(Req, "application/json"), + DbName = couch_db:name(Db), + ReqBody = chttpd:json_body(Req, [return_maps]), + QueryArgs = validate_query_args(#{ + query => maps:get(<<"q">>, ReqBody, undefined), + limit => maps:get(<<"limit">>, ReqBody, undefined), + sort => json_or_undefined(<<"sort">>, ReqBody), + ranges => json_or_undefined(<<"ranges">>, ReqBody), + counts => json_or_undefined(<<"counts">>, ReqBody), + update => maps:get(<<"update">>, ReqBody, undefined), + bookmark => maps:get(<<"bookmark">>, ReqBody, undefined), + include_docs => maps:get(<<"include_docs">>, ReqBody, undefined) + }), + handle_search_req(Req, DbName, DDoc, IndexName, QueryArgs, ?RETRY_LIMIT); +handle_search_req_int(Req, _Db, _DDoc) -> + send_method_not_allowed(Req, "GET, POST"). + +handle_search_req(#httpd{} = Req, DbName, DDoc, IndexName, QueryArgs, Retry) -> + IncludeDocs = maps:get(include_docs, QueryArgs, false), + case nouveau_fabric_search:go(DbName, DDoc, IndexName, QueryArgs) of + {ok, SearchResults} -> + RespBody = #{ + <<"bookmark">> => nouveau_bookmark:pack(maps:get(bookmark, SearchResults)), + <<"total_hits">> => maps:get(<<"total_hits">>, SearchResults), + <<"total_hits_relation">> => maps:get(<<"total_hits_relation">>, SearchResults), + <<"hits">> => include_docs( + DbName, maps:get(<<"hits">>, SearchResults), IncludeDocs + ), + <<"counts">> => maps:get(<<"counts">>, SearchResults, null), + <<"ranges">> => maps:get(<<"ranges">>, SearchResults, null) + }, + HitCount = length(maps:get(<<"hits">>, RespBody)), + incr_stats(HitCount, IncludeDocs), + send_json(Req, 200, RespBody); + {error, {service_unavailable, _}} when Retry > 1 -> + couch_log:warning("search unavailable, retrying (~p of ~p)", [ + ?RETRY_LIMIT - Retry + 1, ?RETRY_LIMIT + ]), + timer:sleep(?RETRY_SLEEP), + handle_search_req(Req, DbName, DDoc, IndexName, QueryArgs, Retry - 1); + {error, Reason} -> + send_error(Req, Reason) + end. + +handle_info_req( + #httpd{method = 'GET', path_parts = [_, _, _, _, IndexName]} = Req, + Db, + #doc{id = Id} = DDoc +) -> + check_if_enabled(), + DbName = couch_db:name(Db), + case nouveau_fabric_info:go(DbName, DDoc, IndexName) of + {ok, IndexInfo} -> + send_json( + Req, + 200, + {[ + {name, <>}, + {search_index, IndexInfo} + ]} + ); + {error, Reason} -> + send_error(Req, Reason) + end; +handle_info_req(#httpd{path_parts = [_, _, _, _, _]} = Req, _Db, _DDoc) -> + check_if_enabled(), + send_method_not_allowed(Req, "GET"); +handle_info_req(Req, _Db, _DDoc) -> + check_if_enabled(), + send_error(Req, {bad_request, "path not recognized"}). + +handle_cleanup_req(#httpd{method = 'POST'} = Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + ok = nouveau_fabric_cleanup:go(couch_db:name(Db)), + send_json(Req, 202, {[{ok, true}]}); +handle_cleanup_req(Req, _Db) -> + send_method_not_allowed(Req, "POST"). + +include_docs(_DbName, Hits, false) -> + Hits; +include_docs(DbName, Hits, true) -> + Ids = [maps:get(<<"id">>, Hit) || Hit <- Hits], + {ok, Docs} = nouveau_fabric:get_json_docs(DbName, Ids), + lists:zipwith(fun(Hit, Doc) -> Hit#{<<"doc">> => Doc} end, Hits, Docs). + +incr_stats(HitCount, false) -> + chttpd_stats:incr_rows(HitCount); +incr_stats(HitCount, true) -> + chttpd_stats:incr_reads(HitCount), + incr_stats(HitCount, false). + +validate_query_args(#{} = QueryArgs) -> + maps:map(fun validate_query_arg/2, QueryArgs). + +validate_query_arg(query, undefined) -> + throw({query_parse_error, <<"q parameter is mandatory">>}); +validate_query_arg(query, Val) when is_list(Val); is_binary(Val) -> + couch_util:to_binary(Val); +validate_query_arg(limit, undefined) -> + 25; +validate_query_arg(limit, Limit) when is_integer(Limit), Limit > 0 -> + Limit; +validate_query_arg(limit, Limit) when is_integer(Limit) -> + throw({query_parse_error, <<"limit parameter must be greater than zero">>}); +validate_query_arg(limit, List) when is_list(List) -> + try + list_to_integer(List) + catch + error:badarg -> + throw({query_parse_error, <<"limit parameter must be an integer">>}) + end; +validate_query_arg(sort, undefined) -> + null; +validate_query_arg(sort, {json, Sort}) when is_binary(Sort) -> + [Sort]; +validate_query_arg(sort, {json, Sort}) -> + ok = is_list_of_strings(<<"counts">>, Sort), + Sort; +validate_query_arg(sort, Sort) -> + validate_query_arg(sort, {json, ?JSON_DECODE(Sort, [return_maps])}); +validate_query_arg(ranges, undefined) -> + null; +validate_query_arg(ranges, {json, Ranges}) when is_map(Ranges) -> + maps:foreach(fun is_valid_range/2, Ranges), + Ranges; +validate_query_arg(ranges, Ranges) -> + validate_query_arg(ranges, {json, ?JSON_DECODE(Ranges, [return_maps])}); +validate_query_arg(counts, undefined) -> + null; +validate_query_arg(counts, {json, Counts}) when is_list(Counts) -> + ok = is_list_of_strings(<<"counts">>, Counts), + Counts; +validate_query_arg(counts, Counts) -> + validate_query_arg(counts, {json, ?JSON_DECODE(Counts, [return_maps])}); +validate_query_arg(update, undefined) -> + true; +validate_query_arg(update, Bool) when is_boolean(Bool) -> + Bool; +validate_query_arg(update, "false") -> + false; +validate_query_arg(update, "true") -> + true; +validate_query_arg(bookmark, undefined) -> + null; +validate_query_arg(bookmark, Bookmark) -> + Bookmark; +validate_query_arg(include_docs, Bool) when is_boolean(Bool) -> + Bool; +validate_query_arg(include_docs, undefined) -> + false; +validate_query_arg(include_docs, "false") -> + false; +validate_query_arg(include_docs, "true") -> + true; +validate_query_arg(Key, Val) -> + Msg = io_lib:format("Invalid value for ~p: ~p", [Key, Val]), + throw({query_parse_error, ?l2b(Msg)}). + +json_or_undefined(Key, Map) when is_binary(Key), is_map(Map) -> + case maps:get(Key, Map, undefined) of + undefined -> + undefined; + Val -> + {json, Val} + end. + +is_list_of_strings(Name, Val) when is_list(Val) -> + AllBinaries = lists:all(fun is_binary/1, Val), + if + AllBinaries -> + ok; + true -> + throw( + {query_parser_error, <<"all items in ", Name/binary, " parameter must be strings">>} + ) + end; +is_list_of_strings(Name, _Val) -> + throw({query_parser_error, <>}). + +is_valid_range(FieldName, _Ranges) when not is_binary(FieldName) -> + throw({query_parse_error, <<"range keys must be strings">>}); +is_valid_range(_FieldName, Ranges) when not is_list(Ranges) -> + throw({query_parse_error, <<"range values must be lists of objects">>}); +is_valid_range(FieldName, Ranges) when is_binary(FieldName), is_list(Ranges) -> + AllMaps = lists:all(fun is_map/1, Ranges), + if + AllMaps -> ok; + true -> throw({query_parser_error, <<"all values in ranges parameter must be objects">>}) + end. + +check_if_enabled() -> + case nouveau:enabled() of + true -> + ok; + false -> + throw(not_found) + end. diff --git a/src/nouveau/src/nouveau_httpd_handlers.erl b/src/nouveau/src/nouveau_httpd_handlers.erl new file mode 100644 index 000000000..971833d8c --- /dev/null +++ b/src/nouveau/src/nouveau_httpd_handlers.erl @@ -0,0 +1,35 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_httpd_handlers). + +-export([url_handler/1, db_handler/1, design_handler/1]). + +url_handler(<<"_nouveau_analyze">>) -> + fun nouveau_httpd:handle_analyze_req/1; +url_handler(_) -> + no_match. + +db_handler(<<"_nouveau_cleanup">>) -> + fun nouveau_httpd:handle_cleanup_req/2; +db_handler(_) -> + no_match. + +design_handler(<<"_nouveau">>) -> + fun nouveau_httpd:handle_search_req/3; +design_handler(<<"_nouveau_info">>) -> + fun nouveau_httpd:handle_info_req/3; +design_handler(_) -> + no_match. diff --git a/src/nouveau/src/nouveau_index_manager.erl b/src/nouveau/src/nouveau_index_manager.erl new file mode 100644 index 000000000..bfbd74990 --- /dev/null +++ b/src/nouveau/src/nouveau_index_manager.erl @@ -0,0 +1,161 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +%% index manager ensures only one process is updating a nouveau index at a time. +%% calling update_index will block until at least one attempt has been made to +%% make the index as current as the database at the time update_index was called. + +-module(nouveau_index_manager). +-behaviour(gen_server). +-behaviour(config_listener). +-include("nouveau.hrl"). + +%% public api +-export([ + update_index/1 +]). + +%% gen_server bits +-export([ + start_link/0, + init/1, + handle_call/3, + handle_cast/2, + handle_info/2 +]). + +% config_listener api +-export([handle_config_change/5, handle_config_terminate/3]). + +-export([handle_db_event/3]). + +-define(BY_DBSIG, nouveau_by_dbsig). +-define(BY_REF, nouveau_by_ref). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +update_index(#index{} = Index) -> + case nouveau:enabled() of + true -> + gen_server:call(?MODULE, {update, Index}, infinity); + false -> + {error, nouveau_not_enabled} + end. + +init(_) -> + couch_util:set_mqd_off_heap(?MODULE), + ets:new(?BY_DBSIG, [set, named_table]), + ets:new(?BY_REF, [set, named_table]), + couch_event:link_listener(?MODULE, handle_db_event, nil, [all_dbs]), + configure_ibrowse(nouveau_util:nouveau_url()), + ok = config:listen_for_changes(?MODULE, nil), + {ok, nil}. + +handle_call({update, #index{} = Index0}, From, State) -> + DbSig = {Index0#index.dbname, Index0#index.sig}, + case ets:lookup(?BY_DBSIG, DbSig) of + [] -> + {_IndexerPid, IndexerRef} = spawn_monitor(nouveau_index_updater, update, [Index0]), + Queue = queue:in(From, queue:new()), + true = ets:insert(?BY_DBSIG, {DbSig, Index0, Queue}), + true = ets:insert(?BY_REF, {IndexerRef, DbSig}); + [{_DbSig, Index1, Queue}] -> + ets:insert(?BY_DBSIG, {DbSig, Index1, queue:in(From, Queue)}) + end, + {noreply, State}; +handle_call(_Msg, _From, State) -> + {reply, unexpected_msg, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'DOWN', IndexerRef, process, _Pid, Reason}, State) -> + case ets:lookup(?BY_REF, IndexerRef) of + [] -> + % not one of ours, somehow... + {noreply, State}; + [{_, DbSig}] -> + true = ets:delete(?BY_REF, IndexerRef), + [{_, Index, Queue0}] = ets:lookup(?BY_DBSIG, DbSig), + {{value, From}, Queue1} = queue:out(Queue0), + case Reason of + normal -> + gen_server:reply(From, ok); + {error, Msg} -> + couch_log:error( + "~p: db:~s ddoc:~s index:~s failed with: ~p", + [ + ?MODULE, + mem3:dbname(Index#index.dbname), + Index#index.ddoc_id, + Index#index.name, + Msg + ] + ), + gen_server:reply(From, {error, Msg}) + end, + case queue:is_empty(Queue1) of + true -> + true = ets:delete(?BY_DBSIG, DbSig); + false -> + {_IndexerPid, NewIndexerRef} = spawn_monitor(nouveau_index_updater, update, [ + Index + ]), + true = ets:insert(?BY_DBSIG, {DbSig, Index, Queue1}), + true = ets:insert(?BY_REF, {NewIndexerRef, DbSig}) + end, + {noreply, State} + end; +handle_info(restart_config_listener, State) -> + ok = config:listen_for_changes(?MODULE, nil), + {noreply, State}; +handle_info(_Msg, State) -> + {noreply, State}. + +handle_db_event(DbName, deleted, State) -> + couch_log:notice("Deleting indexes for ~s as database was deleted", [DbName]), + nouveau_api:delete_path(nouveau_util:index_name(DbName)), + {ok, State}; +handle_db_event(_DbName, _Event, State) -> + {ok, State}. + +handle_config_change("nouveau", "url", URL, _Persist, State) -> + configure_ibrowse(URL), + {ok, State}; +handle_config_change(_Section, _Key, _Value, _Persist, State) -> + {ok, State}. + +handle_config_terminate(_Server, stop, _State) -> + ok; +handle_config_terminate(_Server, _Reason, _State) -> + erlang:send_after( + 5000, + whereis(?MODULE), + restart_config_listener + ). + +configure_ibrowse(URL) -> + #{host := Host, port := Port} = uri_string:parse(URL), + ibrowse:set_max_sessions( + Host, + Port, + config:get_integer("nouveau", "max_sessions", 100) + ), + ibrowse:set_max_pipeline_size( + Host, + Port, + config:get_integer("nouveau", "max_pipeline_size", 1000) + ). diff --git a/src/nouveau/src/nouveau_index_updater.erl b/src/nouveau/src/nouveau_index_updater.erl new file mode 100644 index 000000000..af39faecf --- /dev/null +++ b/src/nouveau/src/nouveau_index_updater.erl @@ -0,0 +1,138 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_index_updater). +-include_lib("couch/include/couch_db.hrl"). +-include("nouveau.hrl"). + +%% public api +-export([outdated/1]). + +%% callbacks +-export([update/1]). + +-import(couch_query_servers, [get_os_process/1, ret_os_process/1, proc_prompt/2]). +-import(nouveau_util, [index_path/1]). + +outdated(#index{} = Index) -> + case open_or_create_index(Index) of + {ok, IndexSeq} -> + DbSeq = get_db_seq(Index), + DbSeq > IndexSeq; + {error, Reason} -> + {error, Reason} + end. + +update(#index{} = Index) -> + {ok, Db} = couch_db:open_int(Index#index.dbname, []), + try + case open_or_create_index(Index) of + {error, Reason} -> + exit({error, Reason}); + {ok, CurSeq} -> + TotalChanges = couch_db:count_changes_since(Db, CurSeq), + couch_task_status:add_task([ + {type, search_indexer}, + {database, Index#index.dbname}, + {design_document, Index#index.ddoc_id}, + {index, Index#index.name}, + {progress, 0}, + {changes_done, 0}, + {total_changes, TotalChanges} + ]), + + %% update status every half second + couch_task_status:set_update_frequency(500), + + Proc = get_os_process(Index#index.def_lang), + try + true = proc_prompt(Proc, [<<"add_fun">>, Index#index.def, <<"nouveau">>]), + Acc0 = {Db, Index, Proc, 0, TotalChanges}, + {ok, _} = couch_db:fold_changes(Db, CurSeq, fun load_docs/2, Acc0, []) + after + ret_os_process(Proc) + end + end + after + couch_db:close(Db) + end. + +load_docs(#full_doc_info{id = <<"_design/", _/binary>>}, Acc) -> + {ok, Acc}; +load_docs(FDI, {Db, Index, Proc, ChangesDone, TotalChanges}) -> + couch_task_status:update([ + {changes_done, ChangesDone}, {progress, (ChangesDone * 100) div TotalChanges} + ]), + + DI = couch_doc:to_doc_info(FDI), + #doc_info{id = Id, high_seq = Seq, revs = [#rev_info{deleted = Del} | _]} = DI, + + case Del of + true -> + ok = nouveau_api:delete_doc(Index, Id, Seq); + false -> + {ok, Doc} = couch_db:open_doc(Db, DI, []), + Json = couch_doc:to_json_obj(Doc, []), + [Fields | _] = proc_prompt(Proc, [<<"nouveau_index_doc">>, Json]), + case Fields of + [] -> + ok = nouveau_api:delete_doc(Index, Id, Seq); + _ -> + case nouveau_api:update_doc(Index, Id, Seq, Fields) of + ok -> + ok; + {error, Reason} -> + exit({error, Reason}) + end + end + end, + {ok, {Db, Index, Proc, ChangesDone + 1, TotalChanges}}. + +open_or_create_index(#index{} = Index) -> + case get_index_seq(Index) of + {ok, UpdateSeq} -> + {ok, UpdateSeq}; + {error, {not_found, _}} -> + case nouveau_api:create_index(Index, index_definition(Index)) of + ok -> + {ok, 0}; + {error, Reason} -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end. + +get_db_seq(#index{} = Index) -> + {ok, Db} = couch_db:open_int(Index#index.dbname, []), + try + couch_db:get_update_seq(Db) + after + couch_db:close(Db) + end. + +get_index_seq(#index{} = Index) -> + case nouveau_api:index_info(Index) of + {ok, #{<<"update_seq">> := Seq}} -> + {ok, Seq}; + {error, Reason} -> + {error, Reason} + end. + +index_definition(#index{} = Index) -> + #{ + <<"default_analyzer">> => Index#index.default_analyzer, + <<"field_analyzers">> => Index#index.field_analyzers + }. diff --git a/src/nouveau/src/nouveau_rpc.erl b/src/nouveau/src/nouveau_rpc.erl new file mode 100644 index 000000000..02f9ce90b --- /dev/null +++ b/src/nouveau/src/nouveau_rpc.erl @@ -0,0 +1,57 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_rpc). + +-export([ + search/3, + info/2, + cleanup/2 +]). + +-include("nouveau.hrl"). +-import(nouveau_util, [index_path/1]). + +search(DbName, #index{} = Index0, QueryArgs) -> + %% Incorporate the shard name into the record. + Index1 = Index0#index{dbname = DbName}, + Update = maps:get(update, QueryArgs, true), + + %% check if index is up to date + case Update andalso nouveau_index_updater:outdated(Index1) of + true -> + case nouveau_index_manager:update_index(Index1) of + ok -> + ok; + {error, Reason} -> + rexi:reply({error, Reason}) + end; + false -> + ok; + {error, Reason} -> + rexi:reply({error, Reason}) + end, + + %% Run the search + rexi:reply(nouveau_api:search(Index1, QueryArgs)). + +info(DbName, #index{} = Index0) -> + %% Incorporate the shard name into the record. + Index1 = Index0#index{dbname = DbName}, + rexi:reply(nouveau_api:index_info(Index1)). + +cleanup(DbName, Exclusions) -> + nouveau_api:delete_path(nouveau_util:index_name(DbName), Exclusions), + rexi:reply(ok). diff --git a/src/nouveau/src/nouveau_sup.erl b/src/nouveau/src/nouveau_sup.erl new file mode 100644 index 000000000..3547b43fa --- /dev/null +++ b/src/nouveau/src/nouveau_sup.erl @@ -0,0 +1,31 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_sup). +-behaviour(supervisor). + +-export([start_link/0, init/1]). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init(_Args) -> + Children = [ + child(nouveau_index_manager) + ], + {ok, {{one_for_one, 10, 1}, couch_epi:register_service(nouveau_epi, Children)}}. + +child(Child) -> + {Child, {Child, start_link, []}, permanent, 1000, worker, [Child]}. diff --git a/src/nouveau/src/nouveau_util.erl b/src/nouveau/src/nouveau_util.erl new file mode 100644 index 000000000..5015b8f14 --- /dev/null +++ b/src/nouveau/src/nouveau_util.erl @@ -0,0 +1,97 @@ +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- + +-module(nouveau_util). + +-include("nouveau.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-export([ + index_name/1, + design_doc_to_indexes/2, + design_doc_to_index/3, + nouveau_url/0 +]). + +index_name(Path) when is_binary(Path) -> + <<(node_prefix())/binary, "/", Path/binary>>; +index_name(#index{} = Index) -> + <<(node_prefix())/binary, "/", (Index#index.dbname)/binary, "/", (Index#index.sig)/binary>>. + +node_prefix() -> + atom_to_binary(node(), utf8). + +%% copied from dreyfus_index.erl +design_doc_to_indexes(DbName, #doc{body = {Fields}} = Doc) -> + RawIndexes = couch_util:get_value(<<"nouveau">>, Fields, {[]}), + case RawIndexes of + {IndexList} when is_list(IndexList) -> + {IndexNames, _} = lists:unzip(IndexList), + lists:flatmap( + fun(IndexName) -> + case (catch design_doc_to_index(DbName, Doc, IndexName)) of + {ok, #index{} = Index} -> [Index]; + _ -> [] + end + end, + IndexNames + ); + _ -> + [] + end. + +%% copied from dreyfus_index.erl +design_doc_to_index(DbName, #doc{id = Id, body = {Fields}}, IndexName) -> + Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>), + {RawIndexes} = couch_util:get_value(<<"nouveau">>, Fields, {[]}), + InvalidDDocError = + {invalid_design_doc, <<"index `", IndexName/binary, "` must have parameter `index`">>}, + case lists:keyfind(IndexName, 1, RawIndexes) of + false -> + {error, {not_found, <>}}; + {IndexName, {Index}} -> + DefaultAnalyzer = couch_util:get_value(<<"default_analyzer">>, Index, <<"standard">>), + FieldAnalyzers = couch_util:get_value(<<"field_analyzers">>, Index, #{}), + case couch_util:get_value(<<"index">>, Index) of + undefined -> + {error, InvalidDDocError}; + Def -> + Sig = ?l2b( + couch_util:to_hex( + crypto:hash( + sha256, + term_to_binary( + {DefaultAnalyzer, FieldAnalyzers, Def} + ) + ) + ) + ), + {ok, #index{ + dbname = DbName, + default_analyzer = DefaultAnalyzer, + field_analyzers = FieldAnalyzers, + ddoc_id = Id, + def = Def, + def_lang = Language, + name = IndexName, + sig = Sig + }} + end; + _ -> + {error, InvalidDDocError} + end. + +nouveau_url() -> + config:get("nouveau", "url", "http://127.0.0.1:8080"). diff --git a/support/build_js.escript b/support/build_js.escript index 194201a1e..b7e78bf78 100644 --- a/support/build_js.escript +++ b/support/build_js.escript @@ -35,6 +35,7 @@ main([]) -> JsFiles = [ "share/server/dreyfus.js", + "share/server/nouveau.js", "share/server/filter.js", "share/server/mimeparse.js", "share/server/render.js", @@ -47,6 +48,7 @@ main([]) -> CoffeeFiles = [ "share/server/dreyfus.js", + "share/server/nouveau.js", "share/server/filter.js", "share/server/mimeparse.js", "share/server/render.js", diff --git a/test/elixir/test/config/nouveau.elixir b/test/elixir/test/config/nouveau.elixir new file mode 100644 index 000000000..90390a9d6 --- /dev/null +++ b/test/elixir/test/config/nouveau.elixir @@ -0,0 +1,17 @@ +%{ + "NouveauTest": [ + "search analyze", + "search info", + "search returns all items for GET", + "search returns all items for POST", + "search returns all items (paginated)", + "search for foo:bar", + "sort by string field (asc)", + "sort by string field (desc)", + "sort by numeric field (asc)", + "sort by numeric field (desc)", + "counts", + "ranges", + "ranges (open)" + ] +} diff --git a/test/elixir/test/config/test-config.ini b/test/elixir/test/config/test-config.ini index 190067643..fb47c5a4c 100644 --- a/test/elixir/test/config/test-config.ini +++ b/test/elixir/test/config/test-config.ini @@ -3,3 +3,6 @@ authentication_handlers = {chttpd_auth, jwt_authentication_handler}, {chttpd_aut [dreyfus] name = clouseau@127.0.0.1 + +[nouveau] +enable = true diff --git a/test/elixir/test/nouveau_test.exs b/test/elixir/test/nouveau_test.exs new file mode 100644 index 000000000..ee5d20542 --- /dev/null +++ b/test/elixir/test/nouveau_test.exs @@ -0,0 +1,242 @@ +defmodule NouveauTest do + use CouchTestCase + + @moduletag :search + + @moduledoc """ + Test search + """ + + def create_search_docs(db_name) do + resp = Couch.post("/#{db_name}/_bulk_docs", + headers: ["Content-Type": "application/json"], + body: %{:docs => [ + %{"_id" => "doc4", "foo" => "foo", "bar" => 42}, + %{"_id" => "doc3", "foo" => "bar", "bar" => 12.0}, + %{"_id" => "doc1", "foo" => "baz", "bar" => 0}, + %{"_id" => "doc2", "foo" => "foobar", "bar" => 100}, + ]} + ) + assert resp.status_code in [201] + end + + def create_ddoc(db_name, opts \\ %{}) do + default_ddoc = %{ + nouveau: %{ + bar: %{ + default_analyzer: "standard", + index: """ + function (doc) { + index("string", "foo", doc.foo, {store: true}); + index("double", "bar", doc.bar, {store: true}); + } + """ + } + } + } + + ddoc = Enum.into(opts, default_ddoc) + + resp = Couch.put("/#{db_name}/_design/foo", body: ddoc) + assert resp.status_code in [201] + assert Map.has_key?(resp.body, "ok") == true + end + + def get_ids(resp) do + %{:body => %{"hits" => hits}} = resp + Enum.map(hits, fn hit -> hit["doc"]["_id"] end) + end + + def get_bookmark(resp) do + %{:body => %{"bookmark" => bookmark}} = resp + bookmark + end + + test "search analyze", context do + url = "/_nouveau_analyze" + resp = Couch.post(url, + headers: ["Content-Type": "application/json"], + body: %{analyzer: "standard", text: "hello there"}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + assert resp.body == %{"tokens" => ["hello", "there"]} + end + + @tag :with_db + test "search info", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + # query it so it builds + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.get(url, query: %{q: "*:*", include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + + url = "/#{db_name}/_design/foo/_nouveau_info/bar" + resp = Couch.get(url) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + info = Map.get(resp.body, "search_index") + assert Map.get(info, "disk_size") > 0 + assert Map.get(info, "num_docs") > 0 + assert Map.get(info, "update_seq") > 0 + end + + @tag :with_db + test "search returns all items for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.get(url, query: %{q: "*:*", include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + ids = get_ids(resp) + # nouveau sorts by _id as tie-breaker + assert ids == ["doc1", "doc2", "doc3", "doc4"] + end + + @tag :with_db + test "search returns all items for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.post(url, body: %{q: "*:*", include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + ids = get_ids(resp) + assert ids == ["doc1", "doc2", "doc3", "doc4"] + end + + @tag :with_db + test "search returns all items (paginated)", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + + # page 1 + resp = Couch.post(url, body: %{q: "*:*", limit: 2, include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + ids = get_ids(resp) + assert ids == ["doc1", "doc2"] + + # page 2 + resp = Couch.post(url, body: %{q: "*:*", limit: 2, bookmark: get_bookmark(resp), include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + ids = get_ids(resp) + assert ids == ["doc3", "doc4"] + end + + @tag :with_db + test "search for foo:bar", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.post(url, body: %{q: "foo:bar", include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + ids = get_ids(resp) + assert ids == ["doc3"] + end + + @tag :with_db + test "sort by string field (asc)", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.post(url, body: %{q: "*:*", sort: "foo", include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + ids = get_ids(resp) + assert ids == ["doc3", "doc1", "doc4", "doc2"] + end + + @tag :with_db + test "sort by string field (desc)", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.post(url, body: %{q: "*:*", sort: "-foo", include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + ids = get_ids(resp) + assert ids == ["doc2", "doc4", "doc1", "doc3"] + end + + @tag :with_db + test "sort by numeric field (asc)", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.post(url, body: %{q: "*:*", sort: "bar", include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + ids = get_ids(resp) + assert ids == ["doc1", "doc3", "doc4", "doc2"] + end + + @tag :with_db + test "sort by numeric field (desc)", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.post(url, body: %{q: "*:*", sort: "-bar", include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + ids = get_ids(resp) + assert ids == ["doc2", "doc4", "doc3", "doc1"] + end + + @tag :with_db + test "counts", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.post(url, body: %{q: "*:*", counts: ["foo"], include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + %{:body => %{"counts" => counts}} = resp + assert counts == %{"foo" => %{"bar" => 1, "baz" => 1, "foo" => 1, "foobar" => 1}} + end + + @tag :with_db + test "ranges", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.post(url, body: %{q: "*:*", ranges: %{bar: [ + %{label: "cheap", min: 0, max: 42}, + %{label: "expensive", min: 42, min_inclusive: false, max: 1000}]}, + include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + %{:body => %{"ranges" => ranges}} = resp + assert ranges == %{"bar" => %{"cheap" => 3, "expensive" => 1}} + end + + @tag :with_db + test "ranges (open)", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/foo/_nouveau/bar" + resp = Couch.post(url, body: %{q: "*:*", ranges: %{bar: [ + %{label: "cheap", max: 42}, + %{label: "expensive", min: 42, min_inclusive: false}]}, + include_docs: true}) + assert resp.status_code == 200, "error #{resp.status_code} #{:jiffy.encode(resp.body)}" + %{:body => %{"ranges" => ranges}} = resp + assert ranges == %{"bar" => %{"cheap" => 3, "expensive" => 1}} + end + +end -- cgit v1.2.1