summaryrefslogtreecommitdiff
path: root/tests/test_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_search.py')
-rw-r--r--tests/test_search.py1219
1 files changed, 1219 insertions, 0 deletions
diff --git a/tests/test_search.py b/tests/test_search.py
new file mode 100644
index 0000000..926b5ff
--- /dev/null
+++ b/tests/test_search.py
@@ -0,0 +1,1219 @@
+import pytest
+import redis
+import bz2
+import csv
+import time
+import os
+
+from io import TextIOWrapper
+from .conftest import skip_ifmodversion_lt, default_redismod_url
+from redis import Redis
+
+import redis.commands.search
+from redis.commands.json.path import Path
+from redis.commands.search import Search
+from redis.commands.search.field import (
+ GeoField,
+ NumericField,
+ TagField,
+ TextField
+)
+from redis.commands.search.query import (
+ GeoFilter,
+ NumericFilter,
+ Query
+)
+from redis.commands.search.result import Result
+from redis.commands.search.indexDefinition import IndexDefinition, IndexType
+from redis.commands.search.suggestion import Suggestion
+import redis.commands.search.aggregation as aggregations
+import redis.commands.search.reducers as reducers
+
+WILL_PLAY_TEXT = (
+ os.path.abspath(
+ os.path.join(
+ os.path.dirname(__file__),
+ "testdata",
+ "will_play_text.csv.bz2"
+ )
+ )
+)
+
+TITLES_CSV = (
+ os.path.abspath(
+ os.path.join(
+ os.path.dirname(__file__),
+ "testdata",
+ "titles.csv"
+ )
+ )
+)
+
+
+def waitForIndex(env, idx, timeout=None):
+ delay = 0.1
+ while True:
+ res = env.execute_command("ft.info", idx)
+ try:
+ res.index("indexing")
+ except ValueError:
+ break
+
+ if int(res[res.index("indexing") + 1]) == 0:
+ break
+
+ time.sleep(delay)
+ if timeout is not None:
+ timeout -= delay
+ if timeout <= 0:
+ break
+
+
+def getClient():
+ """
+ Gets a client client attached to an index name which is ready to be
+ created
+ """
+ rc = Redis.from_url(default_redismod_url, decode_responses=True)
+ return rc
+
+
+def createIndex(client, num_docs=100, definition=None):
+ try:
+ client.create_index(
+ (TextField("play", weight=5.0),
+ TextField("txt"),
+ NumericField("chapter")),
+ definition=definition,
+ )
+ except redis.ResponseError:
+ client.dropindex(delete_documents=True)
+ return createIndex(client, num_docs=num_docs, definition=definition)
+
+ chapters = {}
+ bzfp = TextIOWrapper(bz2.BZ2File(WILL_PLAY_TEXT), encoding="utf8")
+
+ r = csv.reader(bzfp, delimiter=";")
+ for n, line in enumerate(r):
+
+ play, chapter, _, text = \
+ line[1], line[2], line[4], line[5]
+
+ key = "{}:{}".format(play, chapter).lower()
+ d = chapters.setdefault(key, {})
+ d["play"] = play
+ d["txt"] = d.get("txt", "") + " " + text
+ d["chapter"] = int(chapter or 0)
+ if len(chapters) == num_docs:
+ break
+
+ indexer = client.batch_indexer(chunk_size=50)
+ assert isinstance(indexer, Search.BatchIndexer)
+ assert 50 == indexer.chunk_size
+
+ for key, doc in chapters.items():
+ indexer.add_document(key, **doc)
+ indexer.commit()
+
+
+# override the default module client, search requires both db=0, and text
+@pytest.fixture
+def modclient():
+ return Redis.from_url(default_redismod_url, db=0, decode_responses=True)
+
+
+@pytest.fixture
+def client(modclient):
+ modclient.flushdb()
+ return modclient
+
+
+@pytest.mark.redismod
+def test_client(client):
+ num_docs = 500
+ createIndex(client.ft(), num_docs=num_docs)
+ waitForIndex(client, "idx")
+ # verify info
+ info = client.ft().info()
+ for k in [
+ "index_name",
+ "index_options",
+ "attributes",
+ "num_docs",
+ "max_doc_id",
+ "num_terms",
+ "num_records",
+ "inverted_sz_mb",
+ "offset_vectors_sz_mb",
+ "doc_table_size_mb",
+ "key_table_size_mb",
+ "records_per_doc_avg",
+ "bytes_per_record_avg",
+ "offsets_per_term_avg",
+ "offset_bits_per_record_avg",
+ ]:
+ assert k in info
+
+ assert client.ft().index_name == info["index_name"]
+ assert num_docs == int(info["num_docs"])
+
+ res = client.ft().search("henry iv")
+ assert isinstance(res, Result)
+ assert 225 == res.total
+ assert 10 == len(res.docs)
+ assert res.duration > 0
+
+ for doc in res.docs:
+ assert doc.id
+ assert doc.play == "Henry IV"
+ assert len(doc.txt) > 0
+
+ # test no content
+ res = client.ft().search(Query("king").no_content())
+ assert 194 == res.total
+ assert 10 == len(res.docs)
+ for doc in res.docs:
+ assert "txt" not in doc.__dict__
+ assert "play" not in doc.__dict__
+
+ # test verbatim vs no verbatim
+ total = client.ft().search(Query("kings").no_content()).total
+ vtotal = client.ft().search(Query("kings").no_content().verbatim()).total
+ assert total > vtotal
+
+ # test in fields
+ txt_total = (
+ client.ft().search(
+ Query("henry").no_content().limit_fields("txt")).total
+ )
+ play_total = (
+ client.ft().search(
+ Query("henry").no_content().limit_fields("play")).total
+ )
+ both_total = (
+ client.ft()
+ .search(Query("henry").no_content().limit_fields("play", "txt"))
+ .total
+ )
+ assert 129 == txt_total
+ assert 494 == play_total
+ assert 494 == both_total
+
+ # test load_document
+ doc = client.ft().load_document("henry vi part 3:62")
+ assert doc is not None
+ assert "henry vi part 3:62" == doc.id
+ assert doc.play == "Henry VI Part 3"
+ assert len(doc.txt) > 0
+
+ # test in-keys
+ ids = [x.id for x in client.ft().search(Query("henry")).docs]
+ assert 10 == len(ids)
+ subset = ids[:5]
+ docs = client.ft().search(Query("henry").limit_ids(*subset))
+ assert len(subset) == docs.total
+ ids = [x.id for x in docs.docs]
+ assert set(ids) == set(subset)
+
+ # test slop and in order
+ assert 193 == client.ft().search(Query("henry king")).total
+ assert 3 == client.ft().search(
+ Query("henry king").slop(0).in_order()).total
+ assert 52 == client.ft().search(
+ Query("king henry").slop(0).in_order()).total
+ assert 53 == client.ft().search(Query("henry king").slop(0)).total
+ assert 167 == client.ft().search(Query("henry king").slop(100)).total
+
+ # test delete document
+ client.ft().add_document("doc-5ghs2", play="Death of a Salesman")
+ res = client.ft().search(Query("death of a salesman"))
+ assert 1 == res.total
+
+ assert 1 == client.ft().delete_document("doc-5ghs2")
+ res = client.ft().search(Query("death of a salesman"))
+ assert 0 == res.total
+ assert 0 == client.ft().delete_document("doc-5ghs2")
+
+ client.ft().add_document("doc-5ghs2", play="Death of a Salesman")
+ res = client.ft().search(Query("death of a salesman"))
+ assert 1 == res.total
+ client.ft().delete_document("doc-5ghs2")
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.2.0", "search")
+def test_payloads(client):
+ client.ft().create_index((TextField("txt"),))
+
+ client.ft().add_document("doc1", payload="foo baz", txt="foo bar")
+ client.ft().add_document("doc2", txt="foo bar")
+
+ q = Query("foo bar").with_payloads()
+ res = client.ft().search(q)
+ assert 2 == res.total
+ assert "doc1" == res.docs[0].id
+ assert "doc2" == res.docs[1].id
+ assert "foo baz" == res.docs[0].payload
+ assert res.docs[1].payload is None
+
+
+@pytest.mark.redismod
+def test_scores(client):
+ client.ft().create_index((TextField("txt"),))
+
+ client.ft().add_document("doc1", txt="foo baz")
+ client.ft().add_document("doc2", txt="foo bar")
+
+ q = Query("foo ~bar").with_scores()
+ res = client.ft().search(q)
+ assert 2 == res.total
+ assert "doc2" == res.docs[0].id
+ assert 3.0 == res.docs[0].score
+ assert "doc1" == res.docs[1].id
+ # todo: enable once new RS version is tagged
+ # self.assertEqual(0.2, res.docs[1].score)
+
+
+@pytest.mark.redismod
+def test_replace(client):
+ client.ft().create_index((TextField("txt"),))
+
+ client.ft().add_document("doc1", txt="foo bar")
+ client.ft().add_document("doc2", txt="foo bar")
+ waitForIndex(client, "idx")
+
+ res = client.ft().search("foo bar")
+ assert 2 == res.total
+ client.ft().add_document(
+ "doc1",
+ replace=True,
+ txt="this is a replaced doc"
+ )
+
+ res = client.ft().search("foo bar")
+ assert 1 == res.total
+ assert "doc2" == res.docs[0].id
+
+ res = client.ft().search("replaced doc")
+ assert 1 == res.total
+ assert "doc1" == res.docs[0].id
+
+
+@pytest.mark.redismod
+def test_stopwords(client):
+ client.ft().create_index(
+ (TextField("txt"),),
+ stopwords=["foo", "bar", "baz"]
+ )
+ client.ft().add_document("doc1", txt="foo bar")
+ client.ft().add_document("doc2", txt="hello world")
+ waitForIndex(client, "idx")
+
+ q1 = Query("foo bar").no_content()
+ q2 = Query("foo bar hello world").no_content()
+ res1, res2 = client.ft().search(q1), client.ft().search(q2)
+ assert 0 == res1.total
+ assert 1 == res2.total
+
+
+@pytest.mark.redismod
+def test_filters(client):
+ client.ft().create_index(
+ (TextField("txt"),
+ NumericField("num"),
+ GeoField("loc"))
+ )
+ client.ft().add_document(
+ "doc1",
+ txt="foo bar",
+ num=3.141,
+ loc="-0.441,51.458"
+ )
+ client.ft().add_document("doc2", txt="foo baz", num=2, loc="-0.1,51.2")
+
+ waitForIndex(client, "idx")
+ # Test numerical filter
+ q1 = Query("foo").add_filter(NumericFilter("num", 0, 2)).no_content()
+ q2 = (
+ Query("foo")
+ .add_filter(
+ NumericFilter("num", 2, NumericFilter.INF, minExclusive=True))
+ .no_content()
+ )
+ res1, res2 = client.ft().search(q1), client.ft().search(q2)
+
+ assert 1 == res1.total
+ assert 1 == res2.total
+ assert "doc2" == res1.docs[0].id
+ assert "doc1" == res2.docs[0].id
+
+ # Test geo filter
+ q1 = Query("foo").add_filter(
+ GeoFilter("loc", -0.44, 51.45, 10)).no_content()
+ q2 = Query("foo").add_filter(
+ GeoFilter("loc", -0.44, 51.45, 100)).no_content()
+ res1, res2 = client.ft().search(q1), client.ft().search(q2)
+
+ assert 1 == res1.total
+ assert 2 == res2.total
+ assert "doc1" == res1.docs[0].id
+
+ # Sort results, after RDB reload order may change
+ res = [res2.docs[0].id, res2.docs[1].id]
+ res.sort()
+ assert ["doc1", "doc2"] == res
+
+
+@pytest.mark.redismod
+def test_payloads_with_no_content(client):
+ client.ft().create_index((TextField("txt"),))
+ client.ft().add_document("doc1", payload="foo baz", txt="foo bar")
+ client.ft().add_document("doc2", payload="foo baz2", txt="foo bar")
+
+ q = Query("foo bar").with_payloads().no_content()
+ res = client.ft().search(q)
+ assert 2 == len(res.docs)
+
+
+@pytest.mark.redismod
+def test_sort_by(client):
+ client.ft().create_index(
+ (TextField("txt"),
+ NumericField("num", sortable=True))
+ )
+ client.ft().add_document("doc1", txt="foo bar", num=1)
+ client.ft().add_document("doc2", txt="foo baz", num=2)
+ client.ft().add_document("doc3", txt="foo qux", num=3)
+
+ # Test sort
+ q1 = Query("foo").sort_by("num", asc=True).no_content()
+ q2 = Query("foo").sort_by("num", asc=False).no_content()
+ res1, res2 = client.ft().search(q1), client.ft().search(q2)
+
+ assert 3 == res1.total
+ assert "doc1" == res1.docs[0].id
+ assert "doc2" == res1.docs[1].id
+ assert "doc3" == res1.docs[2].id
+ assert 3 == res2.total
+ assert "doc1" == res2.docs[2].id
+ assert "doc2" == res2.docs[1].id
+ assert "doc3" == res2.docs[0].id
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.0.0", "search")
+def test_drop_index():
+ """
+ Ensure the index gets dropped by data remains by default
+ """
+ for x in range(20):
+ for keep_docs in [[True, {}], [False, {"name": "haveit"}]]:
+ idx = "HaveIt"
+ index = getClient()
+ index.hset("index:haveit", mapping={"name": "haveit"})
+ idef = IndexDefinition(prefix=["index:"])
+ index.ft(idx).create_index((TextField("name"),), definition=idef)
+ waitForIndex(index, idx)
+ index.ft(idx).dropindex(delete_documents=keep_docs[0])
+ i = index.hgetall("index:haveit")
+ assert i == keep_docs[1]
+
+
+@pytest.mark.redismod
+def test_example(client):
+ # Creating the index definition and schema
+ client.ft().create_index(
+ (TextField("title", weight=5.0),
+ TextField("body"))
+ )
+
+ # Indexing a document
+ client.ft().add_document(
+ "doc1",
+ title="RediSearch",
+ body="Redisearch impements a search engine on top of redis",
+ )
+
+ # Searching with complex parameters:
+ q = Query("search engine").verbatim().no_content().paging(0, 5)
+
+ res = client.ft().search(q)
+ assert res is not None
+
+
+@pytest.mark.redismod
+def test_auto_complete(client):
+ n = 0
+ with open(TITLES_CSV) as f:
+ cr = csv.reader(f)
+
+ for row in cr:
+ n += 1
+ term, score = row[0], float(row[1])
+ assert n == client.ft().sugadd("ac", Suggestion(term, score=score))
+
+ assert n == client.ft().suglen("ac")
+ ret = client.ft().sugget("ac", "bad", with_scores=True)
+ assert 2 == len(ret)
+ assert "badger" == ret[0].string
+ assert isinstance(ret[0].score, float)
+ assert 1.0 != ret[0].score
+ assert "badalte rishtey" == ret[1].string
+ assert isinstance(ret[1].score, float)
+ assert 1.0 != ret[1].score
+
+ ret = client.ft().sugget("ac", "bad", fuzzy=True, num=10)
+ assert 10 == len(ret)
+ assert 1.0 == ret[0].score
+ strs = {x.string for x in ret}
+
+ for sug in strs:
+ assert 1 == client.ft().sugdel("ac", sug)
+ # make sure a second delete returns 0
+ for sug in strs:
+ assert 0 == client.ft().sugdel("ac", sug)
+
+ # make sure they were actually deleted
+ ret2 = client.ft().sugget("ac", "bad", fuzzy=True, num=10)
+ for sug in ret2:
+ assert sug.string not in strs
+
+ # Test with payload
+ client.ft().sugadd("ac", Suggestion("pay1", payload="pl1"))
+ client.ft().sugadd("ac", Suggestion("pay2", payload="pl2"))
+ client.ft().sugadd("ac", Suggestion("pay3", payload="pl3"))
+
+ sugs = client.ft().sugget(
+ "ac",
+ "pay",
+ with_payloads=True,
+ with_scores=True
+ )
+ assert 3 == len(sugs)
+ for sug in sugs:
+ assert sug.payload
+ assert sug.payload.startswith("pl")
+
+
+@pytest.mark.redismod
+def test_no_index(client):
+ client.ft().create_index(
+ (
+ TextField("field"),
+ TextField("text", no_index=True, sortable=True),
+ NumericField("numeric", no_index=True, sortable=True),
+ GeoField("geo", no_index=True, sortable=True),
+ TagField("tag", no_index=True, sortable=True),
+ )
+ )
+
+ client.ft().add_document(
+ "doc1", field="aaa", text="1", numeric="1", geo="1,1", tag="1"
+ )
+ client.ft().add_document(
+ "doc2", field="aab", text="2", numeric="2", geo="2,2", tag="2"
+ )
+ waitForIndex(client, "idx")
+
+ res = client.ft().search(Query("@text:aa*"))
+ assert 0 == res.total
+
+ res = client.ft().search(Query("@field:aa*"))
+ assert 2 == res.total
+
+ res = client.ft().search(Query("*").sort_by("text", asc=False))
+ assert 2 == res.total
+ assert "doc2" == res.docs[0].id
+
+ res = client.ft().search(Query("*").sort_by("text", asc=True))
+ assert "doc1" == res.docs[0].id
+
+ res = client.ft().search(Query("*").sort_by("numeric", asc=True))
+ assert "doc1" == res.docs[0].id
+
+ res = client.ft().search(Query("*").sort_by("geo", asc=True))
+ assert "doc1" == res.docs[0].id
+
+ res = client.ft().search(Query("*").sort_by("tag", asc=True))
+ assert "doc1" == res.docs[0].id
+
+ # Ensure exception is raised for non-indexable, non-sortable fields
+ with pytest.raises(Exception):
+ TextField("name", no_index=True, sortable=False)
+ with pytest.raises(Exception):
+ NumericField("name", no_index=True, sortable=False)
+ with pytest.raises(Exception):
+ GeoField("name", no_index=True, sortable=False)
+ with pytest.raises(Exception):
+ TagField("name", no_index=True, sortable=False)
+
+
+@pytest.mark.redismod
+def test_partial(client):
+ client.ft().create_index(
+ (TextField("f1"),
+ TextField("f2"),
+ TextField("f3"))
+ )
+ client.ft().add_document("doc1", f1="f1_val", f2="f2_val")
+ client.ft().add_document("doc2", f1="f1_val", f2="f2_val")
+ client.ft().add_document("doc1", f3="f3_val", partial=True)
+ client.ft().add_document("doc2", f3="f3_val", replace=True)
+ waitForIndex(client, "idx")
+
+ # Search for f3 value. All documents should have it
+ res = client.ft().search("@f3:f3_val")
+ assert 2 == res.total
+
+ # Only the document updated with PARTIAL should still have f1 and f2 values
+ res = client.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val")
+ assert 1 == res.total
+
+
+@pytest.mark.redismod
+def test_no_create(client):
+ client.ft().create_index(
+ (TextField("f1"),
+ TextField("f2"),
+ TextField("f3"))
+ )
+ client.ft().add_document("doc1", f1="f1_val", f2="f2_val")
+ client.ft().add_document("doc2", f1="f1_val", f2="f2_val")
+ client.ft().add_document("doc1", f3="f3_val", no_create=True)
+ client.ft().add_document("doc2", f3="f3_val", no_create=True, partial=True)
+ waitForIndex(client, "idx")
+
+ # Search for f3 value. All documents should have it
+ res = client.ft().search("@f3:f3_val")
+ assert 2 == res.total
+
+ # Only the document updated with PARTIAL should still have f1 and f2 values
+ res = client.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val")
+ assert 1 == res.total
+
+ with pytest.raises(redis.ResponseError):
+ client.ft().add_document(
+ "doc3",
+ f2="f2_val",
+ f3="f3_val",
+ no_create=True
+ )
+
+
+@pytest.mark.redismod
+def test_explain(client):
+ client.ft().create_index(
+ (TextField("f1"),
+ TextField("f2"),
+ TextField("f3"))
+ )
+ res = client.ft().explain("@f3:f3_val @f2:f2_val @f1:f1_val")
+ assert res
+
+
+@pytest.mark.redismod
+def test_summarize(client):
+ createIndex(client.ft())
+ waitForIndex(client, "idx")
+
+ q = Query("king henry").paging(0, 1)
+ q.highlight(fields=("play", "txt"), tags=("<b>", "</b>"))
+ q.summarize("txt")
+
+ doc = sorted(client.ft().search(q).docs)[0]
+ assert "<b>Henry</b> IV" == doc.play
+ assert (
+ "ACT I SCENE I. London. The palace. Enter <b>KING</b> <b>HENRY</b>, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa
+ == doc.txt
+ )
+
+ q = Query("king henry").paging(0, 1).summarize().highlight()
+
+ doc = sorted(client.ft().search(q).docs)[0]
+ assert "<b>Henry</b> ... " == doc.play
+ assert (
+ "ACT I SCENE I. London. The palace. Enter <b>KING</b> <b>HENRY</b>, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa
+ == doc.txt
+ )
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.0.0", "search")
+def test_alias():
+ index1 = getClient()
+ index2 = getClient()
+
+ index1.hset("index1:lonestar", mapping={"name": "lonestar"})
+ index2.hset("index2:yogurt", mapping={"name": "yogurt"})
+
+ if os.environ.get("GITHUB_WORKFLOW", None) is not None:
+ time.sleep(2)
+ else:
+ time.sleep(5)
+
+ def1 = IndexDefinition(prefix=["index1:"], score_field="name")
+ def2 = IndexDefinition(prefix=["index2:"], score_field="name")
+
+ ftindex1 = index1.ft("testAlias")
+ ftindex2 = index1.ft("testAlias2")
+ ftindex1.create_index((TextField("name"),), definition=def1)
+ ftindex2.create_index((TextField("name"),), definition=def2)
+
+ # CI is slower
+ try:
+ res = ftindex1.search("*").docs[0]
+ except IndexError:
+ time.sleep(5)
+ res = ftindex1.search("*").docs[0]
+ assert "index1:lonestar" == res.id
+
+ # create alias and check for results
+ ftindex1.aliasadd("spaceballs")
+ alias_client = getClient().ft("spaceballs")
+ res = alias_client.search("*").docs[0]
+ assert "index1:lonestar" == res.id
+
+ # Throw an exception when trying to add an alias that already exists
+ with pytest.raises(Exception):
+ ftindex2.aliasadd("spaceballs")
+
+ # update alias and ensure new results
+ ftindex2.aliasupdate("spaceballs")
+ alias_client2 = getClient().ft("spaceballs")
+ res = alias_client2.search("*").docs[0]
+ assert "index2:yogurt" == res.id
+
+ ftindex2.aliasdel("spaceballs")
+ with pytest.raises(Exception):
+ alias_client2.search("*").docs[0]
+
+
+@pytest.mark.redismod
+def test_alias_basic():
+ # Creating a client with one index
+ getClient().flushdb()
+ index1 = getClient().ft("testAlias")
+
+ index1.create_index((TextField("txt"),))
+ index1.add_document("doc1", txt="text goes here")
+
+ index2 = getClient().ft("testAlias2")
+ index2.create_index((TextField("txt"),))
+ index2.add_document("doc2", txt="text goes here")
+
+ # add the actual alias and check
+ index1.aliasadd("myalias")
+ alias_client = getClient().ft("myalias")
+ res = sorted(alias_client.search("*").docs, key=lambda x: x.id)
+ assert "doc1" == res[0].id
+
+ # Throw an exception when trying to add an alias that already exists
+ with pytest.raises(Exception):
+ index2.aliasadd("myalias")
+
+ # update the alias and ensure we get doc2
+ index2.aliasupdate("myalias")
+ alias_client2 = getClient().ft("myalias")
+ res = sorted(alias_client2.search("*").docs, key=lambda x: x.id)
+ assert "doc1" == res[0].id
+
+ # delete the alias and expect an error if we try to query again
+ index2.aliasdel("myalias")
+ with pytest.raises(Exception):
+ _ = alias_client2.search("*").docs[0]
+
+
+@pytest.mark.redismod
+def test_tags(client):
+ client.ft().create_index((TextField("txt"), TagField("tags")))
+ tags = "foo,foo bar,hello;world"
+ tags2 = "soba,ramen"
+
+ client.ft().add_document("doc1", txt="fooz barz", tags=tags)
+ client.ft().add_document("doc2", txt="noodles", tags=tags2)
+ waitForIndex(client, "idx")
+
+ q = Query("@tags:{foo}")
+ res = client.ft().search(q)
+ assert 1 == res.total
+
+ q = Query("@tags:{foo bar}")
+ res = client.ft().search(q)
+ assert 1 == res.total
+
+ q = Query("@tags:{foo\\ bar}")
+ res = client.ft().search(q)
+ assert 1 == res.total
+
+ q = Query("@tags:{hello\\;world}")
+ res = client.ft().search(q)
+ assert 1 == res.total
+
+ q2 = client.ft().tagvals("tags")
+ assert (tags.split(",") + tags2.split(",")).sort() == q2.sort()
+
+
+@pytest.mark.redismod
+def test_textfield_sortable_nostem(client):
+ # Creating the index definition with sortable and no_stem
+ client.ft().create_index((TextField("txt", sortable=True, no_stem=True),))
+
+ # Now get the index info to confirm its contents
+ response = client.ft().info()
+ assert "SORTABLE" in response["attributes"][0]
+ assert "NOSTEM" in response["attributes"][0]
+
+
+@pytest.mark.redismod
+def test_alter_schema_add(client):
+ # Creating the index definition and schema
+ client.ft().create_index(TextField("title"))
+
+ # Using alter to add a field
+ client.ft().alter_schema_add(TextField("body"))
+
+ # Indexing a document
+ client.ft().add_document(
+ "doc1", title="MyTitle", body="Some content only in the body"
+ )
+
+ # Searching with parameter only in the body (the added field)
+ q = Query("only in the body")
+
+ # Ensure we find the result searching on the added body field
+ res = client.ft().search(q)
+ assert 1 == res.total
+
+
+@pytest.mark.redismod
+def test_spell_check(client):
+ client.ft().create_index((TextField("f1"), TextField("f2")))
+
+ client.ft().add_document(
+ "doc1",
+ f1="some valid content",
+ f2="this is sample text"
+ )
+ client.ft().add_document("doc2", f1="very important", f2="lorem ipsum")
+ waitForIndex(client, "idx")
+
+ # test spellcheck
+ res = client.ft().spellcheck("impornant")
+ assert "important" == res["impornant"][0]["suggestion"]
+
+ res = client.ft().spellcheck("contnt")
+ assert "content" == res["contnt"][0]["suggestion"]
+
+ # test spellcheck with Levenshtein distance
+ res = client.ft().spellcheck("vlis")
+ assert res == {}
+ res = client.ft().spellcheck("vlis", distance=2)
+ assert "valid" == res["vlis"][0]["suggestion"]
+
+ # test spellcheck include
+ client.ft().dict_add("dict", "lore", "lorem", "lorm")
+ res = client.ft().spellcheck("lorm", include="dict")
+ assert len(res["lorm"]) == 3
+ assert (
+ res["lorm"][0]["suggestion"],
+ res["lorm"][1]["suggestion"],
+ res["lorm"][2]["suggestion"],
+ ) == ("lorem", "lore", "lorm")
+ assert (res["lorm"][0]["score"], res["lorm"][1]["score"]) == ("0.5", "0")
+
+ # test spellcheck exclude
+ res = client.ft().spellcheck("lorm", exclude="dict")
+ assert res == {}
+
+
+@pytest.mark.redismod
+def test_dict_operations(client):
+ client.ft().create_index((TextField("f1"), TextField("f2")))
+ # Add three items
+ res = client.ft().dict_add("custom_dict", "item1", "item2", "item3")
+ assert 3 == res
+
+ # Remove one item
+ res = client.ft().dict_del("custom_dict", "item2")
+ assert 1 == res
+
+ # Dump dict and inspect content
+ res = client.ft().dict_dump("custom_dict")
+ assert ["item1", "item3"] == res
+
+ # Remove rest of the items before reload
+ client.ft().dict_del("custom_dict", *res)
+
+
+@pytest.mark.redismod
+def test_phonetic_matcher(client):
+ client.ft().create_index((TextField("name"),))
+ client.ft().add_document("doc1", name="Jon")
+ client.ft().add_document("doc2", name="John")
+
+ res = client.ft().search(Query("Jon"))
+ assert 1 == len(res.docs)
+ assert "Jon" == res.docs[0].name
+
+ # Drop and create index with phonetic matcher
+ client.flushdb()
+
+ client.ft().create_index((TextField("name", phonetic_matcher="dm:en"),))
+ client.ft().add_document("doc1", name="Jon")
+ client.ft().add_document("doc2", name="John")
+
+ res = client.ft().search(Query("Jon"))
+ assert 2 == len(res.docs)
+ assert ["John", "Jon"] == sorted([d.name for d in res.docs])
+
+
+@pytest.mark.redismod
+def test_scorer(client):
+ client.ft().create_index((TextField("description"),))
+
+ client.ft().add_document(
+ "doc1", description="The quick brown fox jumps over the lazy dog"
+ )
+ client.ft().add_document(
+ "doc2",
+ description="Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do.", # noqa
+ )
+
+ # default scorer is TFIDF
+ res = client.ft().search(Query("quick").with_scores())
+ assert 1.0 == res.docs[0].score
+ res = client.ft().search(Query("quick").scorer("TFIDF").with_scores())
+ assert 1.0 == res.docs[0].score
+ res = client.ft().search(
+ Query("quick").scorer("TFIDF.DOCNORM").with_scores())
+ assert 0.1111111111111111 == res.docs[0].score
+ res = client.ft().search(Query("quick").scorer("BM25").with_scores())
+ assert 0.17699114465425977 == res.docs[0].score
+ res = client.ft().search(Query("quick").scorer("DISMAX").with_scores())
+ assert 2.0 == res.docs[0].score
+ res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores())
+ assert 1.0 == res.docs[0].score
+ res = client.ft().search(Query("quick").scorer("HAMMING").with_scores())
+ assert 0.0 == res.docs[0].score
+
+
+@pytest.mark.redismod
+def test_get(client):
+ client.ft().create_index((TextField("f1"), TextField("f2")))
+
+ assert [None] == client.ft().get("doc1")
+ assert [None, None] == client.ft().get("doc2", "doc1")
+
+ client.ft().add_document(
+ "doc1", f1="some valid content dd1", f2="this is sample text ff1"
+ )
+ client.ft().add_document(
+ "doc2", f1="some valid content dd2", f2="this is sample text ff2"
+ )
+
+ assert [
+ ["f1", "some valid content dd2", "f2", "this is sample text ff2"]
+ ] == client.ft().get("doc2")
+ assert [
+ ["f1", "some valid content dd1", "f2", "this is sample text ff1"],
+ ["f1", "some valid content dd2", "f2", "this is sample text ff2"],
+ ] == client.ft().get("doc1", "doc2")
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.2.0", "search")
+def test_config(client):
+ assert client.ft().config_set("TIMEOUT", "100")
+ with pytest.raises(redis.ResponseError):
+ client.ft().config_set("TIMEOUT", "null")
+ res = client.ft().config_get("*")
+ assert "100" == res["TIMEOUT"]
+ res = client.ft().config_get("TIMEOUT")
+ assert "100" == res["TIMEOUT"]
+
+
+@pytest.mark.redismod
+def test_aggregations(client):
+ # Creating the index definition and schema
+ client.ft().create_index(
+ (
+ NumericField("random_num"),
+ TextField("title"),
+ TextField("body"),
+ TextField("parent"),
+ )
+ )
+
+ # Indexing a document
+ client.ft().add_document(
+ "search",
+ title="RediSearch",
+ body="Redisearch impements a search engine on top of redis",
+ parent="redis",
+ random_num=10,
+ )
+ client.ft().add_document(
+ "ai",
+ title="RedisAI",
+ body="RedisAI executes Deep Learning/Machine Learning models and managing their data.", # noqa
+ parent="redis",
+ random_num=3,
+ )
+ client.ft().add_document(
+ "json",
+ title="RedisJson",
+ body="RedisJSON implements ECMA-404 The JSON Data Interchange Standard as a native data type.", # noqa
+ parent="redis",
+ random_num=8,
+ )
+
+ req = aggregations.AggregateRequest("redis").group_by(
+ "@parent",
+ reducers.count(),
+ reducers.count_distinct("@title"),
+ reducers.count_distinctish("@title"),
+ reducers.sum("@random_num"),
+ reducers.min("@random_num"),
+ reducers.max("@random_num"),
+ reducers.avg("@random_num"),
+ reducers.stddev("random_num"),
+ reducers.quantile("@random_num", 0.5),
+ reducers.tolist("@title"),
+ reducers.first_value("@title"),
+ reducers.random_sample("@title", 2),
+ )
+
+ res = client.ft().aggregate(req)
+
+ res = res.rows[0]
+ assert len(res) == 26
+ assert "redis" == res[1]
+ assert "3" == res[3]
+ assert "3" == res[5]
+ assert "3" == res[7]
+ assert "21" == res[9]
+ assert "3" == res[11]
+ assert "10" == res[13]
+ assert "7" == res[15]
+ assert "3.60555127546" == res[17]
+ assert "10" == res[19]
+ assert ["RediSearch", "RedisAI", "RedisJson"] == res[21]
+ assert "RediSearch" == res[23]
+ assert 2 == len(res[25])
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.0.0", "search")
+def test_index_definition(client):
+ """
+ Create definition and test its args
+ """
+ with pytest.raises(RuntimeError):
+ IndexDefinition(prefix=["hset:", "henry"], index_type="json")
+
+ definition = IndexDefinition(
+ prefix=["hset:", "henry"],
+ filter="@f1==32",
+ language="English",
+ language_field="play",
+ score_field="chapter",
+ score=0.5,
+ payload_field="txt",
+ index_type=IndexType.JSON,
+ )
+
+ assert [
+ "ON",
+ "JSON",
+ "PREFIX",
+ 2,
+ "hset:",
+ "henry",
+ "FILTER",
+ "@f1==32",
+ "LANGUAGE_FIELD",
+ "play",
+ "LANGUAGE",
+ "English",
+ "SCORE_FIELD",
+ "chapter",
+ "SCORE",
+ 0.5,
+ "PAYLOAD_FIELD",
+ "txt",
+ ] == definition.args
+
+ createIndex(client.ft(), num_docs=500, definition=definition)
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.0.0", "search")
+def test_create_client_definition(client):
+ """
+ Create definition with no index type provided,
+ and use hset to test the client definition (the default is HASH).
+ """
+ definition = IndexDefinition(prefix=["hset:", "henry"])
+ createIndex(client.ft(), num_docs=500, definition=definition)
+
+ info = client.ft().info()
+ assert 494 == int(info["num_docs"])
+
+ client.ft().client.hset("hset:1", "f1", "v1")
+ info = client.ft().info()
+ assert 495 == int(info["num_docs"])
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.0.0", "search")
+def test_create_client_definition_hash(client):
+ """
+ Create definition with IndexType.HASH as index type (ON HASH),
+ and use hset to test the client definition.
+ """
+ definition = IndexDefinition(
+ prefix=["hset:", "henry"],
+ index_type=IndexType.HASH
+ )
+ createIndex(client.ft(), num_docs=500, definition=definition)
+
+ info = client.ft().info()
+ assert 494 == int(info["num_docs"])
+
+ client.ft().client.hset("hset:1", "f1", "v1")
+ info = client.ft().info()
+ assert 495 == int(info["num_docs"])
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.2.0", "search")
+def test_create_client_definition_json(client):
+ """
+ Create definition with IndexType.JSON as index type (ON JSON),
+ and use json client to test it.
+ """
+ definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON)
+ client.ft().create_index((TextField("$.name"),), definition=definition)
+
+ client.json().set("king:1", Path.rootPath(), {"name": "henry"})
+ client.json().set("king:2", Path.rootPath(), {"name": "james"})
+
+ res = client.ft().search("henry")
+ assert res.docs[0].id == "king:1"
+ assert res.docs[0].payload is None
+ assert res.docs[0].json == '{"name":"henry"}'
+ assert res.total == 1
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.2.0", "search")
+def test_fields_as_name(client):
+ # create index
+ SCHEMA = (
+ TextField("$.name", sortable=True, as_name="name"),
+ NumericField("$.age", as_name="just_a_number"),
+ )
+ definition = IndexDefinition(index_type=IndexType.JSON)
+ client.ft().create_index(SCHEMA, definition=definition)
+
+ # insert json data
+ res = client.json().set(
+ "doc:1",
+ Path.rootPath(),
+ {"name": "Jon", "age": 25}
+ )
+ assert res
+
+ total = client.ft().search(
+ Query("Jon").return_fields("name", "just_a_number")).docs
+ assert 1 == len(total)
+ assert "doc:1" == total[0].id
+ assert "Jon" == total[0].name
+ assert "25" == total[0].just_a_number
+
+
+@pytest.mark.redismod
+@skip_ifmodversion_lt("2.2.0", "search")
+def test_search_return_fields(client):
+ res = client.json().set(
+ "doc:1",
+ Path.rootPath(),
+ {"t": "riceratops", "t2": "telmatosaurus", "n": 9072, "flt": 97.2},
+ )
+ assert res
+
+ # create index on
+ definition = IndexDefinition(index_type=IndexType.JSON)
+ SCHEMA = (
+ TextField("$.t"),
+ NumericField("$.flt"),
+ )
+ client.ft().create_index(SCHEMA, definition=definition)
+ waitForIndex(client, "idx")
+
+ total = client.ft().search(
+ Query("*").return_field("$.t", as_field="txt")).docs
+ assert 1 == len(total)
+ assert "doc:1" == total[0].id
+ assert "riceratops" == total[0].txt
+
+ total = client.ft().search(
+ Query("*").return_field("$.t2", as_field="txt")).docs
+ assert 1 == len(total)
+ assert "doc:1" == total[0].id
+ assert "telmatosaurus" == total[0].txt
+
+
+@pytest.mark.redismod
+def test_synupdate(client):
+ definition = IndexDefinition(index_type=IndexType.HASH)
+ client.ft().create_index(
+ (
+ TextField("title"),
+ TextField("body"),
+ ),
+ definition=definition,
+ )
+
+ client.ft().synupdate("id1", True, "boy", "child", "offspring")
+ client.ft().add_document(
+ "doc1",
+ title="he is a baby",
+ body="this is a test")
+
+ client.ft().synupdate("id1", True, "baby")
+ client.ft().add_document(
+ "doc2",
+ title="he is another baby",
+ body="another test"
+ )
+
+ res = client.ft().search(Query("child").expander("SYNONYM"))
+ assert res.docs[0].id == "doc2"
+ assert res.docs[0].title == "he is another baby"
+ assert res.docs[0].body == "another test"
+
+
+@pytest.mark.redismod
+def test_syndump(client):
+ definition = IndexDefinition(index_type=IndexType.HASH)
+ client.ft().create_index(
+ (
+ TextField("title"),
+ TextField("body"),
+ ),
+ definition=definition,
+ )
+
+ client.ft().synupdate("id1", False, "boy", "child", "offspring")
+ client.ft().synupdate("id2", False, "baby", "child")
+ client.ft().synupdate("id3", False, "tree", "wood")
+ res = client.ft().syndump()
+ assert res == {
+ "boy": ["id1"],
+ "tree": ["id3"],
+ "wood": ["id3"],
+ "child": ["id1", "id2"],
+ "baby": ["id2"],
+ "offspring": ["id1"],
+ }