import bz2 import csv import os import time from io import TextIOWrapper import pytest import redis import import as aggregations import as reducers from redis import Redis from redis.commands.json.path import Path from import Search from import GeoField, NumericField, TagField, TextField from import IndexDefinition, IndexType from import GeoFilter, NumericFilter, Query from import Result from import Suggestion from .conftest import default_redismod_url, skip_ifmodversion_lt WILL_PLAY_TEXT = os.path.abspath( os.path.join(os.path.dirname(__file__), "testdata", "will_play_text.csv.bz2") ) TITLES_CSV = os.path.abspath( os.path.join(os.path.dirname(__file__), "testdata", "titles.csv") ) def waitForIndex(env, idx, timeout=None): delay = 0.1 while True: res = env.execute_command("", idx) try: res.index("indexing") except ValueError: break if int(res[res.index("indexing") + 1]) == 0: break time.sleep(delay) if timeout is not None: timeout -= delay if timeout <= 0: break def getClient(): """ Gets a client client attached to an index name which is ready to be created """ rc = Redis.from_url(default_redismod_url, decode_responses=True) return rc def createIndex(client, num_docs=100, definition=None): try: client.create_index( (TextField("play", weight=5.0), TextField("txt"), NumericField("chapter")), definition=definition, ) except redis.ResponseError: client.dropindex(delete_documents=True) return createIndex(client, num_docs=num_docs, definition=definition) chapters = {} bzfp = TextIOWrapper(bz2.BZ2File(WILL_PLAY_TEXT), encoding="utf8") r = csv.reader(bzfp, delimiter=";") for n, line in enumerate(r): play, chapter, _, text = line[1], line[2], line[4], line[5] key = f"{play}:{chapter}".lower() d = chapters.setdefault(key, {}) d["play"] = play d["txt"] = d.get("txt", "") + " " + text d["chapter"] = int(chapter or 0) if len(chapters) == num_docs: break indexer = client.batch_indexer(chunk_size=50) assert isinstance(indexer, Search.BatchIndexer) assert 50 == indexer.chunk_size for key, doc in chapters.items(): indexer.add_document(key, **doc) indexer.commit() # override the default module client, search requires both db=0, and text @pytest.fixture def modclient(): return Redis.from_url(default_redismod_url, db=0, decode_responses=True) @pytest.fixture def client(modclient): modclient.flushdb() return modclient @pytest.mark.redismod def test_client(client): num_docs = 500 createIndex(client.ft(), num_docs=num_docs) waitForIndex(client, "idx") # verify info info = client.ft().info() for k in [ "index_name", "index_options", "attributes", "num_docs", "max_doc_id", "num_terms", "num_records", "inverted_sz_mb", "offset_vectors_sz_mb", "doc_table_size_mb", "key_table_size_mb", "records_per_doc_avg", "bytes_per_record_avg", "offsets_per_term_avg", "offset_bits_per_record_avg", ]: assert k in info assert client.ft().index_name == info["index_name"] assert num_docs == int(info["num_docs"]) res = client.ft().search("henry iv") assert isinstance(res, Result) assert 225 == assert 10 == len( assert res.duration > 0 for doc in assert assert == "Henry IV" assert len(doc.txt) > 0 # test no content res = client.ft().search(Query("king").no_content()) assert 194 == assert 10 == len( for doc in assert "txt" not in doc.__dict__ assert "play" not in doc.__dict__ # test verbatim vs no verbatim total = client.ft().search(Query("kings").no_content()).total vtotal = client.ft().search(Query("kings").no_content().verbatim()).total assert total > vtotal # test in fields txt_total = ( client.ft().search(Query("henry").no_content().limit_fields("txt")).total ) play_total = ( client.ft().search(Query("henry").no_content().limit_fields("play")).total ) both_total = ( client.ft() .search(Query("henry").no_content().limit_fields("play", "txt")) .total ) assert 129 == txt_total assert 494 == play_total assert 494 == both_total # test load_document doc = client.ft().load_document("henry vi part 3:62") assert doc is not None assert "henry vi part 3:62" == assert == "Henry VI Part 3" assert len(doc.txt) > 0 # test in-keys ids = [ for x in client.ft().search(Query("henry")).docs] assert 10 == len(ids) subset = ids[:5] docs = client.ft().search(Query("henry").limit_ids(*subset)) assert len(subset) == ids = [ for x in] assert set(ids) == set(subset) # test slop and in order assert 193 == client.ft().search(Query("henry king")).total assert 3 == client.ft().search(Query("henry king").slop(0).in_order()).total assert 52 == client.ft().search(Query("king henry").slop(0).in_order()).total assert 53 == client.ft().search(Query("henry king").slop(0)).total assert 167 == client.ft().search(Query("henry king").slop(100)).total # test delete document client.ft().add_document("doc-5ghs2", play="Death of a Salesman") res = client.ft().search(Query("death of a salesman")) assert 1 == assert 1 == client.ft().delete_document("doc-5ghs2") res = client.ft().search(Query("death of a salesman")) assert 0 == assert 0 == client.ft().delete_document("doc-5ghs2") client.ft().add_document("doc-5ghs2", play="Death of a Salesman") res = client.ft().search(Query("death of a salesman")) assert 1 == client.ft().delete_document("doc-5ghs2") @pytest.mark.redismod @skip_ifmodversion_lt("2.2.0", "search") def test_payloads(client): client.ft().create_index((TextField("txt"),)) client.ft().add_document("doc1", payload="foo baz", txt="foo bar") client.ft().add_document("doc2", txt="foo bar") q = Query("foo bar").with_payloads() res = client.ft().search(q) assert 2 == assert "doc1" ==[0].id assert "doc2" ==[1].id assert "foo baz" ==[0].payload assert[1].payload is None @pytest.mark.redismod def test_scores(client): client.ft().create_index((TextField("txt"),)) client.ft().add_document("doc1", txt="foo baz") client.ft().add_document("doc2", txt="foo bar") q = Query("foo ~bar").with_scores() res = client.ft().search(q) assert 2 == assert "doc2" ==[0].id assert 3.0 ==[0].score assert "doc1" ==[1].id # todo: enable once new RS version is tagged # self.assertEqual(0.2,[1].score) @pytest.mark.redismod def test_replace(client): client.ft().create_index((TextField("txt"),)) client.ft().add_document("doc1", txt="foo bar") client.ft().add_document("doc2", txt="foo bar") waitForIndex(client, "idx") res = client.ft().search("foo bar") assert 2 == client.ft().add_document("doc1", replace=True, txt="this is a replaced doc") res = client.ft().search("foo bar") assert 1 == assert "doc2" ==[0].id res = client.ft().search("replaced doc") assert 1 == assert "doc1" ==[0].id @pytest.mark.redismod def test_stopwords(client): client.ft().create_index((TextField("txt"),), stopwords=["foo", "bar", "baz"]) client.ft().add_document("doc1", txt="foo bar") client.ft().add_document("doc2", txt="hello world") waitForIndex(client, "idx") q1 = Query("foo bar").no_content() q2 = Query("foo bar hello world").no_content() res1, res2 = client.ft().search(q1), client.ft().search(q2) assert 0 == assert 1 == @pytest.mark.redismod def test_filters(client): client.ft().create_index((TextField("txt"), NumericField("num"), GeoField("loc"))) client.ft().add_document("doc1", txt="foo bar", num=3.141, loc="-0.441,51.458") client.ft().add_document("doc2", txt="foo baz", num=2, loc="-0.1,51.2") waitForIndex(client, "idx") # Test numerical filter q1 = Query("foo").add_filter(NumericFilter("num", 0, 2)).no_content() q2 = ( Query("foo") .add_filter(NumericFilter("num", 2, NumericFilter.INF, minExclusive=True)) .no_content() ) res1, res2 = client.ft().search(q1), client.ft().search(q2) assert 1 == assert 1 == assert "doc2" ==[0].id assert "doc1" ==[0].id # Test geo filter q1 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 10)).no_content() q2 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 100)).no_content() res1, res2 = client.ft().search(q1), client.ft().search(q2) assert 1 == assert 2 == assert "doc1" ==[0].id # Sort results, after RDB reload order may change res = [[0].id,[1].id] res.sort() assert ["doc1", "doc2"] == res @pytest.mark.redismod def test_payloads_with_no_content(client): client.ft().create_index((TextField("txt"),)) client.ft().add_document("doc1", payload="foo baz", txt="foo bar") client.ft().add_document("doc2", payload="foo baz2", txt="foo bar") q = Query("foo bar").with_payloads().no_content() res = client.ft().search(q) assert 2 == len( @pytest.mark.redismod def test_sort_by(client): client.ft().create_index((TextField("txt"), NumericField("num", sortable=True))) client.ft().add_document("doc1", txt="foo bar", num=1) client.ft().add_document("doc2", txt="foo baz", num=2) client.ft().add_document("doc3", txt="foo qux", num=3) # Test sort q1 = Query("foo").sort_by("num", asc=True).no_content() q2 = Query("foo").sort_by("num", asc=False).no_content() res1, res2 = client.ft().search(q1), client.ft().search(q2) assert 3 == assert "doc1" ==[0].id assert "doc2" ==[1].id assert "doc3" ==[2].id assert 3 == assert "doc1" ==[2].id assert "doc2" ==[1].id assert "doc3" ==[0].id @pytest.mark.redismod @skip_ifmodversion_lt("2.0.0", "search") def test_drop_index(): """ Ensure the index gets dropped by data remains by default """ for x in range(20): for keep_docs in [[True, {}], [False, {"name": "haveit"}]]: idx = "HaveIt" index = getClient() index.hset("index:haveit", mapping={"name": "haveit"}) idef = IndexDefinition(prefix=["index:"]) index.ft(idx).create_index((TextField("name"),), definition=idef) waitForIndex(index, idx) index.ft(idx).dropindex(delete_documents=keep_docs[0]) i = index.hgetall("index:haveit") assert i == keep_docs[1] @pytest.mark.redismod def test_example(client): # Creating the index definition and schema client.ft().create_index((TextField("title", weight=5.0), TextField("body"))) # Indexing a document client.ft().add_document( "doc1", title="RediSearch", body="Redisearch impements a search engine on top of redis", ) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = client.ft().search(q) assert res is not None @pytest.mark.redismod def test_auto_complete(client): n = 0 with open(TITLES_CSV) as f: cr = csv.reader(f) for row in cr: n += 1 term, score = row[0], float(row[1]) assert n == client.ft().sugadd("ac", Suggestion(term, score=score)) assert n == client.ft().suglen("ac") ret = client.ft().sugget("ac", "bad", with_scores=True) assert 2 == len(ret) assert "badger" == ret[0].string assert isinstance(ret[0].score, float) assert 1.0 != ret[0].score assert "badalte rishtey" == ret[1].string assert isinstance(ret[1].score, float) assert 1.0 != ret[1].score ret = client.ft().sugget("ac", "bad", fuzzy=True, num=10) assert 10 == len(ret) assert 1.0 == ret[0].score strs = {x.string for x in ret} for sug in strs: assert 1 == client.ft().sugdel("ac", sug) # make sure a second delete returns 0 for sug in strs: assert 0 == client.ft().sugdel("ac", sug) # make sure they were actually deleted ret2 = client.ft().sugget("ac", "bad", fuzzy=True, num=10) for sug in ret2: assert sug.string not in strs # Test with payload client.ft().sugadd("ac", Suggestion("pay1", payload="pl1")) client.ft().sugadd("ac", Suggestion("pay2", payload="pl2")) client.ft().sugadd("ac", Suggestion("pay3", payload="pl3")) sugs = client.ft().sugget("ac", "pay", with_payloads=True, with_scores=True) assert 3 == len(sugs) for sug in sugs: assert sug.payload assert sug.payload.startswith("pl") @pytest.mark.redismod def test_no_index(client): client.ft().create_index( ( TextField("field"), TextField("text", no_index=True, sortable=True), NumericField("numeric", no_index=True, sortable=True), GeoField("geo", no_index=True, sortable=True), TagField("tag", no_index=True, sortable=True), ) ) client.ft().add_document( "doc1", field="aaa", text="1", numeric="1", geo="1,1", tag="1" ) client.ft().add_document( "doc2", field="aab", text="2", numeric="2", geo="2,2", tag="2" ) waitForIndex(client, "idx") res = client.ft().search(Query("@text:aa*")) assert 0 == res = client.ft().search(Query("@field:aa*")) assert 2 == res = client.ft().search(Query("*").sort_by("text", asc=False)) assert 2 == assert "doc2" ==[0].id res = client.ft().search(Query("*").sort_by("text", asc=True)) assert "doc1" ==[0].id res = client.ft().search(Query("*").sort_by("numeric", asc=True)) assert "doc1" ==[0].id res = client.ft().search(Query("*").sort_by("geo", asc=True)) assert "doc1" ==[0].id res = client.ft().search(Query("*").sort_by("tag", asc=True)) assert "doc1" ==[0].id # Ensure exception is raised for non-indexable, non-sortable fields with pytest.raises(Exception): TextField("name", no_index=True, sortable=False) with pytest.raises(Exception): NumericField("name", no_index=True, sortable=False) with pytest.raises(Exception): GeoField("name", no_index=True, sortable=False) with pytest.raises(Exception): TagField("name", no_index=True, sortable=False) @pytest.mark.redismod def test_partial(client): client.ft().create_index((TextField("f1"), TextField("f2"), TextField("f3"))) client.ft().add_document("doc1", f1="f1_val", f2="f2_val") client.ft().add_document("doc2", f1="f1_val", f2="f2_val") client.ft().add_document("doc1", f3="f3_val", partial=True) client.ft().add_document("doc2", f3="f3_val", replace=True) waitForIndex(client, "idx") # Search for f3 value. All documents should have it res = client.ft().search("@f3:f3_val") assert 2 == # Only the document updated with PARTIAL should still have f1 and f2 values res = client.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val") assert 1 == @pytest.mark.redismod def test_no_create(client): client.ft().create_index((TextField("f1"), TextField("f2"), TextField("f3"))) client.ft().add_document("doc1", f1="f1_val", f2="f2_val") client.ft().add_document("doc2", f1="f1_val", f2="f2_val") client.ft().add_document("doc1", f3="f3_val", no_create=True) client.ft().add_document("doc2", f3="f3_val", no_create=True, partial=True) waitForIndex(client, "idx") # Search for f3 value. All documents should have it res = client.ft().search("@f3:f3_val") assert 2 == # Only the document updated with PARTIAL should still have f1 and f2 values res = client.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val") assert 1 == with pytest.raises(redis.ResponseError): client.ft().add_document("doc3", f2="f2_val", f3="f3_val", no_create=True) @pytest.mark.redismod def test_explain(client): client.ft().create_index((TextField("f1"), TextField("f2"), TextField("f3"))) res = client.ft().explain("@f3:f3_val @f2:f2_val @f1:f1_val") assert res @pytest.mark.redismod def test_explaincli(client): with pytest.raises(NotImplementedError): client.ft().explain_cli("foo") @pytest.mark.redismod def test_summarize(client): createIndex(client.ft()) waitForIndex(client, "idx") q = Query("king henry").paging(0, 1) q.highlight(fields=("play", "txt"), tags=("", "")) q.summarize("txt") doc = sorted(client.ft().search(q).docs)[0] assert "Henry IV" == assert ( "ACT I SCENE I. London. The palace. Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa == doc.txt ) q = Query("king henry").paging(0, 1).summarize().highlight() doc = sorted(client.ft().search(q).docs)[0] assert "Henry ... " == assert ( "ACT I SCENE I. London. The palace. Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa == doc.txt ) @pytest.mark.redismod @skip_ifmodversion_lt("2.0.0", "search") def test_alias(): index1 = getClient() index2 = getClient() def1 = IndexDefinition(prefix=["index1:"]) def2 = IndexDefinition(prefix=["index2:"]) ftindex1 = index1.ft("testAlias") ftindex2 = index2.ft("testAlias2") ftindex1.create_index((TextField("name"),), definition=def1) ftindex2.create_index((TextField("name"),), definition=def2) index1.hset("index1:lonestar", mapping={"name": "lonestar"}) index2.hset("index2:yogurt", mapping={"name": "yogurt"}) res ="*").docs[0] assert "index1:lonestar" == # create alias and check for results ftindex1.aliasadd("spaceballs") alias_client = getClient().ft("spaceballs") res ="*").docs[0] assert "index1:lonestar" == # Throw an exception when trying to add an alias that already exists with pytest.raises(Exception): ftindex2.aliasadd("spaceballs") # update alias and ensure new results ftindex2.aliasupdate("spaceballs") alias_client2 = getClient().ft("spaceballs") res ="*").docs[0] assert "index2:yogurt" == ftindex2.aliasdel("spaceballs") with pytest.raises(Exception):"*").docs[0] @pytest.mark.redismod def test_alias_basic(): # Creating a client with one index getClient().flushdb() index1 = getClient().ft("testAlias") index1.create_index((TextField("txt"),)) index1.add_document("doc1", txt="text goes here") index2 = getClient().ft("testAlias2") index2.create_index((TextField("txt"),)) index2.add_document("doc2", txt="text goes here") # add the actual alias and check index1.aliasadd("myalias") alias_client = getClient().ft("myalias") res = sorted("*").docs, key=lambda x: assert "doc1" == res[0].id # Throw an exception when trying to add an alias that already exists with pytest.raises(Exception): index2.aliasadd("myalias") # update the alias and ensure we get doc2 index2.aliasupdate("myalias") alias_client2 = getClient().ft("myalias") res = sorted("*").docs, key=lambda x: assert "doc1" == res[0].id # delete the alias and expect an error if we try to query again index2.aliasdel("myalias") with pytest.raises(Exception): _ ="*").docs[0] @pytest.mark.redismod def test_tags(client): client.ft().create_index((TextField("txt"), TagField("tags"))) tags = "foo,foo bar,hello;world" tags2 = "soba,ramen" client.ft().add_document("doc1", txt="fooz barz", tags=tags) client.ft().add_document("doc2", txt="noodles", tags=tags2) waitForIndex(client, "idx") q = Query("@tags:{foo}") res = client.ft().search(q) assert 1 == q = Query("@tags:{foo bar}") res = client.ft().search(q) assert 1 == q = Query("@tags:{foo\\ bar}") res = client.ft().search(q) assert 1 == q = Query("@tags:{hello\\;world}") res = client.ft().search(q) assert 1 == q2 = client.ft().tagvals("tags") assert (tags.split(",") + tags2.split(",")).sort() == q2.sort() @pytest.mark.redismod def test_textfield_sortable_nostem(client): # Creating the index definition with sortable and no_stem client.ft().create_index((TextField("txt", sortable=True, no_stem=True),)) # Now get the index info to confirm its contents response = client.ft().info() assert "SORTABLE" in response["attributes"][0] assert "NOSTEM" in response["attributes"][0] @pytest.mark.redismod def test_alter_schema_add(client): # Creating the index definition and schema client.ft().create_index(TextField("title")) # Using alter to add a field client.ft().alter_schema_add(TextField("body")) # Indexing a document client.ft().add_document( "doc1", title="MyTitle", body="Some content only in the body" ) # Searching with parameter only in the body (the added field) q = Query("only in the body") # Ensure we find the result searching on the added body field res = client.ft().search(q) assert 1 == @pytest.mark.redismod def test_spell_check(client): client.ft().create_index((TextField("f1"), TextField("f2"))) client.ft().add_document("doc1", f1="some valid content", f2="this is sample text") client.ft().add_document("doc2", f1="very important", f2="lorem ipsum") waitForIndex(client, "idx") # test spellcheck res = client.ft().spellcheck("impornant") assert "important" == res["impornant"][0]["suggestion"] res = client.ft().spellcheck("contnt") assert "content" == res["contnt"][0]["suggestion"] # test spellcheck with Levenshtein distance res = client.ft().spellcheck("vlis") assert res == {} res = client.ft().spellcheck("vlis", distance=2) assert "valid" == res["vlis"][0]["suggestion"] # test spellcheck include client.ft().dict_add("dict", "lore", "lorem", "lorm") res = client.ft().spellcheck("lorm", include="dict") assert len(res["lorm"]) == 3 assert ( res["lorm"][0]["suggestion"], res["lorm"][1]["suggestion"], res["lorm"][2]["suggestion"], ) == ("lorem", "lore", "lorm") assert (res["lorm"][0]["score"], res["lorm"][1]["score"]) == ("0.5", "0") # test spellcheck exclude res = client.ft().spellcheck("lorm", exclude="dict") assert res == {} @pytest.mark.redismod def test_dict_operations(client): client.ft().create_index((TextField("f1"), TextField("f2"))) # Add three items res = client.ft().dict_add("custom_dict", "item1", "item2", "item3") assert 3 == res # Remove one item res = client.ft().dict_del("custom_dict", "item2") assert 1 == res # Dump dict and inspect content res = client.ft().dict_dump("custom_dict") assert ["item1", "item3"] == res # Remove rest of the items before reload client.ft().dict_del("custom_dict", *res) @pytest.mark.redismod def test_phonetic_matcher(client): client.ft().create_index((TextField("name"),)) client.ft().add_document("doc1", name="Jon") client.ft().add_document("doc2", name="John") res = client.ft().search(Query("Jon")) assert 1 == len( assert "Jon" ==[0].name # Drop and create index with phonetic matcher client.flushdb() client.ft().create_index((TextField("name", phonetic_matcher="dm:en"),)) client.ft().add_document("doc1", name="Jon") client.ft().add_document("doc2", name="John") res = client.ft().search(Query("Jon")) assert 2 == len( assert ["John", "Jon"] == sorted( for d in @pytest.mark.redismod def test_scorer(client): client.ft().create_index((TextField("description"),)) client.ft().add_document( "doc1", description="The quick brown fox jumps over the lazy dog" ) client.ft().add_document( "doc2", description="Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do.", # noqa ) # default scorer is TFIDF res = client.ft().search(Query("quick").with_scores()) assert 1.0 ==[0].score res = client.ft().search(Query("quick").scorer("TFIDF").with_scores()) assert 1.0 ==[0].score res = client.ft().search(Query("quick").scorer("TFIDF.DOCNORM").with_scores()) assert 0.1111111111111111 ==[0].score res = client.ft().search(Query("quick").scorer("BM25").with_scores()) assert 0.17699114465425977 ==[0].score res = client.ft().search(Query("quick").scorer("DISMAX").with_scores()) assert 2.0 ==[0].score res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores()) assert 1.0 ==[0].score res = client.ft().search(Query("quick").scorer("HAMMING").with_scores()) assert 0.0 ==[0].score @pytest.mark.redismod def test_get(client): client.ft().create_index((TextField("f1"), TextField("f2"))) assert [None] == client.ft().get("doc1") assert [None, None] == client.ft().get("doc2", "doc1") client.ft().add_document( "doc1", f1="some valid content dd1", f2="this is sample text ff1" ) client.ft().add_document( "doc2", f1="some valid content dd2", f2="this is sample text ff2" ) assert [ ["f1", "some valid content dd2", "f2", "this is sample text ff2"] ] == client.ft().get("doc2") assert [ ["f1", "some valid content dd1", "f2", "this is sample text ff1"], ["f1", "some valid content dd2", "f2", "this is sample text ff2"], ] == client.ft().get("doc1", "doc2") @pytest.mark.redismod @skip_ifmodversion_lt("2.2.0", "search") def test_config(client): assert client.ft().config_set("TIMEOUT", "100") with pytest.raises(redis.ResponseError): client.ft().config_set("TIMEOUT", "null") res = client.ft().config_get("*") assert "100" == res["TIMEOUT"] res = client.ft().config_get("TIMEOUT") assert "100" == res["TIMEOUT"] @pytest.mark.redismod def test_aggregations_groupby(client): # Creating the index definition and schema client.ft().create_index( ( NumericField("random_num"), TextField("title"), TextField("body"), TextField("parent"), ) ) # Indexing a document client.ft().add_document( "search", title="RediSearch", body="Redisearch impements a search engine on top of redis", parent="redis", random_num=10, ) client.ft().add_document( "ai", title="RedisAI", body="RedisAI executes Deep Learning/Machine Learning models and managing their data.", # noqa parent="redis", random_num=3, ) client.ft().add_document( "json", title="RedisJson", body="RedisJSON implements ECMA-404 The JSON Data Interchange Standard as a native data type.", # noqa parent="redis", random_num=8, ) req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.count(), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == "3" req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.count_distinct("@title"), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == "3" req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.count_distinctish("@title"), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == "3" req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.sum("@random_num"), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == "21" # 10+8+3 req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.min("@random_num"), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == "3" # min(10,8,3) req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.max("@random_num"), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == "10" # max(10,8,3) req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.avg("@random_num"), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == "7" # (10+3+8)/3 req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.stddev("random_num"), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == "3.60555127546" req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.quantile("@random_num", 0.5), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == "10" req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.tolist("@title"), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[3] == ["RediSearch", "RedisAI", "RedisJson"] req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.first_value("@title").alias("first"), ) res = client.ft().aggregate(req).rows[0] assert res == ["parent", "redis", "first", "RediSearch"] req = aggregations.AggregateRequest("redis").group_by( "@parent", reducers.random_sample("@title", 2).alias("random"), ) res = client.ft().aggregate(req).rows[0] assert res[1] == "redis" assert res[2] == "random" assert len(res[3]) == 2 assert res[3][0] in ["RediSearch", "RedisAI", "RedisJson"] @pytest.mark.redismod def test_aggregations_sort_by_and_limit(client): client.ft().create_index( ( TextField("t1"), TextField("t2"), ) ) client.ft().client.hset("doc1", mapping={"t1": "a", "t2": "b"}) client.ft().client.hset("doc2", mapping={"t1": "b", "t2": "a"}) # test sort_by using SortDirection req = aggregations.AggregateRequest("*").sort_by( aggregations.Asc("@t2"), aggregations.Desc("@t1") ) res = client.ft().aggregate(req) assert res.rows[0] == ["t2", "a", "t1", "b"] assert res.rows[1] == ["t2", "b", "t1", "a"] # test sort_by without SortDirection req = aggregations.AggregateRequest("*").sort_by("@t1") res = client.ft().aggregate(req) assert res.rows[0] == ["t1", "a"] assert res.rows[1] == ["t1", "b"] # test sort_by with max req = aggregations.AggregateRequest("*").sort_by("@t1", max=1) res = client.ft().aggregate(req) assert len(res.rows) == 1 # test limit req = aggregations.AggregateRequest("*").sort_by("@t1").limit(1, 1) res = client.ft().aggregate(req) assert len(res.rows) == 1 assert res.rows[0] == ["t1", "b"] @pytest.mark.redismod def test_aggregations_load(client): client.ft().create_index( ( TextField("t1"), TextField("t2"), ) ) client.ft().client.hset("doc1", mapping={"t1": "hello", "t2": "world"}) # load t1 req = aggregations.AggregateRequest("*").load("t1") res = client.ft().aggregate(req) assert res.rows[0] == ["t1", "hello"] # load t2 req = aggregations.AggregateRequest("*").load("t2") res = client.ft().aggregate(req) assert res.rows[0] == ["t2", "world"] # load all req = aggregations.AggregateRequest("*").load() res = client.ft().aggregate(req) assert res.rows[0] == ["t1", "hello", "t2", "world"] @pytest.mark.redismod def test_aggregations_apply(client): client.ft().create_index( ( TextField("PrimaryKey", sortable=True), NumericField("CreatedDateTimeUTC", sortable=True), ) ) client.ft().client.hset( "doc1", mapping={"PrimaryKey": "9::362330", "CreatedDateTimeUTC": "637387878524969984"}, ) client.ft().client.hset( "doc2", mapping={"PrimaryKey": "9::362329", "CreatedDateTimeUTC": "637387875859270016"}, ) req = aggregations.AggregateRequest("*").apply( CreatedDateTimeUTC="@CreatedDateTimeUTC * 10" ) res = client.ft().aggregate(req) assert res.rows[0] == ["CreatedDateTimeUTC", "6373878785249699840"] assert res.rows[1] == ["CreatedDateTimeUTC", "6373878758592700416"] @pytest.mark.redismod def test_aggregations_filter(client): client.ft().create_index( ( TextField("name", sortable=True), NumericField("age", sortable=True), ) ) client.ft().client.hset("doc1", mapping={"name": "bar", "age": "25"}) client.ft().client.hset("doc2", mapping={"name": "foo", "age": "19"}) req = aggregations.AggregateRequest("*").filter("@name=='foo' && @age < 20") res = client.ft().aggregate(req) assert len(res.rows) == 1 assert res.rows[0] == ["name", "foo", "age", "19"] req = aggregations.AggregateRequest("*").filter("@age > 15").sort_by("@age") res = client.ft().aggregate(req) assert len(res.rows) == 2 assert res.rows[0] == ["age", "19"] assert res.rows[1] == ["age", "25"] @pytest.mark.redismod @skip_ifmodversion_lt("2.0.0", "search") def test_index_definition(client): """ Create definition and test its args """ with pytest.raises(RuntimeError): IndexDefinition(prefix=["hset:", "henry"], index_type="json") definition = IndexDefinition( prefix=["hset:", "henry"], filter="@f1==32", language="English", language_field="play", score_field="chapter", score=0.5, payload_field="txt", index_type=IndexType.JSON, ) assert [ "ON", "JSON", "PREFIX", 2, "hset:", "henry", "FILTER", "@f1==32", "LANGUAGE_FIELD", "play", "LANGUAGE", "English", "SCORE_FIELD", "chapter", "SCORE", 0.5, "PAYLOAD_FIELD", "txt", ] == definition.args createIndex(client.ft(), num_docs=500, definition=definition) @pytest.mark.redismod @skip_ifmodversion_lt("2.0.0", "search") def test_create_client_definition(client): """ Create definition with no index type provided, and use hset to test the client definition (the default is HASH). """ definition = IndexDefinition(prefix=["hset:", "henry"]) createIndex(client.ft(), num_docs=500, definition=definition) info = client.ft().info() assert 494 == int(info["num_docs"]) client.ft().client.hset("hset:1", "f1", "v1") info = client.ft().info() assert 495 == int(info["num_docs"]) @pytest.mark.redismod @skip_ifmodversion_lt("2.0.0", "search") def test_create_client_definition_hash(client): """ Create definition with IndexType.HASH as index type (ON HASH), and use hset to test the client definition. """ definition = IndexDefinition(prefix=["hset:", "henry"], index_type=IndexType.HASH) createIndex(client.ft(), num_docs=500, definition=definition) info = client.ft().info() assert 494 == int(info["num_docs"]) client.ft().client.hset("hset:1", "f1", "v1") info = client.ft().info() assert 495 == int(info["num_docs"]) @pytest.mark.redismod @skip_ifmodversion_lt("2.2.0", "search") def test_create_client_definition_json(client): """ Create definition with IndexType.JSON as index type (ON JSON), and use json client to test it. """ definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON) client.ft().create_index((TextField("$.name"),), definition=definition) client.json().set("king:1", Path.rootPath(), {"name": "henry"}) client.json().set("king:2", Path.rootPath(), {"name": "james"}) res = client.ft().search("henry") assert[0].id == "king:1" assert[0].payload is None assert[0].json == '{"name":"henry"}' assert == 1 @pytest.mark.redismod @skip_ifmodversion_lt("2.2.0", "search") def test_fields_as_name(client): # create index SCHEMA = ( TextField("$.name", sortable=True, as_name="name"), NumericField("$.age", as_name="just_a_number"), ) definition = IndexDefinition(index_type=IndexType.JSON) client.ft().create_index(SCHEMA, definition=definition) # insert json data res = client.json().set("doc:1", Path.rootPath(), {"name": "Jon", "age": 25}) assert res total = client.ft().search(Query("Jon").return_fields("name", "just_a_number")).docs assert 1 == len(total) assert "doc:1" == total[0].id assert "Jon" == total[0].name assert "25" == total[0].just_a_number @pytest.mark.redismod @skip_ifmodversion_lt("2.2.0", "search") def test_search_return_fields(client): res = client.json().set( "doc:1", Path.rootPath(), {"t": "riceratops", "t2": "telmatosaurus", "n": 9072, "flt": 97.2}, ) assert res # create index on definition = IndexDefinition(index_type=IndexType.JSON) SCHEMA = ( TextField("$.t"), NumericField("$.flt"), ) client.ft().create_index(SCHEMA, definition=definition) waitForIndex(client, "idx") total = client.ft().search(Query("*").return_field("$.t", as_field="txt")).docs assert 1 == len(total) assert "doc:1" == total[0].id assert "riceratops" == total[0].txt total = client.ft().search(Query("*").return_field("$.t2", as_field="txt")).docs assert 1 == len(total) assert "doc:1" == total[0].id assert "telmatosaurus" == total[0].txt @pytest.mark.redismod def test_synupdate(client): definition = IndexDefinition(index_type=IndexType.HASH) client.ft().create_index( ( TextField("title"), TextField("body"), ), definition=definition, ) client.ft().synupdate("id1", True, "boy", "child", "offspring") client.ft().add_document("doc1", title="he is a baby", body="this is a test") client.ft().synupdate("id1", True, "baby") client.ft().add_document("doc2", title="he is another baby", body="another test") res = client.ft().search(Query("child").expander("SYNONYM")) assert[0].id == "doc2" assert[0].title == "he is another baby" assert[0].body == "another test" @pytest.mark.redismod def test_syndump(client): definition = IndexDefinition(index_type=IndexType.HASH) client.ft().create_index( ( TextField("title"), TextField("body"), ), definition=definition, ) client.ft().synupdate("id1", False, "boy", "child", "offspring") client.ft().synupdate("id2", False, "baby", "child") client.ft().synupdate("id3", False, "tree", "wood") res = client.ft().syndump() assert res == { "boy": ["id1"], "tree": ["id3"], "wood": ["id3"], "child": ["id1", "id2"], "baby": ["id2"], "offspring": ["id1"], } @pytest.mark.redismod @skip_ifmodversion_lt("2.2.0", "search") def test_create_json_with_alias(client): """ Create definition with IndexType.JSON as index type (ON JSON) with two fields with aliases, and use json client to test it. """ definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON) client.ft().create_index( (TextField("$.name", as_name="name"), NumericField("$.num", as_name="num")), definition=definition, ) client.json().set("king:1", Path.rootPath(), {"name": "henry", "num": 42}) client.json().set("king:2", Path.rootPath(), {"name": "james", "num": 3.14}) res = client.ft().search("@name:henry") assert[0].id == "king:1" assert[0].json == '{"name":"henry","num":42}' assert == 1 res = client.ft().search("@num:[0 10]") assert[0].id == "king:2" assert[0].json == '{"name":"james","num":3.14}' assert == 1 # Tests returns an error if path contain special characters (user should # use an alias) with pytest.raises(Exception): client.ft().search("@$.name:henry") @pytest.mark.redismod @skip_ifmodversion_lt("2.2.0", "search") def test_json_with_multipath(client): """ Create definition with IndexType.JSON as index type (ON JSON), and use json client to test it. """ definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON) client.ft().create_index( (TagField("$", as_name="name")), definition=definition ) client.json().set( "king:1", Path.rootPath(), {"name": "henry", "country": {"name": "england"}} ) res = client.ft().search("@name:{henry}") assert[0].id == "king:1" assert[0].json == '{"name":"henry","country":{"name":"england"}}' assert == 1 res = client.ft().search("@name:{england}") assert[0].id == "king:1" assert[0].json == '{"name":"henry","country":{"name":"england"}}' assert == 1 @pytest.mark.redismod @skip_ifmodversion_lt("2.2.0", "search") def test_json_with_jsonpath(client): definition = IndexDefinition(index_type=IndexType.JSON) client.ft().create_index( ( TextField('$["prod:name"]', as_name="name"), TextField("$.prod:name", as_name="name_unsupported"), ), definition=definition, ) client.json().set("doc:1", Path.rootPath(), {"prod:name": "RediSearch"}) # query for a supported field succeeds res = client.ft().search(Query("@name:RediSearch")) assert == 1 assert[0].id == "doc:1" assert[0].json == '{"prod:name":"RediSearch"}' # query for an unsupported field fails res = client.ft().search("@name_unsupported:RediSearch") assert == 0 # return of a supported field succeeds res = client.ft().search(Query("@name:RediSearch").return_field("name")) assert == 1 assert[0].id == "doc:1" assert[0].name == "RediSearch" # return of an unsupported field fails res = client.ft().search(Query("@name:RediSearch").return_field("name_unsupported")) assert == 1 assert[0].id == "doc:1" with pytest.raises(Exception):[0].name_unsupported @pytest.mark.redismod def test_profile(client): client.ft().create_index((TextField("t"),)) client.ft().client.hset("1", "t", "hello") client.ft().client.hset("2", "t", "world") # check using Query q = Query("hello|world").no_content() res, det = client.ft().profile(q) assert det["Iterators profile"]["Counter"] == 2.0 assert len(det["Iterators profile"]["Child iterators"]) == 2 assert det["Iterators profile"]["Type"] == "UNION" assert det["Parsing time"] < 0.5 assert len( == 2 # check also the search result # check using AggregateRequest req = ( aggregations.AggregateRequest("*") .load("t") .apply(prefix="startswith(@t, 'hel')") ) res, det = client.ft().profile(req) assert det["Iterators profile"]["Counter"] == 2.0 assert det["Iterators profile"]["Type"] == "WILDCARD" assert det["Parsing time"] < 0.5 assert len(res.rows) == 2 # check also the search result @pytest.mark.redismod def test_profile_limited(client): client.ft().create_index((TextField("t"),)) client.ft().client.hset("1", "t", "hello") client.ft().client.hset("2", "t", "hell") client.ft().client.hset("3", "t", "help") client.ft().client.hset("4", "t", "helowa") q = Query("%hell% hel*") res, det = client.ft().profile(q, limited=True) assert ( det["Iterators profile"]["Child iterators"][0]["Child iterators"] == "The number of iterators in the union is 3" ) assert ( det["Iterators profile"]["Child iterators"][1]["Child iterators"] == "The number of iterators in the union is 4" ) assert det["Iterators profile"]["Type"] == "INTERSECT" assert len( == 3 # check also the search result