import bz2
import csv
import os
import time
from io import TextIOWrapper
import pytest
import redis
import redis.commands.search
import redis.commands.search.aggregation as aggregations
import redis.commands.search.reducers as reducers
from redis import Redis
from redis.commands.json.path import Path
from redis.commands.search import Search
from redis.commands.search.field import GeoField, NumericField, TagField, TextField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.query import GeoFilter, NumericFilter, Query
from redis.commands.search.result import Result
from redis.commands.search.suggestion import Suggestion
from .conftest import default_redismod_url, skip_ifmodversion_lt
WILL_PLAY_TEXT = os.path.abspath(
os.path.join(os.path.dirname(__file__), "testdata", "will_play_text.csv.bz2")
)
TITLES_CSV = os.path.abspath(
os.path.join(os.path.dirname(__file__), "testdata", "titles.csv")
)
def waitForIndex(env, idx, timeout=None):
delay = 0.1
while True:
res = env.execute_command("ft.info", idx)
try:
res.index("indexing")
except ValueError:
break
if int(res[res.index("indexing") + 1]) == 0:
break
time.sleep(delay)
if timeout is not None:
timeout -= delay
if timeout <= 0:
break
def getClient():
"""
Gets a client client attached to an index name which is ready to be
created
"""
rc = Redis.from_url(default_redismod_url, decode_responses=True)
return rc
def createIndex(client, num_docs=100, definition=None):
try:
client.create_index(
(TextField("play", weight=5.0), TextField("txt"), NumericField("chapter")),
definition=definition,
)
except redis.ResponseError:
client.dropindex(delete_documents=True)
return createIndex(client, num_docs=num_docs, definition=definition)
chapters = {}
bzfp = TextIOWrapper(bz2.BZ2File(WILL_PLAY_TEXT), encoding="utf8")
r = csv.reader(bzfp, delimiter=";")
for n, line in enumerate(r):
play, chapter, _, text = line[1], line[2], line[4], line[5]
key = f"{play}:{chapter}".lower()
d = chapters.setdefault(key, {})
d["play"] = play
d["txt"] = d.get("txt", "") + " " + text
d["chapter"] = int(chapter or 0)
if len(chapters) == num_docs:
break
indexer = client.batch_indexer(chunk_size=50)
assert isinstance(indexer, Search.BatchIndexer)
assert 50 == indexer.chunk_size
for key, doc in chapters.items():
indexer.add_document(key, **doc)
indexer.commit()
# override the default module client, search requires both db=0, and text
@pytest.fixture
def modclient():
return Redis.from_url(default_redismod_url, db=0, decode_responses=True)
@pytest.fixture
def client(modclient):
modclient.flushdb()
return modclient
@pytest.mark.redismod
def test_client(client):
num_docs = 500
createIndex(client.ft(), num_docs=num_docs)
waitForIndex(client, "idx")
# verify info
info = client.ft().info()
for k in [
"index_name",
"index_options",
"attributes",
"num_docs",
"max_doc_id",
"num_terms",
"num_records",
"inverted_sz_mb",
"offset_vectors_sz_mb",
"doc_table_size_mb",
"key_table_size_mb",
"records_per_doc_avg",
"bytes_per_record_avg",
"offsets_per_term_avg",
"offset_bits_per_record_avg",
]:
assert k in info
assert client.ft().index_name == info["index_name"]
assert num_docs == int(info["num_docs"])
res = client.ft().search("henry iv")
assert isinstance(res, Result)
assert 225 == res.total
assert 10 == len(res.docs)
assert res.duration > 0
for doc in res.docs:
assert doc.id
assert doc.play == "Henry IV"
assert len(doc.txt) > 0
# test no content
res = client.ft().search(Query("king").no_content())
assert 194 == res.total
assert 10 == len(res.docs)
for doc in res.docs:
assert "txt" not in doc.__dict__
assert "play" not in doc.__dict__
# test verbatim vs no verbatim
total = client.ft().search(Query("kings").no_content()).total
vtotal = client.ft().search(Query("kings").no_content().verbatim()).total
assert total > vtotal
# test in fields
txt_total = (
client.ft().search(Query("henry").no_content().limit_fields("txt")).total
)
play_total = (
client.ft().search(Query("henry").no_content().limit_fields("play")).total
)
both_total = (
client.ft()
.search(Query("henry").no_content().limit_fields("play", "txt"))
.total
)
assert 129 == txt_total
assert 494 == play_total
assert 494 == both_total
# test load_document
doc = client.ft().load_document("henry vi part 3:62")
assert doc is not None
assert "henry vi part 3:62" == doc.id
assert doc.play == "Henry VI Part 3"
assert len(doc.txt) > 0
# test in-keys
ids = [x.id for x in client.ft().search(Query("henry")).docs]
assert 10 == len(ids)
subset = ids[:5]
docs = client.ft().search(Query("henry").limit_ids(*subset))
assert len(subset) == docs.total
ids = [x.id for x in docs.docs]
assert set(ids) == set(subset)
# test slop and in order
assert 193 == client.ft().search(Query("henry king")).total
assert 3 == client.ft().search(Query("henry king").slop(0).in_order()).total
assert 52 == client.ft().search(Query("king henry").slop(0).in_order()).total
assert 53 == client.ft().search(Query("henry king").slop(0)).total
assert 167 == client.ft().search(Query("henry king").slop(100)).total
# test delete document
client.ft().add_document("doc-5ghs2", play="Death of a Salesman")
res = client.ft().search(Query("death of a salesman"))
assert 1 == res.total
assert 1 == client.ft().delete_document("doc-5ghs2")
res = client.ft().search(Query("death of a salesman"))
assert 0 == res.total
assert 0 == client.ft().delete_document("doc-5ghs2")
client.ft().add_document("doc-5ghs2", play="Death of a Salesman")
res = client.ft().search(Query("death of a salesman"))
assert 1 == res.total
client.ft().delete_document("doc-5ghs2")
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_payloads(client):
client.ft().create_index((TextField("txt"),))
client.ft().add_document("doc1", payload="foo baz", txt="foo bar")
client.ft().add_document("doc2", txt="foo bar")
q = Query("foo bar").with_payloads()
res = client.ft().search(q)
assert 2 == res.total
assert "doc1" == res.docs[0].id
assert "doc2" == res.docs[1].id
assert "foo baz" == res.docs[0].payload
assert res.docs[1].payload is None
@pytest.mark.redismod
def test_scores(client):
client.ft().create_index((TextField("txt"),))
client.ft().add_document("doc1", txt="foo baz")
client.ft().add_document("doc2", txt="foo bar")
q = Query("foo ~bar").with_scores()
res = client.ft().search(q)
assert 2 == res.total
assert "doc2" == res.docs[0].id
assert 3.0 == res.docs[0].score
assert "doc1" == res.docs[1].id
# todo: enable once new RS version is tagged
# self.assertEqual(0.2, res.docs[1].score)
@pytest.mark.redismod
def test_replace(client):
client.ft().create_index((TextField("txt"),))
client.ft().add_document("doc1", txt="foo bar")
client.ft().add_document("doc2", txt="foo bar")
waitForIndex(client, "idx")
res = client.ft().search("foo bar")
assert 2 == res.total
client.ft().add_document("doc1", replace=True, txt="this is a replaced doc")
res = client.ft().search("foo bar")
assert 1 == res.total
assert "doc2" == res.docs[0].id
res = client.ft().search("replaced doc")
assert 1 == res.total
assert "doc1" == res.docs[0].id
@pytest.mark.redismod
def test_stopwords(client):
client.ft().create_index((TextField("txt"),), stopwords=["foo", "bar", "baz"])
client.ft().add_document("doc1", txt="foo bar")
client.ft().add_document("doc2", txt="hello world")
waitForIndex(client, "idx")
q1 = Query("foo bar").no_content()
q2 = Query("foo bar hello world").no_content()
res1, res2 = client.ft().search(q1), client.ft().search(q2)
assert 0 == res1.total
assert 1 == res2.total
@pytest.mark.redismod
def test_filters(client):
client.ft().create_index((TextField("txt"), NumericField("num"), GeoField("loc")))
client.ft().add_document("doc1", txt="foo bar", num=3.141, loc="-0.441,51.458")
client.ft().add_document("doc2", txt="foo baz", num=2, loc="-0.1,51.2")
waitForIndex(client, "idx")
# Test numerical filter
q1 = Query("foo").add_filter(NumericFilter("num", 0, 2)).no_content()
q2 = (
Query("foo")
.add_filter(NumericFilter("num", 2, NumericFilter.INF, minExclusive=True))
.no_content()
)
res1, res2 = client.ft().search(q1), client.ft().search(q2)
assert 1 == res1.total
assert 1 == res2.total
assert "doc2" == res1.docs[0].id
assert "doc1" == res2.docs[0].id
# Test geo filter
q1 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 10)).no_content()
q2 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 100)).no_content()
res1, res2 = client.ft().search(q1), client.ft().search(q2)
assert 1 == res1.total
assert 2 == res2.total
assert "doc1" == res1.docs[0].id
# Sort results, after RDB reload order may change
res = [res2.docs[0].id, res2.docs[1].id]
res.sort()
assert ["doc1", "doc2"] == res
@pytest.mark.redismod
def test_payloads_with_no_content(client):
client.ft().create_index((TextField("txt"),))
client.ft().add_document("doc1", payload="foo baz", txt="foo bar")
client.ft().add_document("doc2", payload="foo baz2", txt="foo bar")
q = Query("foo bar").with_payloads().no_content()
res = client.ft().search(q)
assert 2 == len(res.docs)
@pytest.mark.redismod
def test_sort_by(client):
client.ft().create_index((TextField("txt"), NumericField("num", sortable=True)))
client.ft().add_document("doc1", txt="foo bar", num=1)
client.ft().add_document("doc2", txt="foo baz", num=2)
client.ft().add_document("doc3", txt="foo qux", num=3)
# Test sort
q1 = Query("foo").sort_by("num", asc=True).no_content()
q2 = Query("foo").sort_by("num", asc=False).no_content()
res1, res2 = client.ft().search(q1), client.ft().search(q2)
assert 3 == res1.total
assert "doc1" == res1.docs[0].id
assert "doc2" == res1.docs[1].id
assert "doc3" == res1.docs[2].id
assert 3 == res2.total
assert "doc1" == res2.docs[2].id
assert "doc2" == res2.docs[1].id
assert "doc3" == res2.docs[0].id
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_drop_index():
"""
Ensure the index gets dropped by data remains by default
"""
for x in range(20):
for keep_docs in [[True, {}], [False, {"name": "haveit"}]]:
idx = "HaveIt"
index = getClient()
index.hset("index:haveit", mapping={"name": "haveit"})
idef = IndexDefinition(prefix=["index:"])
index.ft(idx).create_index((TextField("name"),), definition=idef)
waitForIndex(index, idx)
index.ft(idx).dropindex(delete_documents=keep_docs[0])
i = index.hgetall("index:haveit")
assert i == keep_docs[1]
@pytest.mark.redismod
def test_example(client):
# Creating the index definition and schema
client.ft().create_index((TextField("title", weight=5.0), TextField("body")))
# Indexing a document
client.ft().add_document(
"doc1",
title="RediSearch",
body="Redisearch impements a search engine on top of redis",
)
# Searching with complex parameters:
q = Query("search engine").verbatim().no_content().paging(0, 5)
res = client.ft().search(q)
assert res is not None
@pytest.mark.redismod
def test_auto_complete(client):
n = 0
with open(TITLES_CSV) as f:
cr = csv.reader(f)
for row in cr:
n += 1
term, score = row[0], float(row[1])
assert n == client.ft().sugadd("ac", Suggestion(term, score=score))
assert n == client.ft().suglen("ac")
ret = client.ft().sugget("ac", "bad", with_scores=True)
assert 2 == len(ret)
assert "badger" == ret[0].string
assert isinstance(ret[0].score, float)
assert 1.0 != ret[0].score
assert "badalte rishtey" == ret[1].string
assert isinstance(ret[1].score, float)
assert 1.0 != ret[1].score
ret = client.ft().sugget("ac", "bad", fuzzy=True, num=10)
assert 10 == len(ret)
assert 1.0 == ret[0].score
strs = {x.string for x in ret}
for sug in strs:
assert 1 == client.ft().sugdel("ac", sug)
# make sure a second delete returns 0
for sug in strs:
assert 0 == client.ft().sugdel("ac", sug)
# make sure they were actually deleted
ret2 = client.ft().sugget("ac", "bad", fuzzy=True, num=10)
for sug in ret2:
assert sug.string not in strs
# Test with payload
client.ft().sugadd("ac", Suggestion("pay1", payload="pl1"))
client.ft().sugadd("ac", Suggestion("pay2", payload="pl2"))
client.ft().sugadd("ac", Suggestion("pay3", payload="pl3"))
sugs = client.ft().sugget("ac", "pay", with_payloads=True, with_scores=True)
assert 3 == len(sugs)
for sug in sugs:
assert sug.payload
assert sug.payload.startswith("pl")
@pytest.mark.redismod
def test_no_index(client):
client.ft().create_index(
(
TextField("field"),
TextField("text", no_index=True, sortable=True),
NumericField("numeric", no_index=True, sortable=True),
GeoField("geo", no_index=True, sortable=True),
TagField("tag", no_index=True, sortable=True),
)
)
client.ft().add_document(
"doc1", field="aaa", text="1", numeric="1", geo="1,1", tag="1"
)
client.ft().add_document(
"doc2", field="aab", text="2", numeric="2", geo="2,2", tag="2"
)
waitForIndex(client, "idx")
res = client.ft().search(Query("@text:aa*"))
assert 0 == res.total
res = client.ft().search(Query("@field:aa*"))
assert 2 == res.total
res = client.ft().search(Query("*").sort_by("text", asc=False))
assert 2 == res.total
assert "doc2" == res.docs[0].id
res = client.ft().search(Query("*").sort_by("text", asc=True))
assert "doc1" == res.docs[0].id
res = client.ft().search(Query("*").sort_by("numeric", asc=True))
assert "doc1" == res.docs[0].id
res = client.ft().search(Query("*").sort_by("geo", asc=True))
assert "doc1" == res.docs[0].id
res = client.ft().search(Query("*").sort_by("tag", asc=True))
assert "doc1" == res.docs[0].id
# Ensure exception is raised for non-indexable, non-sortable fields
with pytest.raises(Exception):
TextField("name", no_index=True, sortable=False)
with pytest.raises(Exception):
NumericField("name", no_index=True, sortable=False)
with pytest.raises(Exception):
GeoField("name", no_index=True, sortable=False)
with pytest.raises(Exception):
TagField("name", no_index=True, sortable=False)
@pytest.mark.redismod
def test_partial(client):
client.ft().create_index((TextField("f1"), TextField("f2"), TextField("f3")))
client.ft().add_document("doc1", f1="f1_val", f2="f2_val")
client.ft().add_document("doc2", f1="f1_val", f2="f2_val")
client.ft().add_document("doc1", f3="f3_val", partial=True)
client.ft().add_document("doc2", f3="f3_val", replace=True)
waitForIndex(client, "idx")
# Search for f3 value. All documents should have it
res = client.ft().search("@f3:f3_val")
assert 2 == res.total
# Only the document updated with PARTIAL should still have f1 and f2 values
res = client.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val")
assert 1 == res.total
@pytest.mark.redismod
def test_no_create(client):
client.ft().create_index((TextField("f1"), TextField("f2"), TextField("f3")))
client.ft().add_document("doc1", f1="f1_val", f2="f2_val")
client.ft().add_document("doc2", f1="f1_val", f2="f2_val")
client.ft().add_document("doc1", f3="f3_val", no_create=True)
client.ft().add_document("doc2", f3="f3_val", no_create=True, partial=True)
waitForIndex(client, "idx")
# Search for f3 value. All documents should have it
res = client.ft().search("@f3:f3_val")
assert 2 == res.total
# Only the document updated with PARTIAL should still have f1 and f2 values
res = client.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val")
assert 1 == res.total
with pytest.raises(redis.ResponseError):
client.ft().add_document("doc3", f2="f2_val", f3="f3_val", no_create=True)
@pytest.mark.redismod
def test_explain(client):
client.ft().create_index((TextField("f1"), TextField("f2"), TextField("f3")))
res = client.ft().explain("@f3:f3_val @f2:f2_val @f1:f1_val")
assert res
@pytest.mark.redismod
def test_explaincli(client):
with pytest.raises(NotImplementedError):
client.ft().explain_cli("foo")
@pytest.mark.redismod
def test_summarize(client):
createIndex(client.ft())
waitForIndex(client, "idx")
q = Query("king henry").paging(0, 1)
q.highlight(fields=("play", "txt"), tags=("", ""))
q.summarize("txt")
doc = sorted(client.ft().search(q).docs)[0]
assert "Henry IV" == doc.play
assert (
"ACT I SCENE I. London. The palace. Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa
== doc.txt
)
q = Query("king henry").paging(0, 1).summarize().highlight()
doc = sorted(client.ft().search(q).docs)[0]
assert "Henry ... " == doc.play
assert (
"ACT I SCENE I. London. The palace. Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa
== doc.txt
)
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_alias():
index1 = getClient()
index2 = getClient()
def1 = IndexDefinition(prefix=["index1:"])
def2 = IndexDefinition(prefix=["index2:"])
ftindex1 = index1.ft("testAlias")
ftindex2 = index2.ft("testAlias2")
ftindex1.create_index((TextField("name"),), definition=def1)
ftindex2.create_index((TextField("name"),), definition=def2)
index1.hset("index1:lonestar", mapping={"name": "lonestar"})
index2.hset("index2:yogurt", mapping={"name": "yogurt"})
res = ftindex1.search("*").docs[0]
assert "index1:lonestar" == res.id
# create alias and check for results
ftindex1.aliasadd("spaceballs")
alias_client = getClient().ft("spaceballs")
res = alias_client.search("*").docs[0]
assert "index1:lonestar" == res.id
# Throw an exception when trying to add an alias that already exists
with pytest.raises(Exception):
ftindex2.aliasadd("spaceballs")
# update alias and ensure new results
ftindex2.aliasupdate("spaceballs")
alias_client2 = getClient().ft("spaceballs")
res = alias_client2.search("*").docs[0]
assert "index2:yogurt" == res.id
ftindex2.aliasdel("spaceballs")
with pytest.raises(Exception):
alias_client2.search("*").docs[0]
@pytest.mark.redismod
def test_alias_basic():
# Creating a client with one index
getClient().flushdb()
index1 = getClient().ft("testAlias")
index1.create_index((TextField("txt"),))
index1.add_document("doc1", txt="text goes here")
index2 = getClient().ft("testAlias2")
index2.create_index((TextField("txt"),))
index2.add_document("doc2", txt="text goes here")
# add the actual alias and check
index1.aliasadd("myalias")
alias_client = getClient().ft("myalias")
res = sorted(alias_client.search("*").docs, key=lambda x: x.id)
assert "doc1" == res[0].id
# Throw an exception when trying to add an alias that already exists
with pytest.raises(Exception):
index2.aliasadd("myalias")
# update the alias and ensure we get doc2
index2.aliasupdate("myalias")
alias_client2 = getClient().ft("myalias")
res = sorted(alias_client2.search("*").docs, key=lambda x: x.id)
assert "doc1" == res[0].id
# delete the alias and expect an error if we try to query again
index2.aliasdel("myalias")
with pytest.raises(Exception):
_ = alias_client2.search("*").docs[0]
@pytest.mark.redismod
def test_tags(client):
client.ft().create_index((TextField("txt"), TagField("tags")))
tags = "foo,foo bar,hello;world"
tags2 = "soba,ramen"
client.ft().add_document("doc1", txt="fooz barz", tags=tags)
client.ft().add_document("doc2", txt="noodles", tags=tags2)
waitForIndex(client, "idx")
q = Query("@tags:{foo}")
res = client.ft().search(q)
assert 1 == res.total
q = Query("@tags:{foo bar}")
res = client.ft().search(q)
assert 1 == res.total
q = Query("@tags:{foo\\ bar}")
res = client.ft().search(q)
assert 1 == res.total
q = Query("@tags:{hello\\;world}")
res = client.ft().search(q)
assert 1 == res.total
q2 = client.ft().tagvals("tags")
assert (tags.split(",") + tags2.split(",")).sort() == q2.sort()
@pytest.mark.redismod
def test_textfield_sortable_nostem(client):
# Creating the index definition with sortable and no_stem
client.ft().create_index((TextField("txt", sortable=True, no_stem=True),))
# Now get the index info to confirm its contents
response = client.ft().info()
assert "SORTABLE" in response["attributes"][0]
assert "NOSTEM" in response["attributes"][0]
@pytest.mark.redismod
def test_alter_schema_add(client):
# Creating the index definition and schema
client.ft().create_index(TextField("title"))
# Using alter to add a field
client.ft().alter_schema_add(TextField("body"))
# Indexing a document
client.ft().add_document(
"doc1", title="MyTitle", body="Some content only in the body"
)
# Searching with parameter only in the body (the added field)
q = Query("only in the body")
# Ensure we find the result searching on the added body field
res = client.ft().search(q)
assert 1 == res.total
@pytest.mark.redismod
def test_spell_check(client):
client.ft().create_index((TextField("f1"), TextField("f2")))
client.ft().add_document("doc1", f1="some valid content", f2="this is sample text")
client.ft().add_document("doc2", f1="very important", f2="lorem ipsum")
waitForIndex(client, "idx")
# test spellcheck
res = client.ft().spellcheck("impornant")
assert "important" == res["impornant"][0]["suggestion"]
res = client.ft().spellcheck("contnt")
assert "content" == res["contnt"][0]["suggestion"]
# test spellcheck with Levenshtein distance
res = client.ft().spellcheck("vlis")
assert res == {}
res = client.ft().spellcheck("vlis", distance=2)
assert "valid" == res["vlis"][0]["suggestion"]
# test spellcheck include
client.ft().dict_add("dict", "lore", "lorem", "lorm")
res = client.ft().spellcheck("lorm", include="dict")
assert len(res["lorm"]) == 3
assert (
res["lorm"][0]["suggestion"],
res["lorm"][1]["suggestion"],
res["lorm"][2]["suggestion"],
) == ("lorem", "lore", "lorm")
assert (res["lorm"][0]["score"], res["lorm"][1]["score"]) == ("0.5", "0")
# test spellcheck exclude
res = client.ft().spellcheck("lorm", exclude="dict")
assert res == {}
@pytest.mark.redismod
def test_dict_operations(client):
client.ft().create_index((TextField("f1"), TextField("f2")))
# Add three items
res = client.ft().dict_add("custom_dict", "item1", "item2", "item3")
assert 3 == res
# Remove one item
res = client.ft().dict_del("custom_dict", "item2")
assert 1 == res
# Dump dict and inspect content
res = client.ft().dict_dump("custom_dict")
assert ["item1", "item3"] == res
# Remove rest of the items before reload
client.ft().dict_del("custom_dict", *res)
@pytest.mark.redismod
def test_phonetic_matcher(client):
client.ft().create_index((TextField("name"),))
client.ft().add_document("doc1", name="Jon")
client.ft().add_document("doc2", name="John")
res = client.ft().search(Query("Jon"))
assert 1 == len(res.docs)
assert "Jon" == res.docs[0].name
# Drop and create index with phonetic matcher
client.flushdb()
client.ft().create_index((TextField("name", phonetic_matcher="dm:en"),))
client.ft().add_document("doc1", name="Jon")
client.ft().add_document("doc2", name="John")
res = client.ft().search(Query("Jon"))
assert 2 == len(res.docs)
assert ["John", "Jon"] == sorted(d.name for d in res.docs)
@pytest.mark.redismod
def test_scorer(client):
client.ft().create_index((TextField("description"),))
client.ft().add_document(
"doc1", description="The quick brown fox jumps over the lazy dog"
)
client.ft().add_document(
"doc2",
description="Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do.", # noqa
)
# default scorer is TFIDF
res = client.ft().search(Query("quick").with_scores())
assert 1.0 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("TFIDF").with_scores())
assert 1.0 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("TFIDF.DOCNORM").with_scores())
assert 0.1111111111111111 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("BM25").with_scores())
assert 0.17699114465425977 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("DISMAX").with_scores())
assert 2.0 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores())
assert 1.0 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("HAMMING").with_scores())
assert 0.0 == res.docs[0].score
@pytest.mark.redismod
def test_get(client):
client.ft().create_index((TextField("f1"), TextField("f2")))
assert [None] == client.ft().get("doc1")
assert [None, None] == client.ft().get("doc2", "doc1")
client.ft().add_document(
"doc1", f1="some valid content dd1", f2="this is sample text ff1"
)
client.ft().add_document(
"doc2", f1="some valid content dd2", f2="this is sample text ff2"
)
assert [
["f1", "some valid content dd2", "f2", "this is sample text ff2"]
] == client.ft().get("doc2")
assert [
["f1", "some valid content dd1", "f2", "this is sample text ff1"],
["f1", "some valid content dd2", "f2", "this is sample text ff2"],
] == client.ft().get("doc1", "doc2")
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_config(client):
assert client.ft().config_set("TIMEOUT", "100")
with pytest.raises(redis.ResponseError):
client.ft().config_set("TIMEOUT", "null")
res = client.ft().config_get("*")
assert "100" == res["TIMEOUT"]
res = client.ft().config_get("TIMEOUT")
assert "100" == res["TIMEOUT"]
@pytest.mark.redismod
def test_aggregations_groupby(client):
# Creating the index definition and schema
client.ft().create_index(
(
NumericField("random_num"),
TextField("title"),
TextField("body"),
TextField("parent"),
)
)
# Indexing a document
client.ft().add_document(
"search",
title="RediSearch",
body="Redisearch impements a search engine on top of redis",
parent="redis",
random_num=10,
)
client.ft().add_document(
"ai",
title="RedisAI",
body="RedisAI executes Deep Learning/Machine Learning models and managing their data.", # noqa
parent="redis",
random_num=3,
)
client.ft().add_document(
"json",
title="RedisJson",
body="RedisJSON implements ECMA-404 The JSON Data Interchange Standard as a native data type.", # noqa
parent="redis",
random_num=8,
)
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.count(),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.count_distinct("@title"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.count_distinctish("@title"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.sum("@random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "21" # 10+8+3
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.min("@random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3" # min(10,8,3)
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.max("@random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "10" # max(10,8,3)
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.avg("@random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "7" # (10+3+8)/3
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.stddev("random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3.60555127546"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.quantile("@random_num", 0.5),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "10"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.tolist("@title"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == ["RediSearch", "RedisAI", "RedisJson"]
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.first_value("@title").alias("first"),
)
res = client.ft().aggregate(req).rows[0]
assert res == ["parent", "redis", "first", "RediSearch"]
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.random_sample("@title", 2).alias("random"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[2] == "random"
assert len(res[3]) == 2
assert res[3][0] in ["RediSearch", "RedisAI", "RedisJson"]
@pytest.mark.redismod
def test_aggregations_sort_by_and_limit(client):
client.ft().create_index(
(
TextField("t1"),
TextField("t2"),
)
)
client.ft().client.hset("doc1", mapping={"t1": "a", "t2": "b"})
client.ft().client.hset("doc2", mapping={"t1": "b", "t2": "a"})
# test sort_by using SortDirection
req = aggregations.AggregateRequest("*").sort_by(
aggregations.Asc("@t2"), aggregations.Desc("@t1")
)
res = client.ft().aggregate(req)
assert res.rows[0] == ["t2", "a", "t1", "b"]
assert res.rows[1] == ["t2", "b", "t1", "a"]
# test sort_by without SortDirection
req = aggregations.AggregateRequest("*").sort_by("@t1")
res = client.ft().aggregate(req)
assert res.rows[0] == ["t1", "a"]
assert res.rows[1] == ["t1", "b"]
# test sort_by with max
req = aggregations.AggregateRequest("*").sort_by("@t1", max=1)
res = client.ft().aggregate(req)
assert len(res.rows) == 1
# test limit
req = aggregations.AggregateRequest("*").sort_by("@t1").limit(1, 1)
res = client.ft().aggregate(req)
assert len(res.rows) == 1
assert res.rows[0] == ["t1", "b"]
@pytest.mark.redismod
def test_aggregations_load(client):
client.ft().create_index(
(
TextField("t1"),
TextField("t2"),
)
)
client.ft().client.hset("doc1", mapping={"t1": "hello", "t2": "world"})
# load t1
req = aggregations.AggregateRequest("*").load("t1")
res = client.ft().aggregate(req)
assert res.rows[0] == ["t1", "hello"]
# load t2
req = aggregations.AggregateRequest("*").load("t2")
res = client.ft().aggregate(req)
assert res.rows[0] == ["t2", "world"]
# load all
req = aggregations.AggregateRequest("*").load()
res = client.ft().aggregate(req)
assert res.rows[0] == ["t1", "hello", "t2", "world"]
@pytest.mark.redismod
def test_aggregations_apply(client):
client.ft().create_index(
(
TextField("PrimaryKey", sortable=True),
NumericField("CreatedDateTimeUTC", sortable=True),
)
)
client.ft().client.hset(
"doc1",
mapping={"PrimaryKey": "9::362330", "CreatedDateTimeUTC": "637387878524969984"},
)
client.ft().client.hset(
"doc2",
mapping={"PrimaryKey": "9::362329", "CreatedDateTimeUTC": "637387875859270016"},
)
req = aggregations.AggregateRequest("*").apply(
CreatedDateTimeUTC="@CreatedDateTimeUTC * 10"
)
res = client.ft().aggregate(req)
assert res.rows[0] == ["CreatedDateTimeUTC", "6373878785249699840"]
assert res.rows[1] == ["CreatedDateTimeUTC", "6373878758592700416"]
@pytest.mark.redismod
def test_aggregations_filter(client):
client.ft().create_index(
(
TextField("name", sortable=True),
NumericField("age", sortable=True),
)
)
client.ft().client.hset("doc1", mapping={"name": "bar", "age": "25"})
client.ft().client.hset("doc2", mapping={"name": "foo", "age": "19"})
req = aggregations.AggregateRequest("*").filter("@name=='foo' && @age < 20")
res = client.ft().aggregate(req)
assert len(res.rows) == 1
assert res.rows[0] == ["name", "foo", "age", "19"]
req = aggregations.AggregateRequest("*").filter("@age > 15").sort_by("@age")
res = client.ft().aggregate(req)
assert len(res.rows) == 2
assert res.rows[0] == ["age", "19"]
assert res.rows[1] == ["age", "25"]
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_index_definition(client):
"""
Create definition and test its args
"""
with pytest.raises(RuntimeError):
IndexDefinition(prefix=["hset:", "henry"], index_type="json")
definition = IndexDefinition(
prefix=["hset:", "henry"],
filter="@f1==32",
language="English",
language_field="play",
score_field="chapter",
score=0.5,
payload_field="txt",
index_type=IndexType.JSON,
)
assert [
"ON",
"JSON",
"PREFIX",
2,
"hset:",
"henry",
"FILTER",
"@f1==32",
"LANGUAGE_FIELD",
"play",
"LANGUAGE",
"English",
"SCORE_FIELD",
"chapter",
"SCORE",
0.5,
"PAYLOAD_FIELD",
"txt",
] == definition.args
createIndex(client.ft(), num_docs=500, definition=definition)
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_create_client_definition(client):
"""
Create definition with no index type provided,
and use hset to test the client definition (the default is HASH).
"""
definition = IndexDefinition(prefix=["hset:", "henry"])
createIndex(client.ft(), num_docs=500, definition=definition)
info = client.ft().info()
assert 494 == int(info["num_docs"])
client.ft().client.hset("hset:1", "f1", "v1")
info = client.ft().info()
assert 495 == int(info["num_docs"])
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_create_client_definition_hash(client):
"""
Create definition with IndexType.HASH as index type (ON HASH),
and use hset to test the client definition.
"""
definition = IndexDefinition(prefix=["hset:", "henry"], index_type=IndexType.HASH)
createIndex(client.ft(), num_docs=500, definition=definition)
info = client.ft().info()
assert 494 == int(info["num_docs"])
client.ft().client.hset("hset:1", "f1", "v1")
info = client.ft().info()
assert 495 == int(info["num_docs"])
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_create_client_definition_json(client):
"""
Create definition with IndexType.JSON as index type (ON JSON),
and use json client to test it.
"""
definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON)
client.ft().create_index((TextField("$.name"),), definition=definition)
client.json().set("king:1", Path.rootPath(), {"name": "henry"})
client.json().set("king:2", Path.rootPath(), {"name": "james"})
res = client.ft().search("henry")
assert res.docs[0].id == "king:1"
assert res.docs[0].payload is None
assert res.docs[0].json == '{"name":"henry"}'
assert res.total == 1
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_fields_as_name(client):
# create index
SCHEMA = (
TextField("$.name", sortable=True, as_name="name"),
NumericField("$.age", as_name="just_a_number"),
)
definition = IndexDefinition(index_type=IndexType.JSON)
client.ft().create_index(SCHEMA, definition=definition)
# insert json data
res = client.json().set("doc:1", Path.rootPath(), {"name": "Jon", "age": 25})
assert res
total = client.ft().search(Query("Jon").return_fields("name", "just_a_number")).docs
assert 1 == len(total)
assert "doc:1" == total[0].id
assert "Jon" == total[0].name
assert "25" == total[0].just_a_number
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_search_return_fields(client):
res = client.json().set(
"doc:1",
Path.rootPath(),
{"t": "riceratops", "t2": "telmatosaurus", "n": 9072, "flt": 97.2},
)
assert res
# create index on
definition = IndexDefinition(index_type=IndexType.JSON)
SCHEMA = (
TextField("$.t"),
NumericField("$.flt"),
)
client.ft().create_index(SCHEMA, definition=definition)
waitForIndex(client, "idx")
total = client.ft().search(Query("*").return_field("$.t", as_field="txt")).docs
assert 1 == len(total)
assert "doc:1" == total[0].id
assert "riceratops" == total[0].txt
total = client.ft().search(Query("*").return_field("$.t2", as_field="txt")).docs
assert 1 == len(total)
assert "doc:1" == total[0].id
assert "telmatosaurus" == total[0].txt
@pytest.mark.redismod
def test_synupdate(client):
definition = IndexDefinition(index_type=IndexType.HASH)
client.ft().create_index(
(
TextField("title"),
TextField("body"),
),
definition=definition,
)
client.ft().synupdate("id1", True, "boy", "child", "offspring")
client.ft().add_document("doc1", title="he is a baby", body="this is a test")
client.ft().synupdate("id1", True, "baby")
client.ft().add_document("doc2", title="he is another baby", body="another test")
res = client.ft().search(Query("child").expander("SYNONYM"))
assert res.docs[0].id == "doc2"
assert res.docs[0].title == "he is another baby"
assert res.docs[0].body == "another test"
@pytest.mark.redismod
def test_syndump(client):
definition = IndexDefinition(index_type=IndexType.HASH)
client.ft().create_index(
(
TextField("title"),
TextField("body"),
),
definition=definition,
)
client.ft().synupdate("id1", False, "boy", "child", "offspring")
client.ft().synupdate("id2", False, "baby", "child")
client.ft().synupdate("id3", False, "tree", "wood")
res = client.ft().syndump()
assert res == {
"boy": ["id1"],
"tree": ["id3"],
"wood": ["id3"],
"child": ["id1", "id2"],
"baby": ["id2"],
"offspring": ["id1"],
}
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_create_json_with_alias(client):
"""
Create definition with IndexType.JSON as index type (ON JSON) with two
fields with aliases, and use json client to test it.
"""
definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON)
client.ft().create_index(
(TextField("$.name", as_name="name"), NumericField("$.num", as_name="num")),
definition=definition,
)
client.json().set("king:1", Path.rootPath(), {"name": "henry", "num": 42})
client.json().set("king:2", Path.rootPath(), {"name": "james", "num": 3.14})
res = client.ft().search("@name:henry")
assert res.docs[0].id == "king:1"
assert res.docs[0].json == '{"name":"henry","num":42}'
assert res.total == 1
res = client.ft().search("@num:[0 10]")
assert res.docs[0].id == "king:2"
assert res.docs[0].json == '{"name":"james","num":3.14}'
assert res.total == 1
# Tests returns an error if path contain special characters (user should
# use an alias)
with pytest.raises(Exception):
client.ft().search("@$.name:henry")
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_json_with_multipath(client):
"""
Create definition with IndexType.JSON as index type (ON JSON),
and use json client to test it.
"""
definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON)
client.ft().create_index(
(TagField("$..name", as_name="name")), definition=definition
)
client.json().set(
"king:1", Path.rootPath(), {"name": "henry", "country": {"name": "england"}}
)
res = client.ft().search("@name:{henry}")
assert res.docs[0].id == "king:1"
assert res.docs[0].json == '{"name":"henry","country":{"name":"england"}}'
assert res.total == 1
res = client.ft().search("@name:{england}")
assert res.docs[0].id == "king:1"
assert res.docs[0].json == '{"name":"henry","country":{"name":"england"}}'
assert res.total == 1
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_json_with_jsonpath(client):
definition = IndexDefinition(index_type=IndexType.JSON)
client.ft().create_index(
(
TextField('$["prod:name"]', as_name="name"),
TextField("$.prod:name", as_name="name_unsupported"),
),
definition=definition,
)
client.json().set("doc:1", Path.rootPath(), {"prod:name": "RediSearch"})
# query for a supported field succeeds
res = client.ft().search(Query("@name:RediSearch"))
assert res.total == 1
assert res.docs[0].id == "doc:1"
assert res.docs[0].json == '{"prod:name":"RediSearch"}'
# query for an unsupported field fails
res = client.ft().search("@name_unsupported:RediSearch")
assert res.total == 0
# return of a supported field succeeds
res = client.ft().search(Query("@name:RediSearch").return_field("name"))
assert res.total == 1
assert res.docs[0].id == "doc:1"
assert res.docs[0].name == "RediSearch"
# return of an unsupported field fails
res = client.ft().search(Query("@name:RediSearch").return_field("name_unsupported"))
assert res.total == 1
assert res.docs[0].id == "doc:1"
with pytest.raises(Exception):
res.docs[0].name_unsupported
@pytest.mark.redismod
def test_profile(client):
client.ft().create_index((TextField("t"),))
client.ft().client.hset("1", "t", "hello")
client.ft().client.hset("2", "t", "world")
# check using Query
q = Query("hello|world").no_content()
res, det = client.ft().profile(q)
assert det["Iterators profile"]["Counter"] == 2.0
assert len(det["Iterators profile"]["Child iterators"]) == 2
assert det["Iterators profile"]["Type"] == "UNION"
assert det["Parsing time"] < 0.5
assert len(res.docs) == 2 # check also the search result
# check using AggregateRequest
req = (
aggregations.AggregateRequest("*")
.load("t")
.apply(prefix="startswith(@t, 'hel')")
)
res, det = client.ft().profile(req)
assert det["Iterators profile"]["Counter"] == 2.0
assert det["Iterators profile"]["Type"] == "WILDCARD"
assert det["Parsing time"] < 0.5
assert len(res.rows) == 2 # check also the search result
@pytest.mark.redismod
def test_profile_limited(client):
client.ft().create_index((TextField("t"),))
client.ft().client.hset("1", "t", "hello")
client.ft().client.hset("2", "t", "hell")
client.ft().client.hset("3", "t", "help")
client.ft().client.hset("4", "t", "helowa")
q = Query("%hell% hel*")
res, det = client.ft().profile(q, limited=True)
assert (
det["Iterators profile"]["Child iterators"][0]["Child iterators"]
== "The number of iterators in the union is 3"
)
assert (
det["Iterators profile"]["Child iterators"][1]["Child iterators"]
== "The number of iterators in the union is 4"
)
assert det["Iterators profile"]["Type"] == "INTERSECT"
assert len(res.docs) == 3 # check also the search result