Skip to content

FT.CREATE - support MAXTEXTFIELDS, TEMPORARY, NOHL, NOFREQS, SKIPINITIALSCAN #1847

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jan 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 58 additions & 20 deletions redis/commands/search/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,12 @@

NOOFFSETS = "NOOFFSETS"
NOFIELDS = "NOFIELDS"
NOHL = "NOHL"
NOFREQS = "NOFREQS"
MAXTEXTFIELDS = "MAXTEXTFIELDS"
TEMPORARY = "TEMPORARY"
STOPWORDS = "STOPWORDS"
SKIPINITIALSCAN = "SKIPINITIALSCAN"
WITHSCORES = "WITHSCORES"
FUZZY = "FUZZY"
WITHPAYLOADS = "WITHPAYLOADS"
Expand All @@ -66,27 +71,57 @@ def create_index(
no_field_flags=False,
stopwords=None,
definition=None,
max_text_fields=False,
temporary=None,
no_highlight=False,
no_term_frequencies=False,
skip_initial_scan=False,
):
"""
Create the search index. The index must not already exist.

### Parameters:

- **fields**: a list of TextField or NumericField objects
- **no_term_offsets**: If true, we will not save term offsets in the index
- **no_field_flags**: If true, we will not save field flags that allow searching in specific fields
- **stopwords**: If not None, we create the index with this custom stopword list. The list can be empty
- **no_term_offsets**: If true, we will not save term offsets in
the index
- **no_field_flags**: If true, we will not save field flags that
allow searching in specific fields
- **stopwords**: If not None, we create the index with this custom
stopword list. The list can be empty
- **max_text_fields**: If true, we will encode indexes as if there
were more than 32 text fields which allows you to add additional
fields (beyond 32).
- **temporary**: Create a lightweight temporary index which will
expire after the specified period of inactivity (in seconds). The
internal idle timer is reset whenever the index is searched or added to.
- **no_highlight**: If true, disabling highlighting support.
Also implied by no_term_offsets.
- **no_term_frequencies**: If true, we avoid saving the term frequencies
in the index.
- **skip_initial_scan**: If true, we do not scan and index.

For more information: https://oss.redis.com/redisearch/Commands/#ftcreate
""" # noqa

args = [CREATE_CMD, self.index_name]
if definition is not None:
args += definition.args
if max_text_fields:
args.append(MAXTEXTFIELDS)
if temporary is not None and isinstance(temporary, int):
args.append(TEMPORARY)
args.append(temporary)
if no_term_offsets:
args.append(NOOFFSETS)
if no_highlight:
args.append(NOHL)
if no_field_flags:
args.append(NOFIELDS)
if no_term_frequencies:
args.append(NOFREQS)
if skip_initial_scan:
args.append(SKIPINITIALSCAN)
if stopwords is not None and isinstance(stopwords, (list, tuple, set)):
args += [STOPWORDS, len(stopwords)]
if len(stopwords) > 0:
Expand Down Expand Up @@ -129,7 +164,6 @@ def dropindex(self, delete_documents=False):
### Parameters:

- **delete_documents**: If `True`, all documents will be deleted.

For more information: https://oss.redis.com/redisearch/Commands/#ftdropindex
""" # noqa
keep_str = "" if delete_documents else "KEEPDOCS"
Expand Down Expand Up @@ -217,23 +251,27 @@ def add_document(
### Parameters

- **doc_id**: the id of the saved document.
- **nosave**: if set to true, we just index the document, and don't \
save a copy of it. This means that searches will just return ids.
- **score**: the document ranking, between 0.0 and 1.0.
- **payload**: optional inner-index payload we can save for fast access in scoring functions
- **replace**: if True, and the document already is in the index, \
- **nosave**: if set to true, we just index the document, and don't
save a copy of it. This means that searches will just
return ids.
- **score**: the document ranking, between 0.0 and 1.0
- **payload**: optional inner-index payload we can save for fast
i access in scoring functions
- **replace**: if True, and the document already is in the index,
we perform an update and reindex the document
- **partial**: if True, the fields specified will be added to the \
existing document. \
This has the added benefit that any fields specified \
with `no_index` will not be reindexed again. Implies `replace`
- **partial**: if True, the fields specified will be added to the
existing document.
This has the added benefit that any fields specified
with `no_index`
will not be reindexed again. Implies `replace`
- **language**: Specify the language used for document tokenization.
- **no_create**: if True, the document is only updated and reindexed \
if it already exists. If the document does not exist, an error will be \
returned. Implies `replace`
- **fields** kwargs dictionary of the document fields to be saved and/or indexed.

NOTE: Geo points shoule be encoded as strings of "lon,lat"
- **no_create**: if True, the document is only updated and reindexed
if it already exists.
If the document does not exist, an error will be
returned. Implies `replace`
- **fields** kwargs dictionary of the document fields to be saved
and/or indexed.
NOTE: Geo points shoule be encoded as strings of "lon,lat"

For more information: https://oss.redis.com/redisearch/Commands/#ftadd
""" # noqa
Expand Down Expand Up @@ -481,7 +519,7 @@ def spellcheck(self, query, distance=None, include=None, exclude=None):

**query**: search query.
**distance***: the maximal Levenshtein distance for spelling
suggestions (default: 1, max: 4).
suggestions (default: 1, max: 4).
**include**: specifies an inclusion custom dictionary.
**exclude**: specifies an exclusion custom dictionary.

Expand Down
66 changes: 66 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,6 +1154,72 @@ def test_index_definition(client):
createIndex(client.ft(), num_docs=500, definition=definition)


@pytest.mark.redismod
def testExpire(client):
client.ft().create_index((TextField("txt", sortable=True),), temporary=4)
ttl = client.execute_command("ft.debug", "TTL", "idx")
assert ttl > 2

while ttl > 2:
ttl = client.execute_command("ft.debug", "TTL", "idx")
time.sleep(0.01)

# add document - should reset the ttl
client.ft().add_document("doc", txt="foo bar", text="this is a simple test")
ttl = client.execute_command("ft.debug", "TTL", "idx")
assert ttl > 2
try:
while True:
ttl = client.execute_command("ft.debug", "TTL", "idx")
time.sleep(0.5)
except redis.exceptions.ResponseError:
assert ttl == 0


@pytest.mark.redismod
def testSkipInitialScan(client):
client.hset("doc1", "foo", "bar")
q = Query("@foo:bar")

client.ft().create_index((TextField("foo"),), skip_initial_scan=True)
assert 0 == client.ft().search(q).total


@pytest.mark.redismod
def testSummarizeDisabled_nooffset(client):
client.ft().create_index((TextField("txt"),), no_term_offsets=True)
client.ft().add_document("doc1", txt="foo bar")
with pytest.raises(Exception):
client.ft().search(Query("foo").summarize(fields=["txt"]))


@pytest.mark.redismod
def testSummarizeDisabled_nohl(client):
client.ft().create_index((TextField("txt"),), no_highlight=True)
client.ft().add_document("doc1", txt="foo bar")
with pytest.raises(Exception):
client.ft().search(Query("foo").summarize(fields=["txt"]))


@pytest.mark.redismod
def testMaxTextFields(client):
# Creating the index definition
client.ft().create_index((TextField("f0"),))
for x in range(1, 32):
client.ft().alter_schema_add((TextField(f"f{x}"),))

# Should be too many indexes
with pytest.raises(redis.ResponseError):
client.ft().alter_schema_add((TextField(f"f{x}"),))

client.ft().dropindex("idx")
# Creating the index definition
client.ft().create_index((TextField("f0"),), max_text_fields=True)
# Fill the index with fields
for x in range(1, 50):
client.ft().alter_schema_add((TextField(f"f{x}"),))


@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_create_client_definition(client):
Expand Down