Skip to content

Commit 196b3a0

Browse files
fix: edge case where wdoc max chunk would be ignored
Signed-off-by: thiswillbeyourgithub <[email protected]>
1 parent dac6802 commit 196b3a0

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

wdoc/utils/misc.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -726,14 +726,19 @@ def get_splitter(
726726
else:
727727
max_tokens = get_model_max_tokens(modelname)
728728

729-
# don't use overly large chunks anyway
730-
max_tokens = min(max_tokens, env.WDOC_MAX_CHUNK_SIZE)
731729
except Exception as err:
732730
max_tokens = 4096
733731
logger.warning(
734732
f"Failed to get max_tokens limit for model {modelname.original}: '{err}'"
735733
)
736734

735+
# don't use overly large chunks anyway
736+
if max_tokens > env.WDOC_MAX_CHUNK_SIZE:
737+
logger.debug(
738+
f"Setting max_tokens for model {modelname} to the WDOC_MAX_CHUNK_SIZE value ({env.WDOC_MAX_CHUNK_SIZE} instead of {max_tokens})."
739+
)
740+
max_tokens = min(max_tokens, env.WDOC_MAX_CHUNK_SIZE)
741+
737742
model_tkn_length = partial(get_tkn_length, modelname=modelname.original)
738743

739744
if task in ["query", "search"]:

0 commit comments

Comments
 (0)