Skip to content

Commit d7589cc

Browse files
enh: language detector reduce debug logs
Signed-off-by: thiswillbeyourgithub <[email protected]>
1 parent c0e2ce7 commit d7589cc

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

wdoc/utils/misc.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,11 @@ def check_docs_tkn_length(
827827
except Exception as err:
828828
if str(err).startswith("Low language probability"):
829829
raise
830+
if "no features in text" in str(err).lower():
831+
logger.exception(
832+
f"language_detector couldn't find text features of text '{identifier}'. Treating it as valid document."
833+
)
834+
return 1.0
830835
else:
831836
logger.exception(
832837
f"Error when using language_detector on '{identifier}': {err}. Treating it as valid document."

0 commit comments

Comments
 (0)