Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions medcat-v2/medcat/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,9 @@ def _mp_one_batch_per_process(
executor.submit(self._mp_worker_func, batch))
except StopIteration:
break
if not futures:
# NOTE: if there wasn't any data, we didn't process anything
return
# Main process works on next batch while workers are busy
main_batch: Optional[list[tuple[str, str, bool]]]
try:
Expand All @@ -282,6 +285,10 @@ def _mp_one_batch_per_process(
# so we're going to wait for them to finish, yield their results,
# and subsequently submit the next batch to keep them busy
for _ in range(external_processes):
if not futures:
# NOTE: if there's no futures then there can't be
# anything to batch
break
# Wait for any future to complete
done_future = next(as_completed(futures))
futures.remove(done_future)
Expand Down
16 changes: 16 additions & 0 deletions medcat-v2/tests/test_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,13 +418,29 @@ def test_can_get_multiple_entities(self):
"The dog is sitting outside the house."
]
ents = list(self.cat.get_entities_multi_texts(texts))
self.assert_ents(ents, texts)

def assert_ents(self, ents: list[tuple], texts: list[str]):
self.assertEqual(len(ents), len(texts))
# NOTE: text IDs are integers starting from 0
exp_ids = set(str(i) for i in range(len(texts)))
for ent_id_str, ent in ents:
with self.subTest(f"Entity: {ent_id_str} [{ent}]"):
self.assertIn(ent_id_str, exp_ids)

def test_can_multiprocess_empty(self):
texts = []
ents = list(self.cat.get_entities_multi_texts(texts, n_process=3))
self.assert_ents(ents, texts)

def test_can_get_multiprocess(self):
texts = [
"The fittest most fit of chronic kidney failure",
"The dog is sitting outside the house."
]
ents = list(self.cat.get_entities_multi_texts(texts, n_process=3))
self.assert_ents(ents, texts)


class CATWithDocAddonTests(CATIncludingTests):
EXAMPLE_TEXT = "Example text to tokenize"
Expand Down