Skip to content

Commit b251ead

Browse files
authored
CU-8699pv8uw: Add multiprocessing method to DeID (#33)
1 parent 8f142a5 commit b251ead

File tree

1 file changed

+24
-1
lines changed
  • medcat-v2/medcat/components/ner/trf

1 file changed

+24
-1
lines changed

medcat-v2/medcat/components/ner/trf/deid.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
- config
3535
- cdb
3636
"""
37-
from typing import Union, Any, Optional
37+
from typing import Union, Any, Optional, Iterable
3838
import re
3939
import logging
4040

@@ -92,6 +92,29 @@ def deid_text(self, text: str, redact: bool = False) -> str:
9292
return replace_entities_in_text(text, entities, self.cat.cdb.get_name,
9393
redact=redact)
9494

95+
def deid_multi_text(self, texts: Iterable[str], redact: bool = False,
96+
n_process: Optional[int] = None) -> list[str]:
97+
if n_process is None:
98+
n_process = 1
99+
100+
entities = self.cat.get_entities_multi_texts(
101+
texts, n_process=n_process)
102+
out: list[str] = []
103+
for raw_text, (_, _ents) in zip(texts, entities):
104+
ents = _ents['entities']
105+
text: str
106+
if isinstance(raw_text, tuple):
107+
text = raw_text[1]
108+
elif isinstance(raw_text, str):
109+
text = raw_text
110+
else:
111+
raise ValueError("Unknown raw text: "
112+
f"{type(raw_text)}: {raw_text}")
113+
new_text = replace_entities_in_text(
114+
text, ents, get_cui_name=self.cat.cdb.get_name, redact=redact)
115+
out.append(new_text)
116+
return out
117+
95118
@classmethod
96119
def load_model_pack(cls, model_pack_path: str,
97120
config: Optional[dict] = None) -> 'DeIdModel':

0 commit comments

Comments
 (0)