Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3 changes: 3 additions & 0 deletions ctakesclient/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Public API"""

from . import typesystem
from .typesystem import CtakesJSON, MatchText, Polarity, Span
from .typesystem import UmlsTypeMention, UmlsConcept
Comment on lines +4 to +5
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Convenience imports, so consumers don't have to use ctakesclient.typesystem? I kind of like the idea of having one canonical import location though -- if we do this, how do you feel about dropping the from . import typesystem then?


from . import filesystem
from . import client
from . import transformer
19 changes: 18 additions & 1 deletion ctakesclient/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,29 @@ def get_url_ctakes_rest() -> str:
'URL_CTAKES_REST',
'http://localhost:8080/ctakes-web-rest/service/analyze')

def ascii(sentence:str) -> bytes:
"""
https://pythonguides.com/remove-unicode-characters-in-python/#:~:text=In%20python%2C%20to%20remove%20non,decode().
:param sentence:
:return: sentence REMOVED of unicode chars - note LENGTH of text input may change!
"""
return sentence.encode("ascii", "ignore")

def utf8(sentence:str) -> bytes:
"""
https://stackoverflow.com/questions/34618149/post-unicode-string-to-web-service-using-python-requests-library
:param sentence:
:return: sentence ESCAPED unicode chars - note LENGTH of text input may change!
"""
# import re
# return re.sub(r'[^\u1F600-\u1F64F ]|[^\u1F300-\u1F5FF ]', " ", sentence)
Comment on lines +41 to +42
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can delete these comment lines.

return sentence.encode("utf-8")

def post(sentence: str, url=get_url_ctakes_rest()) -> dict:
"""
:param sentence: clinical text to send to cTAKES
:param url: cTAKES REST server fully qualified path
:return:
:return: dict JSON response from cTAKES
"""
logging.debug(url)
# TODO: consider exposing a pass-through timeout parameter
Expand Down
144 changes: 111 additions & 33 deletions ctakesclient/typesystem.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
"""UMLS (Unified Medical Language System)"""
import json
import logging
from collections import OrderedDict
from typing import List
from enum import Enum

###############################################################################
#
# UMLS Unified Medical Language System
#
###############################################################################

###############################################################################
# UMLS Semantic Types
###############################################################################
class UmlsTypeMention(Enum):
"""
Semantic Types in the UMLS (Unified Medical Language System)
Expand All @@ -19,7 +28,9 @@ class UmlsTypeMention(Enum):
Procedure = 'ProcedureMention'
CustomDict = 'IdentifiedAnnotation'


###############################################################################
# UMLS Concepts
###############################################################################
class UmlsConcept:
"""Concept in the UMLS (Unified Medical Language System)"""

Expand Down Expand Up @@ -78,6 +89,10 @@ def __str__(self):
###############################################################################


###############################################################################
# Match: Text, Span, Polarity
###############################################################################

class Polarity(Enum):
""""
Polarity means "negation" like "patient denies cough".
Expand Down Expand Up @@ -130,45 +145,18 @@ def __init__(self, source=None):
def span(self) -> Span:
return Span(self.begin, self.end)

@staticmethod
def parse_polarity(polarity) -> Polarity:
if isinstance(polarity, Polarity):
return polarity
elif polarity == -1:
return Polarity.neg
elif polarity == 0:
return Polarity.pos
else:
raise Exception(f'polarity unknown: {polarity}')

@staticmethod
def parse_mention(mention: str) -> UmlsTypeMention:
if mention == 'IdentifiedAnnotation':
return UmlsTypeMention.CustomDict
else:
return UmlsTypeMention[mention.replace('Mention', '')]

@staticmethod
def sort_concepts(unsorted: List[UmlsConcept]) -> List[UmlsConcept]:
"""
:param unsorted: guarantees responses from ctakes server are
identically ordered
:return: sorted list of concepts.
"""
return sorted(unsorted, key=UmlsConcept.as_string)

def from_json(self, source: dict):
self.begin = source.get('begin')
self.end = source.get('end')
self.text = source.get('text')
self.type = self.parse_mention(source.get('type'))
self.polarity = self.parse_polarity(source.get('polarity'))
self.type = parse_mention(source.get('type'))
self.polarity = parse_polarity(source.get('polarity'))
self.conceptAttributes = []

# sort list of concepts ensuring same ordering
unsorted = list(UmlsConcept(c) for c in source.get('conceptAttributes'))

for c in MatchText.sort_concepts(unsorted):
for c in sort_concepts(unsorted):
self.conceptAttributes.append(c)

def as_json(self):
Expand All @@ -183,6 +171,9 @@ def as_json(self):
'type': self.type.value
}

###############################################################################
# Match: Text, Span, Polarity
###############################################################################

class CtakesJSON:
"""Ctakes JSON contain MatchText with list of UmlsConcept"""
Expand All @@ -204,6 +195,9 @@ def list_concept_cui(self, polarity=None) -> List[str]:
def list_concept_tui(self, polarity=None) -> List[str]:
return [c.tui for c in self.list_concept(polarity)]

def list_concept_vocab(self, polarity=None) -> List[str]:
return [c.codingScheme for c in self.list_concept(polarity)]

def list_concept_code(self, polarity=None) -> List[str]:
return [c.code for c in self.list_concept(polarity)]

Expand All @@ -220,7 +214,7 @@ def list_match(self,
filter_umls_type)

if polarity is not None:
polarity = MatchText.parse_polarity(polarity)
polarity = parse_polarity(polarity)

concat = []
for semtype, matches in self.mentions.items():
Expand All @@ -238,6 +232,11 @@ def list_match_text(self, polarity=None) -> List[str]:
m.text
for m in self.list_match(polarity=polarity, filter_umls_type=None))

def list_match_type(self, polarity=None) -> List[str]:
return list(
m.type
for m in self.list_match(polarity=polarity, filter_umls_type=None))

def list_sign_symptom(self, polarity=None) -> List[MatchText]:
return self.list_match(polarity, UmlsTypeMention.SignSymptom)

Expand All @@ -256,9 +255,27 @@ def list_anatomical_site(self, polarity=None) -> List[MatchText]:
def list_identified_annotation(self, polarity=None) -> List[MatchText]:
return self.list_match(polarity, UmlsTypeMention.CustomDict)

def term_freq_summary(self) -> dict:
"""
:return: dictionary of term frequency calculations
"""
terms_cui = self.list_concept_cui()
terms_code = self.list_concept_code()
terms_vocab = self.list_concept_vocab()
terms_text = self.list_match_text()
terms_polarity = [p.name for p in self.list_polarity(self.list_match())]
terms_type = [t.name for t in self.list_match_type()]

return {'text': CtakesJSON.term_freq(terms_text),
'cui': CtakesJSON.term_freq(terms_cui),
'code': CtakesJSON.term_freq(terms_code),
'vocab': CtakesJSON.term_freq(terms_vocab),
'type': CtakesJSON.term_freq(terms_type),
'polarity': CtakesJSON.term_freq(terms_polarity)}

def from_json(self, source: dict) -> None:
for mention, match_list in source.items():
semtype = MatchText.parse_mention(mention)
semtype = parse_mention(mention)

if semtype not in self.mentions:
self.mentions[semtype] = []
Expand All @@ -273,3 +290,64 @@ def as_json(self):

res[mention.value] = match_json
return res


###############################################################################
#
# Helper Functions
#
###############################################################################

###############################################################################
# Parsing Types
###############################################################################

def parse_polarity(polarity) -> Polarity:
if isinstance(polarity, Polarity):
return polarity
elif polarity == -1:
return Polarity.neg
elif polarity == 0:
return Polarity.pos
else:
raise Exception(f'polarity unknown: {polarity}')

def parse_mention(mention: str) -> UmlsTypeMention:
if mention == 'IdentifiedAnnotation':
return UmlsTypeMention.CustomDict
else:
return UmlsTypeMention[mention.replace('Mention', '')]

###############################################################################
# Sorting
###############################################################################

def sort_concepts(unsorted: List[UmlsConcept]) -> List[UmlsConcept]:
"""
:param unsorted: guarantees responses from ctakes server are
identically ordered
:return: sorted list of concepts.
"""
return sorted(unsorted, key=UmlsConcept.as_string)

###############################################################################
# Term Frequency
###############################################################################

def term_freq(term_list: list) -> dict:
"""
Term Frequency calculation
:param term_list: String, CUI, CODE, or other "term"
:return: OrderedDict sorted by count descending
"""
tf = dict()
for term in term_list:
if term not in tf.keys():
tf[term] = 1
else:
tf[term] += 1

ordered = OrderedDict()
for k in sorted(tf, key=tf.get, reverse=True):
ordered[k] = tf[k]
return ordered
Empty file added test-integration/__init__.py
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import unittest
import ctakesclient
from .test_resources import LoadResource
from test.test_resources import LoadResource


def pretty(result: dict):
Expand All @@ -26,8 +26,8 @@ def test_covid_symptoms_medical_synonyms(self):
'Myalgias': 'Muscle aches and pain',
'Chills': 'Fever or chills',
'Post-tussive': 'after Coughing',
'tussive':'related to Coughing',
'Pharyngitis':'sore throat',
'tussive': 'related to Coughing',
'Pharyngitis': 'sore throat',
'Odynophagia': 'sore throat',
'Loss of taste': 'Anosmia',
'Loss of smell': 'Anosmia',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import unittest
import ctakesclient
from .test_resources import LoadResource
from test.test_resources import LoadResource

class TestClientCtakesRestServer(unittest.TestCase):
"""Test case for REST requests"""
Expand Down
Loading