diff --git a/language/google/cloud/language/syntax.py b/language/google/cloud/language/syntax.py index 9bad2116b332..037a6a74f298 100644 --- a/language/google/cloud/language/syntax.py +++ b/language/google/cloud/language/syntax.py @@ -20,7 +20,62 @@ class PartOfSpeech(object): - """Part of speech of a :class:`Token`.""" + """A Google Cloud Natural Language API Part of speech object. + + These are the grammatical categories of the matched token in + the sentence. https://cloud.google.com/natural-language/docs\ + /reference/rest/v1/Token#PartOfSpeech + + :type aspect: str + :param aspect: The grammatical aspect. https://cloud.google\ + .com/natural-language/docs/reference/rest/v1/\ + Token#Aspect + + :type reciprocity: str + :param reciprocity: The grammatical reciprocity. https://\ + cloud.google.com/natural-language/docs/reference\ + /rest/v1/Token#Reciprocity + + :type case: str + :param case: The grammatical case. https://cloud.google.com/\ + natural-language/docs/reference/rest/v1/Token#Case + + :type mood: str + :param mood: The grammatical mood. https://cloud.google.com/\ + natural-language/docs/reference/rest/v1/Token#Mood + + :type tag: str + :param tag: The part of speech tag. https://cloud.google.com/natural\ + -language/docs/reference/rest/v1/Token#Tag + + :type person: str + :param person: The grammatical person. https://cloud.google.com/\ + natural-language/docs/reference/rest/v1/Token#Person + + :type number: str + :param number: The grammatical number. https://cloud.google.com/natural\ + -language/docs/reference/rest/v1/Token#Number + + :type tense: str + :param tense: The grammatical tense. https://cloud.google.com/natural\ + -language/docs/reference/rest/v1/Token#Tense + + :type form: str + :param form: The grammatical form. https://cloud.google.com/natural\ + -language/docs/reference/rest/v1/Token#Form + + :type proper: str + :param proper: The grammatical properness. https://cloud.google.com/\ + natural-language/docs/reference/rest/v1/Token#Proper + + :type voice: str + :param voice: The grammatical voice. https://cloud.google.com/\ + natural-language/docs/reference/rest/v1/Token#Voice + + :type gender: str + :param gender: The grammatical gender. https://cloud.google.com/\ + natural-language/docs/reference/rest/v1/Token#Gender + """ UNKNOWN = 'UNKNOWN' """Unknown part of speech.""" @@ -81,6 +136,36 @@ class PartOfSpeech(object): 'AFFIX': 'AFFIX', } + def __init__(self, aspect, reciprocity, case, mood, tag, person, + number, tense, form, proper, voice, gender): + self.aspect = aspect + self.reciprocity = reciprocity + self.case = case + self.mood = mood + self.tag = tag + self.person = person + self.number = number + self.tense = tense + self.form = form + self.proper = proper + self.voice = voice + self.gender = gender + + @classmethod + def from_api_repr(cls, payload): + return PartOfSpeech(aspect=payload['aspect'], + reciprocity=payload['reciprocity'], + case=payload['case'], + mood=payload['mood'], + tag=payload['tag'], + person=payload['person'], + number=payload['number'], + tense=payload['tense'], + form=payload['form'], + proper=payload['proper'], + voice=payload['voice'], + gender=payload['gender']) + @classmethod def reverse(cls, tag): """Reverses the API's enum name for the one on this class. @@ -118,9 +203,9 @@ class Token(object): document according to the encoding type specified in the API request. - :type part_of_speech: str - :param part_of_speech: The part of speech of the token. See - :class:`PartOfSpeech` for possible values. + :type part_of_speech: PartOfSpeech + :param part_of_speech: An object representing the Part of Speech of the + token with it's properties. :type edge_index: int :param edge_index: The head of this token in the dependency tree. This is @@ -159,7 +244,7 @@ def from_api_repr(cls, payload): text_span = payload['text'] text_content = text_span['content'] text_begin = text_span['beginOffset'] - part_of_speech = payload['partOfSpeech']['tag'] + part_of_speech = PartOfSpeech.from_api_repr(payload['partOfSpeech']) edge = payload['dependencyEdge'] edge_index = edge['headTokenIndex'] edge_label = edge['label'] diff --git a/language/tests/unit/test_api_responses.py b/language/tests/unit/test_api_responses.py index bc04522acb06..4b79ec923fee 100644 --- a/language/tests/unit/test_api_responses.py +++ b/language/tests/unit/test_api_responses.py @@ -115,6 +115,18 @@ def _verify_sentiment_response(self, sentiment_response): class TestSyntaxResponse(unittest.TestCase): SENTENCE_DICT = copy(TestSentimentResponse.SENTENCE_DICT) + aspect = 'ASPECT_UNKNOWN' + reciprocity = 'RECIPROCITY_UNKNOWN' + case = 'NOMINATIVE' + mood = 'MOOD_UNKNOWN' + tag = 'PRON' + person = 'FIRST' + number = 'SINGULAR' + tense = 'TENSE_UNKNOWN' + form = 'FORM_UNKNOWN' + proper = 'PROPER_UNKNOWN' + voice = 'VOICE_UNKNOWN' + gender = 'GENDER_UNKNOWN' TOKEN_DICT = { 'dependencyEdge': { 'headTokenIndex': 0, @@ -122,7 +134,18 @@ class TestSyntaxResponse(unittest.TestCase): }, 'lemma': 'it', 'partOfSpeech': { - 'tag': 'PRON', + 'aspect': aspect, + 'reciprocity': reciprocity, + 'case': case, + 'mood': mood, + 'tag': tag, + 'person': person, + 'number': number, + 'tense': tense, + 'form': form, + 'proper': proper, + 'voice': voice, + 'gender': gender }, 'text': { 'beginOffset': 0, @@ -156,7 +179,6 @@ def test_api_repr_factory(self): def _verify_syntax_response(self, syntax_response): from google.cloud.language.sentiment import Sentiment - from google.cloud.language.syntax import PartOfSpeech self.assertEqual(syntax_response.language, 'en') @@ -169,7 +191,18 @@ def _verify_syntax_response(self, syntax_response): token = syntax_response.tokens[0] self.assertEqual(token.text_content, 'It') self.assertEqual(token.text_begin, 0) - self.assertEqual(token.part_of_speech, PartOfSpeech.PRONOUN) + self.assertEqual(token.part_of_speech.aspect, 'ASPECT_UNKNOWN') + self.assertEqual(token.part_of_speech.reciprocity, 'RECIPROCITY_UNKNOWN') + self.assertEqual(token.part_of_speech.case, 'NOMINATIVE') + self.assertEqual(token.part_of_speech.mood, 'MOOD_UNKNOWN') + self.assertEqual(token.part_of_speech.tag, 'PRON') + self.assertEqual(token.part_of_speech.person, 'FIRST') + self.assertEqual(token.part_of_speech.number, 'SINGULAR') + self.assertEqual(token.part_of_speech.tense, 'TENSE_UNKNOWN') + self.assertEqual(token.part_of_speech.form, 'FORM_UNKNOWN') + self.assertEqual(token.part_of_speech.proper, 'PROPER_UNKNOWN') + self.assertEqual(token.part_of_speech.voice, 'VOICE_UNKNOWN') + self.assertEqual(token.part_of_speech.gender, 'GENDER_UNKNOWN') self.assertEqual(token.edge_index, 0) self.assertEqual(token.edge_label, 'NSUBJ') self.assertEqual(token.lemma, 'it') diff --git a/language/tests/unit/test_document.py b/language/tests/unit/test_document.py index c30d13b6f15e..0b12d0955309 100644 --- a/language/tests/unit/test_document.py +++ b/language/tests/unit/test_document.py @@ -14,7 +14,6 @@ import unittest - ANNOTATE_NAME = 'Moon' ANNOTATE_CONTENT = 'A cow jumped over the %s.' % (ANNOTATE_NAME,) ANNOTATE_SCORE = 1 @@ -29,7 +28,20 @@ def _make_token_json(name, part_of_speech, head, edge_label): 'content': name, 'beginOffset': -1, }, - 'partOfSpeech': {'tag': part_of_speech}, + 'partOfSpeech': { + 'aspect': 'ASPECT_UNKNOWN', + 'reciprocity': 'RECIPROCITY_UNKNOWN', + 'case': 'NOMINATIVE', + 'mood': 'MOOD_UNKNOWN', + 'tag': part_of_speech, + 'person': 'FIRST', + 'number': 'SINGULAR', + 'tense': 'TENSE_UNKNOWN', + 'form': 'FORM_UNKNOWN', + 'proper': 'PROPER_UNKNOWN', + 'voice': 'VOICE_UNKNOWN', + 'gender': 'GENDER_UNKNOWN', + }, 'dependencyEdge': { 'headTokenIndex': head, 'label': edge_label, @@ -120,7 +132,6 @@ def test_default_low_maxunicode(self): class TestDocument(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.language.document import Document @@ -424,12 +435,12 @@ def test_analyze_sentiment(self): client._connection.api_request.assert_called_once_with( path='analyzeSentiment', method='POST', data=expected) - def _verify_token(self, token, text_content, part_of_speech, lemma): + def _verify_token(self, token, text_content, part_of_speech_tag, lemma): from google.cloud.language.syntax import Token self.assertIsInstance(token, Token) self.assertEqual(token.text_content, text_content) - self.assertEqual(token.part_of_speech, part_of_speech) + self.assertEqual(token.part_of_speech.tag, part_of_speech_tag) self.assertEqual(token.lemma, lemma) def test_analyze_syntax(self): @@ -457,7 +468,18 @@ def test_analyze_syntax(self): 'beginOffset': -1, }, 'partOfSpeech': { + 'aspect': 'ASPECT_UNKNOWN', + 'reciprocity': 'RECIPROCITY_UNKNOWN', + 'case': 'CASE_UNKNOWN', + 'mood': 'MOOD_UNKNOWN', 'tag': 'NOUN', + 'person': 'PERSON_UNKNOWN', + 'number': 'SINGULAR', + 'tense': 'TENSE_UNKNOWN', + 'form': 'FORM_UNKNOWN', + 'proper': 'PROPER', + 'voice': 'VOICE_UNKNOWN', + 'gender': 'GENDER_UNKNOWN' }, 'dependencyEdge': { 'headTokenIndex': 0, @@ -471,7 +493,18 @@ def test_analyze_syntax(self): 'beginOffset': -1, }, 'partOfSpeech': { + 'aspect': 'ASPECT_UNKNOWN', + 'reciprocity': 'RECIPROCITY_UNKNOWN', + 'case': 'CASE_UNKNOWN', + 'mood': 'MOOD_UNKNOWN', 'tag': 'ADP', + 'person': 'PERSON_UNKNOWN', + 'number': 'NUMBER_UNKNOWN', + 'tense': 'TENSE_UNKNOWN', + 'form': 'FORM_UNKNOWN', + 'proper': 'PROPER_UNKNOWN', + 'voice': 'VOICE_UNKNOWN', + 'gender': 'GENDER_UNKNOWN' }, 'dependencyEdge': { 'headTokenIndex': 0, @@ -485,7 +518,18 @@ def test_analyze_syntax(self): 'beginOffset': -1, }, 'partOfSpeech': { + 'aspect': 'ASPECT_UNKNOWN', + 'reciprocity': 'RECIPROCITY_UNKNOWN', + 'case': 'CASE_UNKNOWN', + 'mood': 'MOOD_UNKNOWN', 'tag': 'DET', + 'person': 'PERSON_UNKNOWN', + 'number': 'NUMBER_UNKNOWN', + 'tense': 'TENSE_UNKNOWN', + 'form': 'FORM_UNKNOWN', + 'proper': 'PROPER_UNKNOWN', + 'voice': 'VOICE_UNKNOWN', + 'gender': 'GENDER_UNKNOWN' }, 'dependencyEdge': { 'headTokenIndex': 3, @@ -499,7 +543,18 @@ def test_analyze_syntax(self): 'beginOffset': -1, }, 'partOfSpeech': { + 'aspect': 'ASPECT_UNKNOWN', + 'reciprocity': 'RECIPROCITY_UNKNOWN', + 'case': 'CASE_UNKNOWN', + 'mood': 'MOOD_UNKNOWN', 'tag': 'NOUN', + 'person': 'PERSON_UNKNOWN', + 'number': 'SINGULAR', + 'tense': 'TENSE_UNKNOWN', + 'form': 'FORM_UNKNOWN', + 'proper': 'PROPER', + 'voice': 'VOICE_UNKNOWN', + 'gender': 'GENDER_UNKNOWN' }, 'dependencyEdge': { 'headTokenIndex': 1, @@ -553,7 +608,7 @@ def _verify_tokens(self, annotations, token_info): self.assertIsInstance(token, Token) self.assertEqual(token.text_content, info[0]) self.assertEqual(token.text_begin, -1) - self.assertEqual(token.part_of_speech, info[1]) + self.assertEqual(token.part_of_speech.tag, info[1]) self.assertEqual(token.edge_index, info[2]) self.assertEqual(token.edge_label, info[3]) self.assertEqual(token.lemma, info[0]) diff --git a/language/tests/unit/test_syntax.py b/language/tests/unit/test_syntax.py index 8c1f994da5ae..387257353ccb 100644 --- a/language/tests/unit/test_syntax.py +++ b/language/tests/unit/test_syntax.py @@ -16,7 +16,6 @@ class TestPartOfSpeech(unittest.TestCase): - @staticmethod def _get_target_class(): from google.cloud.language.syntax import PartOfSpeech @@ -34,9 +33,83 @@ def test_reverse(self): result = klass.reverse(value) self.assertEqual(result, attr) + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) -class TestToken(unittest.TestCase): + def test_constructor(self): + + aspect = 'ASPECT_UNKNOWN' + reciprocity = 'RECIPROCITY_UNKNOWN' + case = 'NOMINATIVE' + mood = 'MOOD_UNKNOWN' + tag = 'PRON' + person = 'FIRST' + number = 'SINGULAR' + tense = 'TENSE_UNKNOWN' + form = 'FORM_UNKNOWN' + proper = 'PROPER_UNKNOWN' + voice = 'VOICE_UNKNOWN' + gender = 'GENDER_UNKNOWN' + + pos = self._make_one(aspect, reciprocity, case, mood, tag, person, + number, tense, form, proper, voice, gender) + self.assertEqual(pos.aspect, aspect) + self.assertEqual(pos.reciprocity, reciprocity) + self.assertEqual(pos.case, case) + self.assertEqual(pos.mood, mood) + self.assertEqual(pos.tag, tag) + self.assertEqual(pos.person, person) + self.assertEqual(pos.number, number) + self.assertEqual(pos.tense, tense) + self.assertEqual(pos.form, form) + self.assertEqual(pos.proper, proper) + self.assertEqual(pos.voice, voice) + self.assertEqual(pos.gender, gender) + + def test_from_api_repr(self): + klass = self._get_target_class() + aspect = 'ASPECT_UNKNOWN' + reciprocity = 'RECIPROCITY_UNKNOWN' + case = 'NOMINATIVE' + mood = 'MOOD_UNKNOWN' + tag = 'PRON' + person = 'FIRST' + number = 'SINGULAR' + tense = 'TENSE_UNKNOWN' + form = 'FORM_UNKNOWN' + proper = 'PROPER_UNKNOWN' + voice = 'VOICE_UNKNOWN' + gender = 'GENDER_UNKNOWN' + payload = { + 'aspect': aspect, + 'reciprocity': reciprocity, + 'case': case, + 'mood': mood, + 'tag': tag, + 'person': person, + 'number': number, + 'tense': tense, + 'form': form, + 'proper': proper, + 'voice': voice, + 'gender': gender + } + pos = klass.from_api_repr(payload) + self.assertEqual(pos.aspect, aspect) + self.assertEqual(pos.reciprocity, reciprocity) + self.assertEqual(pos.case, case) + self.assertEqual(pos.mood, mood) + self.assertEqual(pos.tag, tag) + self.assertEqual(pos.person, person) + self.assertEqual(pos.number, number) + self.assertEqual(pos.tense, tense) + self.assertEqual(pos.form, form) + self.assertEqual(pos.proper, proper) + self.assertEqual(pos.voice, voice) + self.assertEqual(pos.gender, gender) + +class TestToken(unittest.TestCase): @staticmethod def _get_target_class(): from google.cloud.language.syntax import Token @@ -51,7 +124,20 @@ def test_constructor(self): text_content = 'All' text_begin = -1 - part_of_speech = PartOfSpeech.DETERMINER + aspect = 'ASPECT_UNKNOWN' + reciprocity = 'RECIPROCITY_UNKNOWN' + case = 'NOMINATIVE' + mood = 'MOOD_UNKNOWN' + tag = 'PRON' + person = 'FIRST' + number = 'SINGULAR' + tense = 'TENSE_UNKNOWN' + form = 'FORM_UNKNOWN' + proper = 'PROPER_UNKNOWN' + voice = 'VOICE_UNKNOWN' + gender = 'GENDER_UNKNOWN' + part_of_speech = PartOfSpeech(aspect, reciprocity, case, mood, tag, person, + number, tense, form, proper, voice, gender) edge_index = 3 edge_label = 'PREDET' lemma = text_content @@ -59,18 +145,52 @@ def test_constructor(self): edge_index, edge_label, lemma) self.assertEqual(token.text_content, text_content) self.assertEqual(token.text_begin, text_begin) - self.assertEqual(token.part_of_speech, part_of_speech) + self.assertEqual(token.part_of_speech.aspect, part_of_speech.aspect) + self.assertEqual(token.part_of_speech.reciprocity, part_of_speech.reciprocity) + self.assertEqual(token.part_of_speech.case, part_of_speech.case) + self.assertEqual(token.part_of_speech.mood, part_of_speech.mood) + self.assertEqual(token.part_of_speech.tag, part_of_speech.tag) + self.assertEqual(token.part_of_speech.person, part_of_speech.person) + self.assertEqual(token.part_of_speech.number, part_of_speech.number) + self.assertEqual(token.part_of_speech.tense, part_of_speech.tense) + self.assertEqual(token.part_of_speech.form, part_of_speech.form) + self.assertEqual(token.part_of_speech.proper, part_of_speech.proper) + self.assertEqual(token.part_of_speech.voice, part_of_speech.voice) + self.assertEqual(token.part_of_speech.gender, part_of_speech.gender) self.assertEqual(token.edge_index, edge_index) self.assertEqual(token.edge_label, edge_label) self.assertEqual(token.lemma, lemma) def test_from_api_repr(self): - from google.cloud.language.syntax import PartOfSpeech - klass = self._get_target_class() text_content = 'pretty' text_begin = -1 - part_of_speech = PartOfSpeech.ADJECTIVE + aspect = 'ASPECT_UNKNOWN' + reciprocity = 'RECIPROCITY_UNKNOWN' + case = 'NOMINATIVE' + mood = 'MOOD_UNKNOWN' + tag = 'PRON' + person = 'FIRST' + number = 'SINGULAR' + tense = 'TENSE_UNKNOWN' + form = 'FORM_UNKNOWN' + proper = 'PROPER_UNKNOWN' + voice = 'VOICE_UNKNOWN' + gender = 'GENDER_UNKNOWN' + part_of_speech = { + 'aspect': aspect, + 'reciprocity': reciprocity, + 'case': case, + 'mood': mood, + 'tag': tag, + 'person': person, + 'number': number, + 'tense': tense, + 'form': form, + 'proper': proper, + 'voice': voice, + 'gender': gender + } edge_index = 3 edge_label = 'AMOD' lemma = text_content @@ -79,9 +199,7 @@ def test_from_api_repr(self): 'content': text_content, 'beginOffset': text_begin, }, - 'partOfSpeech': { - 'tag': part_of_speech, - }, + 'partOfSpeech': part_of_speech, 'dependencyEdge': { 'headTokenIndex': edge_index, 'label': edge_label, @@ -91,7 +209,18 @@ def test_from_api_repr(self): token = klass.from_api_repr(payload) self.assertEqual(token.text_content, text_content) self.assertEqual(token.text_begin, text_begin) - self.assertEqual(token.part_of_speech, part_of_speech) + self.assertEqual(token.part_of_speech.aspect, part_of_speech['aspect']) + self.assertEqual(token.part_of_speech.reciprocity, part_of_speech['reciprocity']) + self.assertEqual(token.part_of_speech.case, part_of_speech['case']) + self.assertEqual(token.part_of_speech.mood, part_of_speech['mood']) + self.assertEqual(token.part_of_speech.tag, part_of_speech['tag']) + self.assertEqual(token.part_of_speech.person, part_of_speech['person']) + self.assertEqual(token.part_of_speech.number, part_of_speech['number']) + self.assertEqual(token.part_of_speech.tense, part_of_speech['tense']) + self.assertEqual(token.part_of_speech.form, part_of_speech['form']) + self.assertEqual(token.part_of_speech.proper, part_of_speech['proper']) + self.assertEqual(token.part_of_speech.voice, part_of_speech['voice']) + self.assertEqual(token.part_of_speech.gender, part_of_speech['gender']) self.assertEqual(token.edge_index, edge_index) self.assertEqual(token.edge_label, edge_label) self.assertEqual(token.lemma, lemma)