diff --git a/language/google/cloud/language/document.py b/language/google/cloud/language/document.py index 3165f3c17597..df6bfeb5cfc7 100644 --- a/language/google/cloud/language/document.py +++ b/language/google/cloud/language/document.py @@ -18,6 +18,7 @@ """ import collections +import sys from google.cloud.language import api_responses from google.cloud.language.entity import Entity @@ -64,6 +65,17 @@ class Encoding(object): UTF32 = 'UTF32' """UTF-32 encoding type.""" + @classmethod + def get_default(cls): + """Return the appropriate default encoding on this system. + + :rtype: str + :returns: The correct default encoding on this system. + """ + if sys.maxunicode == 65535: + return cls.UTF16 + return cls.UTF32 + class Document(object): """Document to send to Google Cloud Natural Language API. @@ -115,7 +127,7 @@ class Document(object): """HTML document type.""" def __init__(self, client, content=None, gcs_url=None, doc_type=PLAIN_TEXT, - language=None, encoding=Encoding.UTF8): + language=None, encoding=Encoding.get_default()): if content is not None and gcs_url is not None: raise ValueError('A Document cannot contain both local text and ' 'a link to text in a Google Cloud Storage object') diff --git a/language/unit_tests/test_document.py b/language/unit_tests/test_document.py index a96eac8d24e5..d41c4ad062a4 100644 --- a/language/unit_tests/test_document.py +++ b/language/unit_tests/test_document.py @@ -105,6 +105,19 @@ def make_mock_client(response): return mock.Mock(_connection=connection, spec=Client) +class TestEncoding(unittest.TestCase): + def test_default_low_maxunicode(self): + import sys + import mock + + from google.cloud.language.document import Encoding + + with mock.patch.dict(sys.__dict__, maxunicode=65535): + self.assertEqual(Encoding.get_default(), Encoding.UTF16) + with mock.patch.dict(sys.__dict__, maxunicode=1114111): + self.assertEqual(Encoding.get_default(), Encoding.UTF32) + + class TestDocument(unittest.TestCase): @staticmethod @@ -127,7 +140,7 @@ def test_constructor_defaults(self): self.assertIsNone(document.gcs_url) self.assertIsNone(document.language) self.assertEqual(document.doc_type, MUT.Document.PLAIN_TEXT) - self.assertEqual(document.encoding, MUT.Encoding.UTF8) + self.assertEqual(document.encoding, MUT.Encoding.get_default()) def test_constructor_explicit(self): import google.cloud.language.document as MUT @@ -287,7 +300,7 @@ def test_analyze_entities(self): # Verify the request. expected = self._expected_data( - content, encoding_type=Encoding.UTF8) + content, encoding_type=Encoding.get_default()) client._connection.api_request.assert_called_once_with( path='analyzeEntities', method='POST', data=expected) @@ -428,7 +441,7 @@ def test_analyze_syntax(self): # Verify the request. expected = self._expected_data( - content, encoding_type=Encoding.UTF8) + content, encoding_type=Encoding.get_default()) client._connection.api_request.assert_called_once_with( path='analyzeSyntax', method='POST', data=expected) @@ -506,7 +519,7 @@ def _annotate_text_helper(self, include_sentiment, # Verify the request. expected = self._expected_data( - ANNOTATE_CONTENT, encoding_type=Encoding.UTF8, + ANNOTATE_CONTENT, encoding_type=Encoding.get_default(), extract_sentiment=include_sentiment, extract_entities=include_entities, extract_syntax=include_syntax) diff --git a/system_tests/language.py b/system_tests/language.py index f3869fbe8fb9..23b76da2cf32 100644 --- a/system_tests/language.py +++ b/system_tests/language.py @@ -76,7 +76,7 @@ def _check_analyze_entities_result(self, entities): self.assertGreater(entity1.salience, 0.0) # Other mentions may occur, e.g. "painter". self.assertIn(entity1.name, entity1.mentions) - self.assertEqual(entity1.wikipedia_url, + self.assertEqual(entity1.metadata['wikipedia_url'], 'http://en.wikipedia.org/wiki/Caravaggio') self.assertIsInstance(entity1.metadata, dict) # Verify entity 2. @@ -84,7 +84,7 @@ def _check_analyze_entities_result(self, entities): self.assertEqual(entity2.entity_type, EntityType.LOCATION) self.assertGreater(entity2.salience, 0.0) self.assertEqual(entity2.mentions, [entity2.name]) - self.assertEqual(entity2.wikipedia_url, + self.assertEqual(entity2.metadata['wikipedia_url'], 'http://en.wikipedia.org/wiki/Italy') self.assertIsInstance(entity2.metadata, dict) # Verify entity 3. @@ -95,7 +95,7 @@ def _check_analyze_entities_result(self, entities): self.assertEqual(entity3.mentions, [entity3.name]) wiki_url = ('http://en.wikipedia.org/wiki/' 'The_Calling_of_St_Matthew_(Caravaggio)') - self.assertEqual(entity3.wikipedia_url, wiki_url) + self.assertEqual(entity3.metadata['wikipedia_url'], wiki_url) self.assertIsInstance(entity3.metadata, dict) def test_analyze_entities(self):