Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion language/google/cloud/language/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"""

import collections
import sys

from google.cloud.language import api_responses
from google.cloud.language.entity import Entity
Expand Down Expand Up @@ -64,6 +65,17 @@ class Encoding(object):
UTF32 = 'UTF32'
"""UTF-32 encoding type."""

@classmethod
def get_default(cls):
"""Return the appropriate default encoding on this system.

:rtype: str
:returns: The correct default encoding on this system.
"""
if sys.maxunicode == 65535:
return cls.UTF16
return cls.UTF32


class Document(object):
"""Document to send to Google Cloud Natural Language API.
Expand Down Expand Up @@ -115,7 +127,7 @@ class Document(object):
"""HTML document type."""

def __init__(self, client, content=None, gcs_url=None, doc_type=PLAIN_TEXT,
language=None, encoding=Encoding.UTF8):
language=None, encoding=Encoding.get_default()):
if content is not None and gcs_url is not None:
raise ValueError('A Document cannot contain both local text and '
'a link to text in a Google Cloud Storage object')
Expand Down
21 changes: 17 additions & 4 deletions language/unit_tests/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,19 @@ def make_mock_client(response):
return mock.Mock(_connection=connection, spec=Client)


class TestEncoding(unittest.TestCase):
def test_default_low_maxunicode(self):
import sys
import mock

from google.cloud.language.document import Encoding

with mock.patch.dict(sys.__dict__, maxunicode=65535):
self.assertEqual(Encoding.get_default(), Encoding.UTF16)
with mock.patch.dict(sys.__dict__, maxunicode=1114111):
self.assertEqual(Encoding.get_default(), Encoding.UTF32)


class TestDocument(unittest.TestCase):

@staticmethod
Expand All @@ -127,7 +140,7 @@ def test_constructor_defaults(self):
self.assertIsNone(document.gcs_url)
self.assertIsNone(document.language)
self.assertEqual(document.doc_type, MUT.Document.PLAIN_TEXT)
self.assertEqual(document.encoding, MUT.Encoding.UTF8)
self.assertEqual(document.encoding, MUT.Encoding.get_default())

def test_constructor_explicit(self):
import google.cloud.language.document as MUT
Expand Down Expand Up @@ -287,7 +300,7 @@ def test_analyze_entities(self):

# Verify the request.
expected = self._expected_data(
content, encoding_type=Encoding.UTF8)
content, encoding_type=Encoding.get_default())
client._connection.api_request.assert_called_once_with(
path='analyzeEntities', method='POST', data=expected)

Expand Down Expand Up @@ -428,7 +441,7 @@ def test_analyze_syntax(self):

# Verify the request.
expected = self._expected_data(
content, encoding_type=Encoding.UTF8)
content, encoding_type=Encoding.get_default())
client._connection.api_request.assert_called_once_with(
path='analyzeSyntax', method='POST', data=expected)

Expand Down Expand Up @@ -506,7 +519,7 @@ def _annotate_text_helper(self, include_sentiment,

# Verify the request.
expected = self._expected_data(
ANNOTATE_CONTENT, encoding_type=Encoding.UTF8,
ANNOTATE_CONTENT, encoding_type=Encoding.get_default(),
extract_sentiment=include_sentiment,
extract_entities=include_entities,
extract_syntax=include_syntax)
Expand Down
6 changes: 3 additions & 3 deletions system_tests/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,15 @@ def _check_analyze_entities_result(self, entities):
self.assertGreater(entity1.salience, 0.0)
# Other mentions may occur, e.g. "painter".
self.assertIn(entity1.name, entity1.mentions)
self.assertEqual(entity1.wikipedia_url,
self.assertEqual(entity1.metadata['wikipedia_url'],

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

'http://en.wikipedia.org/wiki/Caravaggio')
self.assertIsInstance(entity1.metadata, dict)
# Verify entity 2.
self.assertEqual(entity2.name, self.NAME2)
self.assertEqual(entity2.entity_type, EntityType.LOCATION)
self.assertGreater(entity2.salience, 0.0)
self.assertEqual(entity2.mentions, [entity2.name])
self.assertEqual(entity2.wikipedia_url,
self.assertEqual(entity2.metadata['wikipedia_url'],
'http://en.wikipedia.org/wiki/Italy')
self.assertIsInstance(entity2.metadata, dict)
# Verify entity 3.
Expand All @@ -95,7 +95,7 @@ def _check_analyze_entities_result(self, entities):
self.assertEqual(entity3.mentions, [entity3.name])
wiki_url = ('http://en.wikipedia.org/wiki/'
'The_Calling_of_St_Matthew_(Caravaggio)')
self.assertEqual(entity3.wikipedia_url, wiki_url)
self.assertEqual(entity3.metadata['wikipedia_url'], wiki_url)
self.assertIsInstance(entity3.metadata, dict)

def test_analyze_entities(self):
Expand Down