@@ -68,7 +68,7 @@ class Document(proto.Message):
6868 Optional. UTF-8 encoded text in reading order
6969 from the document.
7070 text_styles (Sequence[google.cloud.documentai_v1beta3.types.Document.Style]):
71- Styles for the
71+ Placeholder. Styles for the
7272 [Document.text][google.cloud.documentai.v1beta3.Document.text].
7373 pages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page]):
7474 Visual page layout for the
@@ -79,13 +79,13 @@ class Document(proto.Message):
7979 For document shards, entities in this list may cross shard
8080 boundaries.
8181 entity_relations (Sequence[google.cloud.documentai_v1beta3.types.Document.EntityRelation]):
82- Relationship among
82+ Placeholder. Relationship among
8383 [Document.entities][google.cloud.documentai.v1beta3.Document.entities].
8484 text_changes (Sequence[google.cloud.documentai_v1beta3.types.Document.TextChange]):
85- A list of text corrections made to [Document.text]. This is
86- usually used for annotating corrections to OCR mistakes.
87- Text changes for a given revision may not overlap with each
88- other.
85+ Placeholder. A list of text corrections made to
86+ [Document.text]. This is usually used for annotating
87+ corrections to OCR mistakes. Text changes for a given
88+ revision may not overlap with each other.
8989 shard_info (google.cloud.documentai_v1beta3.types.Document.ShardInfo):
9090 Information about the sharding if this
9191 document is sharded part of a larger document.
@@ -95,7 +95,8 @@ class Document(proto.Message):
9595 Any error that occurred while processing this
9696 document.
9797 revisions (Sequence[google.cloud.documentai_v1beta3.types.Document.Revision]):
98- Revision history of this document.
98+ Placeholder. Revision history of this
99+ document.
99100 """
100101
101102 class ShardInfo (proto .Message ):
@@ -224,6 +225,9 @@ class Page(proto.Message):
224225 form_fields (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.FormField]):
225226 A list of visually detected form fields on
226227 the page.
228+ symbols (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Symbol]):
229+ A list of visually detected symbols on the
230+ page.
227231 provenance (google.cloud.documentai_v1beta3.types.Document.Provenance):
228232 The history of this page.
229233 """
@@ -457,6 +461,26 @@ class Type(proto.Enum):
457461 proto .MESSAGE , number = 4 , message = "Document.Provenance" ,
458462 )
459463
464+ class Symbol (proto .Message ):
465+ r"""A detected symbol.
466+
467+ Attributes:
468+ layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout):
469+ [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]
470+ for
471+ [Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol].
472+ detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]):
473+ A list of detected languages together with
474+ confidence.
475+ """
476+
477+ layout = proto .Field (
478+ proto .MESSAGE , number = 1 , message = "Document.Page.Layout" ,
479+ )
480+ detected_languages = proto .RepeatedField (
481+ proto .MESSAGE , number = 2 , message = "Document.Page.DetectedLanguage" ,
482+ )
483+
460484 class VisualElement (proto .Message ):
461485 r"""Detected non-text visual elements e.g. checkbox, signature
462486 etc. on the page.
@@ -606,7 +630,7 @@ class DetectedLanguage(proto.Message):
606630 language_code (str):
607631 The BCP-47 language code, such as "en-US" or "sr-Latn". For
608632 more information, see
609- http ://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
633+ https ://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
610634 confidence (float):
611635 Confidence of detected language. Range [0, 1].
612636 """
@@ -647,13 +671,17 @@ class DetectedLanguage(proto.Message):
647671 form_fields = proto .RepeatedField (
648672 proto .MESSAGE , number = 11 , message = "Document.Page.FormField" ,
649673 )
674+ symbols = proto .RepeatedField (
675+ proto .MESSAGE , number = 12 , message = "Document.Page.Symbol" ,
676+ )
650677 provenance = proto .Field (
651678 proto .MESSAGE , number = 16 , message = "Document.Provenance" ,
652679 )
653680
654681 class Entity (proto .Message ):
655- r"""A phrase in the text that is a known entity type, such as a
656- person, an organization, or location.
682+ r"""An entity that could be a phrase in the text or a property
683+ that belongs to the document. It is a known entity type, such as
684+ a person, an organization, or location.
657685
658686 Attributes:
659687 text_anchor (google.cloud.documentai_v1beta3.types.Document.TextAnchor):
@@ -664,7 +692,8 @@ class Entity(proto.Message):
664692 Entity type from a schema e.g. ``Address``.
665693 mention_text (str):
666694 Optional. Text value in the document e.g.
667- ``1600 Amphitheatre Pkwy``.
695+ ``1600 Amphitheatre Pkwy``. If the entity is not present in
696+ the document, this field will be empty.
668697 mention_id (str):
669698 Optional. Deprecated. Use ``id`` field instead.
670699 confidence (float):
@@ -733,10 +762,14 @@ class NormalizedValue(proto.Message):
733762
734763 This field is a member of `oneof`_ ``structured_value``.
735764 text (str):
736- Required. Normalized entity value stored as a string. This
737- field is populated for supported document type (e.g.
738- Invoice). For some entity types, one of respective
739- 'structured_value' fields may also be populated.
765+ Optional. An optional field to store a normalized string.
766+ For some entity types, one of respective
767+ ``structured_value`` fields may also be populated. Also not
768+ all the types of ``structured_value`` will be normalized.
769+ For example, some processors may not generate float or int
770+ normalized text by default.
771+
772+ Below are sample formats mapped to structured values.
740773
741774 - Money/Currency type (``money_value``) is in the ISO 4217
742775 text format.
@@ -822,7 +855,8 @@ class TextAnchor(proto.Message):
822855 [Document.text][google.cloud.documentai.v1beta3.Document.text].
823856 content (str):
824857 Contains the content of the text span so that users do not
825- have to look it up in the text_segments.
858+ have to look it up in the text_segments. It is always
859+ populated for formFields.
826860 """
827861
828862 class TextSegment (proto .Message ):
@@ -946,18 +980,18 @@ class OperationType(proto.Enum):
946980 EVAL_SKIPPED = 6
947981
948982 class Parent (proto .Message ):
949- r"""Structure for referencing parent provenances. When an
950- element replaces one of more other elements parent references
951- identify the elements that are replaced.
983+ r"""The parent element the current element is based on. Used for
984+ referencing/aligning, removal and replacement operations.
952985
953986 Attributes:
954987 revision (int):
955- The index of the [Document.revisions] identifying the parent
956- revision .
988+ The index of the index into current revision's parent_ids
989+ list .
957990 index (int):
958- The index of the parent revisions
959- corresponding collection of items (eg. list of
960- entities, properties within entities, etc.)
991+ The index of the parent item in the
992+ corresponding item list (eg. list of entities,
993+ properties within entities, etc.) in the parent
994+ revision.
961995 id (int):
962996 The id of the parent provenance.
963997 """
0 commit comments