diff --git a/README.md b/README.md index 62e076bd..69242c0a 100644 --- a/README.md +++ b/README.md @@ -108,22 +108,6 @@ use this Document AI Client Library. -## Samples - -Samples are in the [`samples/`](https://github.com/googleapis/java-document-ai/tree/main/samples) directory. - -| Sample | Source Code | Try it | -| --------------------------- | --------------------------------- | ------ | -| Batch Process Document | [source code](https://github.com/googleapis/java-document-ai/blob/main/samples/snippets/src/main/java/documentai/v1/BatchProcessDocument.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-document-ai&page=editor&open_in_editor=samples/snippets/src/main/java/documentai/v1/BatchProcessDocument.java) | -| Process Document | [source code](https://github.com/googleapis/java-document-ai/blob/main/samples/snippets/src/main/java/documentai/v1/ProcessDocument.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-document-ai&page=editor&open_in_editor=samples/snippets/src/main/java/documentai/v1/ProcessDocument.java) | -| Quick Start | [source code](https://github.com/googleapis/java-document-ai/blob/main/samples/snippets/src/main/java/documentai/v1/QuickStart.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-document-ai&page=editor&open_in_editor=samples/snippets/src/main/java/documentai/v1/QuickStart.java) | -| Process Form Document | [source code](https://github.com/googleapis/java-document-ai/blob/main/samples/snippets/src/main/java/documentai/v1beta3/ProcessFormDocument.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-document-ai&page=editor&open_in_editor=samples/snippets/src/main/java/documentai/v1beta3/ProcessFormDocument.java) | -| Process Ocr Document | [source code](https://github.com/googleapis/java-document-ai/blob/main/samples/snippets/src/main/java/documentai/v1beta3/ProcessOcrDocument.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-document-ai&page=editor&open_in_editor=samples/snippets/src/main/java/documentai/v1beta3/ProcessOcrDocument.java) | -| Process Quality Document | [source code](https://github.com/googleapis/java-document-ai/blob/main/samples/snippets/src/main/java/documentai/v1beta3/ProcessQualityDocument.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-document-ai&page=editor&open_in_editor=samples/snippets/src/main/java/documentai/v1beta3/ProcessQualityDocument.java) | -| Process Specialized Document | [source code](https://github.com/googleapis/java-document-ai/blob/main/samples/snippets/src/main/java/documentai/v1beta3/ProcessSpecializedDocument.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-document-ai&page=editor&open_in_editor=samples/snippets/src/main/java/documentai/v1beta3/ProcessSpecializedDocument.java) | -| Process Splitter Document | [source code](https://github.com/googleapis/java-document-ai/blob/main/samples/snippets/src/main/java/documentai/v1beta3/ProcessSplitterDocument.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-document-ai&page=editor&open_in_editor=samples/snippets/src/main/java/documentai/v1beta3/ProcessSplitterDocument.java) | - - ## Troubleshooting diff --git a/samples/snippets/resources/document_quality_poor.pdf b/samples/snippets/resources/document_quality_poor.pdf deleted file mode 100644 index 3a34a925..00000000 Binary files a/samples/snippets/resources/document_quality_poor.pdf and /dev/null differ diff --git a/samples/snippets/resources/handwritten_form.pdf b/samples/snippets/resources/handwritten_form.pdf deleted file mode 100644 index 2189ffff..00000000 Binary files a/samples/snippets/resources/handwritten_form.pdf and /dev/null differ diff --git a/samples/snippets/resources/invoice.pdf b/samples/snippets/resources/invoice.pdf deleted file mode 100644 index 7722734a..00000000 Binary files a/samples/snippets/resources/invoice.pdf and /dev/null differ diff --git a/samples/snippets/resources/multi_document.pdf b/samples/snippets/resources/multi_document.pdf deleted file mode 100644 index 7ea62eb8..00000000 Binary files a/samples/snippets/resources/multi_document.pdf and /dev/null differ diff --git a/samples/snippets/resources/us_driver_license.pdf b/samples/snippets/resources/us_driver_license.pdf deleted file mode 100644 index f8f62d90..00000000 Binary files a/samples/snippets/resources/us_driver_license.pdf and /dev/null differ diff --git a/samples/snippets/src/main/java/documentai/v1/BatchProcessDocument.java b/samples/snippets/src/main/java/documentai/v1/BatchProcessDocument.java deleted file mode 100644 index efee05e6..00000000 --- a/samples/snippets/src/main/java/documentai/v1/BatchProcessDocument.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1; - -// [START documentai_batch_process_document] - -import com.google.api.gax.longrunning.OperationFuture; -import com.google.api.gax.paging.Page; -import com.google.cloud.documentai.v1.BatchDocumentsInputConfig; -import com.google.cloud.documentai.v1.BatchProcessMetadata; -import com.google.cloud.documentai.v1.BatchProcessRequest; -import com.google.cloud.documentai.v1.BatchProcessResponse; -import com.google.cloud.documentai.v1.Document; -import com.google.cloud.documentai.v1.DocumentOutputConfig; -import com.google.cloud.documentai.v1.DocumentOutputConfig.GcsOutputConfig; -import com.google.cloud.documentai.v1.DocumentProcessorServiceClient; -import com.google.cloud.documentai.v1.GcsDocument; -import com.google.cloud.documentai.v1.GcsDocuments; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.BlobId; -import com.google.cloud.storage.Bucket; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.StorageOptions; -import com.google.protobuf.util.JsonFormat; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -public class BatchProcessDocument { - public static void batchProcessDocument() - throws IOException, InterruptedException, TimeoutException, ExecutionException { - // TODO(developer): Replace these variables before running the sample. - String projectId = "your-project-id"; - String location = "your-project-location"; // Format is "us" or "eu". - String processerId = "your-processor-id"; - String outputGcsBucketName = "your-gcs-bucket-name"; - String outputGcsPrefix = "PREFIX"; - String inputGcsUri = "gs://your-gcs-bucket/path/to/input/file.pdf"; - batchProcessDocument( - projectId, location, processerId, inputGcsUri, outputGcsBucketName, outputGcsPrefix); - } - - public static void batchProcessDocument( - String projectId, - String location, - String processorId, - String gcsInputUri, - String gcsOutputBucketName, - String gcsOutputUriPrefix) - throws IOException, InterruptedException, TimeoutException, ExecutionException { - // Initialize client that will be used to send requests. This client only needs to be created - // once, and can be reused for multiple requests. After completing all of your requests, call - // the "close" method on the client to safely clean up any remaining background resources. - try (DocumentProcessorServiceClient client = DocumentProcessorServiceClient.create()) { - // The full resource name of the processor, e.g.: - // projects/project-id/locations/location/processor/processor-id - // You must create new processors in the Cloud Console first - String name = - String.format("projects/%s/locations/%s/processors/%s", projectId, location, processorId); - - GcsDocument gcsDocument = - GcsDocument.newBuilder().setGcsUri(gcsInputUri).setMimeType("application/pdf").build(); - - GcsDocuments gcsDocuments = GcsDocuments.newBuilder().addDocuments(gcsDocument).build(); - - BatchDocumentsInputConfig inputConfig = - BatchDocumentsInputConfig.newBuilder().setGcsDocuments(gcsDocuments).build(); - - String fullGcsPath = String.format("gs://%s/%s/", gcsOutputBucketName, gcsOutputUriPrefix); - GcsOutputConfig gcsOutputConfig = GcsOutputConfig.newBuilder().setGcsUri(fullGcsPath).build(); - - DocumentOutputConfig documentOutputConfig = - DocumentOutputConfig.newBuilder().setGcsOutputConfig(gcsOutputConfig).build(); - - // Configure the batch process request. - BatchProcessRequest request = - BatchProcessRequest.newBuilder() - .setName(name) - .setInputDocuments(inputConfig) - .setDocumentOutputConfig(documentOutputConfig) - .build(); - - OperationFuture future = - client.batchProcessDocumentsAsync(request); - - // Batch process document using a long-running operation. - // You can wait for now, or get results later. - // Note: first request to the service takes longer than subsequent - // requests. - System.out.println("Waiting for operation to complete..."); - future.get(240, TimeUnit.SECONDS); - - System.out.println("Document processing complete."); - - Storage storage = StorageOptions.newBuilder().setProjectId(projectId).build().getService(); - Bucket bucket = storage.get(gcsOutputBucketName); - - // List all of the files in the Storage bucket. - Page blobs = bucket.list(Storage.BlobListOption.prefix(gcsOutputUriPrefix + "/")); - int idx = 0; - for (Blob blob : blobs.iterateAll()) { - if (!blob.isDirectory()) { - System.out.printf("Fetched file #%d\n", ++idx); - // Read the results - - // Download and store json data in a temp file. - File tempFile = File.createTempFile("file", ".json"); - Blob fileInfo = storage.get(BlobId.of(gcsOutputBucketName, blob.getName())); - fileInfo.downloadTo(tempFile.toPath()); - - // Parse json file into Document. - FileReader reader = new FileReader(tempFile); - Document.Builder builder = Document.newBuilder(); - JsonFormat.parser().merge(reader, builder); - - Document document = builder.build(); - - // Get all of the document text as one big string. - String text = document.getText(); - - // Read the text recognition output from the processor - System.out.println("The document contains the following paragraphs:"); - Document.Page page1 = document.getPages(0); - List paragraphList = page1.getParagraphsList(); - for (Document.Page.Paragraph paragraph : paragraphList) { - String paragraphText = getText(paragraph.getLayout().getTextAnchor(), text); - System.out.printf("Paragraph text:%s\n", paragraphText); - } - - // Form parsing provides additional output about - // form-formatted PDFs. You must create a form - // processor in the Cloud Console to see full field details. - System.out.println("The following form key/value pairs were detected:"); - - for (Document.Page.FormField field : page1.getFormFieldsList()) { - String fieldName = getText(field.getFieldName().getTextAnchor(), text); - String fieldValue = getText(field.getFieldValue().getTextAnchor(), text); - - System.out.println("Extracted form fields pair:"); - System.out.printf("\t(%s, %s))", fieldName, fieldValue); - } - - // Clean up temp file. - tempFile.deleteOnExit(); - } - } - } - } - - // Extract shards from the text field - private static String getText(Document.TextAnchor textAnchor, String text) { - if (textAnchor.getTextSegmentsList().size() > 0) { - int startIdx = (int) textAnchor.getTextSegments(0).getStartIndex(); - int endIdx = (int) textAnchor.getTextSegments(0).getEndIndex(); - return text.substring(startIdx, endIdx); - } - return "[NO TEXT]"; - } -} -// [END documentai_batch_process_document] diff --git a/samples/snippets/src/main/java/documentai/v1/ProcessDocument.java b/samples/snippets/src/main/java/documentai/v1/ProcessDocument.java deleted file mode 100644 index 75a5c639..00000000 --- a/samples/snippets/src/main/java/documentai/v1/ProcessDocument.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1; - -// [START documentai_process_document] - -import com.google.cloud.documentai.v1.Document; -import com.google.cloud.documentai.v1.DocumentProcessorServiceClient; -import com.google.cloud.documentai.v1.ProcessRequest; -import com.google.cloud.documentai.v1.ProcessResponse; -import com.google.cloud.documentai.v1.RawDocument; -import com.google.protobuf.ByteString; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; - -public class ProcessDocument { - public static void processDocument() - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // TODO(developer): Replace these variables before running the sample. - String projectId = "your-project-id"; - String location = "your-project-location"; // Format is "us" or "eu". - String processerId = "your-processor-id"; - String filePath = "path/to/input/file.pdf"; - processDocument(projectId, location, processerId, filePath); - } - - public static void processDocument( - String projectId, String location, String processorId, String filePath) - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // Initialize client that will be used to send requests. This client only needs to be created - // once, and can be reused for multiple requests. After completing all of your requests, call - // the "close" method on the client to safely clean up any remaining background resources. - try (DocumentProcessorServiceClient client = DocumentProcessorServiceClient.create()) { - // The full resource name of the processor, e.g.: - // projects/project-id/locations/location/processor/processor-id - // You must create new processors in the Cloud Console first - String name = - String.format("projects/%s/locations/%s/processors/%s", projectId, location, processorId); - - // Read the file. - byte[] imageFileData = Files.readAllBytes(Paths.get(filePath)); - - // Convert the image data to a Buffer and base64 encode it. - ByteString content = ByteString.copyFrom(imageFileData); - - RawDocument document = - RawDocument.newBuilder().setContent(content).setMimeType("application/pdf").build(); - - // Configure the process request. - ProcessRequest request = - ProcessRequest.newBuilder().setName(name).setRawDocument(document).build(); - - // Recognizes text entities in the PDF document - ProcessResponse result = client.processDocument(request); - Document documentResponse = result.getDocument(); - - // Get all of the document text as one big string - String text = documentResponse.getText(); - - // Read the text recognition output from the processor - System.out.println("The document contains the following paragraphs:"); - Document.Page firstPage = documentResponse.getPages(0); - List paragraphs = firstPage.getParagraphsList(); - - for (Document.Page.Paragraph paragraph : paragraphs) { - String paragraphText = getText(paragraph.getLayout().getTextAnchor(), text); - System.out.printf("Paragraph text:\n%s\n", paragraphText); - } - - // Form parsing provides additional output about - // form-formatted PDFs. You must create a form - // processor in the Cloud Console to see full field details. - System.out.println("The following form key/value pairs were detected:"); - - for (Document.Page.FormField field : firstPage.getFormFieldsList()) { - String fieldName = getText(field.getFieldName().getTextAnchor(), text); - String fieldValue = getText(field.getFieldValue().getTextAnchor(), text); - - System.out.println("Extracted form fields pair:"); - System.out.printf("\t(%s, %s))\n", fieldName, fieldValue); - } - } - } - - // Extract shards from the text field - private static String getText(Document.TextAnchor textAnchor, String text) { - if (textAnchor.getTextSegmentsList().size() > 0) { - int startIdx = (int) textAnchor.getTextSegments(0).getStartIndex(); - int endIdx = (int) textAnchor.getTextSegments(0).getEndIndex(); - return text.substring(startIdx, endIdx); - } - return "[NO TEXT]"; - } -} -// [END documentai_process_document] diff --git a/samples/snippets/src/main/java/documentai/v1/QuickStart.java b/samples/snippets/src/main/java/documentai/v1/QuickStart.java deleted file mode 100644 index 88f22136..00000000 --- a/samples/snippets/src/main/java/documentai/v1/QuickStart.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1; - -// [START documentai_quickstart] -import com.google.cloud.documentai.v1.Document; -import com.google.cloud.documentai.v1.DocumentProcessorServiceClient; -import com.google.cloud.documentai.v1.ProcessRequest; -import com.google.cloud.documentai.v1.ProcessResponse; -import com.google.cloud.documentai.v1.RawDocument; -import com.google.protobuf.ByteString; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; - -public class QuickStart { - public static void main(String[] args) - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // TODO(developer): Replace these variables before running the sample. - String projectId = "your-project-id"; - String location = "your-project-location"; // Format is "us" or "eu". - String processorId = "your-processor-id"; - String filePath = "path/to/input/file.pdf"; - quickStart(projectId, location, processorId, filePath); - } - - public static void quickStart( - String projectId, String location, String processorId, String filePath) - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // Initialize client that will be used to send requests. This client only needs to be created - // once, and can be reused for multiple requests. After completing all of your requests, call - // the "close" method on the client to safely clean up any remaining background resources. - try (DocumentProcessorServiceClient client = DocumentProcessorServiceClient.create()) { - // The full resource name of the processor, e.g.: - // projects/project-id/locations/location/processor/processor-id - // You must create new processors in the Cloud Console first - String name = - String.format("projects/%s/locations/%s/processors/%s", projectId, location, processorId); - - // Read the file. - byte[] imageFileData = Files.readAllBytes(Paths.get(filePath)); - - // Convert the image data to a Buffer and base64 encode it. - ByteString content = ByteString.copyFrom(imageFileData); - - RawDocument document = - RawDocument.newBuilder().setContent(content).setMimeType("application/pdf").build(); - - // Configure the process request. - ProcessRequest request = - ProcessRequest.newBuilder().setName(name).setRawDocument(document).build(); - - // Recognizes text entities in the PDF document - ProcessResponse result = client.processDocument(request); - Document documentResponse = result.getDocument(); - - // Get all of the document text as one big string - String text = documentResponse.getText(); - - // Read the text recognition output from the processor - System.out.println("The document contains the following paragraphs:"); - Document.Page firstPage = documentResponse.getPages(0); - List paragraphs = firstPage.getParagraphsList(); - - for (Document.Page.Paragraph paragraph : paragraphs) { - String paragraphText = getText(paragraph.getLayout().getTextAnchor(), text); - System.out.printf("Paragraph text:\n%s\n", paragraphText); - } - } - } - - // Extract shards from the text field - private static String getText(Document.TextAnchor textAnchor, String text) { - if (textAnchor.getTextSegmentsList().size() > 0) { - int startIdx = (int) textAnchor.getTextSegments(0).getStartIndex(); - int endIdx = (int) textAnchor.getTextSegments(0).getEndIndex(); - return text.substring(startIdx, endIdx); - } - return "[NO TEXT]"; - } -} -// [END documentai_quickstart] diff --git a/samples/snippets/src/main/java/documentai/v1beta3/ProcessFormDocument.java b/samples/snippets/src/main/java/documentai/v1beta3/ProcessFormDocument.java deleted file mode 100644 index 8a50d853..00000000 --- a/samples/snippets/src/main/java/documentai/v1beta3/ProcessFormDocument.java +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -// [START documentai_process_form_document] - -import com.google.cloud.documentai.v1beta3.Document; -import com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClient; -import com.google.cloud.documentai.v1beta3.ProcessRequest; -import com.google.cloud.documentai.v1beta3.ProcessResponse; -import com.google.cloud.documentai.v1beta3.RawDocument; -import com.google.protobuf.ByteString; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; - -public class ProcessFormDocument { - public static void processFormDocument() - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // TODO(developer): Replace these variables before running the sample. - String projectId = "your-project-id"; - String location = "your-project-location"; // Format is "us" or "eu". - String processerId = "your-processor-id"; - String filePath = "path/to/input/file.pdf"; - processFormDocument(projectId, location, processerId, filePath); - } - - public static void processFormDocument( - String projectId, String location, String processorId, String filePath) - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // Initialize client that will be used to send requests. This client only needs to be created - // once, and can be reused for multiple requests. After completing all of your requests, call - // the "close" method on the client to safely clean up any remaining background resources. - try (DocumentProcessorServiceClient client = DocumentProcessorServiceClient.create()) { - // The full resource name of the processor, e.g.: - // projects/project-id/locations/location/processor/processor-id - // You must create new processors in the Cloud Console first - String name = - String.format("projects/%s/locations/%s/processors/%s", projectId, location, processorId); - - // Read the file. - byte[] imageFileData = Files.readAllBytes(Paths.get(filePath)); - - // Convert the image data to a Buffer and base64 encode it. - ByteString content = ByteString.copyFrom(imageFileData); - - RawDocument document = - RawDocument.newBuilder().setContent(content).setMimeType("application/pdf").build(); - - // Configure the process request. - ProcessRequest request = - ProcessRequest.newBuilder().setName(name).setRawDocument(document).build(); - - // Recognizes text entities in the PDF document - ProcessResponse result = client.processDocument(request); - Document documentResponse = result.getDocument(); - - System.out.println("Document processing complete."); - - // Read the text recognition output from the processor - // For a full list of Document object attributes, - // please reference this page: - // https://googleapis.dev/java/google-cloud-document-ai/latest/index.html - - // Get all of the document text as one big string - String text = documentResponse.getText(); - System.out.printf("Full document text: '%s'\n", removeNewlines(text)); - - // Read the text recognition output from the processor - List pages = documentResponse.getPagesList(); - System.out.printf("There are %s page(s) in this document.\n", pages.size()); - - for (Document.Page page : pages) { - System.out.printf("\n\n**** Page %d ****\n", page.getPageNumber()); - - List tables = page.getTablesList(); - System.out.printf("Found %d table(s):\n", tables.size()); - for (Document.Page.Table table : tables) { - printTableInfo(table, text); - } - - List formFields = page.getFormFieldsList(); - System.out.printf("Found %d form fields:\n", formFields.size()); - for (Document.Page.FormField formField : formFields) { - String fieldName = getLayoutText(formField.getFieldName().getTextAnchor(), text); - String fieldValue = getLayoutText(formField.getFieldValue().getTextAnchor(), text); - System.out.printf( - " * '%s': '%s'\n", removeNewlines(fieldName), removeNewlines(fieldValue)); - } - } - } - } - - private static void printTableInfo(Document.Page.Table table, String text) { - Document.Page.Table.TableRow firstBodyRow = table.getBodyRows(0); - int columnCount = firstBodyRow.getCellsCount(); - System.out.printf( - " Table with %d columns and %d rows:\n", columnCount, table.getBodyRowsCount()); - - Document.Page.Table.TableRow headerRow = table.getHeaderRows(0); - StringBuilder headerRowText = new StringBuilder(); - for (Document.Page.Table.TableCell cell : headerRow.getCellsList()) { - String columnName = getLayoutText(cell.getLayout().getTextAnchor(), text); - headerRowText.append(String.format("%s | ", removeNewlines(columnName))); - } - headerRowText.setLength(headerRowText.length() - 3); - System.out.printf(" Collumns: %s\n", headerRowText.toString()); - - StringBuilder firstRowText = new StringBuilder(); - for (Document.Page.Table.TableCell cell : firstBodyRow.getCellsList()) { - String cellText = getLayoutText(cell.getLayout().getTextAnchor(), text); - firstRowText.append(String.format("%s | ", removeNewlines(cellText))); - } - firstRowText.setLength(firstRowText.length() - 3); - System.out.printf(" First row data: %s\n", firstRowText.toString()); - } - - // Extract shards from the text field - private static String getLayoutText(Document.TextAnchor textAnchor, String text) { - if (textAnchor.getTextSegmentsList().size() > 0) { - int startIdx = (int) textAnchor.getTextSegments(0).getStartIndex(); - int endIdx = (int) textAnchor.getTextSegments(0).getEndIndex(); - return text.substring(startIdx, endIdx); - } - return "[NO TEXT]"; - } - - private static String removeNewlines(String s) { - return s.replace("\n", "").replace("\r", ""); - } -} -// [END documentai_process_form_document] diff --git a/samples/snippets/src/main/java/documentai/v1beta3/ProcessOcrDocument.java b/samples/snippets/src/main/java/documentai/v1beta3/ProcessOcrDocument.java deleted file mode 100644 index f483929a..00000000 --- a/samples/snippets/src/main/java/documentai/v1beta3/ProcessOcrDocument.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -// [START documentai_process_ocr_document] - -import com.google.cloud.documentai.v1beta3.Document; -import com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClient; -import com.google.cloud.documentai.v1beta3.ProcessRequest; -import com.google.cloud.documentai.v1beta3.ProcessResponse; -import com.google.cloud.documentai.v1beta3.RawDocument; -import com.google.protobuf.ByteString; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; - -public class ProcessOcrDocument { - public static void processOcrDocument() - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // TODO(developer): Replace these variables before running the sample. - String projectId = "your-project-id"; - String location = "your-project-location"; // Format is "us" or "eu". - String processerId = "your-processor-id"; - String filePath = "path/to/input/file.pdf"; - processOcrDocument(projectId, location, processerId, filePath); - } - - public static void processOcrDocument( - String projectId, String location, String processorId, String filePath) - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // Initialize client that will be used to send requests. This client only needs to be created - // once, and can be reused for multiple requests. After completing all of your requests, call - // the "close" method on the client to safely clean up any remaining background resources. - try (DocumentProcessorServiceClient client = DocumentProcessorServiceClient.create()) { - // The full resource name of the processor, e.g.: - // projects/project-id/locations/location/processor/processor-id - // You must create new processors in the Cloud Console first - String name = - String.format("projects/%s/locations/%s/processors/%s", projectId, location, processorId); - - // Read the file. - byte[] imageFileData = Files.readAllBytes(Paths.get(filePath)); - - // Convert the image data to a Buffer and base64 encode it. - ByteString content = ByteString.copyFrom(imageFileData); - - RawDocument document = - RawDocument.newBuilder().setContent(content).setMimeType("application/pdf").build(); - - // Configure the process request. - ProcessRequest request = - ProcessRequest.newBuilder().setName(name).setRawDocument(document).build(); - - // Recognizes text entities in the PDF document - ProcessResponse result = client.processDocument(request); - Document documentResponse = result.getDocument(); - - System.out.println("Document processing complete."); - - // Read the text recognition output from the processor - // For a full list of Document object attributes, - // please reference this page: - // https://googleapis.dev/java/google-cloud-document-ai/latest/index.html - - // Get all of the document text as one big string - String text = documentResponse.getText(); - System.out.printf("Full document text: '%s'\n", escapeNewlines(text)); - - // Read the text recognition output from the processor - List pages = documentResponse.getPagesList(); - System.out.printf("There are %s page(s) in this document.\n", pages.size()); - - for (Document.Page page : pages) { - System.out.printf("Page %d:\n", page.getPageNumber()); - printPageDimensions(page.getDimension()); - printDetectedLanguages(page.getDetectedLanguagesList()); - printParagraphs(page.getParagraphsList(), text); - printBlocks(page.getBlocksList(), text); - printLines(page.getLinesList(), text); - printTokens(page.getTokensList(), text); - } - } - } - - private static void printPageDimensions(Document.Page.Dimension dimension) { - String unit = dimension.getUnit(); - System.out.printf(" Width: %.1f %s\n", dimension.getWidth(), unit); - System.out.printf(" Height: %.1f %s\n", dimension.getHeight(), unit); - } - - private static void printDetectedLanguages( - List detectedLangauges) { - System.out.println(" Detected languages:"); - for (Document.Page.DetectedLanguage detectedLanguage : detectedLangauges) { - String languageCode = detectedLanguage.getLanguageCode(); - float confidence = detectedLanguage.getConfidence(); - System.out.printf(" %s (%.2f%%)\n", languageCode, confidence * 100.0); - } - } - - private static void printParagraphs(List paragraphs, String text) { - System.out.printf(" %d paragraphs detected:\n", paragraphs.size()); - Document.Page.Paragraph firstParagraph = paragraphs.get(0); - String firstParagraphText = getLayoutText(firstParagraph.getLayout().getTextAnchor(), text); - System.out.printf(" First paragraph text: %s\n", escapeNewlines(firstParagraphText)); - Document.Page.Paragraph lastParagraph = paragraphs.get(paragraphs.size() - 1); - String lastParagraphText = getLayoutText(lastParagraph.getLayout().getTextAnchor(), text); - System.out.printf(" Last paragraph text: %s\n", escapeNewlines(lastParagraphText)); - } - - private static void printBlocks(List blocks, String text) { - System.out.printf(" %d blocks detected:\n", blocks.size()); - Document.Page.Block firstBlock = blocks.get(0); - String firstBlockText = getLayoutText(firstBlock.getLayout().getTextAnchor(), text); - System.out.printf(" First block text: %s\n", escapeNewlines(firstBlockText)); - Document.Page.Block lastBlock = blocks.get(blocks.size() - 1); - String lastBlockText = getLayoutText(lastBlock.getLayout().getTextAnchor(), text); - System.out.printf(" Last block text: %s\n", escapeNewlines(lastBlockText)); - } - - private static void printLines(List lines, String text) { - System.out.printf(" %d lines detected:\n", lines.size()); - Document.Page.Line firstLine = lines.get(0); - String firstLineText = getLayoutText(firstLine.getLayout().getTextAnchor(), text); - System.out.printf(" First line text: %s\n", escapeNewlines(firstLineText)); - Document.Page.Line lastLine = lines.get(lines.size() - 1); - String lastLineText = getLayoutText(lastLine.getLayout().getTextAnchor(), text); - System.out.printf(" Last line text: %s\n", escapeNewlines(lastLineText)); - } - - private static void printTokens(List tokens, String text) { - System.out.printf(" %d tokens detected:\n", tokens.size()); - Document.Page.Token firstToken = tokens.get(0); - String firstTokenText = getLayoutText(firstToken.getLayout().getTextAnchor(), text); - System.out.printf(" First token text: %s\n", escapeNewlines(firstTokenText)); - Document.Page.Token lastToken = tokens.get(tokens.size() - 1); - String lastTokenText = getLayoutText(lastToken.getLayout().getTextAnchor(), text); - System.out.printf(" Last token text: %s\n", escapeNewlines(lastTokenText)); - } - - // Extract shards from the text field - private static String getLayoutText(Document.TextAnchor textAnchor, String text) { - if (textAnchor.getTextSegmentsList().size() > 0) { - int startIdx = (int) textAnchor.getTextSegments(0).getStartIndex(); - int endIdx = (int) textAnchor.getTextSegments(0).getEndIndex(); - return text.substring(startIdx, endIdx); - } - return "[NO TEXT]"; - } - - private static String escapeNewlines(String s) { - return s.replace("\n", "\\n").replace("\r", "\\r"); - } -} -// [END documentai_process_ocr_document] diff --git a/samples/snippets/src/main/java/documentai/v1beta3/ProcessQualityDocument.java b/samples/snippets/src/main/java/documentai/v1beta3/ProcessQualityDocument.java deleted file mode 100644 index 3e80a574..00000000 --- a/samples/snippets/src/main/java/documentai/v1beta3/ProcessQualityDocument.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -// [START documentai_process_quality_document] - -import com.google.cloud.documentai.v1beta3.Document; -import com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClient; -import com.google.cloud.documentai.v1beta3.ProcessRequest; -import com.google.cloud.documentai.v1beta3.ProcessResponse; -import com.google.cloud.documentai.v1beta3.RawDocument; -import com.google.protobuf.ByteString; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; - -public class ProcessQualityDocument { - public static void processQualityDocument() - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // TODO(developer): Replace these variables before running the sample. - String projectId = "your-project-id"; - String location = "your-project-location"; // Format is "us" or "eu". - String processerId = "your-processor-id"; - String filePath = "path/to/input/file.pdf"; - processQualityDocument(projectId, location, processerId, filePath); - } - - public static void processQualityDocument( - String projectId, String location, String processorId, String filePath) - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // Initialize client that will be used to send requests. This client only needs to be created - // once, and can be reused for multiple requests. After completing all of your requests, call - // the "close" method on the client to safely clean up any remaining background resources. - try (DocumentProcessorServiceClient client = DocumentProcessorServiceClient.create()) { - // The full resource name of the processor, e.g.: - // projects/project-id/locations/location/processor/processor-id - // You must create new processors in the Cloud Console first - String name = - String.format("projects/%s/locations/%s/processors/%s", projectId, location, processorId); - - // Read the file. - byte[] imageFileData = Files.readAllBytes(Paths.get(filePath)); - - // Convert the image data to a Buffer and base64 encode it. - ByteString content = ByteString.copyFrom(imageFileData); - - RawDocument document = - RawDocument.newBuilder().setContent(content).setMimeType("application/pdf").build(); - - // Configure the process request. - ProcessRequest request = - ProcessRequest.newBuilder().setName(name).setRawDocument(document).build(); - - // Recognizes text entities in the PDF document - ProcessResponse result = client.processDocument(request); - Document documentResponse = result.getDocument(); - - System.out.println("Document processing complete."); - - // Read the quality-specific information from the output from the - // Intelligent Document Quality Processor: - // https://cloud.google.com/document-ai/docs/processors-list#processor_doc-quality-processor - // OCR and other data is also present in the quality processor's response. - // Please see the OCR and other samples for how to parse other data in the - // response. - List entities = documentResponse.getEntitiesList(); - for (Document.Entity entity : entities) { - float entityConfidence = entity.getConfidence(); - long pageNumber = entity.getPageAnchor().getPageRefs(0).getPage() + 1; - System.out.printf( - "Page %d has a quality score of (%.2f%%):\n", pageNumber, entityConfidence * 100.0); - for (Document.Entity property : entity.getPropertiesList()) { - float propertyConfidence = property.getConfidence(); - String propertyType = property.getType(); - System.out.printf(" * %s score of %.2f%%\n", propertyType, propertyConfidence * 100.0); - } - } - } - } -} -// [END documentai_process_quality_document] diff --git a/samples/snippets/src/main/java/documentai/v1beta3/ProcessSpecializedDocument.java b/samples/snippets/src/main/java/documentai/v1beta3/ProcessSpecializedDocument.java deleted file mode 100644 index 5cbb1af1..00000000 --- a/samples/snippets/src/main/java/documentai/v1beta3/ProcessSpecializedDocument.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -// [START documentai_process_specialized_document] - -import com.google.cloud.documentai.v1beta3.Document; -import com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClient; -import com.google.cloud.documentai.v1beta3.ProcessRequest; -import com.google.cloud.documentai.v1beta3.ProcessResponse; -import com.google.cloud.documentai.v1beta3.RawDocument; -import com.google.protobuf.ByteString; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; - -public class ProcessSpecializedDocument { - public static void processSpecializedDocument() - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // TODO(developer): Replace these variables before running the sample. - String projectId = "your-project-id"; - String location = "your-project-location"; // Format is "us" or "eu". - String processerId = "your-processor-id"; - String filePath = "path/to/input/file.pdf"; - processSpecializedDocument(projectId, location, processerId, filePath); - } - - public static void processSpecializedDocument( - String projectId, String location, String processorId, String filePath) - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // Initialize client that will be used to send requests. This client only needs to be created - // once, and can be reused for multiple requests. After completing all of your requests, call - // the "close" method on the client to safely clean up any remaining background resources. - try (DocumentProcessorServiceClient client = DocumentProcessorServiceClient.create()) { - // The full resource name of the processor, e.g.: - // projects/project-id/locations/location/processor/processor-id - // You must create new processors in the Cloud Console first - String name = - String.format("projects/%s/locations/%s/processors/%s", projectId, location, processorId); - - // Read the file. - byte[] imageFileData = Files.readAllBytes(Paths.get(filePath)); - - // Convert the image data to a Buffer and base64 encode it. - ByteString content = ByteString.copyFrom(imageFileData); - - RawDocument document = - RawDocument.newBuilder().setContent(content).setMimeType("application/pdf").build(); - - // Configure the process request. - ProcessRequest request = - ProcessRequest.newBuilder().setName(name).setRawDocument(document).build(); - - // Recognizes text entities in the PDF document - ProcessResponse result = client.processDocument(request); - Document documentResponse = result.getDocument(); - - System.out.println("Document processing complete."); - - // Read fields specificly from the specalized US drivers license processor: - // https://cloud.google.com/document-ai/docs/processors-list#processor_us-driver-license-parser - // retriving data from other specalized processors follow a similar pattern. - // For a complete list of processors see: - // https://cloud.google.com/document-ai/docs/processors-list - // - // OCR and other data is also present in the quality processor's response. - // Please see the OCR and other samples for how to parse other data in the - // response. - for (Document.Entity entity : documentResponse.getEntitiesList()) { - // Fields detected. For a full list of fields for each processor see - // the processor documentation: - // https://cloud.google.com/document-ai/docs/processors-list - String entityType = entity.getType(); - // some other value formats in addition to text are availible - // e.g. dates: `entity.getNormalizedValue().getDateValue().getYear()` - // check for normilized value with `entity.hasNormalizedValue()` - String entityTextValue = escapeNewlines(entity.getTextAnchor().getContent()); - float entityConfidence = entity.getConfidence(); - System.out.printf( - " * %s: %s (%.2f%% confident)\n", - entityType, entityTextValue, entityConfidence * 100.0); - } - } - } - - private static String escapeNewlines(String s) { - return s.replace("\n", "\\n").replace("\r", "\\r"); - } -} -// [END documentai_process_specialized_document] diff --git a/samples/snippets/src/main/java/documentai/v1beta3/ProcessSplitterDocument.java b/samples/snippets/src/main/java/documentai/v1beta3/ProcessSplitterDocument.java deleted file mode 100644 index e63e2f8e..00000000 --- a/samples/snippets/src/main/java/documentai/v1beta3/ProcessSplitterDocument.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -// [START documentai_process_splitter_document] - -import com.google.cloud.documentai.v1beta3.Document; -import com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClient; -import com.google.cloud.documentai.v1beta3.ProcessRequest; -import com.google.cloud.documentai.v1beta3.ProcessResponse; -import com.google.cloud.documentai.v1beta3.RawDocument; -import com.google.protobuf.ByteString; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; - -public class ProcessSplitterDocument { - public static void processSplitterDocument() - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // TODO(developer): Replace these variables before running the sample. - String projectId = "your-project-id"; - String location = "your-project-location"; // Format is "us" or "eu". - String processerId = "your-processor-id"; - String filePath = "path/to/input/file.pdf"; - processSplitterDocument(projectId, location, processerId, filePath); - } - - public static void processSplitterDocument( - String projectId, String location, String processorId, String filePath) - throws IOException, InterruptedException, ExecutionException, TimeoutException { - // Initialize client that will be used to send requests. This client only needs to be created - // once, and can be reused for multiple requests. After completing all of your requests, call - // the "close" method on the client to safely clean up any remaining background resources. - try (DocumentProcessorServiceClient client = DocumentProcessorServiceClient.create()) { - // The full resource name of the processor, e.g.: - // projects/project-id/locations/location/processor/processor-id - // You must create new processors in the Cloud Console first - String name = - String.format("projects/%s/locations/%s/processors/%s", projectId, location, processorId); - - // Read the file. - byte[] imageFileData = Files.readAllBytes(Paths.get(filePath)); - - // Convert the image data to a Buffer and base64 encode it. - ByteString content = ByteString.copyFrom(imageFileData); - - RawDocument document = - RawDocument.newBuilder().setContent(content).setMimeType("application/pdf").build(); - - // Configure the process request. - ProcessRequest request = - ProcessRequest.newBuilder().setName(name).setRawDocument(document).build(); - - // Recognizes text entities in the PDF document - ProcessResponse result = client.processDocument(request); - Document documentResponse = result.getDocument(); - - System.out.println("Document processing complete."); - - // Read the splitter output from the document splitter processor: - // https://cloud.google.com/document-ai/docs/processors-list#processor_doc-splitter - // This processor only provides text for the document and information on how - // to split the document on logical boundaries. To identify and extract text, - // form elements, and entities please see other processors like the OCR, form, - // and specalized processors. - List entities = documentResponse.getEntitiesList(); - System.out.printf("Found %d subdocuments:\n", entities.size()); - for (Document.Entity entity : entities) { - float entityConfidence = entity.getConfidence(); - String pagesRangeText = pageRefsToString(entity.getPageAnchor().getPageRefsList()); - String subdocumentType = entity.getType(); - if (subdocumentType.isEmpty()) { - System.out.printf( - "%.2f%% confident that %s a subdocument.\n", entityConfidence * 100, pagesRangeText); - } else { - System.out.printf( - "%.2f%% confident that %s a '%s' subdocument.\n", - entityConfidence * 100, pagesRangeText, subdocumentType); - } - } - } - } - - // Converts page reference(s) to a string describing the page or page range. - private static String pageRefsToString(List pageRefs) { - if (pageRefs.size() == 1) { - return String.format("page %d is", pageRefs.get(0).getPage() + 1); - } else { - long start = pageRefs.get(0).getPage() + 1; - long end = pageRefs.get(1).getPage() + 1; - return String.format("pages %d to %d are", start, end); - } - } -} -// [END documentai_process_splitter_document] diff --git a/samples/snippets/src/test/java/documentai/v1/BatchProcessDocumentTest.java b/samples/snippets/src/test/java/documentai/v1/BatchProcessDocumentTest.java deleted file mode 100644 index 1024ae71..00000000 --- a/samples/snippets/src/test/java/documentai/v1/BatchProcessDocumentTest.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1; - -import static com.google.common.truth.Truth.assertThat; -import static org.junit.Assert.assertNotNull; - -import com.google.api.gax.paging.Page; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.BucketInfo; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.StorageOptions; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.util.UUID; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class BatchProcessDocumentTest { - private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); - private static final String PROCESSOR_ID = "88541adc6eeec481"; - private static final String BUCKET_NAME = - String.format("document-ai-output-test-%s", UUID.randomUUID()); - private static final String INPUT_URI = "gs://cloud-samples-data/documentai/invoice.pdf"; - private static final String OUTPUT_PREFIX = String.format("%s", UUID.randomUUID()); - private static final String OUTPUT_BUCKET_NAME = PROJECT_ID; - - private ByteArrayOutputStream bout; - private PrintStream out; - private PrintStream originalPrintStream; - - private static void requireEnvVar(String varName) { - assertNotNull( - String.format("Environment variable '%s' must be set to perform these tests.", varName), - System.getenv(varName)); - } - - private static void cleanUpBucket() { - Storage storage = StorageOptions.getDefaultInstance().getService(); - Page blobs = - storage.list( - BUCKET_NAME, - Storage.BlobListOption.currentDirectory(), - Storage.BlobListOption.prefix(OUTPUT_PREFIX)); - - deleteDirectory(storage, blobs); - } - - private static void deleteDirectory(Storage storage, Page blobs) { - for (Blob blob : blobs.iterateAll()) { - if (!blob.delete()) { - Page subBlobs = - storage.list( - BUCKET_NAME, - Storage.BlobListOption.currentDirectory(), - Storage.BlobListOption.prefix(blob.getName())); - - deleteDirectory(storage, subBlobs); - } - } - } - - @Before - public void checkRequirements() { - requireEnvVar("GOOGLE_CLOUD_PROJECT"); - requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS"); - } - - @Before - public void setUp() { - bout = new ByteArrayOutputStream(); - out = new PrintStream(bout); - originalPrintStream = System.out; - System.setOut(out); - - Storage storage = StorageOptions.getDefaultInstance().getService(); - storage.create(BucketInfo.of(BUCKET_NAME)); - } - - @Test - public void testBatchProcessDocument() - throws InterruptedException, ExecutionException, TimeoutException, IOException { - // parse the GCS invoice as a form. - BatchProcessDocument.batchProcessDocument( - PROJECT_ID, "us", PROCESSOR_ID, INPUT_URI, OUTPUT_BUCKET_NAME, OUTPUT_PREFIX); - String got = bout.toString(); - - assertThat(got).contains("Paragraph text:"); - assertThat(got).contains("Extracted"); - } - - @After - public void tearDown() { - cleanUpBucket(); - System.out.flush(); - System.setOut(originalPrintStream); - } -} diff --git a/samples/snippets/src/test/java/documentai/v1/ProcessDocumentTest.java b/samples/snippets/src/test/java/documentai/v1/ProcessDocumentTest.java deleted file mode 100644 index 6a4a35aa..00000000 --- a/samples/snippets/src/test/java/documentai/v1/ProcessDocumentTest.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1; - -import static com.google.common.truth.Truth.assertThat; -import static org.junit.Assert.assertNotNull; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class ProcessDocumentTest { - private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); - private static final String PROCESSOR_ID = "88541adc6eeec481"; - private static final String FILE_PATH = "resources/invoice.pdf"; - - private ByteArrayOutputStream bout; - private PrintStream out; - private PrintStream originalPrintStream; - - private static void requireEnvVar(String varName) { - assertNotNull( - String.format("Environment variable '%s' must be set to perform these tests.", varName), - System.getenv(varName)); - } - - @Before - public void checkRequirements() { - requireEnvVar("GOOGLE_CLOUD_PROJECT"); - requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS"); - } - - @Before - public void setUp() { - bout = new ByteArrayOutputStream(); - out = new PrintStream(bout); - originalPrintStream = System.out; - System.setOut(out); - } - - @Test - public void testProcessDocument() - throws InterruptedException, ExecutionException, IOException, TimeoutException { - // parse the GCS invoice as a form. - ProcessDocument.processDocument(PROJECT_ID, "us", PROCESSOR_ID, FILE_PATH); - String got = bout.toString(); - - assertThat(got).contains("Paragraph text:"); - assertThat(got).contains("Extracted"); - } - - @After - public void tearDown() { - System.out.flush(); - System.setOut(originalPrintStream); - } -} diff --git a/samples/snippets/src/test/java/documentai/v1/QuickStartTest.java b/samples/snippets/src/test/java/documentai/v1/QuickStartTest.java deleted file mode 100644 index afaa4b1c..00000000 --- a/samples/snippets/src/test/java/documentai/v1/QuickStartTest.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1; - -import static com.google.common.truth.Truth.assertThat; -import static org.junit.Assert.assertNotNull; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class QuickStartTest { - private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); - private static final String PROCESSOR_ID = "88541adc6eeec481"; - private static final String FILE_PATH = "resources/invoice.pdf"; - - private ByteArrayOutputStream bout; - private PrintStream out; - private PrintStream originalPrintStream; - - private static void requireEnvVar(String varName) { - assertNotNull( - String.format("Environment variable '%s' must be set to perform these tests.", varName), - System.getenv(varName)); - } - - @Before - public void checkRequirements() { - requireEnvVar("GOOGLE_CLOUD_PROJECT"); - requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS"); - } - - @Before - public void setUp() { - bout = new ByteArrayOutputStream(); - out = new PrintStream(bout); - originalPrintStream = System.out; - System.setOut(out); - } - - @Test - public void testQuickStart() - throws InterruptedException, ExecutionException, IOException, TimeoutException { - // parse the GCS invoice as a form. - QuickStart.quickStart(PROJECT_ID, "us", PROCESSOR_ID, FILE_PATH); - String got = bout.toString(); - - assertThat(got).contains("Paragraph text:"); - } - - @After - public void tearDown() { - System.out.flush(); - System.setOut(originalPrintStream); - } -} diff --git a/samples/snippets/src/test/java/documentai/v1beta3/ProcessFormDocumentTest.java b/samples/snippets/src/test/java/documentai/v1beta3/ProcessFormDocumentTest.java deleted file mode 100644 index 7491d744..00000000 --- a/samples/snippets/src/test/java/documentai/v1beta3/ProcessFormDocumentTest.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -import static com.google.common.truth.Truth.assertThat; -import static org.junit.Assert.assertNotNull; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class ProcessFormDocumentTest { - private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); - private static final String PROCESSOR_ID = "88541adc6eeec481"; - private static final String FILE_PATH = "resources/invoice.pdf"; - - private ByteArrayOutputStream bout; - private PrintStream out; - private PrintStream originalPrintStream; - - private static void requireEnvVar(String varName) { - assertNotNull( - String.format("Environment variable '%s' must be set to perform these tests.", varName), - System.getenv(varName)); - } - - @Before - public void checkRequirements() { - requireEnvVar("GOOGLE_CLOUD_PROJECT"); - requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS"); - } - - @Before - public void setUp() { - bout = new ByteArrayOutputStream(); - out = new PrintStream(bout); - originalPrintStream = System.out; - System.setOut(out); - } - - @Test - public void testProcessFormDocument() - throws InterruptedException, ExecutionException, IOException, TimeoutException { - // parse the GCS invoice as a form. - ProcessFormDocument.processFormDocument(PROJECT_ID, "us", PROCESSOR_ID, FILE_PATH); - String got = bout.toString(); - - assertThat(got).contains("There are 1 page(s) in this document."); - assertThat(got).contains("Table with 4 columns and 6 rows"); - assertThat(got).contains("Found 13 form fields"); - assertThat(got).contains("'BALANCE DUE': '$2140.00'"); - } - - @After - public void tearDown() { - System.out.flush(); - System.setOut(originalPrintStream); - } -} diff --git a/samples/snippets/src/test/java/documentai/v1beta3/ProcessOcrDocumentTest.java b/samples/snippets/src/test/java/documentai/v1beta3/ProcessOcrDocumentTest.java deleted file mode 100644 index 0c2da471..00000000 --- a/samples/snippets/src/test/java/documentai/v1beta3/ProcessOcrDocumentTest.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -import static com.google.common.truth.Truth.assertThat; -import static org.junit.Assert.assertNotNull; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class ProcessOcrDocumentTest { - private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); - private static final String PROCESSOR_ID = "f9018d35bc5edc1e"; - private static final String FILE_PATH = "resources/handwritten_form.pdf"; - - private ByteArrayOutputStream bout; - private PrintStream out; - private PrintStream originalPrintStream; - - private static void requireEnvVar(String varName) { - assertNotNull( - String.format("Environment variable '%s' must be set to perform these tests.", varName), - System.getenv(varName)); - } - - @Before - public void checkRequirements() { - requireEnvVar("GOOGLE_CLOUD_PROJECT"); - requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS"); - } - - @Before - public void setUp() { - bout = new ByteArrayOutputStream(); - out = new PrintStream(bout); - originalPrintStream = System.out; - System.setOut(out); - } - - @Test - public void testProcessOcrDocument() - throws InterruptedException, ExecutionException, IOException, TimeoutException { - // parse the GCS invoice as a form. - ProcessOcrDocument.processOcrDocument(PROJECT_ID, "us", PROCESSOR_ID, FILE_PATH); - String got = bout.toString(); - - assertThat(got).contains("Page 1"); - assertThat(got).contains("en"); - assertThat(got).contains("FakeDoc"); - } - - @After - public void tearDown() { - System.out.flush(); - System.setOut(originalPrintStream); - } -} diff --git a/samples/snippets/src/test/java/documentai/v1beta3/ProcessQualityDocumentTest.java b/samples/snippets/src/test/java/documentai/v1beta3/ProcessQualityDocumentTest.java deleted file mode 100644 index 7379dbf0..00000000 --- a/samples/snippets/src/test/java/documentai/v1beta3/ProcessQualityDocumentTest.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -import static com.google.common.truth.Truth.assertThat; -import static org.junit.Assert.assertNotNull; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class ProcessQualityDocumentTest { - private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); - private static final String PROCESSOR_ID = "f80f55e03d4c20ed"; - private static final String FILE_PATH = "resources/document_quality_poor.pdf"; - - private ByteArrayOutputStream bout; - private PrintStream out; - private PrintStream originalPrintStream; - - private static void requireEnvVar(String varName) { - assertNotNull( - String.format("Environment variable '%s' must be set to perform these tests.", varName), - System.getenv(varName)); - } - - @Before - public void checkRequirements() { - requireEnvVar("GOOGLE_CLOUD_PROJECT"); - requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS"); - } - - @Before - public void setUp() { - bout = new ByteArrayOutputStream(); - out = new PrintStream(bout); - originalPrintStream = System.out; - System.setOut(out); - } - - @Test - public void testProcessQualityDocument() - throws InterruptedException, ExecutionException, IOException, TimeoutException { - // parse the GCS invoice as a form. - ProcessQualityDocument.processQualityDocument(PROJECT_ID, "us", PROCESSOR_ID, FILE_PATH); - String got = bout.toString(); - - assertThat(got).contains("Page 1 has a quality score of"); - assertThat(got).contains("defect_blurry score of 9"); - assertThat(got).contains("defect_noisy"); - } - - @After - public void tearDown() { - System.out.flush(); - System.setOut(originalPrintStream); - } -} diff --git a/samples/snippets/src/test/java/documentai/v1beta3/ProcessSpecializedDocumentTest.java b/samples/snippets/src/test/java/documentai/v1beta3/ProcessSpecializedDocumentTest.java deleted file mode 100644 index 5f5b21d0..00000000 --- a/samples/snippets/src/test/java/documentai/v1beta3/ProcessSpecializedDocumentTest.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -import static com.google.common.truth.Truth.assertThat; -import static org.junit.Assert.assertNotNull; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class ProcessSpecializedDocumentTest { - private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); - private static final String PROCESSOR_ID = "ae8bc99f01b36b5e"; - private static final String FILE_PATH = "resources/us_driver_license.pdf"; - - private ByteArrayOutputStream bout; - private PrintStream out; - private PrintStream originalPrintStream; - - private static void requireEnvVar(String varName) { - assertNotNull( - String.format("Environment variable '%s' must be set to perform these tests.", varName), - System.getenv(varName)); - } - - @Before - public void checkRequirements() { - requireEnvVar("GOOGLE_CLOUD_PROJECT"); - requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS"); - } - - @Before - public void setUp() { - bout = new ByteArrayOutputStream(); - out = new PrintStream(bout); - originalPrintStream = System.out; - System.setOut(out); - } - - @Test - public void testProcessSpecializedDocument() - throws InterruptedException, ExecutionException, IOException, TimeoutException { - // parse the GCS invoice as a form. - ProcessSpecializedDocument.processSpecializedDocument( - PROJECT_ID, "us", PROCESSOR_ID, FILE_PATH); - String got = bout.toString(); - - assertThat(got).contains("Document Id"); - assertThat(got).contains("97551579"); - } - - @After - public void tearDown() { - System.out.flush(); - System.setOut(originalPrintStream); - } -} diff --git a/samples/snippets/src/test/java/documentai/v1beta3/ProcessSplitterDocumentTest.java b/samples/snippets/src/test/java/documentai/v1beta3/ProcessSplitterDocumentTest.java deleted file mode 100644 index 8fcf7aaf..00000000 --- a/samples/snippets/src/test/java/documentai/v1beta3/ProcessSplitterDocumentTest.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2020 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package documentai.v1beta3; - -import static com.google.common.truth.Truth.assertThat; -import static org.junit.Assert.assertNotNull; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class ProcessSplitterDocumentTest { - private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); - private static final String PROCESSOR_ID = "7cb010d65184a4d"; - private static final String FILE_PATH = "resources/multi_document.pdf"; - - private ByteArrayOutputStream bout; - private PrintStream out; - private PrintStream originalPrintStream; - - private static void requireEnvVar(String varName) { - assertNotNull( - String.format("Environment variable '%s' must be set to perform these tests.", varName), - System.getenv(varName)); - } - - @Before - public void checkRequirements() { - requireEnvVar("GOOGLE_CLOUD_PROJECT"); - requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS"); - } - - @Before - public void setUp() { - bout = new ByteArrayOutputStream(); - out = new PrintStream(bout); - originalPrintStream = System.out; - System.setOut(out); - } - - @Test - public void testProcessSplitterDocument() - throws InterruptedException, ExecutionException, IOException, TimeoutException { - // parse the GCS invoice as a form. - ProcessSplitterDocument.processSplitterDocument(PROJECT_ID, "us", PROCESSOR_ID, FILE_PATH); - String got = bout.toString(); - - assertThat(got).contains("Found 8 subdocuments"); - assertThat(got).contains("confident that pages 1 to 2 are a subdocument"); - assertThat(got).contains("confident that page 10 is a subdocument"); - } - - @After - public void tearDown() { - System.out.flush(); - System.setOut(originalPrintStream); - } -}