Skip to content

Commit f6055b4

Browse files
authored
add 2nd gen GCF OCR sample (#7357)
* add v2 ocr sample * lint * fix for j17 * lint * add comments
1 parent 6d6f436 commit f6055b4

File tree

18 files changed

+1616
-0
lines changed

18 files changed

+1616
-0
lines changed
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<!--
4+
Copyright 2022 Google LLC
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
-->
18+
19+
<project xmlns="http://maven.apache.org/POM/4.0.0"
20+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
21+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
22+
<modelVersion>4.0.0</modelVersion>
23+
24+
<groupId>com.example.cloud.functions</groupId>
25+
<artifactId>functions-ocr-process-image</artifactId>
26+
27+
<parent>
28+
<groupId>com.google.cloud.samples</groupId>
29+
<artifactId>shared-configuration</artifactId>
30+
<version>1.2.0</version>
31+
</parent>
32+
33+
<properties>
34+
<maven.compiler.target>11</maven.compiler.target>
35+
<maven.compiler.source>11</maven.compiler.source>
36+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
37+
</properties>
38+
39+
<dependencyManagement>
40+
<dependencies>
41+
<dependency>
42+
<groupId>com.google.cloud</groupId>
43+
<artifactId>libraries-bom</artifactId>
44+
<version>25.4.0</version>
45+
<type>pom</type>
46+
<scope>import</scope>
47+
</dependency>
48+
</dependencies>
49+
</dependencyManagement>
50+
51+
<dependencies>
52+
<dependency>
53+
<groupId>com.google.cloud.functions</groupId>
54+
<artifactId>functions-framework-api</artifactId>
55+
<version>1.0.4</version>
56+
<scope>provided</scope>
57+
</dependency>
58+
<dependency>
59+
<groupId>io.cloudevents</groupId>
60+
<artifactId>cloudevents-core</artifactId>
61+
<version>2.3.0</version>
62+
</dependency>
63+
<dependency>
64+
<groupId>org.projectlombok</groupId>
65+
<artifactId>lombok</artifactId>
66+
<version>1.18.24</version>
67+
</dependency>
68+
<dependency>
69+
<groupId>com.google.cloud</groupId>
70+
<artifactId>google-cloud-vision</artifactId>
71+
</dependency>
72+
<dependency>
73+
<groupId>com.google.cloud</groupId>
74+
<artifactId>google-cloud-translate</artifactId>
75+
</dependency>
76+
<dependency>
77+
<groupId>com.google.cloud</groupId>
78+
<artifactId>google-cloud-pubsub</artifactId>
79+
</dependency>
80+
<dependency>
81+
<groupId>com.google.code.gson</groupId>
82+
<artifactId>gson</artifactId>
83+
<version>2.9.1</version>
84+
</dependency>
85+
86+
87+
<!-- The following dependencies are only required for testing -->
88+
<dependency>
89+
<groupId>junit</groupId>
90+
<artifactId>junit</artifactId>
91+
<version>4.13.2</version>
92+
<scope>test</scope>
93+
</dependency>
94+
<dependency>
95+
<groupId>com.google.truth</groupId>
96+
<artifactId>truth</artifactId>
97+
<version>1.1.3</version>
98+
<scope>test</scope>
99+
</dependency>
100+
<dependency>
101+
<groupId>com.google.guava</groupId>
102+
<artifactId>guava-testlib</artifactId>
103+
<version>31.1-jre</version>
104+
<scope>test</scope>
105+
</dependency>
106+
</dependencies>
107+
108+
<build>
109+
<plugins>
110+
<plugin>
111+
<!--
112+
Google Cloud Functions Framework Maven plugin
113+
114+
This plugin allows you to run Cloud Functions Java code
115+
locally. Use the following terminal command to run a
116+
given function locally:
117+
118+
mvn function:run -Drun.functionTarget=your.package.yourFunction
119+
-->
120+
<groupId>com.google.cloud.functions</groupId>
121+
<artifactId>function-maven-plugin</artifactId>
122+
<version>0.10.0</version>
123+
<configuration>
124+
<functionTarget>
125+
functions.OcrProcessImage
126+
</functionTarget>
127+
</configuration>
128+
</plugin>
129+
<plugin>
130+
<groupId>org.apache.maven.plugins</groupId>
131+
<artifactId>maven-surefire-plugin</artifactId>
132+
<version>3.0.0-M7</version>
133+
<configuration>
134+
<skipTests>${skipTests}</skipTests>
135+
<reportNameSuffix>sponge_log</reportNameSuffix>
136+
<trimStackTrace>false</trimStackTrace>
137+
</configuration>
138+
</plugin>
139+
</plugins>
140+
</build>
141+
</project>
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
/*
2+
* Copyright 2022 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package functions;
18+
19+
// [START functions_ocr_process]
20+
21+
import com.google.cloud.functions.CloudEventsFunction;
22+
import com.google.cloud.pubsub.v1.Publisher;
23+
import com.google.cloud.translate.v3.DetectLanguageRequest;
24+
import com.google.cloud.translate.v3.DetectLanguageResponse;
25+
import com.google.cloud.translate.v3.LocationName;
26+
import com.google.cloud.translate.v3.TranslationServiceClient;
27+
import com.google.cloud.vision.v1.AnnotateImageRequest;
28+
import com.google.cloud.vision.v1.AnnotateImageResponse;
29+
import com.google.cloud.vision.v1.Feature;
30+
import com.google.cloud.vision.v1.Image;
31+
import com.google.cloud.vision.v1.ImageAnnotatorClient;
32+
import com.google.cloud.vision.v1.ImageSource;
33+
import com.google.gson.Gson;
34+
import com.google.gson.GsonBuilder;
35+
import com.google.gson.JsonDeserializationContext;
36+
import com.google.gson.JsonDeserializer;
37+
import com.google.gson.JsonElement;
38+
import com.google.gson.JsonParseException;
39+
import com.google.protobuf.ByteString;
40+
import com.google.pubsub.v1.ProjectTopicName;
41+
import com.google.pubsub.v1.PubsubMessage;
42+
import functions.eventpojos.StorageObjectData;
43+
import io.cloudevents.CloudEvent;
44+
import java.io.IOException;
45+
import java.lang.reflect.Type;
46+
import java.nio.charset.StandardCharsets;
47+
import java.time.OffsetDateTime;
48+
import java.util.ArrayList;
49+
import java.util.List;
50+
import java.util.concurrent.ExecutionException;
51+
import java.util.logging.Level;
52+
import java.util.logging.Logger;
53+
54+
// [END functions_ocr_process]
55+
56+
// [START functions_ocr_setup]
57+
public class OcrProcessImage implements CloudEventsFunction {
58+
// TODO<developer> set these environment variables
59+
private static final String PROJECT_ID = System.getenv("GCP_PROJECT");
60+
private static final String TRANSLATE_TOPIC_NAME = System.getenv("TRANSLATE_TOPIC");
61+
private static final String[] TO_LANGS = System.getenv("TO_LANG").split(",");
62+
63+
private static final Logger logger = Logger.getLogger(OcrProcessImage.class.getName());
64+
private static final String LOCATION_NAME = LocationName.of(PROJECT_ID, "global").toString();
65+
private Publisher publisher;
66+
67+
public OcrProcessImage() throws IOException {
68+
publisher = Publisher.newBuilder(ProjectTopicName.of(PROJECT_ID, TRANSLATE_TOPIC_NAME)).build();
69+
}
70+
71+
// Create custom deserializer to handle timestamps in event data
72+
class DateDeserializer implements JsonDeserializer<OffsetDateTime> {
73+
@Override
74+
public OffsetDateTime deserialize(
75+
JsonElement json, Type typeOfT, JsonDeserializationContext context)
76+
throws JsonParseException {
77+
return OffsetDateTime.parse(json.getAsString());
78+
}
79+
}
80+
81+
Gson gson =
82+
new GsonBuilder().registerTypeAdapter(OffsetDateTime.class, new DateDeserializer()).create();
83+
// [END functions_ocr_setup]
84+
85+
// [START functions_ocr_process]
86+
@Override
87+
public void accept(CloudEvent event) {
88+
// Unmarshal data from CloudEvent
89+
StorageObjectData gcsEvent =
90+
gson.fromJson(
91+
new String(event.getData().toBytes(), StandardCharsets.UTF_8), StorageObjectData.class);
92+
String bucket = gcsEvent.getBucket();
93+
if (bucket == null) {
94+
throw new IllegalArgumentException("Missing bucket parameter");
95+
}
96+
String filename = gcsEvent.getName();
97+
if (filename == null) {
98+
throw new IllegalArgumentException("Missing name parameter");
99+
}
100+
101+
detectText(bucket, filename);
102+
}
103+
// [END functions_ocr_process]
104+
105+
// [START functions_ocr_detect]
106+
private void detectText(String bucket, String filename) {
107+
logger.info("Looking for text in image " + filename);
108+
109+
List<AnnotateImageRequest> visionRequests = new ArrayList<>();
110+
String gcsPath = String.format("gs://%s/%s", bucket, filename);
111+
112+
ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
113+
Image img = Image.newBuilder().setSource(imgSource).build();
114+
115+
Feature textFeature = Feature.newBuilder().setType(Feature.Type.TEXT_DETECTION).build();
116+
AnnotateImageRequest visionRequest =
117+
AnnotateImageRequest.newBuilder().addFeatures(textFeature).setImage(img).build();
118+
visionRequests.add(visionRequest);
119+
120+
// Detect text in an image using the Cloud Vision API
121+
AnnotateImageResponse visionResponse;
122+
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
123+
visionResponse = client.batchAnnotateImages(visionRequests).getResponses(0);
124+
if (visionResponse == null || !visionResponse.hasFullTextAnnotation()) {
125+
logger.info(String.format("Image %s contains no text", filename));
126+
return;
127+
}
128+
129+
if (visionResponse.hasError()) {
130+
// Log error
131+
logger.log(
132+
Level.SEVERE, "Error in vision API call: " + visionResponse.getError().getMessage());
133+
return;
134+
}
135+
} catch (IOException e) {
136+
// Log error (since IOException cannot be thrown by a Cloud Function)
137+
logger.log(Level.SEVERE, "Error detecting text: " + e.getMessage(), e);
138+
return;
139+
}
140+
141+
String text = visionResponse.getFullTextAnnotation().getText();
142+
logger.info("Extracted text from image: " + text);
143+
144+
// Detect language using the Cloud Translation API
145+
DetectLanguageRequest languageRequest =
146+
DetectLanguageRequest.newBuilder()
147+
.setParent(LOCATION_NAME)
148+
.setMimeType("text/plain")
149+
.setContent(text)
150+
.build();
151+
DetectLanguageResponse languageResponse;
152+
try (TranslationServiceClient client = TranslationServiceClient.create()) {
153+
languageResponse = client.detectLanguage(languageRequest);
154+
} catch (IOException e) {
155+
// Log error (since IOException cannot be thrown by a function)
156+
logger.log(Level.SEVERE, "Error detecting language: " + e.getMessage(), e);
157+
return;
158+
}
159+
160+
if (languageResponse.getLanguagesCount() == 0) {
161+
logger.info("No languages were detected for text: " + text);
162+
return;
163+
}
164+
165+
String languageCode = languageResponse.getLanguages(0).getLanguageCode();
166+
logger.info(String.format("Detected language %s for file %s", languageCode, filename));
167+
168+
// Send a Pub/Sub translation request for every language we're going to translate to
169+
for (String targetLanguage : TO_LANGS) {
170+
logger.info("Sending translation request for language " + targetLanguage);
171+
OcrTranslateApiMessage message = new OcrTranslateApiMessage(text, filename, targetLanguage);
172+
ByteString byteStr = ByteString.copyFrom(message.toPubsubData());
173+
PubsubMessage pubsubApiMessage = PubsubMessage.newBuilder().setData(byteStr).build();
174+
try {
175+
publisher.publish(pubsubApiMessage).get();
176+
} catch (InterruptedException | ExecutionException e) {
177+
// Log error
178+
logger.log(Level.SEVERE, "Error publishing translation request: " + e.getMessage(), e);
179+
return;
180+
}
181+
}
182+
}
183+
// [END functions_ocr_detect]
184+
185+
// [START functions_ocr_process]
186+
// [START functions_ocr_setup]
187+
}
188+
// [END functions_ocr_setup]
189+
// [END functions_ocr_process]

0 commit comments

Comments
 (0)