diff --git a/language/analysis/README.md b/language/analysis/README.md index b120baf6081..fd00b27c000 100644 --- a/language/analysis/README.md +++ b/language/analysis/README.md @@ -30,12 +30,12 @@ mvn clean compile assembly:single ``` We can then run the assembled JAR file with the `java` command. The variable $COMMAND takes -three values `entities`, `sentiment` or `syntax`. +three values `entities`, `sentiment`, or `syntax`. ``` MAIN_CLASS=com.google.cloud.language.samples.Analyze JAR_FILE=target/language-entities-1.0-jar-with-dependencies.jar -java -cp $JAR_FILE $MAIN_CLASS +java -cp $JAR_FILE $MAIN_CLASS ``` Example usage: @@ -47,7 +47,9 @@ QUOTE="Larry Page, Google's co-founder, once described the 'perfect search offer products beyond search, but the spirit of what he said remains." java -cp $JAR_FILE $MAIN_CLASS entities "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS entities "gs://bucket/file.txt" java -cp $JAR_FILE $MAIN_CLASS sentiment "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS sentiment "gs://bucket/file.txt" java -cp $JAR_FILE $MAIN_CLASS syntax "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS syntax "gs://bucket/file.txt" ``` - diff --git a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java index be89dfd6895..9b4adfcff96 100644 --- a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java +++ b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java @@ -60,11 +60,23 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept Analyze app = new Analyze(LanguageServiceClient.create()); if (command.equals("entities")) { - printEntities(System.out, app.analyzeEntities(text)); + if (text.startsWith("gs://")) { + printEntities(System.out, app.analyzeEntitiesFile(text)); + } else { + printEntities(System.out, app.analyzeEntitiesText(text)); + } } else if (command.equals("sentiment")) { - printSentiment(System.out, app.analyzeSentiment(text)); + if (text.startsWith("gs://")) { + printSentiment(System.out, app.analyzeSentimentFile(text)); + } else { + printSentiment(System.out, app.analyzeSentimentText(text)); + } } else if (command.equals("syntax")) { - printSyntax(System.out, app.analyzeSyntax(text)); + if (text.startsWith("gs://")) { + printSyntax(System.out, app.analyzeSyntaxFile(text)); + } else { + printSyntax(System.out, app.analyzeSyntaxText(text)); + } } } @@ -111,6 +123,9 @@ public static void printSentiment(PrintStream out, Sentiment sentiment) { out.printf("\tScore: %.3f\n", sentiment.getScore()); } + /** + * Prints the Syntax for the {@code tokens}. + */ public static void printSyntax(PrintStream out, List tokens) { if (tokens == null || tokens.size() == 0) { out.println("No syntax found"); @@ -153,7 +168,7 @@ public Analyze(LanguageServiceClient languageApi) { /** * Gets {@link Entity}s from the string {@code text}. */ - public List analyzeEntities(String text) throws IOException { + public List analyzeEntitiesText(String text) throws IOException { Document doc = Document.newBuilder() .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder() @@ -163,20 +178,43 @@ public List analyzeEntities(String text) throws IOException { return response.getEntitiesList(); } + /** + * Gets {@link Entity}s from the contents of the object at the given GCS {@code path}. + */ + public List analyzeEntitiesFile(String path) throws IOException { + Document doc = Document.newBuilder() + .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); + AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder() + .setDocument(doc) + .setEncodingType(EncodingType.UTF16).build(); + AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request); + return response.getEntitiesList(); + } + /** * Gets {@link Sentiment} from the string {@code text}. */ - public Sentiment analyzeSentiment(String text) throws IOException { + public Sentiment analyzeSentimentText(String text) throws IOException { Document doc = Document.newBuilder() .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc); return response.getDocumentSentiment(); } + /** + * Gets {@link Sentiment} from the contents of the object at the given GCS {@code path}. + */ + public Sentiment analyzeSentimentFile(String path) throws IOException { + Document doc = Document.newBuilder() + .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); + AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc); + return response.getDocumentSentiment(); + } + /** * Gets {@link Token}s from the string {@code text}. */ - public List analyzeSyntax(String text) throws IOException { + public List analyzeSyntaxText(String text) throws IOException { Document doc = Document.newBuilder() .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() @@ -185,4 +223,17 @@ public List analyzeSyntax(String text) throws IOException { AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); return response.getTokensList(); } + + /** + * Gets {@link Token}s from the contents of the object at the given GCS {@code path}. + */ + public List analyzeSyntaxFile(String path) throws IOException { + Document doc = Document.newBuilder() + .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); + AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() + .setDocument(doc) + .setEncodingType(EncodingType.UTF16).build(); + AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); + return response.getTokensList(); + } } diff --git a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java index aa1a40fddc6..faa9b0d1f06 100644 --- a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java +++ b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java @@ -48,7 +48,7 @@ public class AnalyzeIT { @Test public void analyzeEntities_withEntities_returnsLarryPage() throws Exception { // Act List entities = - analyzeApp.analyzeEntities( + analyzeApp.analyzeEntitiesText( "Larry Page, Google's co-founder, once described the 'perfect search engine' as" + " something that 'understands exactly what you mean and gives you back exactly what" + " you want.' Since he spoke those words Google has grown to offer products beyond" @@ -59,10 +59,20 @@ public class AnalyzeIT { assertThat(got).named("entity names").contains("Larry Page"); } - @Test public void analyzeSentiment_returnPositive() throws Exception { + @Test public void analyzeEntities_withEntitiesFile_containsGod() throws Exception { + // Act + List entities = + analyzeApp.analyzeEntitiesFile("gs://cloud-samples-tests/natural-language/gettysburg.txt"); + List got = entities.stream().map(e -> e.getName()).collect(Collectors.toList()); + + // Assert + assertThat(got).named("entity names").contains("God"); + } + + @Test public void analyzeSentimentText_returnPositive() throws Exception { // Act Sentiment sentiment = - analyzeApp.analyzeSentiment( + analyzeApp.analyzeSentimentText( "Tom Cruise is one of the finest actors in hollywood and a great star!"); // Assert @@ -70,10 +80,21 @@ public class AnalyzeIT { assertThat((double)sentiment.getScore()).isGreaterThan(0.0); } + @Test public void analyzeSentimentFile_returnPositiveFile() throws Exception { + // Act + Sentiment sentiment = + analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/" + + "sentiment/bladerunner-pos.txt"); + + // Assert + assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0); + assertThat((double)sentiment.getScore()).isGreaterThan(0.0); + } + @Test public void analyzeSentiment_returnNegative() throws Exception { // Act Sentiment sentiment = - analyzeApp.analyzeSentiment( + analyzeApp.analyzeSentimentText( "That was the worst performance I've seen in awhile."); // Assert @@ -81,10 +102,32 @@ public class AnalyzeIT { assertThat((double)sentiment.getScore()).isLessThan(0.0); } + @Test public void analyzeSentiment_returnNegativeFile() throws Exception { + // Act + Sentiment sentiment = + analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/" + + "sentiment/bladerunner-neg.txt"); + + // Assert + assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0); + assertThat((double)sentiment.getScore()).isLessThan(0.0); + } + + @Test public void analyzeSentiment_returnNeutralFile() throws Exception { + // Act + Sentiment sentiment = + analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/" + + "sentiment/bladerunner-neutral.txt"); + + // Assert + assertThat((double)sentiment.getMagnitude()).isGreaterThan(1.0); + assertThat((double)sentiment.getScore()).isWithin(0.1); + } + @Test public void analyzeSyntax_partOfSpeech() throws Exception { // Act List token = - analyzeApp.analyzeSyntax( + analyzeApp.analyzeSyntaxText( "President Obama was elected for the second term"); List got = token.stream().map(e -> e.getPartOfSpeech().getTag()) @@ -94,4 +137,20 @@ public class AnalyzeIT { assertThat(got).containsExactly(Tag.NOUN, Tag.NOUN, Tag.VERB, Tag.VERB, Tag.ADP, Tag.DET, Tag.ADJ, Tag.NOUN).inOrder(); } + + @Test public void analyzeSyntax_partOfSpeechFile() throws Exception { + // Act + List token = + analyzeApp.analyzeSyntaxFile("gs://cloud-samples-tests/natural-language/" + + "sentiment/bladerunner-neutral.txt"); + + List got = token.stream().map(e -> e.getPartOfSpeech().getTag()) + .collect(Collectors.toList()); + + // Assert + assertThat(got).containsExactly(Tag.PRON, Tag.CONJ, Tag.VERB, Tag.CONJ, Tag.VERB, + Tag.DET, Tag.NOUN, Tag.PUNCT, Tag.NOUN, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.CONJ, + Tag.ADV, Tag.PRON, Tag.VERB, Tag.VERB, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.DET, + Tag.NOUN, Tag.VERB, Tag.ADV, Tag.ADJ,Tag.PUNCT).inOrder(); + } }