diff --git a/video/cloud-client/README.md b/video/cloud-client/README.md index 21d95360769..3565429ce1f 100644 --- a/video/cloud-client/README.md +++ b/video/cloud-client/README.md @@ -53,6 +53,11 @@ Detect Shots mvn exec:java -DDetect -Dexec.args="shots gs://demomaker/gbikes_dinosaur.mp4" ``` +Transcribe Speech +``` +mvn exec:java -DDetect -Dexec.args="speech-transcription gs://python-docs-samples-tests/video/googlework_short.mp4" +``` + From Windows, you may need to supply your classpath differently, for example: ``` mvn exec:java -DDetect -Dexec.args="labels gs://demomaker/cat.mp4" diff --git a/video/cloud-client/src/main/java/com/example/video/Detect.java b/video/cloud-client/src/main/java/com/example/video/Detect.java index e31f4414998..ee5dad37194 100644 --- a/video/cloud-client/src/main/java/com/example/video/Detect.java +++ b/video/cloud-client/src/main/java/com/example/video/Detect.java @@ -25,14 +25,21 @@ import com.google.cloud.videointelligence.v1.Feature; import com.google.cloud.videointelligence.v1.LabelAnnotation; import com.google.cloud.videointelligence.v1.LabelSegment; +import com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative; +import com.google.cloud.videointelligence.v1.SpeechTranscription; +import com.google.cloud.videointelligence.v1.SpeechTranscriptionConfig; import com.google.cloud.videointelligence.v1.VideoAnnotationResults; +import com.google.cloud.videointelligence.v1.VideoContext; import com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient; import com.google.cloud.videointelligence.v1.VideoSegment; +import com.google.cloud.videointelligence.v1.WordInfo; import com.google.protobuf.ByteString; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; + import org.apache.commons.codec.binary.Base64; @@ -83,6 +90,9 @@ public static void argsHelper(String[] args) throws Exception { if (command.equals("explicit-content")) { analyzeExplicitContent(path); } + if (command.equals("speech-transcription")) { + speechTranscription(path); + } } /** @@ -322,4 +332,69 @@ public static void analyzeExplicitContent(String gcsUri) throws Exception { // [END video_analyze_explicit_content] } } -} + + /** + * Transcribe speech from a video stored on GCS. + * + * @param gcsUri the path to the video file to analyze. + */ + public static void speechTranscription(String gcsUri) throws Exception { + // [START video_speech_transcription_gcs] + // Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient + try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) { + // Set the language code + SpeechTranscriptionConfig config = SpeechTranscriptionConfig.newBuilder() + .setLanguageCode("en-US") + .setEnableAutomaticPunctuation(true) + .build(); + + // Set the video context with the above configuration + VideoContext context = VideoContext.newBuilder() + .setSpeechTranscriptionConfig(config) + .build(); + + // Create the request + AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder() + .setInputUri(gcsUri) + .addFeatures(Feature.SPEECH_TRANSCRIPTION) + .setVideoContext(context) + .build(); + + // asynchronously perform speech transcription on videos + OperationFuture response = + client.annotateVideoAsync(request); + + System.out.println("Waiting for operation to complete..."); + // Display the results + for (VideoAnnotationResults results : response.get(600, TimeUnit.SECONDS) + .getAnnotationResultsList()) { + for (SpeechTranscription speechTranscription : results.getSpeechTranscriptionsList()) { + try { + // Print the transcription + if (speechTranscription.getAlternativesCount() > 0) { + SpeechRecognitionAlternative alternative = speechTranscription.getAlternatives(0); + + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + System.out.printf("Confidence: %.2f\n", alternative.getConfidence()); + + System.out.println("Word level information:"); + for (WordInfo wordInfo : alternative.getWordsList()) { + double startTime = wordInfo.getStartTime().getSeconds() + + wordInfo.getStartTime().getNanos() / 1e9; + double endTime = wordInfo.getEndTime().getSeconds() + + wordInfo.getEndTime().getNanos() / 1e9; + System.out.printf("\t%4.2fs - %4.2fs: %s\n", + startTime, endTime, wordInfo.getWord()); + } + } else { + System.out.println("No transcription found"); + } + } catch (IndexOutOfBoundsException ioe) { + System.out.println("Could not retrieve frame: " + ioe.getMessage()); + } + } + } + } + // [END video_speech_transcription_gcs] + } +} \ No newline at end of file diff --git a/video/cloud-client/src/test/java/com/example/video/DetectIT.java b/video/cloud-client/src/test/java/com/example/video/DetectIT.java index fcc7a50cb85..1404ec66888 100644 --- a/video/cloud-client/src/test/java/com/example/video/DetectIT.java +++ b/video/cloud-client/src/test/java/com/example/video/DetectIT.java @@ -37,6 +37,8 @@ public class DetectIT { static final String LABEL_FILE_LOCATION = "./resources/cat.mp4"; static final String SHOTS_FILE_LOCATION = "gs://demomaker/gbikes_dinosaur.mp4"; static final String EXPLICIT_CONTENT_LOCATION = "gs://demomaker/cat.mp4"; + static final String SPEECH_GCS_LOCATION = + "gs://java-docs-samples-testing/video/googlework_short.mp4"; @Before public void setUp() { @@ -84,4 +86,13 @@ public void testShots() throws Exception { assertThat(got).contains("Shots:"); assertThat(got).contains("Location: 0"); } + + @Test + public void testSpeechTranscription() throws Exception { + String[] args = {"speech-transcription", SPEECH_GCS_LOCATION}; + Detect.argsHelper(args); + String got = bout.toString(); + + assertThat(got).contains("cultural"); + } }