Skip to content

Commit

Permalink
docs(samples): refactors the export-to-gcs sample (#737)
Browse files Browse the repository at this point in the history
* docs(samples): refactors the export-to-gcs sample

* docs(samples): refactors the export-to-gcs sample

* docs(samples): fixed lint

* docs(samples): updated test file

* docs(samples): udded imports

* docs(samples): udded dependency to global pom

* docs(samples): udded dependency to global pom

* docs(samples): udded dependency to global pom

* docs(samples): udated pom

* docs(samples): fixed null error

* docs(samples): rafactored test
  • Loading branch information
b-loved-dreamer authored Dec 2, 2021
1 parent 8c6d16a commit 0873c66
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 9 deletions.
Empty file added speech/.bash_profile
Empty file.
5 changes: 5 additions & 0 deletions speech/snippets/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
</dependencyManagement>

<dependencies>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20210307</version>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-speech</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,34 @@
import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1p1beta1.SpeechClient;
import com.google.cloud.speech.v1p1beta1.TranscriptOutputConfig;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageOptions;
import com.google.protobuf.util.JsonFormat;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import org.json.JSONObject;

public class ExportToStorageBeta {

public static void main(String[] args) throws Exception {
String inputUri = "gs://YOUR_BUCKET_ID/path/to/your/audio_file.wav";
String outputStorageUri = "gs://YOUR_BUCKET_ID/output_dir_prefix/";
String objectName = "YOUR_OBJECT_NAME";
String bucketName = "YOUR_BUCKET_ID";
String encoding = "LINEAR16"; // encoding of the audio
int sampleRateHertz = 8000;
String languageCode = "en-US"; // language code BCP-47_LANGUAGE_CODE_OF_AUDIO
exportToStorage(inputUri, outputStorageUri, encoding, sampleRateHertz, languageCode);
exportToStorage(
inputUri,
outputStorageUri,
encoding,
sampleRateHertz,
languageCode,
bucketName,
objectName);
}

// Exports the recognized output to specified GCS destination.
Expand All @@ -48,7 +63,9 @@ public static void exportToStorage(
String outputStorageUri,
String encoding,
int sampleRateHertz,
String languageCode)
String languageCode,
String bucketName,
String objectName)
throws IOException, ExecutionException, InterruptedException {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
Expand All @@ -58,6 +75,9 @@ public static void exportToStorage(

AudioEncoding audioEncoding = AudioEncoding.valueOf(encoding);

// Instantiates a client
Storage storage = StorageOptions.getDefaultInstance().getService();

// Pass in the URI of the Cloud Storage bucket to hold the transcription
TranscriptOutputConfig outputConfig =
TranscriptOutputConfig.newBuilder().setGcsUri(outputStorageUri).build();
Expand All @@ -80,12 +100,39 @@ public static void exportToStorage(
speechClient.longRunningRecognizeAsync(request);

System.out.println("Waiting for operation to complete...");
LongRunningRecognizeResponse response = future.get();
future.get();

// Get blob given bucket and object name
Blob blob = storage.get(BlobId.of(bucketName, objectName));

// Extract byte contents from blob
byte[] bytes = blob.getContent();

// Get decoded representation
String decoded = new String(bytes, "UTF-8");

// Create json object
JSONObject jsonObject = new JSONObject(decoded);

// Get json string
String json = jsonObject.toString();

// Specefy the proto type message
LongRunningRecognizeResponse.Builder builder = LongRunningRecognizeResponse.newBuilder();

// Construct a parser
JsonFormat.Parser parser = JsonFormat.parser().ignoringUnknownFields();

// Parses from JSON into a protobuf message.
parser.merge(json, builder);

// Get the converted values
LongRunningRecognizeResponse storageResponse = builder.build();

System.out.println("Results saved to specified output Cloud Storage bucket.");

String output =
response.getResultsList().stream()
storageResponse.getResultsList().stream()
.map(result -> String.valueOf(result.getAlternatives(0).getTranscript()))
.collect(Collectors.joining("\n"));
System.out.printf("Transcription: %s", output);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import com.google.api.gax.paging.Page;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BucketInfo;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageOptions;
import java.io.ByteArrayOutputStream;
Expand All @@ -34,25 +35,32 @@ public class ExportToStorageBetaTest {
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
private static final String AUDIO_STORAGE_URI =
"gs://cloud-samples-data/speech/commercial_mono.wav";
private static final String PREFIX = "EXPORT_TEST_OUTPUTS";
private static final String BUCKET_PREFIX = "EXPORT_TRANSCRIPT_OUTPUT";
private static final String UNIQUE_ID =
UUID.randomUUID().toString().replace("-", "").substring(0, 8);
private static String BUCKET_NAME = String.format("speech-%s", UNIQUE_ID);
private static final String OUTPUT_STORAGE_URI =
String.format("gs://%s/%s/%s/", PROJECT_ID, PREFIX, UUID.randomUUID());
String.format("gs://%s/%s/", BUCKET_NAME, BUCKET_PREFIX);
private static final String ENCODING = "LINEAR16";
private static final String LANGUAGE_CODE = "en-US";
private static Storage storage = StorageOptions.getDefaultInstance().getService();

private static final int SAMPLE_RATE_HERTZ = 8000;

private ByteArrayOutputStream bout;
private PrintStream originalPrintStream;
private PrintStream out;

private static void createBucket() {
storage.create(BucketInfo.of(BUCKET_NAME));
}

private static void cleanUpBucket() {
Storage storage = StorageOptions.getDefaultInstance().getService();
Page<Blob> blobs =
storage.list(
PROJECT_ID,
Storage.BlobListOption.currentDirectory(),
Storage.BlobListOption.prefix(PREFIX));
Storage.BlobListOption.prefix(BUCKET_PREFIX));

deleteDirectory(storage, blobs);
}
Expand All @@ -73,6 +81,7 @@ private static void deleteDirectory(Storage storage, Page<Blob> blobs) {

@Before
public void setUp() {
createBucket();
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
originalPrintStream = System.out;
Expand All @@ -89,7 +98,13 @@ public void tearDown() {
@Test
public void testExportToStorageBeta() throws Exception {
ExportToStorageBeta.exportToStorage(
AUDIO_STORAGE_URI, OUTPUT_STORAGE_URI, ENCODING, SAMPLE_RATE_HERTZ, LANGUAGE_CODE);
AUDIO_STORAGE_URI,
OUTPUT_STORAGE_URI,
ENCODING,
SAMPLE_RATE_HERTZ,
LANGUAGE_CODE,
BUCKET_NAME,
BUCKET_PREFIX);
String got = bout.toString();
assertThat(got).contains("Transcription:");
}
Expand Down

0 comments on commit 0873c66

Please sign in to comment.