diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerClient.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerClient.cs index b72dbf0eefe7..7fe8b224b05a 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerClient.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerClient.cs @@ -2,7 +2,6 @@ // Licensed under the MIT License. using System; -using System.Collections.Generic; using System.IO; using System.Threading; using System.Threading.Tasks; @@ -60,15 +59,11 @@ protected FormRecognizerClient() [ForwardsClientCalls] public virtual RecognizeContentOperation StartRecognizeContent(Stream formFileStream, RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) { - // TODO: automate content-type detection - // https://github.com/Azure/azure-sdk-for-net/issues/10329 - ResponseWithHeaders response = ServiceClient.RestClient.AnalyzeLayoutAsync(ContentType.Pdf, formFileStream, cancellationToken); - //Response response = ServiceClient.RestClient.AnalyzeLayoutAsync(ContentType.Pdf, formFileStream, cancellationToken); - - // TODO: throw Exception if header is not present. - //response.Headers.TryGetValue("Operation-Location", out string operationLocation); - string operationLocation = response.Headers.OperationLocation; - return new RecognizeContentOperation(ServiceClient, operationLocation); + recognizeOptions ??= new RecognizeOptions(); + ContentType contentType = recognizeOptions.ContentType ?? DetectContentType(formFileStream, nameof(formFileStream)); + + ResponseWithHeaders response = ServiceClient.RestClient.AnalyzeLayoutAsync(contentType, formFileStream, cancellationToken); + return new RecognizeContentOperation(ServiceClient, response.Headers.OperationLocation); } /// @@ -82,15 +77,11 @@ public virtual RecognizeContentOperation StartRecognizeContent(Stream formFileSt [ForwardsClientCalls] public virtual async Task StartRecognizeContentAsync(Stream formFileStream, RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) { - // TODO: automate content-type detection - // https://github.com/Azure/azure-sdk-for-net/issues/10329 - ResponseWithHeaders response = await ServiceClient.RestClient.AnalyzeLayoutAsyncAsync(ContentType.Pdf, formFileStream, cancellationToken).ConfigureAwait(false); - //Response response = await ServiceClient.RestClient.AnalyzeLayoutAsyncAsync(ContentType.Pdf, formFileStream, cancellationToken).ConfigureAwait(false); - - // TODO: throw Exception if header is not present. - //response.Headers.TryGetValue("Operation-Location", out string operationLocation); - string operationLocation = response.Headers.OperationLocation; - return new RecognizeContentOperation(ServiceClient, operationLocation); + recognizeOptions ??= new RecognizeOptions(); + ContentType contentType = recognizeOptions.ContentType ?? DetectContentType(formFileStream, nameof(formFileStream)); + + ResponseWithHeaders response = await ServiceClient.RestClient.AnalyzeLayoutAsyncAsync(contentType, formFileStream, cancellationToken).ConfigureAwait(false); + return new RecognizeContentOperation(ServiceClient, response.Headers.OperationLocation); } /// @@ -106,12 +97,7 @@ public virtual RecognizeContentOperation StartRecognizeContentFromUri(Uri formFi { SourcePath_internal sourcePath = new SourcePath_internal(formFileUri.ToString()); ResponseWithHeaders response = ServiceClient.RestClient.AnalyzeLayoutAsync(sourcePath, cancellationToken); - //Response response = ServiceClient.RestClient.AnalyzeLayoutAsync(sourcePath, cancellationToken); - - // TODO: throw Exception if header is not present. - //response.Headers.TryGetValue("Operation-Location", out string operationLocation); - string operationLocation = response.Headers.OperationLocation; - return new RecognizeContentOperation(ServiceClient, operationLocation); + return new RecognizeContentOperation(ServiceClient, response.Headers.OperationLocation); } /// @@ -127,12 +113,7 @@ public virtual async Task StartRecognizeContentFromUr { SourcePath_internal sourcePath = new SourcePath_internal(formFileUri.ToString()); ResponseWithHeaders response = await ServiceClient.RestClient.AnalyzeLayoutAsyncAsync(sourcePath, cancellationToken).ConfigureAwait(false); - //Response response = await ServiceClient.RestClient.AnalyzeLayoutAsyncAsync(sourcePath, cancellationToken).ConfigureAwait(false); - - // TODO: throw Exception if header is not present. - //response.Headers.TryGetValue("Operation-Location", out string operationLocation); - string operationLocation = response.Headers.OperationLocation; - return new RecognizeContentOperation(ServiceClient, operationLocation); + return new RecognizeContentOperation(ServiceClient, response.Headers.OperationLocation); } #endregion @@ -143,19 +124,15 @@ public virtual async Task StartRecognizeContentFromUr /// Recognizes values from one or more receipts. /// /// The stream containing the one or more receipts to recognize values from. - /// > - /// /// /// A controlling the request lifetime. /// A to wait on this long-running operation. Its .Value upon successful /// completion will contain the extracted receipt. [ForwardsClientCalls] - public virtual async Task StartRecognizeReceiptsAsync(Stream receiptFileStream, ContentType contentType, string receiptLocale = "en-US", RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) + public virtual async Task StartRecognizeReceiptsAsync(Stream receiptFileStream, RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) { - // TODO: automate content-type detection - // https://github.com/Azure/azure-sdk-for-net/issues/10329 - recognizeOptions ??= new RecognizeOptions(); + ContentType contentType = recognizeOptions.ContentType ?? DetectContentType(receiptFileStream, nameof(receiptFileStream)); ResponseWithHeaders response = await ServiceClient.RestClient.AnalyzeReceiptAsyncAsync(contentType, receiptFileStream, includeTextDetails: recognizeOptions.IncludeTextContent, cancellationToken).ConfigureAwait(false); return new RecognizeReceiptsOperation(ServiceClient, response.Headers.OperationLocation); @@ -165,19 +142,15 @@ public virtual async Task StartRecognizeReceiptsAsyn /// Recognizes values from one or more receipts. /// /// The stream containing the one or more receipts to recognize values from. - /// - /// /// Whether or not to include raw page recognition in addition to layout elements. /// A controlling the request lifetime. /// A to wait on this long-running operation. Its .Value upon successful /// completion will contain the extracted receipt. [ForwardsClientCalls] - public virtual RecognizeReceiptsOperation StartRecognizeReceipts(Stream receiptFileStream, ContentType contentType, string receiptLocale = "en-US", RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) + public virtual RecognizeReceiptsOperation StartRecognizeReceipts(Stream receiptFileStream, RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) { - // TODO: automate content-type detection - // https://github.com/Azure/azure-sdk-for-net/issues/10329 - recognizeOptions ??= new RecognizeOptions(); + ContentType contentType = recognizeOptions.ContentType ?? DetectContentType(receiptFileStream, nameof(receiptFileStream)); ResponseWithHeaders response = ServiceClient.RestClient.AnalyzeReceiptAsync(contentType, receiptFileStream, includeTextDetails: recognizeOptions.IncludeTextContent, cancellationToken); return new RecognizeReceiptsOperation(ServiceClient, response.Headers.OperationLocation); @@ -187,13 +160,12 @@ public virtual RecognizeReceiptsOperation StartRecognizeReceipts(Stream receiptF /// Recognizes values from one or more receipts. /// /// The absolute URI of the remote file to recognize values from. - /// /// Whether or not to include raw page recognition in addition to layout elements. /// A controlling the request lifetime. /// A to wait on this long-running operation. Its .Value upon successful /// completion will contain the extracted receipt. [ForwardsClientCalls] - public virtual async Task StartRecognizeReceiptsFromUriAsync(Uri receiptFileUri, string receiptLocale = "en-US", RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) + public virtual async Task StartRecognizeReceiptsFromUriAsync(Uri receiptFileUri, RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) { recognizeOptions ??= new RecognizeOptions(); @@ -206,13 +178,12 @@ public virtual async Task StartRecognizeReceiptsFrom /// Recognizes values from one or more receipts. /// /// The absolute URI of the remote file to recognize values from. - /// /// Whether or not to include raw page recognition in addition to layout elements. /// A controlling the request lifetime. /// A to wait on this long-running operation. Its .Value upon successful /// completion will contain the extracted receipt. [ForwardsClientCalls] - public virtual RecognizeReceiptsOperation StartRecognizeReceiptsFromUri(Uri receiptFileUri, string receiptLocale="en-US", RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) + public virtual RecognizeReceiptsOperation StartRecognizeReceiptsFromUri(Uri receiptFileUri, RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) { recognizeOptions ??= new RecognizeOptions(); @@ -238,11 +209,9 @@ public virtual RecognizeReceiptsOperation StartRecognizeReceiptsFromUri(Uri rece public virtual RecognizeCustomFormsOperation StartRecognizeCustomForms(string modelId, Stream formFileStream, RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) { recognizeOptions ??= new RecognizeOptions(); + ContentType contentType = recognizeOptions.ContentType ?? DetectContentType(formFileStream, nameof(formFileStream)); - // TODO: automate content-type detection - // https://github.com/Azure/azure-sdk-for-net/issues/10329 - - ResponseWithHeaders response = ServiceClient.RestClient.AnalyzeWithCustomModel(new Guid(modelId), ContentType.Pdf, formFileStream, includeTextDetails: recognizeOptions.IncludeTextContent, cancellationToken); + ResponseWithHeaders response = ServiceClient.RestClient.AnalyzeWithCustomModel(new Guid(modelId), contentType, formFileStream, includeTextDetails: recognizeOptions.IncludeTextContent, cancellationToken); return new RecognizeCustomFormsOperation(ServiceClient, modelId, response.Headers.OperationLocation); } @@ -278,11 +247,9 @@ public virtual RecognizeCustomFormsOperation StartRecognizeCustomFormsFromUri(st public virtual async Task StartRecognizeCustomFormsAsync(string modelId, Stream formFileStream, RecognizeOptions recognizeOptions = default, CancellationToken cancellationToken = default) { recognizeOptions ??= new RecognizeOptions(); + ContentType contentType = recognizeOptions.ContentType ?? DetectContentType(formFileStream, nameof(formFileStream)); - // TODO: automate content-type detection - // https://github.com/Azure/azure-sdk-for-net/issues/10329 - - ResponseWithHeaders response = await ServiceClient.RestClient.AnalyzeWithCustomModelAsync(new Guid(modelId), ContentType.Pdf, formFileStream, includeTextDetails: recognizeOptions.IncludeTextContent, cancellationToken).ConfigureAwait(false); + ResponseWithHeaders response = await ServiceClient.RestClient.AnalyzeWithCustomModelAsync(new Guid(modelId), contentType, formFileStream, includeTextDetails: recognizeOptions.IncludeTextContent, cancellationToken).ConfigureAwait(false); return new RecognizeCustomFormsOperation(ServiceClient, modelId, response.Headers.OperationLocation); } @@ -318,5 +285,35 @@ public virtual FormTrainingClient GetFormTrainingClient() } #endregion Training client + + /// + /// Used as part of argument validation. Detects the of a stream and + /// throws an in case of failure. + /// + /// The stream to which the content type detection attempt will be performed. + /// The original parameter name of the . Used to create exceptions in case of failure. + /// The detected . + /// Happens when detection fails or cannot be performed. + private static ContentType DetectContentType(Stream stream, string paramName) + { + ContentType contentType; + + if (!stream.CanSeek) + { + throw new ArgumentException($"Content type cannot be detected because stream is not seekable. It can be manually set in the {nameof(RecognizeOptions)}.", paramName); + } + + if (!stream.CanRead) + { + throw new ArgumentException($"Content type cannot be detected because stream is not readable. It can be manually set in the {nameof(RecognizeOptions)}.", paramName); + } + + if (!stream.TryGetContentType(out contentType)) + { + throw new ArgumentException($"Content type of the stream could not be detected. It can be manually set in the {nameof(RecognizeOptions)}.", paramName); + } + + return contentType; + } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerClientOptions.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerClientOptions.cs index e56a4b4404fb..bb5e13eafa1e 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerClientOptions.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerClientOptions.cs @@ -35,7 +35,6 @@ public enum ServiceVersion #pragma warning restore CA1707 // Identifiers should not contain underscores } - /// /// public ServiceVersion Version { get; } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ReceiptExtensions.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ReceiptExtensions.cs index 3e0b59699db8..0309409bcb28 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ReceiptExtensions.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ReceiptExtensions.cs @@ -1,10 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -using System; -using System.Collections.Generic; -using System.Text; - namespace Azure.AI.FormRecognizer.Models { /// diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RecognizeOptions.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RecognizeOptions.cs index 2339d96fa55f..0ea82486f624 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RecognizeOptions.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RecognizeOptions.cs @@ -17,5 +17,8 @@ public RecognizeOptions() /// public bool IncludeTextContent { get; set; } = false; + /// + /// + public ContentType? ContentType { get; set; } = null; } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/StreamExtensions.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/StreamExtensions.cs new file mode 100644 index 000000000000..1ad5de7f1420 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/StreamExtensions.cs @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.IO; +using Azure.AI.FormRecognizer.Models; + +namespace Azure.AI.FormRecognizer +{ + internal static class StreamExtensions + { + /// The set of bytes expected to be present at the start of PDF files. + private static byte[] PdfHeader = new byte[] { 0x25, 0x50, 0x44, 0x46 }; + + /// The set of bytes expected to be present at the start of PNG files. + private static byte[] PngHeader = new byte[] { 0x89, 0x50, 0x4E, 0x47 }; + + /// The set of bytes expected to be present at the start of JPEG files. + private static byte[] JpegHeader = new byte[] { 0xFF, 0xD8 }; + + /// The set of bytes expected to be present at the start of TIFF (little-endian) files. + private static byte[] TiffLeHeader = new byte[] { 0x49, 0x49, 0x2A, 0x00 }; + + /// The set of bytes expected to be present at the start of TIFF (big-endian) files. + private static byte[] TiffBeHeader = new byte[] { 0x4D, 0x4D, 0x00, 0x2A }; + + /// + /// Attemps to detect the of a stream of bytes. The algorithm searches through + /// the first set of bytes in the stream and compares it to well-known file signatures. + /// + /// The stream to which the content type detection attempt will be performed. + /// If the detection is successful, outputs the detected content type. Otherwise, default. + /// true if the detection was successful. Otherwise, false. + /// Happens when is not seekable or readable. + public static bool TryGetContentType(this Stream stream, out ContentType contentType) + { + if (stream.BeginsWithHeader(PdfHeader)) + { + contentType = ContentType.Pdf; + } + else if (stream.BeginsWithHeader(PngHeader)) + { + contentType = ContentType.Png; + } + else if (stream.BeginsWithHeader(JpegHeader)) + { + contentType = ContentType.Jpeg; + } + else if (stream.BeginsWithHeader(TiffLeHeader) || stream.BeginsWithHeader(TiffBeHeader)) + { + contentType = ContentType.Tiff; + } + else + { + contentType = default; + return false; + } + + return true; + } + + /// + /// Determines whether a stream begins with a specified sequence of bytes. + /// + /// The stream to be verified. + /// The sequence of bytes expected to be at the start of . + /// true if the begins with the specified . Otherwise, false. + private static bool BeginsWithHeader(this Stream stream, byte[] header) + { + var originalPosition = stream.Position; + + if (stream.Length - originalPosition < header.Length) + { + return false; + } + + foreach (var headerByte in header) + { + var streamByte = (byte)stream.ReadByte(); + + if (streamByte != headerByte) + { + stream.Position = originalPosition; + return false; + } + } + + stream.Position = originalPosition; + return true; + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Assets/Invoice_1.tiff b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Assets/Invoice_1.tiff new file mode 100644 index 000000000000..224fb82205b0 Binary files /dev/null and b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Assets/Invoice_1.tiff differ diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Assets/contoso-allinone.png b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Assets/contoso-allinone.png new file mode 100644 index 000000000000..f2ce6e99d925 Binary files /dev/null and b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Assets/contoso-allinone.png differ diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Extensions/StreamExtensionsTests.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Extensions/StreamExtensionsTests.cs new file mode 100644 index 000000000000..39c885d92c47 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Extensions/StreamExtensionsTests.cs @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.IO; +using System.Text; +using Azure.AI.FormRecognizer.Models; +using NUnit.Framework; + +namespace Azure.AI.FormRecognizer.Tests +{ + /// + /// The suite of tests for the class. + /// + public class StreamExtensionsTests + { + /// + /// Verifies functionality of the method. + /// + [Test] + public void TryGetContentTypeDetectsPdf() + { + using var stream = new FileStream(TestEnvironment.RetrieveInvoicePath(1, ContentType.Pdf), FileMode.Open); + + Assert.True(stream.TryGetContentType(out var contentType)); + Assert.AreEqual(ContentType.Pdf, contentType); + } + + /// + /// Verifies functionality of the method. + /// + [Test] + public void TryGetContentTypeDetectsPng() + { + using var stream = new FileStream(TestEnvironment.PngReceiptPath, FileMode.Open); + + Assert.True(stream.TryGetContentType(out var contentType)); + Assert.AreEqual(ContentType.Png, contentType); + } + + /// + /// Verifies functionality of the method. + /// + [Test] + public void TryGetContentTypeDetectsJpeg() + { + using var stream = new FileStream(TestEnvironment.JpgReceiptPath, FileMode.Open); + + Assert.True(stream.TryGetContentType(out var contentType)); + Assert.AreEqual(ContentType.Jpeg, contentType); + } + + /// + /// Verifies functionality of the method. + /// + [Test] + public void TryGetContentTypeDetectsLittleEndianTiff() + { + using var stream = new FileStream(TestEnvironment.RetrieveInvoicePath(1, ContentType.Tiff), FileMode.Open); + + Assert.True(stream.TryGetContentType(out var contentType)); + Assert.AreEqual(ContentType.Tiff, contentType); + } + + /// + /// Verifies functionality of the method. + /// + [Test] + public void TryGetContentTypeDetectsBigEndianTiff() + { + // Currently there are no big-endian TIFF files available in the test assets, so + // we'll simulate one in a MemoryStream. These files start with the "MM\0*" header + // in ASCII encoding. + + using var stream = new MemoryStream(Encoding.ASCII.GetBytes("MM\0*I am a completely normal TIFF file. Trust me.")); + + Assert.True(stream.TryGetContentType(out var contentType)); + Assert.AreEqual(ContentType.Tiff, contentType); + } + + /// + /// Verifies functionality of the method. + /// + [Test] + public void TryGetContentTypeCannotDetectUnknownType() + { + using var stream = new MemoryStream(Encoding.UTF8.GetBytes("I am probably unknown.")); + + Assert.False(stream.TryGetContentType(out var contentType)); + Assert.AreEqual(default(ContentType), contentType); + } + + /// + /// Verifies functionality of the method. + /// + [Test] + public void TryGetContentTypeDoesNotThrowForEmptyStream() + { + using var stream = new MemoryStream(Array.Empty()); + + Assert.False(stream.TryGetContentType(out var contentType)); + Assert.AreEqual(default(ContentType), contentType); + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/FormRecognizerClient/FormRecognizerClientLiveTests.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/FormRecognizerClient/FormRecognizerClientLiveTests.cs index 2bac4add1afb..47955599d2aa 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/FormRecognizerClient/FormRecognizerClientLiveTests.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/FormRecognizerClient/FormRecognizerClientLiveTests.cs @@ -45,7 +45,7 @@ public async Task StartRecognizeContentPopulatesFormPage(bool useStream) if (useStream) { - using var stream = new FileStream(TestEnvironment.RetrieveInvoicePath(1), FileMode.Open); + using var stream = new FileStream(TestEnvironment.RetrieveInvoicePath(1, ContentType.Pdf), FileMode.Open); operation = await client.StartRecognizeContentAsync(stream); } else @@ -135,12 +135,12 @@ public async Task StartRecognizeReceiptsPopulatesExtractedReceipt(bool useStream if (useStream) { - using var stream = new FileStream(TestEnvironment.ReceiptPath, FileMode.Open); - operation = await client.StartRecognizeReceiptsAsync(stream, ContentType.Jpeg); + using var stream = new FileStream(TestEnvironment.JpgReceiptPath, FileMode.Open); + operation = await client.StartRecognizeReceiptsAsync(stream); } else { - var uri = new Uri(TestEnvironment.ReceiptUri); + var uri = new Uri(TestEnvironment.JpgReceiptUri); operation = await client.StartRecognizeReceiptsFromUriAsync(uri, default); } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/FormRecognizerClient/FormRecognizerClientTests.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/FormRecognizerClient/FormRecognizerClientTests.cs index 894cfc00d126..cf7cdf666d3b 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/FormRecognizerClient/FormRecognizerClientTests.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/FormRecognizerClient/FormRecognizerClientTests.cs @@ -83,12 +83,13 @@ public void StartRecognizeContentRequiresTheFormFileStream() public void StartRecognizeContentRespectsTheCancellationToken() { var client = CreateInstrumentedClient(); + var options = new RecognizeOptions { ContentType = ContentType.Pdf }; using var stream = new MemoryStream(Array.Empty()); using var cancellationSource = new CancellationTokenSource(); cancellationSource.Cancel(); - Assert.ThrowsAsync(async () => await client.StartRecognizeContentAsync(stream, default, cancellationSource.Token)); + Assert.ThrowsAsync(async () => await client.StartRecognizeContentAsync(stream, options, cancellationSource.Token)); } /// @@ -116,7 +117,7 @@ public void StartRecognizeContentFromUriRespectsTheCancellationToken() using var cancellationSource = new CancellationTokenSource(); cancellationSource.Cancel(); - Assert.ThrowsAsync(async () => await client.StartRecognizeContentFromUriAsync(fakeUri, default, cancellationSource.Token)); + Assert.ThrowsAsync(async () => await client.StartRecognizeContentFromUriAsync(fakeUri, cancellationToken: cancellationSource.Token)); } /// @@ -128,7 +129,7 @@ public void StartRecognizeContentFromUriRespectsTheCancellationToken() public void StartRecognizeReceiptsRequiresTheReceiptFileStream() { var client = CreateInstrumentedClient(); - Assert.ThrowsAsync(async () => await client.StartRecognizeReceiptsAsync(null, ContentType.Jpeg)); + Assert.ThrowsAsync(async () => await client.StartRecognizeReceiptsAsync(null)); } /// @@ -139,12 +140,13 @@ public void StartRecognizeReceiptsRequiresTheReceiptFileStream() public void StartRecognizeReceiptsRespectsTheCancellationToken() { var client = CreateInstrumentedClient(); + var options = new RecognizeOptions { ContentType = ContentType.Pdf }; using var stream = new MemoryStream(Array.Empty()); using var cancellationSource = new CancellationTokenSource(); cancellationSource.Cancel(); - Assert.ThrowsAsync(async () => await client.StartRecognizeReceiptsAsync(stream, ContentType.Jpeg, cancellationToken: cancellationSource.Token)); + Assert.ThrowsAsync(async () => await client.StartRecognizeReceiptsAsync(stream, recognizeOptions: options, cancellationToken: cancellationSource.Token)); } /// diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Infrastructure/TestEnvironment.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Infrastructure/TestEnvironment.cs index af0c51b21b9e..31b970314742 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Infrastructure/TestEnvironment.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/Infrastructure/TestEnvironment.cs @@ -1,8 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +using System; using System.IO; using System.Reflection; +using Azure.AI.FormRecognizer.Models; namespace Azure.AI.FormRecognizer.Tests { @@ -21,35 +23,58 @@ public static class TestEnvironment private const string AssetsFolderName = "Assets"; /// The name of the JPG file which contains the receipt to be used for tests. - private const string ReceiptFilename = "contoso-receipt.jpg"; + private const string JpgReceiptFilename = "contoso-receipt.jpg"; - /// The format to generate the filenames of the PDF forms to be used for tests. - private const string InvoiceFilenameFormat = "Invoice_{0}.pdf"; + /// The name of the PNG file which contains the receipt to be used for tests. + private const string PngReceiptFilename = "contoso-allinone.png"; + + /// The format to generate the filenames of the forms to be used for tests. + private const string InvoiceFilenameFormat = "Invoice_{0}.{1}"; /// The format to generate the GitHub URIs of the files to be used for tests. private const string FileUriFormat = "https://raw.githubusercontent.com/Azure/azure-sdk-for-net/master/sdk/formrecognizer/Azure.AI.FormRecognizer/tests/{0}/{1}"; + /// + /// The name of the directory where the running assembly is located. + /// + /// The name of the current working directory. + private static string CurrentWorkingDirectory => Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + /// /// The relative path to the JPG file which contains the receipt to be used for tests. /// /// The relative path to the JPG file. - public static string ReceiptPath => Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), AssetsFolderName, ReceiptFilename); + public static string JpgReceiptPath => Path.Combine(CurrentWorkingDirectory, AssetsFolderName, JpgReceiptFilename); + + /// + /// The relative path to the PNG file which contains the receipt to be used for tests. + /// + /// The relative path to the PNG file. + public static string PngReceiptPath => Path.Combine(CurrentWorkingDirectory, AssetsFolderName, PngReceiptFilename); /// /// The URI string to the JPG file which contains the receipt to be used for tests. /// /// The URI string to the JPG file. - public static string ReceiptUri => string.Format(FileUriFormat, AssetsFolderName, ReceiptFilename); + public static string JpgReceiptUri => string.Format(FileUriFormat, AssetsFolderName, JpgReceiptFilename); /// - /// Retrieves the relative path to a PDF form available in the test assets. + /// Retrieves the relative path to a PDF or TIFF form available in the test assets. /// /// The index to specify the form to be retrieved. - /// The relative path to the PDF form corresponding to the specified index. - public static string RetrieveInvoicePath(int index) + /// The type of the form to be retrieved. Currently only PDF and TIFF are available. + /// The relative path to the PDF or TIFF form corresponding to the specified index. + public static string RetrieveInvoicePath(int index, ContentType contentType) { - var filename = string.Format(InvoiceFilenameFormat, index); - return Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), AssetsFolderName, filename); + var extension = contentType switch + { + ContentType.Pdf => "pdf", + ContentType.Tiff => "tiff", + _ => throw new ArgumentException("The requested content type is not available.", nameof(contentType)) + }; + + var filename = string.Format(InvoiceFilenameFormat, index, extension); + return Path.Combine(CurrentWorkingDirectory, AssetsFolderName, filename); } /// @@ -59,7 +84,7 @@ public static string RetrieveInvoicePath(int index) /// The URI string to the PDF form corresponding to the specified index. public static string RetrieveInvoiceUri(int index) { - var filename = string.Format(InvoiceFilenameFormat, index); + var filename = string.Format(InvoiceFilenameFormat, index, "pdf"); return string.Format(FileUriFormat, AssetsFolderName, filename); } }