diff --git a/eng/.docsettings.yml b/eng/.docsettings.yml index 8310415b9d3f..73fba2778777 100644 --- a/eng/.docsettings.yml +++ b/eng/.docsettings.yml @@ -140,6 +140,7 @@ known_content_issues: - ['sdk/extensions/Azure.AspNetCore.DataProtection.Keys/README.md','azure-sdk-tools/issues/404'] - ['sdk/extensions/Azure.Extensions.Configuration.Secrets/README.md', 'azure-sdk-tools/issues/404'] - ['sdk/search/README.md','azure-sdk-tools/issues/42'] + - ['sdk/formrecognizer/Azure.AI.FormRecognizer/README.md','#5499'] # .net climbs upwards. placing these to prevent assigning readmes to the wrong project package_indexing_exclusion_list: diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/README.md b/sdk/formrecognizer/Azure.AI.FormRecognizer/README.md new file mode 100644 index 000000000000..766d958e7952 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/README.md @@ -0,0 +1,197 @@ +# Azure Cognitive Services Form Recognizer client library for .NET +Azure Cognitive Services Form Recognizer is a cloud service that uses machine learning to extract text and table data from form documents. It allows you to train custom models using your own forms, to extract field names and values, and table data from them. It also provides a prebuilt models you can use to extract values from receipts, or tables from any form. + +[Source code][formreco_client_src] | [Product documentation][formreco_docs] + +## Getting started + +### Prerequisites +* An [Azure subscription][azure_sub]. +* An existing Form Recognizer resource. If you need to create the resource, you can use the [Azure Portal][azure_portal] or [Azure CLI][azure_cli]. + + + +### Authenticate a Form Recognizer client +In order to interact with the Form Recognizer service, you'll need to select either a `ReceiptClient`, `FormLayoutClient`, or `CustomFormClient`, and create an instance of this class. In the following samples, we will use CustomFormClient as an example. You will need an **endpoint**, and either an **API key** or ``TokenCredential`` to instantiate a client object. For more information regarding authenticating with cognitive services, see [Authenticate requests to Azure Cognitive Services][cognitive_auth]. + +#### Get Subscription Key + +You can obtain the endpoint and subscription key from the resource information in the [Azure Portal][azure_portal]. + +Alternatively, you can use the [Azure CLI][azure_cli] snippet below to get the subscription key from the Form Recognizer resource. + +```PowerShell +az cognitiveservices account keys list --resource-group --name +``` + +#### Create CustomFormClient with Subscription Key Credential +Once you have the value for the subscription key, create a `FormRecognizerApiKeyCredential`. This will allow you to update the subscription key by using the `UpdateCredential` method without creating a new client. + +With the value of the endpoint and a `FormRecognizerApiKeyCredential`, you can create the [CustomFormClient][formreco_custom_client_class]: + +```C# +string endpoint = ""; +string subscriptionKey = ""; +var credential = new FormRecognizerApiKeyCredential(subscriptionKey); +var client = new CustomFormClient(new Uri(endpoint), credential); +``` + + + +## Key concepts + +### ReceiptClient +A `ReceiptClient` is the Form Recognizer interface to use for analyzing receipts. It provides operations to extract receipt field values and locations from receipts from the United States. + +### FormLayoutClient +A `FormLayoutClient` is the Form Recognizer interface to extract layout items from forms. It provides operations to extract table data and geometry. + +### CustomFormClient +A `CustomFormClient` is the Form Recognizer interface to use for creating, using, and managing custom machine-learned models. It provides operations for training models on forms you provide, and extracting field values and locations from your custom forms. It also provides operations for viewing and deleting models, as well as understanding how close you are to reaching subscription limits for the number of models you can train. + +### Long-Running Operations +Long-running operations are operations which consist of an initial request sent to the service to start an operation,followed by polling the service at intervals to determine whether the operation has completed or failed, and if it has succeeded, to get the result. + +Methods that train models or extract values from forms are modeled as long-running operations. The client exposes a `Start` method that returns an `Operation`. Callers should wait for the operation to complete by calling `WaitForCompletionAsync()` on the operation returned from the `Start` method. A sample code snippet is provided to illustrate using long-running operations [below](#extracting-receipt-values-with-a-long-running-operation). + +### Training models +Using the `CustomFormClient`, you can train a machine-learned model on your own form type. The resulting model will be able to extract values from the types of forms it was trained on. + +#### Training without labels +A model trained without labels uses unsupervised learning to understand the layout and relationships between field names and values in your forms. The learning algorithm clusters the training forms by type and learns what fields and tables are present in each form type. + +This approach doesn't require manual data labeling or intensive coding and maintenance, and we recommend you try this method first when training custom models. + +#### Training with labels +A model trained with labels uses supervised learning to extract values you specify by adding labels to your training forms. The learning algorithm uses a label file you provide to learn what fields are found at various locations in the form, and learns to extract just those values. + +This approach can result in better-performing models, and those models can work with more complex form structures. + +### Extracting values from forms +Using the `CustomFormClient`, you can use your own trained models to extract field values and locations, as well as table data, from forms of the type you trained your models on. The output of models trained with and without labels differs as described below. + +#### Using models trained without labels +Models trained without labels consider each form page to be a different form type. For example, if you train your model on 3-page forms, it will learn that these are three different types of forms. When you send a form to it for analysis, it will return a collection of three pages, where each page contains the field names, values, and locations, as well as table data, found on that page. + +#### Using models trained with labels +Models trained with labels consider a form as a single unit. For example, if you train your model on 3-page forms with labels, it will learn to extract field values from the locations you've labeled across all pages in the form. If you sent a document containing two forms to it for analysis, it would return a collection of two forms, where each form contains the field names, values, and locations, as well as table data, found in that form. Fields and tables have page numbers to identify the pages where they were found. + +### Managing Custom Models +Using the `CustomFormClient`, you can get, list, and delete the custom models you've trained. You can also view the count of models you've trained and the maximum number of models your subscription will allow you to store. + +## Examples +The following section provides several code snippets illustrating common patterns used in the Form Recognizer .NET API. + +### Extracting receipt values with a long-running operation +```C# +string endpoint = ""; +string subscriptionKey = ""; +var credential = new FormRecognizerApiKeyCredential(subscriptionKey); +var client = new ReceiptClient(new Uri(endpoint), credential); + +using (FileStream stream = new FileStream(@"C:\path\to\receipt.jpg", FileMode.Open)) +{ + var extractReceiptOperation = client.StartExtractReceipts(stream, FormContentType.Jpeg); + await extractReceiptOperation.WaitForCompletionAsync(); + if (extractReceiptOperation.HasValue) + { + IReadOnlyList result = extractReceiptOperation.Value; + } +} +``` + +## Contributing + +This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit [cla.microsoft.com][cla]. + +When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct][code_of_conduct]. For more information see the [Code of Conduct FAQ][coc_faq] or contact [opencode@microsoft.com][coc_contact] with any additional questions or comments. + +![Impressions](https://azure-sdk-impressions.azurewebsites.net/api/impressions/azure-sdk-for-net%2Fsdk%2Ftextanalytics%2FAzure.AI.TextAnalytics%2FREADME.png) + + + +[formreco_client_src]: https://github.com/Azure/azure-sdk-for-net/tree/master/sdk/formrecognizer/Azure.AI.FormRecognizer/src +[formreco_docs]: https://docs.microsoft.com/en-us/azure/cognitive-services/form-recognizer/ +[formreco_refdocs]: https://aka.ms/azsdk-net-textanalytics-ref-docs + + +[formreco_rest_api]: https://westus2.dev.cognitive.microsoft.com/docs/services/form-recognizer-api-v2-preview +[cognitive_resource]: https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-apis-create-account + + + + +[formreco_custom_client_class]: src/CustomFormClient.cs +[azure_identity]: https://github.com/Azure/azure-sdk-for-net/tree/master/sdk/identity/Azure.Identity +[cognitive_auth]: https://docs.microsoft.com/en-us/azure/cognitive-services/authentication +[register_aad_app]: https://docs.microsoft.com/azure/cognitive-services/authentication#assign-a-role-to-a-service-principal +[aad_grant_access]: https://docs.microsoft.com/azure/cognitive-services/authentication#assign-a-role-to-a-service-principal +[custom_subdomain]: https://docs.microsoft.com/azure/cognitive-services/authentication#create-a-resource-with-a-custom-subdomain +[DefaultAzureCredential]: ../../identity/Azure.Identity/README.md + + + +[azure_cli]: https://docs.microsoft.com/cli/azure +[azure_sub]: https://azure.microsoft.com/free/ +[nuget]: https://www.nuget.org/ +[azure_portal]: https://portal.azure.com + +[cla]: https://cla.microsoft.com +[code_of_conduct]: https://opensource.microsoft.com/codeofconduct/ +[coc_faq]: https://opensource.microsoft.com/codeofconduct/faq/ +[coc_contact]: mailto:opencode@microsoft.com \ No newline at end of file diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/api/Azure.AI.FormRecognizer.netstandard2.0.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/api/Azure.AI.FormRecognizer.netstandard2.0.cs index 3a0be52da49b..77cd2284d10a 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/api/Azure.AI.FormRecognizer.netstandard2.0.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/api/Azure.AI.FormRecognizer.netstandard2.0.cs @@ -5,10 +5,10 @@ public partial class FormLayoutClient protected FormLayoutClient() { } public FormLayoutClient(System.Uri endpoint, Azure.AI.FormRecognizer.Models.FormRecognizerApiKeyCredential credential) { } public FormLayoutClient(System.Uri endpoint, Azure.AI.FormRecognizer.Models.FormRecognizerApiKeyCredential credential, Azure.AI.FormRecognizer.FormRecognizerClientOptions options) { } - public virtual Azure.Operation> StartExtractLayout(System.IO.Stream stream, Azure.AI.FormRecognizer.Models.FormContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual Azure.Operation> StartExtractLayout(System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual System.Threading.Tasks.Task>> StartExtractLayoutAsync(System.IO.Stream stream, Azure.AI.FormRecognizer.Models.FormContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual System.Threading.Tasks.Task>> StartExtractLayoutAsync(System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Operation> StartExtractLayouts(System.IO.Stream stream, Azure.AI.FormRecognizer.Models.ContentType contentType, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Operation> StartExtractLayouts(System.Uri uri, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task>> StartExtractLayoutsAsync(System.IO.Stream stream, Azure.AI.FormRecognizer.Models.ContentType contentType, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task>> StartExtractLayoutsAsync(System.Uri uri, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } } public partial class FormRecognizerClientOptions : Azure.Core.ClientOptions { @@ -24,10 +24,10 @@ public partial class ReceiptClient protected ReceiptClient() { } public ReceiptClient(System.Uri endpoint, Azure.AI.FormRecognizer.Models.FormRecognizerApiKeyCredential credential) { } public ReceiptClient(System.Uri endpoint, Azure.AI.FormRecognizer.Models.FormRecognizerApiKeyCredential credential, Azure.AI.FormRecognizer.FormRecognizerClientOptions options) { } - public virtual Azure.Response ExtractReceipt(System.IO.Stream stream, Azure.AI.FormRecognizer.Models.FormContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual Azure.Response ExtractReceipt(System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual System.Threading.Tasks.Task> ExtractReceiptAsync(System.IO.Stream stream, Azure.AI.FormRecognizer.Models.FormContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual System.Threading.Tasks.Task> ExtractReceiptAsync(System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Operation> StartExtractReceipts(System.IO.Stream stream, Azure.AI.FormRecognizer.Models.ContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Operation> StartExtractReceipts(System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task>> StartExtractReceiptsAsync(System.IO.Stream stream, Azure.AI.FormRecognizer.Models.ContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task>> StartExtractReceiptsAsync(System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } } } namespace Azure.AI.FormRecognizer.Custom @@ -43,10 +43,14 @@ public CustomFormClient(System.Uri endpoint, Azure.AI.FormRecognizer.Models.Form public virtual Azure.AsyncPageable GetModelInfosAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } public virtual Azure.Response GetSubscriptionProperties(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } public virtual System.Threading.Tasks.Task> GetSubscriptionPropertiesAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual Azure.Operation StartExtractForm(string modelId, System.IO.Stream stream, Azure.AI.FormRecognizer.Models.FormContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual Azure.Operation StartExtractForm(string modelId, System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual System.Threading.Tasks.Task> StartExtractFormAsync(string modelId, System.IO.Stream stream, Azure.AI.FormRecognizer.Models.FormContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } - public virtual System.Threading.Tasks.Task> StartExtractFormAsync(string modelId, System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Operation> StartExtractFormPages(string modelId, System.IO.Stream stream, Azure.AI.FormRecognizer.Models.ContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Operation> StartExtractFormPages(string modelId, System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task>> StartExtractFormPagesAsync(string modelId, System.IO.Stream stream, Azure.AI.FormRecognizer.Models.ContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task>> StartExtractFormPagesAsync(string modelId, System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Operation> StartExtractLabeledForms(string modelId, System.IO.Stream stream, Azure.AI.FormRecognizer.Models.ContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Operation> StartExtractLabeledForms(string modelId, System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task>> StartExtractLabeledFormsAsync(string modelId, System.IO.Stream stream, Azure.AI.FormRecognizer.Models.ContentType contentType, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task>> StartExtractLabeledFormsAsync(string modelId, System.Uri uri, bool includeRawPageExtractions = false, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } public virtual Azure.Operation StartTraining(string source, Azure.AI.FormRecognizer.Custom.TrainingFileFilter filter = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } public virtual System.Threading.Tasks.Task> StartTrainingAsync(string source, Azure.AI.FormRecognizer.Custom.TrainingFileFilter filter = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } public virtual Azure.Operation StartTrainingWithLabels(string source, Azure.AI.FormRecognizer.Custom.TrainingFileFilter filter = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } @@ -64,7 +68,7 @@ internal CustomLabeledModel() { } public partial class CustomModel { internal CustomModel() { } - public System.Collections.Generic.IReadOnlyList LearnedForms { get { throw null; } } + public System.Collections.Generic.IReadOnlyList LearnedPages { get { throw null; } } public string ModelId { get { throw null; } } public Azure.AI.FormRecognizer.Custom.CustomModelInfo ModelInfo { get { throw null; } } public Azure.AI.FormRecognizer.Custom.TrainingInfo TrainingInfo { get { throw null; } } @@ -75,20 +79,73 @@ internal CustomModelInfo() { } public System.DateTimeOffset? CreatedOn { get { throw null; } } public System.DateTimeOffset? LastUpdatedOn { get { throw null; } } public string ModelId { get { throw null; } } - public Azure.AI.FormRecognizer.Models.ModelStatus TrainingStatus { get { throw null; } } + public Azure.AI.FormRecognizer.Custom.ModelStatus Status { get { throw null; } } } - public partial class CustomModelLearnedForm + public partial class CustomModelLearnedPage { - public CustomModelLearnedForm() { } + internal CustomModelLearnedPage() { } public string FormTypeId { get { throw null; } } public System.Collections.Generic.IReadOnlyList LearnedFields { get { throw null; } } } + public partial class ExtractedField + { + internal ExtractedField() { } + public float Confidence { get { throw null; } } + public string Name { get { throw null; } } + public Azure.AI.FormRecognizer.Models.BoundingBox NameBoundingBox { get { throw null; } } + public System.Collections.Generic.IReadOnlyList NameRawExtractedItems { get { throw null; } } + public string Value { get { throw null; } } + public Azure.AI.FormRecognizer.Models.BoundingBox ValueBoundingBox { get { throw null; } } + public System.Collections.Generic.IReadOnlyList ValueRawExtractedItems { get { throw null; } } + } + public partial class ExtractedLabeledField + { + internal ExtractedLabeledField() { } + public float? Confidence { get { throw null; } } + public string Label { get { throw null; } } + public int? PageNumber { get { throw null; } } + public System.Collections.Generic.IReadOnlyList RawExtractedItems { get { throw null; } } + public string Value { get { throw null; } } + public Azure.AI.FormRecognizer.Models.BoundingBox ValueBoundingBox { get { throw null; } } + } + public partial class ExtractedLabeledForm + { + internal ExtractedLabeledForm() { } + public int EndPageNumber { get { throw null; } } + public System.Collections.Generic.IReadOnlyList Fields { get { throw null; } } + public string FormType { get { throw null; } } + public System.Collections.Generic.IReadOnlyList RawExtractedPages { get { throw null; } } + public int StartPageNumber { get { throw null; } } + public System.Collections.Generic.IReadOnlyList Tables { get { throw null; } } + public string GetFieldValue(string label) { throw null; } + } + public partial class ExtractedLabeledTable : Azure.AI.FormRecognizer.Models.ExtractedTable + { + internal ExtractedLabeledTable() { } + public int PageNumber { get { throw null; } } + } + public partial class ExtractedPage + { + internal ExtractedPage() { } + public System.Collections.Generic.IReadOnlyList Fields { get { throw null; } } + public int? FormTypeId { get { throw null; } } + public int PageNumber { get { throw null; } } + public Azure.AI.FormRecognizer.Models.RawExtractedPage RawExtractedPage { get { throw null; } } + public System.Collections.Generic.IReadOnlyList Tables { get { throw null; } } + public string GetFieldValue(string fieldName) { throw null; } + } public partial class FieldPredictionAccuracy { internal FieldPredictionAccuracy() { } public float Accuracy { get { throw null; } } public string Label { get { throw null; } } } + public enum ModelStatus + { + Training = 0, + Ready = 1, + Invalid = 2, + } public partial class SubscriptionProperties { internal SubscriptionProperties() { } @@ -102,7 +159,7 @@ internal TrainingDocumentInfo() { } public string DocumentName { get { throw null; } } public System.Collections.Generic.IList Errors { get { throw null; } } public int PageCount { get { throw null; } set { } } - public Azure.AI.FormRecognizer.Models.TrainStatus Status { get { throw null; } } + public Azure.AI.FormRecognizer.Custom.TrainingStatus Status { get { throw null; } } } public partial class TrainingFileFilter { @@ -116,6 +173,12 @@ internal TrainingInfo() { } public System.Collections.Generic.IReadOnlyList PerDocumentInfo { get { throw null; } } public System.Collections.Generic.IReadOnlyList TrainingErrors { get { throw null; } } } + public enum TrainingStatus + { + Succeeded = 0, + PartiallySucceeded = 1, + Failed = 2, + } } namespace Azure.AI.FormRecognizer.Models { @@ -126,29 +189,10 @@ internal BoundingBox() { } } public enum ContentType { - ApplicationPdf = 0, - ImageJpeg = 1, - ImagePng = 2, - ImageTiff = 3, - } - public partial class ExtractedField - { - internal ExtractedField() { } - public float? Confidence { get { throw null; } } - public string Label { get { throw null; } } - public Azure.AI.FormRecognizer.Models.BoundingBox LabelBoundingBox { get { throw null; } } - public System.Collections.Generic.IReadOnlyList LabelRawExtractedItems { get { throw null; } } - public string Value { get { throw null; } } - public Azure.AI.FormRecognizer.Models.BoundingBox ValueBoundingBox { get { throw null; } } - public System.Collections.Generic.IReadOnlyList ValueRawExtractedItems { get { throw null; } } - } - public partial class ExtractedForm - { - internal ExtractedForm() { } - public int EndPageNumber { get { throw null; } } - public string LearnedFormType { get { throw null; } } - public System.Collections.Generic.IReadOnlyList Pages { get { throw null; } } - public int StartPageNumber { get { throw null; } } + Pdf = 0, + Png = 1, + Jpeg = 2, + Tiff = 3, } public partial class ExtractedLayoutPage { @@ -157,14 +201,6 @@ internal ExtractedLayoutPage() { } public Azure.AI.FormRecognizer.Models.RawExtractedPage RawExtractedPage { get { throw null; } } public System.Collections.Generic.IReadOnlyList Tables { get { throw null; } } } - public partial class ExtractedPage - { - internal ExtractedPage() { } - public System.Collections.Generic.IReadOnlyList Fields { get { throw null; } } - public int PageNumber { get { throw null; } } - public Azure.AI.FormRecognizer.Models.RawExtractedPage RawExtractedPage { get { throw null; } } - public System.Collections.Generic.IReadOnlyList Tables { get { throw null; } } - } public partial class ExtractedReceipt { internal ExtractedReceipt() { } @@ -174,7 +210,7 @@ internal ExtractedReceipt() { } public string MerchantAddress { get { throw null; } } public string MerchantName { get { throw null; } } public string MerchantPhoneNumber { get { throw null; } } - public Azure.AI.FormRecognizer.Models.RawExtractedPage RawExtractedPage { get { throw null; } } + public System.Collections.Generic.IReadOnlyList RawExtractedPage { get { throw null; } } public Azure.AI.FormRecognizer.Models.ExtractedReceiptType ReceiptType { get { throw null; } } public int StartPageNumber { get { throw null; } } public float? Subtotal { get { throw null; } } @@ -201,8 +237,8 @@ internal ExtractedReceiptItem() { } } public enum ExtractedReceiptType { - Unrecognized = 1, - Itemized = 2, + Unrecognized = 0, + Itemized = 1, } public partial class ExtractedTable { @@ -237,13 +273,6 @@ public enum FieldValueType Array = 6, Object = 7, } - public enum FormContentType - { - Pdf = 1, - Png = 2, - Jpeg = 3, - Tiff = 4, - } public partial class FormRecognizerApiKeyCredential { public FormRecognizerApiKeyCredential(string apiKey) { } @@ -252,27 +281,14 @@ public void UpdateCredential(string apiKey) { } public partial class FormRecognizerError { internal FormRecognizerError() { } - public string Code { get { throw null; } } - public string Message { get { throw null; } } + public string Code { get { throw null; } set { } } + public string Message { get { throw null; } set { } } } public enum LengthUnit { Pixel = 0, Inch = 1, } - public enum ModelStatus - { - Creating = 0, - Ready = 1, - Invalid = 2, - } - public enum OperationStatus - { - NotStarted = 0, - Running = 1, - Succeeded = 2, - Failed = 3, - } public partial class RawExtractedItem { internal RawExtractedItem() { } @@ -282,7 +298,6 @@ internal RawExtractedItem() { } public partial class RawExtractedLine : Azure.AI.FormRecognizer.Models.RawExtractedItem { internal RawExtractedLine() { } - public string Language { get { throw null; } } public System.Collections.Generic.IReadOnlyList Words { get { throw null; } } public static implicit operator string (Azure.AI.FormRecognizer.Models.RawExtractedLine line) { throw null; } } @@ -291,7 +306,6 @@ public partial class RawExtractedPage internal RawExtractedPage() { } public float Angle { get { throw null; } set { } } public float Height { get { throw null; } set { } } - public string Language { get { throw null; } set { } } public System.Collections.Generic.ICollection Lines { get { throw null; } set { } } public int Page { get { throw null; } set { } } public Azure.AI.FormRecognizer.Models.LengthUnit Unit { get { throw null; } set { } } @@ -303,10 +317,4 @@ internal RawExtractedWord() { } public float? Confidence { get { throw null; } } public static implicit operator string (Azure.AI.FormRecognizer.Models.RawExtractedWord word) { throw null; } } - public enum TrainStatus - { - Succeeded = 0, - PartiallySucceeded = 1, - Failed = 2, - } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Azure.AI.FormRecognizer.csproj b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Azure.AI.FormRecognizer.csproj index 1e5f5cd2c7da..3210a5fd17c5 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Azure.AI.FormRecognizer.csproj +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Azure.AI.FormRecognizer.csproj @@ -6,7 +6,6 @@ Azure Template $(RequiredTargetFrameworks) false - false $(NoWarn); diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/BoundingBox.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/BoundingBox.cs index 11bfa21a6022..9a4c7aa7cc0c 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/BoundingBox.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/BoundingBox.cs @@ -7,6 +7,8 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public class BoundingBox { internal BoundingBox(ICollection boundingBox) @@ -24,6 +26,8 @@ internal BoundingBox(ICollection boundingBox) } } + /// + /// public PointF[] Points { get; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormContentType.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ContentType.cs similarity index 56% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormContentType.cs rename to sdk/formrecognizer/Azure.AI.FormRecognizer/src/ContentType.cs index a407c8e123bc..939fc566f77c 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormContentType.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ContentType.cs @@ -1,23 +1,30 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +using Azure.Core; + namespace Azure.AI.FormRecognizer.Models { /// /// Form content type for local files. /// - public enum FormContentType + [CodeGenSchema("ContentType")] + public enum ContentType { /// application/pdf - Pdf = 1, + [CodeGenSchemaMember("ApplicationPdf")] + Pdf, /// image/png - Png = 2, + [CodeGenSchemaMember("ImagePng")] + Png, /// image/jpeg - Jpeg = 3, + [CodeGenSchemaMember("ImageJpeg")] + Jpeg, /// image/tiff - Tiff = 4, + [CodeGenSchemaMember("ImageTiff")] + Tiff, } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomFormClient.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomFormClient.cs index 90a517258d2f..b51f677b9835 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomFormClient.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomFormClient.cs @@ -2,6 +2,7 @@ // Licensed under the MIT License. using System; +using System.Collections.Generic; using System.IO; using System.Threading; using System.Threading.Tasks; @@ -12,7 +13,9 @@ namespace Azure.AI.FormRecognizer.Custom { /// - /// The sample client. + /// The client to use to with the Form Recognizer Azure Cognitive Service to train custom models from forms, + /// and to extract values from forms using those custom models. It also supports listing and deleting trained + /// models. /// public class CustomFormClient { @@ -32,14 +35,18 @@ protected CustomFormClient() /// /// Initializes a new instance of the . /// +#pragma warning disable AZC0007 // DO provide a minimal constructor that takes only the parameters required to connect to the service. public CustomFormClient(Uri endpoint, FormRecognizerApiKeyCredential credential) : this(endpoint, credential, new FormRecognizerClientOptions()) +#pragma warning restore AZC0007 // DO provide a minimal constructor that takes only the parameters required to connect to the service. { } /// /// Initializes a new instance of the . /// +#pragma warning disable AZC0007 // DO provide a minimal constructor that takes only the parameters required to connect to the service. public CustomFormClient(Uri endpoint, FormRecognizerApiKeyCredential credential, FormRecognizerClientOptions options) +#pragma warning restore AZC0007 // DO provide a minimal constructor that takes only the parameters required to connect to the service. { _diagnostics = new ClientDiagnostics(options); _pipeline = HttpPipelineBuilder.Build(options, new ApiKeyAuthenticationPolicy(credential)); @@ -47,6 +54,15 @@ public CustomFormClient(Uri endpoint, FormRecognizerApiKeyCredential credential, } #region Training + + /// + /// Trains a model from a collection of custom forms in a blob storage container. + /// + /// An externally accessible Azure storage blob container Uri. + /// Filter to apply to the documents in the source path for training. + /// A controlling the request lifetime. + /// A Operation<CustomModel> to wait on this long-running operation. Its Operation < CustomModel > .Value upon successful + /// completion will contain meta-data about the trained model. public virtual Operation StartTraining(string source, TrainingFileFilter filter = default, CancellationToken cancellationToken = default) { TrainRequest_internal trainRequest = new TrainRequest_internal() { Source = source }; @@ -65,6 +81,14 @@ public virtual Operation StartTraining(string source, TrainingFileF return new TrainingOperation(_operations, response.Headers.Location); } + /// + /// Trains a model from a collection of custom forms in a blob storage container. + /// + /// An externally accessible Azure storage blob container Uri. + /// Filter to apply to the documents in the source path for training. + /// A controlling the request lifetime. + /// A Operation<CustomModel> to wait on this long-running operation. Its Operation < CustomModel > .Value upon successful + /// completion will contain meta-data about the trained model. public virtual async Task> StartTrainingAsync(string source, TrainingFileFilter filter = default, CancellationToken cancellationToken = default) { TrainRequest_internal trainRequest = new TrainRequest_internal() { Source = source }; @@ -80,6 +104,13 @@ public virtual async Task> StartTrainingAsync(string sour return new TrainingOperation(_operations, response.Headers.Location); } + /// + /// Trains a model from a collection of custom forms and a label file in a blob storage container. + /// + /// An externally accessible Azure storage blob container Uri. + /// Filter to apply to the documents in the source path for training. + /// A controlling the request lifetime. + /// A to wait on this long-running operation. public virtual Operation StartTrainingWithLabels(string source, TrainingFileFilter filter = default, CancellationToken cancellationToken = default) { TrainRequest_internal trainRequest = new TrainRequest_internal() { Source = source, UseLabelFile = true }; @@ -95,6 +126,13 @@ public virtual Operation StartTrainingWithLabels(string sour return new TrainingWithLabelsOperation(_operations, response.Headers.Location); } + /// + /// Trains a model from a collection of custom forms and a label file in a blob storage container. + /// + /// An externally accessible Azure storage blob container Uri. + /// Filter to apply to the documents in the source path for training. + /// A controlling the request lifetime. + /// A to wait on this long-running operation. public virtual async Task> StartTrainingWithLabelsAsync(string source, TrainingFileFilter filter = default, CancellationToken cancellationToken = default) { TrainRequest_internal trainRequest = new TrainRequest_internal() { Source = source, UseLabelFile = true }; @@ -113,61 +151,202 @@ public virtual async Task> StartTrainingWithLabels #endregion Training #region Analyze - public virtual Operation StartExtractForm(string modelId, Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + + #region Unsupervised + + /// + /// Extract pages from one or more forms, using a model trained without labels. + /// + /// The id of the model to use for extracting form values. + /// The stream containing one or more forms to extract elements from. + /// The content type of the input file. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedPage>>.Value upon successful + /// completion will contain extracted pages from the input document. + public virtual Operation> StartExtractFormPages(string modelId, Stream stream, ContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) { // TODO: automate content-type detection // https://github.com/Azure/azure-sdk-for-net/issues/10329 ResponseWithHeaders response = _operations.AnalyzeWithCustomModel(new Guid(modelId), includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken); - return new ExtractFormOperation(_operations, modelId, response.Headers.OperationLocation); + return new ExtractPagesOperation(_operations, modelId, response.Headers.OperationLocation); } - public virtual Operation StartExtractForm(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + /// + /// Extract pages from one or more forms, using a model trained without labels. + /// + /// The id of the model to use for extracting form values. + /// The absolute URI of the remote file to extract elements from. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedPage>>.Value upon successful + /// completion will contain extracted pages from the input document. + public virtual Operation> StartExtractFormPages(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) { SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() }; ResponseWithHeaders response = _operations.RestClient.AnalyzeWithCustomModel(new Guid(modelId), includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken); - return new ExtractFormOperation(_operations, modelId, response.Headers.OperationLocation); + return new ExtractPagesOperation(_operations, modelId, response.Headers.OperationLocation); } - public virtual async Task> StartExtractFormAsync(string modelId, Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + /// + /// Extract pages from one or more forms, using a model trained without labels. + /// + /// The id of the model to use for extracting form values. + /// The stream containing one or more forms to extract elements from. + /// The content type of the input file. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedPage>>.Value upon successful + /// completion will contain extracted pages from the input document. + public virtual async Task>> StartExtractFormPagesAsync(string modelId, Stream stream, ContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) { // TODO: automate content-type detection // https://github.com/Azure/azure-sdk-for-net/issues/10329 ResponseWithHeaders response = await _operations.AnalyzeWithCustomModelAsync(new Guid(modelId), includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken).ConfigureAwait(false); - return new ExtractFormOperation(_operations, modelId, response.Headers.OperationLocation); + return new ExtractPagesOperation(_operations, modelId, response.Headers.OperationLocation); } - public virtual async Task> StartExtractFormAsync(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + + /// + /// Extract pages from one or more forms, using a model trained without labels. + /// + /// The id of the model to use for extracting form values. + /// The absolute URI of the remote file to extract elements from. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedPage>>.Value upon successful + /// completion will contain extracted pages from the input document. + public virtual async Task>> StartExtractFormPagesAsync(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) { SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() }; ResponseWithHeaders response = await _operations.RestClient.AnalyzeWithCustomModelAsync(new Guid(modelId), includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken).ConfigureAwait(false); - return new ExtractFormOperation(_operations, modelId, response.Headers.OperationLocation); + return new ExtractPagesOperation(_operations, modelId, response.Headers.OperationLocation); } + #endregion + + #region Supervised + + /// + /// Extract form content from one or more forms, using a model trained with labels. + /// + /// The id of the model to use for extracting form values. + /// The stream containing one or more forms to extract elements from. + /// The content type of the input file. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedPage>>.Value upon successful + /// completion will contain extracted forms from the input document. + public virtual Operation> StartExtractLabeledForms(string modelId, Stream stream, ContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + { + // TODO: automate content-type detection + // https://github.com/Azure/azure-sdk-for-net/issues/10329 + ResponseWithHeaders response = _operations.AnalyzeWithCustomModel(new Guid(modelId), includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken); + return new ExtractLabeledFormOperation(_operations, modelId, response.Headers.OperationLocation); + } + + /// + /// Extract form content from one or more forms, using a model trained with labels. + /// + /// The id of the model to use for extracting form values. + /// The absolute URI of the remote file to extract elements from. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedPage>>.Value upon successful + /// completion will contain extracted forms from the input document. + public virtual Operation> StartExtractLabeledForms(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + { + SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() }; + ResponseWithHeaders response = _operations.RestClient.AnalyzeWithCustomModel(new Guid(modelId), includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken); + return new ExtractLabeledFormOperation(_operations, modelId, response.Headers.OperationLocation); + } + + /// + /// Extract form content from one or more forms, using a model trained with labels. + /// + /// The id of the model to use for extracting form values. + /// The stream containing one or more forms to extract elements from. + /// The content type of the input file. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedPage>>.Value upon successful + /// completion will contain extracted forms from the input document. + public virtual async Task>> StartExtractLabeledFormsAsync(string modelId, Stream stream, ContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + { + // TODO: automate content-type detection + // https://github.com/Azure/azure-sdk-for-net/issues/10329 + ResponseWithHeaders response = await _operations.AnalyzeWithCustomModelAsync(new Guid(modelId), includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken).ConfigureAwait(false); + return new ExtractLabeledFormOperation(_operations, modelId, response.Headers.OperationLocation); + } + + /// + /// Extract form content from one or more forms, using a model trained with labels. + /// + /// The id of the model to use for extracting form values. + /// The absolute URI of the remote file to extract elements from. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedPage>>.Value upon successful + /// completion will contain extracted forms from the input document. + public virtual async Task>> StartExtractLabeledFormsAsync(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + { + SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() }; + ResponseWithHeaders response = await _operations.RestClient.AnalyzeWithCustomModelAsync(new Guid(modelId), includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken).ConfigureAwait(false); + return new ExtractLabeledFormOperation(_operations, modelId, response.Headers.OperationLocation); + } + #endregion #endregion Analyze #region CRUD Ops + /// + /// Delete the model with the specified model ID. + /// + /// The ID of the model to delete. + /// A controlling the request lifetime. + /// public virtual Response DeleteModel(string modelId, CancellationToken cancellationToken = default) { return _operations.DeleteCustomModel(new Guid(modelId), cancellationToken); } + /// + /// Delete the model with the specified model ID. + /// + /// The ID of the model to delete. + /// A controlling the request lifetime. + /// public virtual async Task DeleteModelAsync(string modelId, CancellationToken cancellationToken = default) { return await _operations.DeleteCustomModelAsync(new Guid(modelId), cancellationToken).ConfigureAwait(false); } + /// + /// Get a collection of items describing the models trained on this subscription + /// and their training status. + /// + /// + /// public virtual Pageable GetModelInfos(CancellationToken cancellationToken = default) { return _operations.GetCustomModelsPageableModelInfo(GetModelOptions.Full, cancellationToken); } + /// + /// Get a collection of items describing the models trained on this subscription + /// and their training status. + /// + /// + /// public virtual AsyncPageable GetModelInfosAsync(CancellationToken cancellationToken = default) { return _operations.GetCustomModelsPageableModelInfoAsync(GetModelOptions.Full, cancellationToken); } /// + /// Get the number of models trained on this subscription and the subscription limits. /// + /// + /// public virtual Response GetSubscriptionProperties(CancellationToken cancellationToken = default) { Response response = _operations.RestClient.GetCustomModels(GetModelOptions.Summary, cancellationToken); @@ -175,7 +354,10 @@ public virtual Response GetSubscriptionProperties(Cancel } /// + /// Get the number of models trained on this subscription and the subscription limits. /// + /// + /// public virtual async Task> GetSubscriptionPropertiesAsync(CancellationToken cancellationToken = default) { Response response = await _operations.RestClient.GetCustomModelsAsync(GetModelOptions.Summary, cancellationToken).ConfigureAwait(false); diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomLabeledModel.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomLabeledModel.cs index da3772e14013..84ae62d06ef8 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomLabeledModel.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomLabeledModel.cs @@ -3,9 +3,13 @@ using System.Collections.Generic; using Azure.AI.FormRecognizer.Models; +using System.Linq; namespace Azure.AI.FormRecognizer.Custom { + /// + /// Description of a custom model that was trained with labels. + /// public class CustomLabeledModel { internal CustomLabeledModel(Model_internal model) @@ -17,10 +21,30 @@ internal CustomLabeledModel(Model_internal model) TrainingInfo = new TrainingInfo(model.TrainResult); } + /// + /// The unique identifier of the model. + /// public string ModelId { get; } + + /// + /// The mean of the prediction accuracies for each field. + /// public float AveragePredictionAccuracy { get; } + + /// + /// A collection of prediction accuracies per field. These indicate the ability of the model + /// to correctly predict the value of a field for a given label. + /// public IReadOnlyList PredictionAccuracies { get; } + + /// + /// Information about documents used to train the model and errors encountered during training. + /// public TrainingInfo TrainingInfo { get; } + + /// + /// Information about the trained model, including model ID and training completion status. + /// public CustomModelInfo ModelInfo { get; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModel.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModel.cs index d27422b5b9f0..b0e112be44a6 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModel.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModel.cs @@ -3,43 +3,51 @@ using System.Collections.Generic; using Azure.AI.FormRecognizer.Models; +using System.Linq; namespace Azure.AI.FormRecognizer.Custom { + /// + /// Description of a custom model that was trained without labels. + /// public class CustomModel { internal CustomModel(Model_internal model) { ModelId = model.ModelInfo.ModelId.ToString(); - LearnedForms = ConvertLearnedForms(model.Keys); + LearnedPages = ConvertLearnedForms(model.Keys); ModelInfo = new CustomModelInfo(model.ModelInfo); TrainingInfo = new TrainingInfo(model.TrainResult); } /// + /// The unique identifier of the model. /// public string ModelId { get; internal set; } /// + /// List of forms the model learned to recognize, including form fields found in each form. /// - public IReadOnlyList LearnedForms { get; internal set; } + public IReadOnlyList LearnedPages { get; internal set; } /// + /// Information about the trained model, including model ID and training completion status. /// public CustomModelInfo ModelInfo { get; internal set; } /// + /// Information about documents used to train the model and errors encountered during training. /// public TrainingInfo TrainingInfo { get; internal set; } - private static IReadOnlyList ConvertLearnedForms(KeysResult_internal keys) + private static IReadOnlyList ConvertLearnedForms(KeysResult_internal keys) { - List forms = new List(); + List forms = new List(); foreach (var key in keys.Clusters) { - CustomModelLearnedForm form = new CustomModelLearnedForm() + CustomModelLearnedPage form = new CustomModelLearnedPage() { FormTypeId = key.Key, // TODO: Q3 diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelInfo.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelInfo.cs index fdb3db22daf4..3b38f5f4eb04 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelInfo.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelInfo.cs @@ -6,6 +6,8 @@ namespace Azure.AI.FormRecognizer.Custom { + /// + /// public class CustomModelInfo { internal CustomModelInfo(ModelInfo_internal modelInfo) @@ -13,7 +15,7 @@ internal CustomModelInfo(ModelInfo_internal modelInfo) ModelId = modelInfo.ModelId.ToString(); CreatedOn = modelInfo.CreatedDateTime; LastUpdatedOn = modelInfo.LastUpdatedDateTime; - TrainingStatus = modelInfo.Status; + Status = modelInfo.Status; } /// @@ -22,7 +24,7 @@ internal CustomModelInfo(ModelInfo_internal modelInfo) /// /// - public ModelStatus TrainingStatus { get; } + public ModelStatus Status { get; } /// /// diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelKnownForm.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelLearnedPage.cs similarity index 74% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelKnownForm.cs rename to sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelLearnedPage.cs index a61604324b41..12a99ee58cd8 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelKnownForm.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomModelLearnedPage.cs @@ -5,8 +5,14 @@ namespace Azure.AI.FormRecognizer.Custom { - public class CustomModelLearnedForm + /// + /// + public class CustomModelLearnedPage { + internal CustomModelLearnedPage() + { + } + /// /// public string FormTypeId { get; internal set; } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractLabeledFormOperation.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractLabeledFormOperation.cs new file mode 100644 index 000000000000..c5b056f1e66b --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractLabeledFormOperation.cs @@ -0,0 +1,106 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Azure.AI.FormRecognizer.Models; +using Azure.Core; +using Azure.Core.Pipeline; + +namespace Azure.AI.FormRecognizer.Custom +{ + /// + /// + internal class ExtractLabeledFormOperation : Operation> + { + private Response _response; + private IReadOnlyList _value; + private bool _hasCompleted; + + private readonly string _modelId; + private readonly ServiceClient _operations; + + public override string Id { get; } + + public override IReadOnlyList Value => OperationHelpers.GetValue(ref _value); + + public override bool HasCompleted => _hasCompleted; + + public override bool HasValue => _value != null; + + /// + public override Response GetRawResponse() => _response; + + /// + public override ValueTask>> WaitForCompletionAsync(CancellationToken cancellationToken = default) => + this.DefaultWaitForCompletionAsync(cancellationToken); + + /// + public override ValueTask>> WaitForCompletionAsync(TimeSpan pollingInterval, CancellationToken cancellationToken = default) => + this.DefaultWaitForCompletionAsync(pollingInterval, cancellationToken); + + /// + /// + /// + /// + /// + internal ExtractLabeledFormOperation(ServiceClient operations, string modelId, string operationLocation) + { + _operations = operations; + _modelId = modelId; + + // TODO: Add validation here + // https://github.com/Azure/azure-sdk-for-net/issues/10385 + Id = operationLocation.Split('/').Last(); + } + + /// + public override Response UpdateStatus(CancellationToken cancellationToken = default) => + UpdateStatusAsync(false, cancellationToken).EnsureCompleted(); + + /// + public override async ValueTask UpdateStatusAsync(CancellationToken cancellationToken = default) => + await UpdateStatusAsync(true, cancellationToken).ConfigureAwait(false); + + private async ValueTask UpdateStatusAsync(bool async, CancellationToken cancellationToken) + { + if (!_hasCompleted) + { + Response update = async + ? await _operations.GetAnalyzeFormResultAsync(new Guid(_modelId), new Guid(Id), cancellationToken).ConfigureAwait(false) + : _operations.GetAnalyzeFormResult(new Guid(_modelId), new Guid(Id), cancellationToken); + + // TODO: Handle correctly according to returned status code + // https://github.com/Azure/azure-sdk-for-net/issues/10386 + // TODO: Add reasonable null checks. + + if (update.Value.Status == OperationStatus.Succeeded || update.Value.Status == OperationStatus.Failed) + { + _hasCompleted = true; + + // TODO: Consider what we'll do when there are multiple DocumentResults + // https://github.com/Azure/azure-sdk-for-net/issues/10387 + // Supervised + _value = ConvertToExtractedLabeledForms(update.Value.AnalyzeResult.DocumentResults, update.Value.AnalyzeResult.PageResults, update.Value.AnalyzeResult.ReadResults); + } + + _response = update.GetRawResponse(); + } + + return GetRawResponse(); + } + + private static IReadOnlyList ConvertToExtractedLabeledForms(IList documentResults, IList pageResults, IList readResults) + { + List forms = new List(); + for (int i = 0; i < documentResults.Count; i++) + { + forms.Add(new ExtractedLabeledForm(documentResults[i], pageResults, readResults)); + } + return forms; + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractLayoutOperation.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractLayoutOperation.cs index d13b8b206377..ba38ea02655b 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractLayoutOperation.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractLayoutOperation.cs @@ -56,7 +56,7 @@ public override Response UpdateStatus(CancellationToken cancellationToken = defa public override async ValueTask UpdateStatusAsync(CancellationToken cancellationToken = default) => await UpdateStatusAsync(true, cancellationToken).ConfigureAwait(false); - private async Task UpdateStatusAsync(bool async, CancellationToken cancellationToken) + private async ValueTask UpdateStatusAsync(bool async, CancellationToken cancellationToken) { if (!_hasCompleted) { diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractFormOperation.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractPagesOperation.cs similarity index 61% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractFormOperation.cs rename to sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractPagesOperation.cs index e63374d4230b..e679efb357e0 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractFormOperation.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractPagesOperation.cs @@ -2,20 +2,22 @@ // Licensed under the MIT License. using System; +using System.Collections.Generic; using System.Linq; using System.Threading; using System.Threading.Tasks; +using Azure.AI.FormRecognizer.Models; using Azure.Core; using Azure.Core.Pipeline; -namespace Azure.AI.FormRecognizer.Models +namespace Azure.AI.FormRecognizer.Custom { /// /// - internal class ExtractFormOperation : Operation + internal class ExtractPagesOperation : Operation> { private Response _response; - private ExtractedForm _value; + private IReadOnlyList _value; private bool _hasCompleted; private readonly string _modelId; @@ -23,7 +25,7 @@ internal class ExtractFormOperation : Operation public override string Id { get; } - public override ExtractedForm Value => OperationHelpers.GetValue(ref _value); + public override IReadOnlyList Value => OperationHelpers.GetValue(ref _value); public override bool HasCompleted => _hasCompleted; @@ -33,11 +35,11 @@ internal class ExtractFormOperation : Operation public override Response GetRawResponse() => _response; /// - public override ValueTask> WaitForCompletionAsync(CancellationToken cancellationToken = default) => + public override ValueTask>> WaitForCompletionAsync(CancellationToken cancellationToken = default) => this.DefaultWaitForCompletionAsync(cancellationToken); /// - public override ValueTask> WaitForCompletionAsync(TimeSpan pollingInterval, CancellationToken cancellationToken = default) => + public override ValueTask>> WaitForCompletionAsync(TimeSpan pollingInterval, CancellationToken cancellationToken = default) => this.DefaultWaitForCompletionAsync(pollingInterval, cancellationToken); /// @@ -45,7 +47,7 @@ public override ValueTask> WaitForCompletionAsync(TimeSp /// /// /// - internal ExtractFormOperation(ServiceClient operations, string modelId, string operationLocation) + internal ExtractPagesOperation(ServiceClient operations, string modelId, string operationLocation) { _operations = operations; _modelId = modelId; @@ -63,7 +65,7 @@ public override Response UpdateStatus(CancellationToken cancellationToken = defa public override async ValueTask UpdateStatusAsync(CancellationToken cancellationToken = default) => await UpdateStatusAsync(true, cancellationToken).ConfigureAwait(false); - private async Task UpdateStatusAsync(bool async, CancellationToken cancellationToken) + private async ValueTask UpdateStatusAsync(bool async, CancellationToken cancellationToken) { if (!_hasCompleted) { @@ -78,21 +80,7 @@ private async Task UpdateStatusAsync(bool async, CancellationToken can if (update.Value.Status == OperationStatus.Succeeded || update.Value.Status == OperationStatus.Failed) { _hasCompleted = true; - - // TODO: Move this logic into ExtractedForm? It's a bit convoluted right now. - // Determine if the model was supervised or unsupervised - if (update.Value.AnalyzeResult.DocumentResults?.Count == 0) - { - // Unsupervised - _value = new ExtractedForm(update.Value.AnalyzeResult.PageResults, update.Value.AnalyzeResult.ReadResults); - } - else - { - // TODO: Consider what we'll do when there are multiple DocumentResults - // https://github.com/Azure/azure-sdk-for-net/issues/10387 - // Supervised - _value = new ExtractedForm(update.Value.AnalyzeResult.DocumentResults.First(), update.Value.AnalyzeResult.PageResults, update.Value.AnalyzeResult.ReadResults); - } + _value = ConvertToExtractedPages(update.Value.AnalyzeResult.PageResults, update.Value.AnalyzeResult.ReadResults); } _response = update.GetRawResponse(); @@ -100,5 +88,15 @@ private async Task UpdateStatusAsync(bool async, CancellationToken can return GetRawResponse(); } + + private static IReadOnlyList ConvertToExtractedPages(IList pageResults, IList readResults) + { + List pages = new List(); + for (int i = 0; i < pageResults.Count; i++) + { + pages.Add(new ExtractedPage(pageResults[i], readResults[i])); + } + return pages; + } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractReceiptOperation.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractReceiptOperation.cs index 5a1600c3bfdf..a6c57bc819b7 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractReceiptOperation.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractReceiptOperation.cs @@ -7,20 +7,21 @@ using Azure.Core; using Azure.Core.Pipeline; using System.Linq; +using System.Collections.Generic; namespace Azure.AI.FormRecognizer.Models { - internal class ExtractReceiptOperation : Operation + internal class ExtractReceiptOperation : Operation> { private Response _response; - private ExtractedReceipt _value; + private IReadOnlyList _value; private bool _hasCompleted; private readonly ServiceClient _operations; public override string Id { get; } - public override ExtractedReceipt Value => OperationHelpers.GetValue(ref _value); + public override IReadOnlyList Value => OperationHelpers.GetValue(ref _value); public override bool HasCompleted => _hasCompleted; @@ -30,11 +31,11 @@ internal class ExtractReceiptOperation : Operation public override Response GetRawResponse() => _response; /// - public override ValueTask> WaitForCompletionAsync(CancellationToken cancellationToken = default) => + public override ValueTask>> WaitForCompletionAsync(CancellationToken cancellationToken = default) => this.DefaultWaitForCompletionAsync(cancellationToken); /// - public override ValueTask> WaitForCompletionAsync(TimeSpan pollingInterval, CancellationToken cancellationToken = default) => + public override ValueTask>> WaitForCompletionAsync(TimeSpan pollingInterval, CancellationToken cancellationToken = default) => this.DefaultWaitForCompletionAsync(pollingInterval, cancellationToken); internal ExtractReceiptOperation(ServiceClient operations, string operationLocation) @@ -54,7 +55,7 @@ public override Response UpdateStatus(CancellationToken cancellationToken = defa public override async ValueTask UpdateStatusAsync(CancellationToken cancellationToken = default) => await UpdateStatusAsync(true, cancellationToken).ConfigureAwait(false); - private async Task UpdateStatusAsync(bool async, CancellationToken cancellationToken) + private async ValueTask UpdateStatusAsync(bool async, CancellationToken cancellationToken) { if (!_hasCompleted) { @@ -71,7 +72,8 @@ private async Task UpdateStatusAsync(bool async, CancellationToken can // TODO: When they support extracting more than one receipt, add a pageable method for this. // https://github.com/Azure/azure-sdk-for-net/issues/10389 - _value = new ExtractedReceipt(update.Value.AnalyzeResult.DocumentResults.First(), update.Value.AnalyzeResult.ReadResults.First()); + //_value = new ExtractedReceipt(update.Value.AnalyzeResult.DocumentResults.First(), update.Value.AnalyzeResult.ReadResults.First()); + _value = ConvertToExtractedReceipts(update.Value.AnalyzeResult.DocumentResults, update.Value.AnalyzeResult.ReadResults); } _response = update.GetRawResponse(); @@ -79,5 +81,15 @@ private async Task UpdateStatusAsync(bool async, CancellationToken can return GetRawResponse(); } + + private static IReadOnlyList ConvertToExtractedReceipts(IList documentResults, IList readResults) + { + List receipts = new List(); + for (int i = 0; i < documentResults.Count; i++) + { + receipts.Add(new ExtractedReceipt(documentResults[i], readResults)); + } + return receipts; + } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedField.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedField.cs index 096bfb8af5c8..b8d5e4f285e6 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedField.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedField.cs @@ -4,25 +4,30 @@ using System.Collections.Generic; using System.Globalization; using System.Linq; +using Azure.AI.FormRecognizer.Models; -namespace Azure.AI.FormRecognizer.Models +namespace Azure.AI.FormRecognizer.Custom { + // Maps to KeyValuePair + + /// + /// public class ExtractedField { internal ExtractedField(KeyValuePair_internal field, ReadResult_internal readResult) { - // Unsupervised Confidence = field.Confidence; - Label = field.Key.Text; - LabelBoundingBox = field.Key.BoundingBox == null ? null : new BoundingBox(field.Key.BoundingBox); + Name = field.Key.Text; + NameBoundingBox = field.Key.BoundingBox == null ? null : new BoundingBox(field.Key.BoundingBox); + if (field.Key.Elements != null) { - LabelRawExtractedItems = ConvertTextReferences(readResult, field.Key.Elements); + NameRawExtractedItems = ConvertTextReferences(readResult, field.Key.Elements); } Value = field.Value.Text; - ValueBoundingBox = new BoundingBox(field.Value.BoundingBox); + ValueBoundingBox = field.Value.BoundingBox == null ? null : new BoundingBox(field.Value.BoundingBox); if (field.Value.Elements != null) { @@ -30,29 +35,32 @@ internal ExtractedField(KeyValuePair_internal field, ReadResult_internal readRes } } - internal ExtractedField(KeyValuePair field) - { - // Supervised - Confidence = field.Value.Confidence; - Label = field.Key; - Value = field.Value.Text; - ValueBoundingBox = new BoundingBox(field.Value.BoundingBox); - } + /// + /// + public float Confidence { get; internal set; } + + /// + /// + public string Name { get; internal set; } - // TODO: Why can this be nullable on FieldValue.Confidence? - // https://github.com/Azure/azure-sdk-for-net/issues/10378 - public float? Confidence { get; internal set; } - public string Label { get; internal set; } + /// + /// + public BoundingBox NameBoundingBox { get; internal set; } - // TODO: Make this nullable to indicate that this is an optional field. - // https://github.com/Azure/azure-sdk-for-net/issues/10361 - // Not currently supported for Track2 libraries. - public BoundingBox LabelBoundingBox { get; internal set; } + /// + /// + public IReadOnlyList NameRawExtractedItems { get; internal set; } + /// + /// public string Value { get; internal set; } + + /// + /// public BoundingBox ValueBoundingBox { get; internal set; } - public IReadOnlyList LabelRawExtractedItems { get; internal set; } + /// + /// public IReadOnlyList ValueRawExtractedItems { get; internal set; } // TODO: Refactor to move OCR code to a common file, rather than it living in this file. @@ -66,6 +74,16 @@ internal static IReadOnlyList ConvertTextReferences(ReadResult return extractedTexts; } + internal static IReadOnlyList ConvertTextReferences(IList readResults, ICollection references) + { + List extractedTexts = new List(); + foreach (var reference in references) + { + extractedTexts.Add(ResolveTextReference(readResults, reference)); + } + return extractedTexts; + } + //private const string SegmentReadResults = "readResults"; //private const string SegmentLines = "lines"; //private const string SegmentWords = "words"; @@ -127,5 +145,25 @@ private static RawExtractedItem ResolveTextReference(ReadResult_internal readRes // } //} } + + private static RawExtractedItem ResolveTextReference(IList readResults, string reference) + { + // TODO: Add additional validations here. + // https://github.com/Azure/azure-sdk-for-net/issues/10363 + + // Example: the following should result in LineIndex = 7, WordIndex = 12 + // "#/readResults/3/lines/7/words/12" + string[] segments = reference.Split('/'); + +#pragma warning disable CA1305 // Specify IFormatProvider + var pageIndex = int.Parse(segments[2]); + var lineIndex = int.Parse(segments[4]); + var wordIndex = int.Parse(segments[6]); +#pragma warning restore CA1305 // Specify IFormatProvider + + // TODO: Support case where text reference is lines only, without word segment + // https://github.com/Azure/azure-sdk-for-net/issues/10364 + return new RawExtractedWord(readResults[pageIndex].Lines[lineIndex].Words[wordIndex]); + } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedForm.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedForm.cs deleted file mode 100644 index 36fc9d4544b5..000000000000 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedForm.cs +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -using System.Collections.Generic; -using System.Diagnostics; -using System.Globalization; -using System.Linq; - -namespace Azure.AI.FormRecognizer.Models -{ - public class ExtractedForm - { - internal ExtractedForm(ICollection pageResults, ICollection readResults) - { - // Unsupervised - Pages = SetPages(pageResults, readResults); - - // TODO: Set page range from page numbers in pageResults - // https://github.com/Azure/azure-sdk-for-net/issues/10365 - } - - internal ExtractedForm(DocumentResult_internal documentResult, ICollection pageResults, ICollection readResults) - { - // Supervised - LearnedFormType = documentResult.DocType; - StartPageNumber = documentResult.PageRange.First(); - EndPageNumber = documentResult.PageRange.Last(); - Pages = ConvertPages(documentResult, pageResults, readResults); - } - - public string LearnedFormType { get; internal set; } - - public int StartPageNumber { get; internal set; } - - public int EndPageNumber { get; internal set; } - - public IReadOnlyList Pages { get; } - - private IReadOnlyList SetPages(ICollection pageResults, ICollection readResults) - { - // TODO: Add validation and appropriate exception if these don't match. - // https://github.com/Azure/azure-sdk-for-net/issues/10366 - Debug.Assert(pageResults.Count == readResults.Count); - - List pages = new List(); - - for (int i = 0; i < pageResults.Count; i++) - { - PageResult_internal pageResult = pageResults.ElementAt(i); - ReadResult_internal rawExtractedPage = readResults.ElementAt(i); - - SetLearnedFormType(pageResult.ClusterId); - - ExtractedPage page = new ExtractedPage(pageResult, rawExtractedPage); - pages.Add(page); - } - - return pages; - } - - private static IReadOnlyList ConvertPages(DocumentResult_internal documentResult, ICollection pageResults, ICollection readResults) - { - List pages = new List(); - - Dictionary> fieldsByPage = new Dictionary>(); - foreach (var field in documentResult.Fields) - { - // TODO: We are currently setting the field page to 0 if field.Value.Page comes back as null. - // https://github.com/Azure/azure-sdk-for-net/issues/10369 - - // TODO: How should we handle the multiple values per field and the strongly-typed ones? - // https://github.com/Azure/azure-sdk-for-net/issues/10333 - - List list; - if (!fieldsByPage.TryGetValue(field.Value.Page ?? 0, out list)) - { - fieldsByPage[field.Value.Page ?? 0] = new List(); - } - - fieldsByPage[field.Value.Page ?? 0].Add(new ExtractedField(field)); - } - - foreach (var pageFields in fieldsByPage) - { - int pageNumber = pageFields.Key; - var page = new ExtractedPage(pageNumber, pageFields.Value, pageResults.ElementAt(pageNumber - 1), readResults.ElementAt(pageNumber - 1)); - pages.Add(page); - } - - return pages; - } - - private void SetLearnedFormType(int? clusterId) - { - string formId = clusterId?.ToString(CultureInfo.InvariantCulture); - - if (formId != null) - { - // TODO: is it possible that multiple pages in a page result could have different page numbers? - // https://github.com/Azure/azure-sdk-for-net/issues/10377 - Debug.Assert(LearnedFormType == formId, "Multiple form types found in ExtractedForm."); - LearnedFormType = formId; - } - } - } -} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLabeledField.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLabeledField.cs new file mode 100644 index 000000000000..32652c452723 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLabeledField.cs @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using Azure.AI.FormRecognizer.Models; + +namespace Azure.AI.FormRecognizer.Custom +{ + + /// + /// + // Maps to FieldValue in swagger. + public class ExtractedLabeledField + { + internal ExtractedLabeledField(KeyValuePair field, IList readResults) + { + // Supervised + Confidence = field.Value.Confidence; + PageNumber = field.Value.Page; + Label = field.Key; + Value = field.Value.Text; + ValueBoundingBox = new BoundingBox(field.Value.BoundingBox); + + if (field.Value.Elements != null) + { + RawExtractedItems = ExtractedField.ConvertTextReferences(readResults, field.Value.Elements); + } + + // TODO: Add strongly-typed value + // https://github.com/Azure/azure-sdk-for-net/issues/10333 + } + + /// + /// + // TODO: Why can this be nullable on FieldValue.Confidence? + // https://github.com/Azure/azure-sdk-for-net/issues/10378 + public float? Confidence { get; internal set; } + + /// + /// + public int? PageNumber { get; internal set; } + + /// + /// + public string Label { get; internal set; } + + /// + /// + public string Value { get; internal set; } + + /// + /// + public BoundingBox ValueBoundingBox { get; internal set; } + + /// + /// + public IReadOnlyList RawExtractedItems { get; internal set; } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLabeledForm.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLabeledForm.cs new file mode 100644 index 000000000000..34dbccaac40e --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLabeledForm.cs @@ -0,0 +1,109 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Linq; +using Azure.AI.FormRecognizer.Models; + +namespace Azure.AI.FormRecognizer.Custom +{ + /// + /// + public class ExtractedLabeledForm + { + internal ExtractedLabeledForm(DocumentResult_internal documentResult, IList pageResults, IList readResults) + { + // Supervised + FormType = documentResult.DocType; + + // TODO: validate that PageRange.Length == 2. + // https://github.com/Azure/azure-sdk-for-net/issues/10547 + StartPageNumber = documentResult.PageRange.First(); + EndPageNumber = documentResult.PageRange.Last(); + + Fields = ConvertFields(documentResult.Fields, readResults); + + Tables = ConvertLabeledTables(pageResults, readResults); + + if (readResults != null) + { + RawExtractedPages = ConvertRawPages(readResults); + } + } + + /// + /// + public string FormType { get; internal set; } + + /// + /// + public int StartPageNumber { get; internal set; } + + /// + /// + public int EndPageNumber { get; internal set; } + + /// + /// + public IReadOnlyList Fields { get; } + + /// + /// + public IReadOnlyList Tables { get; } + + /// + /// + public IReadOnlyList RawExtractedPages { get; } + + /// + /// Return the field value text for a given label. + /// + /// + /// + public string GetFieldValue(string label) + { + var field = Fields.Where(f => f.Label == label).FirstOrDefault(); + if (field == default) + { + throw new FieldNotFoundException($"Field '{label}' not found on form."); + } + + return field.Value; + } + + private static IReadOnlyList ConvertFields(IDictionary fields, IList readResults) + { + List list = new List(); + foreach (var field in fields) + { + list.Add(new ExtractedLabeledField(field, readResults)); + } + return list; + } + + private static IReadOnlyList ConvertRawPages(IList readResults) + { + List rawPages = new List(); + foreach (var readResult in readResults) + { + rawPages.Add(new RawExtractedPage(readResult)); + } + return rawPages; + } + + internal static IReadOnlyList ConvertLabeledTables(IList pageResults, IList readResults) + { + List tables = new List(); + + foreach (var pageResult in pageResults) + { + foreach (var table in pageResult.Tables) + { + tables.Add(new ExtractedLabeledTable(table, readResults[pageResult.Page - 1], pageResult.Page)); + } + } + + return tables; + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLabeledTable.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLabeledTable.cs new file mode 100644 index 000000000000..5ff5ac21a881 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLabeledTable.cs @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Text; +using Azure.AI.FormRecognizer.Models; + +namespace Azure.AI.FormRecognizer.Custom +{ + /// + /// + public class ExtractedLabeledTable : ExtractedTable + { + internal ExtractedLabeledTable(DataTable_internal table, ReadResult_internal readResult, int pageNumber) + : base(table, readResult) + { + PageNumber = pageNumber; + } + + /// + /// + public int PageNumber { get; } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLayoutPage.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLayoutPage.cs index 7716d5d1c27a..832a938a13bf 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLayoutPage.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedLayoutPage.cs @@ -5,6 +5,8 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public class ExtractedLayoutPage { internal ExtractedLayoutPage(PageResult_internal pageResult, ReadResult_internal readResult) @@ -18,10 +20,16 @@ internal ExtractedLayoutPage(PageResult_internal pageResult, ReadResult_internal } } + /// + /// public int PageNumber { get; } + /// + /// public IReadOnlyList Tables { get; } + /// + /// public RawExtractedPage RawExtractedPage { get; } internal static IReadOnlyList ConvertTables(ICollection tablesResult, ReadResult_internal readResult) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedPage.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedPage.cs index f6f44f9f6210..8a8bbecd60eb 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedPage.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedPage.cs @@ -2,15 +2,21 @@ // Licensed under the MIT License. using System.Collections.Generic; +using Azure.AI.FormRecognizer.Models; +using System.Linq; -namespace Azure.AI.FormRecognizer.Models +namespace Azure.AI.FormRecognizer.Custom { + /// + /// + // Maps to PageResult public class ExtractedPage { // Unsupervised internal ExtractedPage(PageResult_internal pageResult, ReadResult_internal readResult) { PageNumber = pageResult.Page; + FormTypeId = pageResult.ClusterId; Fields = ConvertFields(pageResult.KeyValuePairs, readResult); Tables = ExtractedLayoutPage.ConvertTables(pageResult.Tables, readResult); @@ -20,26 +26,44 @@ internal ExtractedPage(PageResult_internal pageResult, ReadResult_internal readR } } - // Supervised - internal ExtractedPage(int pageNumber, List fields, PageResult_internal pageResult, ReadResult_internal readResult) - { - PageNumber = pageNumber; - Fields = ConvertFields(fields); - Tables = ExtractedLayoutPage.ConvertTables(pageResult.Tables, readResult); + /// + /// + public int PageNumber { get; } - if (readResult != null) - { - RawExtractedPage = new RawExtractedPage(readResult); - } - } + /// + /// + public int? FormTypeId { get; } - public int PageNumber { get; } + /// + /// public IReadOnlyList Fields { get; } + + /// + /// + public IReadOnlyList Tables { get; } + /// + /// public RawExtractedPage RawExtractedPage { get; } + /// + /// Return the field value text for a given fieldName. + /// + /// + /// + public string GetFieldValue(string fieldName) + { + var field = Fields.Where(f => f.Name == fieldName).FirstOrDefault(); + if (field == default) + { + throw new FieldNotFoundException($"Field '{fieldName}' not found on form."); + } + + return field.Value; + } + private static IReadOnlyList ConvertFields(ICollection keyValuePairs, ReadResult_internal readResult) { List fields = new List(); @@ -50,15 +74,5 @@ private static IReadOnlyList ConvertFields(ICollection ConvertFields(List fields) - { - List list = new List(); - foreach (var field in fields) - { - list.Add(field); - } - return list; - } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceipt.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceipt.cs index ba6564b28825..e3c63a57bb26 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceipt.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceipt.cs @@ -9,45 +9,86 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public class ExtractedReceipt { - internal ExtractedReceipt(DocumentResult_internal documentResult, ReadResult_internal readResult) + internal ExtractedReceipt(DocumentResult_internal documentResult, IList readResults) { StartPageNumber = documentResult.PageRange.First(); EndPageNumber = documentResult.PageRange.Last(); SetReceiptValues(documentResult.Fields); - if (readResult != null) + if (readResults != null) { - RawExtractedPage = new RawExtractedPage(readResult); + RawExtractedPage = ConvertRawPages(StartPageNumber, EndPageNumber, readResults); } } + /// + /// public int StartPageNumber { get; internal set; } + /// + /// public int EndPageNumber { get; internal set; } + /// + /// // TODO: Can we make this nullable in case a value isn't present or // isn't read by the learner? // https://github.com/Azure/azure-sdk-for-net/issues/10361 public IReadOnlyList Items { get; internal set; } + + /// + /// public string MerchantAddress { get; internal set; } + + /// + /// public string MerchantName { get; internal set; } + + /// + /// public string MerchantPhoneNumber { get; internal set; } + /// + /// public ExtractedReceiptType ReceiptType { get; internal set; } + + /// + /// public float? Subtotal { get; internal set; } + + /// + /// public float? Tax { get; internal set; } + + /// + /// public float? Tip { get; internal set; } + + /// + /// public float? Total { get; internal set; } + + /// + /// public DateTimeOffset? TransactionDate { get; internal set; } + + /// + /// public DateTimeOffset? TransactionTime { get; internal set; } + /// + /// // TODO: Have this handle Items correctly // https://github.com/Azure/azure-sdk-for-net/issues/10379 public IReadOnlyDictionary ExtractedFields { get; internal set; } - public RawExtractedPage RawExtractedPage { get; } + /// + /// + public IReadOnlyList RawExtractedPage { get; } private void SetReceiptValues(IDictionary fields) { @@ -180,8 +221,8 @@ private static string ConvertStringValue(string fieldName, IDictionary DateTimeOffset.Parse(value.ValueDate, CultureInfo.InvariantCulture), - FieldValueType.Time => DateTimeOffset.Parse(value.ValueTime, CultureInfo.InvariantCulture), + FieldValueType.Date => value.ValueDate == null ? default : DateTimeOffset.Parse(value.ValueDate, CultureInfo.InvariantCulture), + FieldValueType.Time => value.ValueTime == null ? default : DateTimeOffset.Parse(value.ValueTime, CultureInfo.InvariantCulture), _ => throw new InvalidOperationException($"The value type {value.Type} was expected to be a Date or Time") }; } @@ -217,5 +258,15 @@ private static IReadOnlyList ConvertReceiptItems(IDictiona return items; } + + private static IReadOnlyList ConvertRawPages(int startPageNumber, int endPageNumber, IList readResults) + { + List rawPages = new List(); + for (int i = startPageNumber - 1; i < endPageNumber - 1; i++) + { + rawPages.Add(new RawExtractedPage(readResults[i])); + } + return rawPages; + } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptField.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptField.cs index 2cbefa968db7..ac54d67bef79 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptField.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptField.cs @@ -3,6 +3,8 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public class ExtractedReceiptField { internal ExtractedReceiptField(FieldValue_internal field) @@ -16,8 +18,16 @@ internal ExtractedReceiptField(FieldValue_internal field) } } + /// + /// public string Text { get; internal set; } + + /// + /// public BoundingBox BoundingBox { get; internal set; } + + /// + /// public float? Confidence { get; internal set; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptItem.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptItem.cs index 1b0e0390b3bb..45a2d4d71fa9 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptItem.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptItem.cs @@ -3,6 +3,8 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public class ExtractedReceiptItem { internal ExtractedReceiptItem(string name, int? quantity, float? price, float? totalPrice) @@ -13,9 +15,20 @@ internal ExtractedReceiptItem(string name, int? quantity, float? price, float? t TotalPrice = totalPrice; } + /// + /// public string Name { get; } + + /// + /// public int? Quantity { get; } + + /// + /// public float? Price { get; } + + /// + /// public float? TotalPrice { get; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptType.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptType.cs index e867c8fe6c98..8d2a4dc60c43 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptType.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedReceiptType.cs @@ -3,9 +3,16 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public enum ExtractedReceiptType { - Unrecognized = 1, - Itemized = 2, + /// + /// + Unrecognized = 0, + + /// + /// + Itemized = 1, } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedTable.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedTable.cs index 56b8425985eb..754988b2f768 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedTable.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedTable.cs @@ -6,17 +6,27 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public class ExtractedTable { - internal ExtractedTable(DataTable_internal result, ReadResult_internal readResult) + internal ExtractedTable(DataTable_internal table, ReadResult_internal readResult) { - ColumnCount = result.Columns; - RowCount = result.Rows; - Cells = ConvertCells(result.Cells, readResult); + ColumnCount = table.Columns; + RowCount = table.Rows; + Cells = ConvertCells(table.Cells, readResult); } + /// + /// public IReadOnlyList Cells { get; } + + /// + /// public int ColumnCount { get; } + + /// + /// public int RowCount { get; } // TODO: implement table indexer @@ -24,6 +34,8 @@ internal ExtractedTable(DataTable_internal result, ReadResult_internal readResul // https://github.com/Azure/azure-sdk-for-net/issues/9975 + /// + /// #pragma warning disable CA1822 // Mark as static public ExtractedTableCell this[int row, int column] #pragma warning restore CA1822 // Mark as static diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedTableCell.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedTableCell.cs index d1dd908a5b1c..87f84c5bb378 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedTableCell.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ExtractedTableCell.cs @@ -2,9 +2,12 @@ // Licensed under the MIT License. using System.Collections.Generic; +using Azure.AI.FormRecognizer.Custom; namespace Azure.AI.FormRecognizer.Models { + /// + /// public class ExtractedTableCell { internal ExtractedTableCell(DataTableCell_internal dataTableCell, ReadResult_internal readResult, ICollection references) @@ -25,16 +28,44 @@ internal ExtractedTableCell(DataTableCell_internal dataTableCell, ReadResult_int } } + /// + /// public BoundingBox BoundingBox { get; } + + /// + /// public int ColumnIndex { get; } + + /// + /// public int ColumnSpan { get; } + + /// + /// public float Confidence { get; } + + /// + /// public bool IsFooter { get; } + + /// + /// public bool IsHeader { get; } + + /// + /// public int RowIndex { get; } + + /// + /// public int RowSpan { get; } + + /// + /// public string Text { get; } + /// + /// public IReadOnlyList RawExtractedItems { get; internal set; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FieldNotFoundException.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FieldNotFoundException.cs new file mode 100644 index 000000000000..ac91ebe555ca --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FieldNotFoundException.cs @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; + +namespace Azure.AI.FormRecognizer.Models +{ +#pragma warning disable CA1064 // Exceptions should be public + internal class FieldNotFoundException : Exception +#pragma warning restore CA1064 // Exceptions should be public + { + public FieldNotFoundException(string message) : base(message) + { + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FieldPredictionAccuracy.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FieldPredictionAccuracy.cs index b363f10455f7..52c9a8844f27 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FieldPredictionAccuracy.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FieldPredictionAccuracy.cs @@ -5,11 +5,13 @@ namespace Azure.AI.FormRecognizer.Custom { + [CodeGenSchema("FormFieldsReport")] public partial class FieldPredictionAccuracy { + /// + /// [CodeGenSchemaMember("FieldName")] - public string Label { get; internal set; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormLayoutClient.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormLayoutClient.cs index 6b029b8f46dc..dc8d6190a78d 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormLayoutClient.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormLayoutClient.cs @@ -12,6 +12,10 @@ namespace Azure.AI.FormRecognizer { + + /// + /// The client to use to with the Form Recognizer Azure Cognitive Service, to extract layout elements like tables from forms. + /// public class FormLayoutClient { private readonly ClientDiagnostics _diagnostics; @@ -20,12 +24,14 @@ public class FormLayoutClient internal const string LayoutRoute = "/layout"; + /// + /// protected FormLayoutClient() { } /// - /// Initializes a new instance of the . + /// Initializes a new instance of the . /// #pragma warning disable AZC0007 // DO provide a minimal constructor that takes only the parameters required to connect to the service. public FormLayoutClient(Uri endpoint, FormRecognizerApiKeyCredential credential) : this(endpoint, credential, new FormRecognizerClientOptions()) @@ -34,7 +40,7 @@ protected FormLayoutClient() } /// - /// Initializes a new instance of the . + /// Initializes a new instance of the . /// #pragma warning disable AZC0007 // DO provide a minimal constructor that takes only the parameters required to connect to the service. public FormLayoutClient(Uri endpoint, FormRecognizerApiKeyCredential credential, FormRecognizerClientOptions options) @@ -45,15 +51,31 @@ public FormLayoutClient(Uri endpoint, FormRecognizerApiKeyCredential credential, _operations = new ServiceClient(_diagnostics, _pipeline, endpoint.ToString()); } - public virtual Operation> StartExtractLayout(Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + /// + /// Extracts layout elements from one or more passed-in forms. + /// + /// The stream containing one or more forms to extract elements from. + /// The content type of the input file. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedLayoutPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedLayoutPage>>.Value upon successful + /// completion will contain layout elements extracted from the form. + public virtual Operation> StartExtractLayouts(Stream stream, ContentType contentType, CancellationToken cancellationToken = default) { // TODO: automate content-type detection // https://github.com/Azure/azure-sdk-for-net/issues/10329 - ResponseWithHeaders response = _operations.AnalyzeLayoutAsync(stream, contentType , cancellationToken); + ResponseWithHeaders response = _operations.AnalyzeLayoutAsync(stream, contentType, cancellationToken); return new ExtractLayoutOperation(_operations, response.Headers.OperationLocation); } - public virtual async Task>> StartExtractLayoutAsync(Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + /// + /// Extracts layout elements from one or more passed-in forms. + /// + /// The stream containing one or more forms to extract elements from. + /// The content type of the input file. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedLayoutPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedLayoutPage>>.Value upon successful + /// completion will contain layout elements extracted from the form. + public virtual async Task>> StartExtractLayoutsAsync(Stream stream, ContentType contentType, CancellationToken cancellationToken = default) { // TODO: automate content-type detection // https://github.com/Azure/azure-sdk-for-net/issues/10329 @@ -61,14 +83,28 @@ public virtual async Task>> StartEx return new ExtractLayoutOperation(_operations, response.Headers.OperationLocation); } - public virtual Operation> StartExtractLayout(Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + /// + /// Extracts layout elements from one or more passed-in forms. + /// + /// The absolute URI of the remote file to extract elements from. + /// + /// A Operation<IReadOnlyList<ExtractedLayoutPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedLayoutPage>>.Value upon successful + /// completion will contain layout elements extracted from the form. + public virtual Operation> StartExtractLayouts(Uri uri, CancellationToken cancellationToken = default) { SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() }; ResponseWithHeaders response = _operations.RestClient.AnalyzeLayoutAsync(sourcePath, cancellationToken); return new ExtractLayoutOperation(_operations, response.Headers.OperationLocation); } - public virtual async Task>> StartExtractLayoutAsync(Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + /// + /// Extracts layout elements from one or more passed-in forms. + /// + /// The absolute URI of the remote file to extract elements from. + /// + /// A Operation<IReadOnlyList<ExtractedLayoutPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedLayoutPage>>.Value upon successful + /// completion will contain layout elements extracted from the form. + public virtual async Task>> StartExtractLayoutsAsync(Uri uri, CancellationToken cancellationToken = default) { SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() }; ResponseWithHeaders response = await _operations.RestClient.AnalyzeLayoutAsyncAsync(sourcePath, cancellationToken).ConfigureAwait(false); diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerError.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerError.cs index 62e7a73163e2..1626e7843efa 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerError.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/FormRecognizerError.cs @@ -5,8 +5,19 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// [CodeGenSchema("ErrorInformation")] public partial class FormRecognizerError { + /// + /// + [CodeGenSchemaMember("Code")] + public string Code { get; set; } + + /// + /// + [CodeGenSchemaMember("Message")] + public string Message { get; set; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/AnalyzeOperationResult_internal.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/AnalyzeOperationResult_internal.cs index 91c42a3ffb09..74d476dff347 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/AnalyzeOperationResult_internal.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/AnalyzeOperationResult_internal.cs @@ -6,6 +6,7 @@ #nullable disable using System; +using Azure.AI.FormRecognizer; namespace Azure.AI.FormRecognizer.Models { diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ContentType.Serialization.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ContentType.Serialization.cs index 65e338838919..4845ce94ddf8 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ContentType.Serialization.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ContentType.Serialization.cs @@ -13,19 +13,19 @@ internal static class ContentTypeExtensions { public static string ToSerialString(this ContentType value) => value switch { - ContentType.ApplicationPdf => "application/pdf", - ContentType.ImageJpeg => "image/jpeg", - ContentType.ImagePng => "image/png", - ContentType.ImageTiff => "image/tiff", + ContentType.Pdf => "application/pdf", + ContentType.Jpeg => "image/jpeg", + ContentType.Png => "image/png", + ContentType.Tiff => "image/tiff", _ => throw new ArgumentOutOfRangeException(nameof(value), value, "Unknown ContentType value.") }; public static ContentType ToContentType(this string value) { - if (string.Equals(value, "application/pdf", StringComparison.InvariantCultureIgnoreCase)) return ContentType.ApplicationPdf; - if (string.Equals(value, "image/jpeg", StringComparison.InvariantCultureIgnoreCase)) return ContentType.ImageJpeg; - if (string.Equals(value, "image/png", StringComparison.InvariantCultureIgnoreCase)) return ContentType.ImagePng; - if (string.Equals(value, "image/tiff", StringComparison.InvariantCultureIgnoreCase)) return ContentType.ImageTiff; + if (string.Equals(value, "application/pdf", StringComparison.InvariantCultureIgnoreCase)) return ContentType.Pdf; + if (string.Equals(value, "image/jpeg", StringComparison.InvariantCultureIgnoreCase)) return ContentType.Jpeg; + if (string.Equals(value, "image/png", StringComparison.InvariantCultureIgnoreCase)) return ContentType.Png; + if (string.Equals(value, "image/tiff", StringComparison.InvariantCultureIgnoreCase)) return ContentType.Tiff; throw new ArgumentOutOfRangeException(nameof(value), value, "Unknown ContentType value."); } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ContentType.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ContentType.cs deleted file mode 100644 index 9d49e26511a4..000000000000 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ContentType.cs +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -// - -#nullable disable - -namespace Azure.AI.FormRecognizer.Models -{ - /// Content type for upload. - public enum ContentType - { - /// Content Type 'application/pdf'. - ApplicationPdf, - /// Content Type 'image/jpeg'. - ImageJpeg, - /// Content Type 'image/png'. - ImagePng, - /// Content Type 'image/tiff'. - ImageTiff - } -} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/FormRecognizerError.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/FormRecognizerError.cs index 75696bc7a001..5a5a29ea5b58 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/FormRecognizerError.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/FormRecognizerError.cs @@ -23,8 +23,5 @@ internal FormRecognizerError(string code, string message) Code = code; Message = message; } - - public string Code { get; internal set; } - public string Message { get; internal set; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelInfo_internal.Serialization.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelInfo_internal.Serialization.cs index fceeea90f75f..b5ebe4052e0f 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelInfo_internal.Serialization.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelInfo_internal.Serialization.cs @@ -8,7 +8,7 @@ using System.Text.Json; using Azure.Core; -namespace Azure.AI.FormRecognizer.Models +namespace Azure.AI.FormRecognizer.Custom { internal partial class ModelInfo_internal { diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelInfo_internal.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelInfo_internal.cs index f0577d3b3c27..388128147b02 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelInfo_internal.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelInfo_internal.cs @@ -7,7 +7,7 @@ using System; -namespace Azure.AI.FormRecognizer.Models +namespace Azure.AI.FormRecognizer.Custom { /// Basic custom model information. internal partial class ModelInfo_internal diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelStatus.Serialization.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelStatus.Serialization.cs index ca409f3bd7c1..c8eff7f04e18 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelStatus.Serialization.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelStatus.Serialization.cs @@ -7,13 +7,13 @@ using System; -namespace Azure.AI.FormRecognizer.Models +namespace Azure.AI.FormRecognizer.Custom { internal static class ModelStatusExtensions { public static string ToSerialString(this ModelStatus value) => value switch { - ModelStatus.Creating => "creating", + ModelStatus.Training => "creating", ModelStatus.Ready => "ready", ModelStatus.Invalid => "invalid", _ => throw new ArgumentOutOfRangeException(nameof(value), value, "Unknown ModelStatus value.") @@ -21,7 +21,7 @@ internal static class ModelStatusExtensions public static ModelStatus ToModelStatus(this string value) { - if (string.Equals(value, "creating", StringComparison.InvariantCultureIgnoreCase)) return ModelStatus.Creating; + if (string.Equals(value, "creating", StringComparison.InvariantCultureIgnoreCase)) return ModelStatus.Training; if (string.Equals(value, "ready", StringComparison.InvariantCultureIgnoreCase)) return ModelStatus.Ready; if (string.Equals(value, "invalid", StringComparison.InvariantCultureIgnoreCase)) return ModelStatus.Invalid; throw new ArgumentOutOfRangeException(nameof(value), value, "Unknown ModelStatus value."); diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelStatus.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelStatus.cs deleted file mode 100644 index f1388b9560dc..000000000000 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/ModelStatus.cs +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -// - -#nullable disable - -namespace Azure.AI.FormRecognizer.Models -{ - /// Status of the model. - public enum ModelStatus - { - /// creating. - Creating, - /// ready. - Ready, - /// invalid. - Invalid - } -} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Model_internal.Serialization.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Model_internal.Serialization.cs index 39a8410c7aa8..29f1c5bf2e0c 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Model_internal.Serialization.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Model_internal.Serialization.cs @@ -7,6 +7,7 @@ using System.Text.Json; using Azure.AI.FormRecognizer; +using Azure.AI.FormRecognizer.Custom; using Azure.Core; namespace Azure.AI.FormRecognizer.Models diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Model_internal.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Model_internal.cs index aa15a49fa377..d764d45c6ec0 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Model_internal.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Model_internal.cs @@ -6,6 +6,7 @@ #nullable disable using Azure.AI.FormRecognizer; +using Azure.AI.FormRecognizer.Custom; namespace Azure.AI.FormRecognizer.Models { diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Models_internal.Serialization.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Models_internal.Serialization.cs index f6cd38448255..65829b316ec8 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Models_internal.Serialization.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Models_internal.Serialization.cs @@ -8,7 +8,6 @@ using System.Collections.Generic; using System.Text.Json; using Azure.AI.FormRecognizer; -using Azure.AI.FormRecognizer.Models; using Azure.Core; namespace Azure.AI.FormRecognizer.Custom diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Models_internal.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Models_internal.cs index 45581471fbde..0da183cd0973 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Models_internal.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/Models_internal.cs @@ -7,7 +7,6 @@ using System.Collections.Generic; using Azure.AI.FormRecognizer; -using Azure.AI.FormRecognizer.Models; namespace Azure.AI.FormRecognizer.Custom { diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/OperationStatus.Serialization.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/OperationStatus.Serialization.cs index c97b388d8bd6..a6cc2500560a 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/OperationStatus.Serialization.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/OperationStatus.Serialization.cs @@ -7,7 +7,7 @@ using System; -namespace Azure.AI.FormRecognizer.Models +namespace Azure.AI.FormRecognizer { internal static class OperationStatusExtensions { diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainStatus.Serialization.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainStatus.Serialization.cs deleted file mode 100644 index 1cd3accb1361..000000000000 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainStatus.Serialization.cs +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -// - -#nullable disable - -using System; - -namespace Azure.AI.FormRecognizer.Models -{ - internal static class TrainStatusExtensions - { - public static string ToSerialString(this TrainStatus value) => value switch - { - TrainStatus.Succeeded => "succeeded", - TrainStatus.PartiallySucceeded => "partiallySucceeded", - TrainStatus.Failed => "failed", - _ => throw new ArgumentOutOfRangeException(nameof(value), value, "Unknown TrainStatus value.") - }; - - public static TrainStatus ToTrainStatus(this string value) - { - if (string.Equals(value, "succeeded", StringComparison.InvariantCultureIgnoreCase)) return TrainStatus.Succeeded; - if (string.Equals(value, "partiallySucceeded", StringComparison.InvariantCultureIgnoreCase)) return TrainStatus.PartiallySucceeded; - if (string.Equals(value, "failed", StringComparison.InvariantCultureIgnoreCase)) return TrainStatus.Failed; - throw new ArgumentOutOfRangeException(nameof(value), value, "Unknown TrainStatus value."); - } - } -} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainStatus.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainStatus.cs deleted file mode 100644 index fbf75e098a14..000000000000 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainStatus.cs +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -// - -#nullable disable - -namespace Azure.AI.FormRecognizer.Models -{ - /// Status of the training operation. - public enum TrainStatus - { - /// succeeded. - Succeeded, - /// partiallySucceeded. - PartiallySucceeded, - /// failed. - Failed - } -} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingDocumentInfo.Serialization.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingDocumentInfo.Serialization.cs index 73d1bdba6873..16e0532320ae 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingDocumentInfo.Serialization.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingDocumentInfo.Serialization.cs @@ -38,7 +38,7 @@ internal static TrainingDocumentInfo DeserializeTrainingDocumentInfo(JsonElement } if (property.NameEquals("status")) { - result.Status = property.Value.GetString().ToTrainStatus(); + result.Status = property.Value.GetString().ToTrainingStatus(); continue; } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingDocumentInfo.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingDocumentInfo.cs index 6bc1347e3c9c..8a2136048fde 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingDocumentInfo.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingDocumentInfo.cs @@ -13,17 +13,12 @@ namespace Azure.AI.FormRecognizer.Custom /// Report for a custom model training document. public partial class TrainingDocumentInfo { - /// Initializes a new instance of TrainingDocumentInfo. - internal TrainingDocumentInfo() - { - } - /// Initializes a new instance of TrainingDocumentInfo. /// Training document name. /// Total number of pages trained. /// List of errors. /// Status of the training operation. - internal TrainingDocumentInfo(string documentName, int pageCount, IList errors, TrainStatus status) + internal TrainingDocumentInfo(string documentName, int pageCount, IList errors, TrainingStatus status) { DocumentName = documentName; PageCount = pageCount; @@ -36,6 +31,6 @@ internal TrainingDocumentInfo(string documentName, int pageCount, IList List of errors. public IList Errors { get; internal set; } = new List(); /// Status of the training operation. - public TrainStatus Status { get; internal set; } + public TrainingStatus Status { get; internal set; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingStatus.Serialization.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingStatus.Serialization.cs new file mode 100644 index 000000000000..f6434033b9df --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/TrainingStatus.Serialization.cs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// + +#nullable disable + +using System; + +namespace Azure.AI.FormRecognizer.Custom +{ + internal static class TrainingStatusExtensions + { + public static string ToSerialString(this TrainingStatus value) => value switch + { + TrainingStatus.Succeeded => "succeeded", + TrainingStatus.PartiallySucceeded => "partiallySucceeded", + TrainingStatus.Failed => "failed", + _ => throw new ArgumentOutOfRangeException(nameof(value), value, "Unknown TrainingStatus value.") + }; + + public static TrainingStatus ToTrainingStatus(this string value) + { + if (string.Equals(value, "succeeded", StringComparison.InvariantCultureIgnoreCase)) return TrainingStatus.Succeeded; + if (string.Equals(value, "partiallySucceeded", StringComparison.InvariantCultureIgnoreCase)) return TrainingStatus.PartiallySucceeded; + if (string.Equals(value, "failed", StringComparison.InvariantCultureIgnoreCase)) return TrainingStatus.Failed; + throw new ArgumentOutOfRangeException(nameof(value), value, "Unknown TrainingStatus value."); + } + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Operations/ServiceClient.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Operations/ServiceClient.cs index c26489e7bc73..de2c9d4765ac 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Operations/ServiceClient.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Operations/ServiceClient.cs @@ -10,6 +10,7 @@ using System.Threading; using System.Threading.Tasks; using Azure; +using Azure.AI.FormRecognizer.Custom; using Azure.AI.FormRecognizer.Models; using Azure.Core; using Azure.Core.Pipeline; diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ModelInfo_internal.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ModelInfo_internal.cs index 15c8bd44311c..373a5e5b3a0a 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ModelInfo_internal.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ModelInfo_internal.cs @@ -3,7 +3,7 @@ using Azure.Core; -namespace Azure.AI.FormRecognizer.Models +namespace Azure.AI.FormRecognizer.Custom { [CodeGenSchema("ModelInfo")] internal partial class ModelInfo_internal diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ModelStatus.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ModelStatus.cs new file mode 100644 index 000000000000..9a7fdc3b22d0 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ModelStatus.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + + +using System; +using System.Collections.Generic; +using System.Text; +using Azure.Core; + +namespace Azure.AI.FormRecognizer.Custom +{ + + /// + /// + [CodeGenSchema("ModelStatus")] +#pragma warning disable CA1717 // Only FlagsAttribute enums should have plural names + public enum ModelStatus +#pragma warning restore CA1717 // Only FlagsAttribute enums should have plural names + { + /// + /// + [CodeGenSchemaMember("creating")] + Training, + + /// + /// + Ready, + + /// + /// + Invalid + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/OperationStatus.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/OperationStatus.cs similarity index 53% rename from sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/OperationStatus.cs rename to sdk/formrecognizer/Azure.AI.FormRecognizer/src/OperationStatus.cs index 089e980475ff..5d2321d10379 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/Generated/Models/OperationStatus.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/OperationStatus.cs @@ -1,14 +1,15 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -// -#nullable disable - -namespace Azure.AI.FormRecognizer.Models +using System; +using System.Collections.Generic; +using System.Text; +using Azure.Core; +namespace Azure.AI.FormRecognizer { - /// Status of the queued operation. - public enum OperationStatus + [CodeGenSchema("OperationStatus")] + internal enum OperationStatus { /// notStarted. NotStarted, diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedLine.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedLine.cs index 469a7be3666c..7f3de05a3757 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedLine.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedLine.cs @@ -5,22 +5,22 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public class RawExtractedLine : RawExtractedItem { internal RawExtractedLine(TextLine_internal textLine) { Text = textLine.Text; BoundingBox = new BoundingBox(textLine.BoundingBox); - Language = textLine.Language.ToString(); Words = ConvertWords(textLine.Words); } - /// Language code. - //public Language_internal? Language { get; internal set; } - public string Language { get; internal set; } /// List of words in the text line. public IReadOnlyList Words { get; internal set; } + /// + /// public static implicit operator string(RawExtractedLine line) => line.Text; private static IReadOnlyList ConvertWords(ICollection textWords) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedPage.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedPage.cs index 818487294f52..ad3768051e6f 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedPage.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedPage.cs @@ -5,6 +5,8 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public class RawExtractedPage { internal RawExtractedPage(ReadResult_internal readResult) @@ -14,7 +16,6 @@ internal RawExtractedPage(ReadResult_internal readResult) Width = readResult.Width; Height = readResult.Height; Unit = readResult.Unit; - Language = readResult.Language.ToString(); if (readResult.Lines != null) { @@ -33,11 +34,6 @@ internal RawExtractedPage(ReadResult_internal readResult) /// The unit used by the width, height and boundingBox properties. For images, the unit is "pixel". For PDF, the unit is "inch". public LengthUnit Unit { get; set; } - // TODO: Make language nullable? - // https://github.com/Azure/azure-sdk-for-net/issues/10361 - - /// Language code. - public string Language { get; set; } /// When includeTextDetails is set to true, a list of recognized text lines. The maximum number of lines returned is 300 per page. The lines are sorted top to bottom, left to right, although in certain cases proximity is treated with higher priority. As the sorting order depends on the detected text, it may change across images and OCR version updates. Thus, business logic should be built upon the actual line location instead of order. public ICollection Lines { get; set; } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedText.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedText.cs index 9408202ddf81..f95c1c044a32 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedText.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedText.cs @@ -3,11 +3,19 @@ namespace Azure.AI.FormRecognizer.Models { + + /// + /// public class RawExtractedItem { internal RawExtractedItem() { } + /// + /// public BoundingBox BoundingBox { get; internal set; } + + /// + /// public string Text { get; internal set; } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedWord.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedWord.cs index b8d55d52f6bb..1f53df644309 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedWord.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/RawExtractedWord.cs @@ -3,6 +3,8 @@ namespace Azure.AI.FormRecognizer.Models { + /// + /// public class RawExtractedWord : RawExtractedItem { internal RawExtractedWord(TextWord_internal textWord) @@ -11,9 +13,14 @@ internal RawExtractedWord(TextWord_internal textWord) Confidence = textWord.Confidence; Text = textWord.Text; } + /// + /// public float? Confidence { get; } + /// + /// + public static implicit operator string(RawExtractedWord word) => word.Text; } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ReceiptClient.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ReceiptClient.cs index 88e1ae27c991..a7d5017c7c43 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ReceiptClient.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ReceiptClient.cs @@ -2,6 +2,7 @@ // Licensed under the MIT License. using System; +using System.Collections.Generic; using System.IO; using System.Threading; using System.Threading.Tasks; @@ -11,6 +12,9 @@ namespace Azure.AI.FormRecognizer { + /// + /// The client to use to with the Form Recognizer Azure Cognitive Service, to extract values from receipts. + /// public class ReceiptClient { private readonly ClientDiagnostics _diagnostics; @@ -19,6 +23,8 @@ public class ReceiptClient internal const string ReceiptsRoute = "/prebuilt/receipt"; + /// + /// protected ReceiptClient() { } @@ -44,86 +50,69 @@ public ReceiptClient(Uri endpoint, FormRecognizerApiKeyCredential credential, Fo _operations = new ServiceClient(_diagnostics, _pipeline, endpoint.ToString()); } - public virtual Response ExtractReceipt(Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + /// + /// Extracts values from one or more receipts. + /// + /// The stream containing the one or more receipts to extract values from. + /// The content type of the input file. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedLayoutPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedLayoutPage>>.Value upon successful + /// completion will contain the extracted receipt. + public virtual Operation> StartExtractReceipts(Stream stream, ContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) { // TODO: automate content-type detection // https://github.com/Azure/azure-sdk-for-net/issues/10329 ResponseWithHeaders response = _operations.AnalyzeReceiptAsync(includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken); - var operation = new ExtractReceiptOperation(_operations, response.Headers.OperationLocation); - - ValueTask> task = operation.WaitForCompletionAsync(); - - // TODO: this feels very bad. Better way? - // https://github.com/Azure/azure-sdk-for-net/issues/10391 - task.AsTask().Wait(); - - if (!operation.HasValue) - { - throw new RequestFailedException("Failed to retrieve response from ExtractReceipt Long-Running Operation"); - } - - // TODO: this is also a mess. Reconcile these together. - // https://github.com/Azure/azure-sdk-for-net/issues/10391 - return Response.FromValue(operation.Value, task.AsTask().Result.GetRawResponse()); + return new ExtractReceiptOperation(_operations, response.Headers.OperationLocation); } - public virtual Response ExtractReceipt(Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + /// + /// Extracts values from one or more receipts. + /// + /// The absolute URI of the remote file to extract values from. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedLayoutPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedLayoutPage>>.Value upon successful + /// completion will contain the extracted receipt. + public virtual Operation> StartExtractReceipts(Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) { SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() }; ResponseWithHeaders response = _operations.RestClient.AnalyzeReceiptAsync(includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken); - var operation = new ExtractReceiptOperation(_operations, response.Headers.OperationLocation); - - ValueTask> task = operation.WaitForCompletionAsync(); - - // TODO: this feels very bad. Better way? - // https://github.com/Azure/azure-sdk-for-net/issues/10391 - task.AsTask().Wait(); - - if (!operation.HasValue) - { - throw new RequestFailedException("Failed to retrieve response from ExtractReceipt Long-Running Operation"); - } - - // TODO: this is also a mess. Reconcile these together. - // https://github.com/Azure/azure-sdk-for-net/issues/10391 - return Response.FromValue(operation.Value, task.AsTask().Result.GetRawResponse()); + return new ExtractReceiptOperation(_operations, response.Headers.OperationLocation); } - public virtual async Task> ExtractReceiptAsync(Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + + /// + /// Extracts values from one or more receipts. + /// + /// The stream containing the one or more receipts to extract values from. + /// The content type of the input file. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedLayoutPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedLayoutPage>>.Value upon successful + /// completion will contain the extracted receipt. + public virtual async Task>> StartExtractReceiptsAsync(Stream stream, ContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) { // TODO: automate content-type detection // https://github.com/Azure/azure-sdk-for-net/issues/10329 - ResponseWithHeaders response = _operations.AnalyzeReceiptAsync(includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken); - var operation = new ExtractReceiptOperation(_operations, response.Headers.OperationLocation); - - var operationResponse = await operation.WaitForCompletionAsync().ConfigureAwait(false); - - if (!operation.HasValue) - { - throw new RequestFailedException("Failed to retrieve response from ExtractReceipt Long-Running Operation"); - } - - // TODO: Is this the best way? - // https://github.com/Azure/azure-sdk-for-net/issues/10391 - return Response.FromValue(operation.Value, operationResponse.GetRawResponse()); + ResponseWithHeaders response = await _operations.RestClient.AnalyzeReceiptAsyncAsync(includeTextDetails: includeRawPageExtractions, contentType, stream, cancellationToken).ConfigureAwait(false); + return new ExtractReceiptOperation(_operations, response.Headers.OperationLocation); } - public virtual async Task> ExtractReceiptAsync(Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) + /// + /// Extracts values from one or more receipts. + /// + /// The absolute URI of the remote file to extract values from. + /// Whether or not to include raw page extractions in addition to layout elements. + /// A controlling the request lifetime. + /// A Operation<IReadOnlyList<ExtractedLayoutPage>> to wait on this long-running operation. Its Operation<IReadOnlyList<ExtractedLayoutPage>>.Value upon successful + /// completion will contain the extracted receipt. + public virtual async Task>> StartExtractReceiptsAsync(Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default) { SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() }; - ResponseWithHeaders response = _operations.RestClient.AnalyzeReceiptAsync(includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken); - var operation = new ExtractReceiptOperation(_operations, response.Headers.OperationLocation); - - var operationResponse = await operation.WaitForCompletionAsync().ConfigureAwait(false); - - if (!operation.HasValue) - { - throw new RequestFailedException("Failed to retrieve response from ExtractReceipt Long-Running Operation"); - } - - // TODO: Is this the best way? - // https://github.com/Azure/azure-sdk-for-net/issues/10391 - return Response.FromValue(operation.Value, operationResponse.GetRawResponse()); + ResponseWithHeaders response = await _operations.RestClient.AnalyzeReceiptAsyncAsync(includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken).ConfigureAwait(false); + return new ExtractReceiptOperation(_operations, response.Headers.OperationLocation); } } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ServiceClient.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ServiceClient.cs index 7ba35c46e344..59c9f00cfc44 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ServiceClient.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ServiceClient.cs @@ -17,7 +17,7 @@ internal partial class ServiceClient { #region Custom - internal ResponseWithHeaders AnalyzeWithCustomModel(Guid modelId, bool? includeTextDetails, Stream stream, FormContentType contentType, CancellationToken cancellationToken = default) + internal ResponseWithHeaders AnalyzeWithCustomModel(Guid modelId, bool? includeTextDetails, Stream stream, ContentType contentType, CancellationToken cancellationToken = default) { using var scope = clientDiagnostics.CreateScope("AllOperations.AnalyzeWithCustomModel"); scope.Start(); @@ -42,7 +42,7 @@ internal ResponseWithHeaders AnalyzeWithCustomMod } } - internal async ValueTask> AnalyzeWithCustomModelAsync(Guid modelId, bool? includeTextDetails, Stream stream, FormContentType contentType, CancellationToken cancellationToken = default) + internal async ValueTask> AnalyzeWithCustomModelAsync(Guid modelId, bool? includeTextDetails, Stream stream, ContentType contentType, CancellationToken cancellationToken = default) { using var scope = clientDiagnostics.CreateScope("AllOperations.AnalyzeWithCustomModel"); scope.Start(); @@ -107,7 +107,7 @@ async Task> NextPageFunc(string nextLink, int? pageSizeHin #region Receipt - public ResponseWithHeaders AnalyzeReceiptAsync(bool? includeTextDetails, Stream stream, FormContentType contentType, CancellationToken cancellationToken = default) + public ResponseWithHeaders AnalyzeReceiptAsync(bool? includeTextDetails, Stream stream, ContentType contentType, CancellationToken cancellationToken = default) { using var scope = clientDiagnostics.CreateScope("AllOperations.AnalyzeReceiptAsync"); @@ -138,7 +138,7 @@ public ResponseWithHeaders AnalyzeReceiptAsync(bool? // TODO: Is it ok that includeTextDetails is missing here? Or is it an issue with the Swagger? // This is missing from the swagger -- following up with service team. - public async ValueTask> AnalyzeLayoutAsyncAsync(Stream stream, FormContentType contentType, CancellationToken cancellationToken = default) + public async ValueTask> AnalyzeLayoutAsyncAsync(Stream stream, ContentType contentType, CancellationToken cancellationToken = default) { using var scope = clientDiagnostics.CreateScope("AllOperations.AnalyzeLayoutAsync"); scope.Start(); @@ -162,7 +162,7 @@ public async ValueTask> AnalyzeLa } } - public ResponseWithHeaders AnalyzeLayoutAsync(Stream stream, FormContentType contentType, CancellationToken cancellationToken = default) + public ResponseWithHeaders AnalyzeLayoutAsync(Stream stream, ContentType contentType, CancellationToken cancellationToken = default) { using var scope = clientDiagnostics.CreateScope("AllOperations.AnalyzeLayoutAsync"); scope.Start(); diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ServiceRestClient.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ServiceRestClient.cs index 09f2fcfd3198..88efddd43063 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ServiceRestClient.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/ServiceRestClient.cs @@ -12,19 +12,19 @@ namespace Azure.AI.FormRecognizer { internal partial class ServiceRestClient { - internal static string GetContentTypeString(FormContentType contentType) + internal static string GetContentTypeString(ContentType contentType) { return contentType switch { - FormContentType.Pdf => "application/pdf", - FormContentType.Png => "image/png", - FormContentType.Jpeg => "image/jpeg", - FormContentType.Tiff => "image/tiff", + ContentType.Pdf => "application/pdf", + ContentType.Png => "image/png", + ContentType.Jpeg => "image/jpeg", + ContentType.Tiff => "image/tiff", _ => throw new NotSupportedException($"The content type {contentType} is not supported."), }; } - internal HttpMessage CreateAnalyzeWithCustomModelRequest(Guid modelId, bool? includeTextDetails, Stream stream, FormContentType contentType) + internal HttpMessage CreateAnalyzeWithCustomModelRequest(Guid modelId, bool? includeTextDetails, Stream stream, ContentType contentType) { var message = pipeline.CreateMessage(); var request = message.Request; @@ -48,7 +48,7 @@ internal HttpMessage CreateAnalyzeWithCustomModelRequest(Guid modelId, bool? inc return message; } - internal HttpMessage CreateAnalyzeReceiptAsyncRequest(bool? includeTextDetails, Stream stream, FormContentType contentType) + internal HttpMessage CreateAnalyzeReceiptAsyncRequest(bool? includeTextDetails, Stream stream, ContentType contentType) { var message = pipeline.CreateMessage(); var request = message.Request; @@ -73,7 +73,7 @@ internal HttpMessage CreateAnalyzeReceiptAsyncRequest(bool? includeTextDetails, // TODO: Is it ok that includeTextDetails is missing here? Or is it an issue with the Swagger? // This is missing from the swagger -- following up with service team. - internal HttpMessage CreateAnalyzeLayoutAsyncRequest(Stream stream, FormContentType contentType) + internal HttpMessage CreateAnalyzeLayoutAsyncRequest(Stream stream, ContentType contentType) { var message = pipeline.CreateMessage(); var request = message.Request; diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/SubscriptionProperties.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/SubscriptionProperties.cs index 83bfda5ea0d4..f5e55c6a96ed 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/SubscriptionProperties.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/SubscriptionProperties.cs @@ -5,6 +5,8 @@ namespace Azure.AI.FormRecognizer.Custom { + /// + /// public class SubscriptionProperties { internal SubscriptionProperties(ModelsSummary_internal summary) diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingDocumentInfo.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingDocumentInfo.cs index 0fef809b9186..cad1dda5ca20 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingDocumentInfo.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingDocumentInfo.cs @@ -5,9 +5,17 @@ namespace Azure.AI.FormRecognizer.Custom { + /// + /// [CodeGenSchema("TrainingDocumentInfo")] public partial class TrainingDocumentInfo { + internal TrainingDocumentInfo() + { + } + + /// + /// [CodeGenSchemaMember("pages")] public int PageCount { get; set; } } diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingInfo.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingInfo.cs index 0369dec9d27c..756a38c19b73 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingInfo.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingInfo.cs @@ -6,6 +6,8 @@ namespace Azure.AI.FormRecognizer.Custom { + /// + /// public class TrainingInfo { private TrainResult_internal trainResult; diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingOperation.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingOperation.cs index d756aee62c67..909b23276026 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingOperation.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingOperation.cs @@ -79,7 +79,7 @@ public override Response UpdateStatus(CancellationToken cancellationToken = defa public override async ValueTask UpdateStatusAsync(CancellationToken cancellationToken = default) => await UpdateStatusAsync(true, cancellationToken).ConfigureAwait(false); - private async Task UpdateStatusAsync(bool async, CancellationToken cancellationToken) + private async ValueTask UpdateStatusAsync(bool async, CancellationToken cancellationToken) { if (!_hasCompleted) { @@ -90,7 +90,7 @@ private async Task UpdateStatusAsync(bool async, CancellationToken can // TODO: Handle correctly according to returned status code // https://github.com/Azure/azure-sdk-for-net/issues/10386 - if (update.Value.ModelInfo.Status != ModelStatus.Creating) + if (update.Value.ModelInfo.Status != ModelStatus.Training) { _hasCompleted = true; _value = new CustomModel(update.Value); diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingStatus.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingStatus.cs new file mode 100644 index 000000000000..6edd1ccd2583 --- /dev/null +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingStatus.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Azure.Core; + +namespace Azure.AI.FormRecognizer.Custom +{ + /// + /// + [CodeGenSchema("TrainStatus")] +#pragma warning disable CA1717 // Only FlagsAttribute enums should have plural names + public enum TrainingStatus +#pragma warning restore CA1717 // Only FlagsAttribute enums should have plural names + { + /// + /// + Succeeded, + + /// + /// + PartiallySucceeded, + + /// + /// + Failed, + } +} diff --git a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingWithLabelsOperation.cs b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingWithLabelsOperation.cs index fb2f480a0747..061f54a5fe8e 100644 --- a/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingWithLabelsOperation.cs +++ b/sdk/formrecognizer/Azure.AI.FormRecognizer/src/TrainingWithLabelsOperation.cs @@ -76,7 +76,7 @@ public override Response UpdateStatus(CancellationToken cancellationToken = defa public override async ValueTask UpdateStatusAsync(CancellationToken cancellationToken = default) => await UpdateStatusAsync(true, cancellationToken).ConfigureAwait(false); - private async Task UpdateStatusAsync(bool async, CancellationToken cancellationToken) + private async ValueTask UpdateStatusAsync(bool async, CancellationToken cancellationToken) { if (!_hasCompleted) { @@ -87,7 +87,7 @@ private async Task UpdateStatusAsync(bool async, CancellationToken can // TODO: Handle correctly according to returned status code // https://github.com/Azure/azure-sdk-for-net/issues/10386 - if (update.Value.ModelInfo.Status != ModelStatus.Creating) + if (update.Value.ModelInfo.Status != ModelStatus.Training) { _hasCompleted = true; _value = new CustomLabeledModel(update.Value);