Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
cebfd1e
work in progress
annelo-msft Mar 12, 2020
fe1ee4f
interim commit
annelo-msft Mar 12, 2020
8f2bef2
update api
annelo-msft Mar 12, 2020
fc8e7ea
interim split
annelo-msft Mar 12, 2020
0d75a39
handle multiple receipts
annelo-msft Mar 12, 2020
948520c
api update and bug fix
annelo-msft Mar 12, 2020
0978cbf
api tweaks
annelo-msft Mar 12, 2020
b85cce7
update api
annelo-msft Mar 12, 2020
59c3a1e
Merge remote-tracking branch 'upstream/master' into fr-split-supervis…
annelo-msft Mar 12, 2020
09ccbf5
Merge new CodeGen updates.
annelo-msft Mar 12, 2020
d66c034
Merge remote-tracking branch 'upstream/master' into fr-split-supervis…
annelo-msft Mar 12, 2020
87bf81d
regen
annelo-msft Mar 12, 2020
9f587c8
api update
annelo-msft Mar 13, 2020
66c81fd
api tweaks
annelo-msft Mar 13, 2020
8e116fa
move to custom namespace
annelo-msft Mar 13, 2020
167e715
client docstrings
annelo-msft Mar 13, 2020
a3d105b
README
annelo-msft Mar 15, 2020
1f8e764
README nits
annelo-msft Mar 15, 2020
805b55f
README nits
annelo-msft Mar 15, 2020
432ea3c
README nit
annelo-msft Mar 16, 2020
fcdbe1d
README nit
annelo-msft Mar 16, 2020
a9f228d
some docstrings
annelo-msft Mar 16, 2020
ee7533f
enable docstrings
annelo-msft Mar 16, 2020
aca397a
docstrings
annelo-msft Mar 16, 2020
63645db
fix readme
annelo-msft Mar 16, 2020
0cea404
another README fix
annelo-msft Mar 16, 2020
2bc47bf
docstring updates
annelo-msft Mar 16, 2020
875d4d1
UX study updates
annelo-msft Mar 17, 2020
46e08ed
updates from ux study
annelo-msft Mar 18, 2020
1831700
api update
annelo-msft Mar 18, 2020
2fbf2a5
learned form-> page
annelo-msft Mar 18, 2020
70429dd
api updates
annelo-msft Mar 19, 2020
dae7f5a
Merge remote-tracking branch 'upstream/master' into fr-split-supervis…
annelo-msft Mar 19, 2020
2998efc
updates for rebase
annelo-msft Mar 19, 2020
f099fbf
Merge remote-tracking branch 'upstream/master' into fr-split-supervis…
annelo-msft Mar 19, 2020
6408aa1
pr feedback
annelo-msft Mar 20, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 47 additions & 8 deletions sdk/formrecognizer/Azure.AI.FormRecognizer/src/CustomFormClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Licensed under the MIT License.

using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -117,35 +119,72 @@ public virtual async Task<Operation<CustomLabeledModel>> StartTrainingWithLabels
#endregion Training

#region Analyze
public virtual Operation<ExtractedForm> StartExtractForm(string modelId, Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)

#region Unsupervised
public virtual Operation<IReadOnlyList<ExtractedPage>> StartExtractPages(string modelId, Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
{
// TODO: automate content-type detection
// https://github.com/Azure/azure-sdk-for-net/issues/10329
ResponseWithHeaders<AnalyzeWithCustomModelHeaders> response = _operations.AnalyzeWithCustomModel(new Guid(modelId), includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken);
return new ExtractPagesOperation(_operations, modelId, response.Headers.OperationLocation);
}

public virtual Operation<IReadOnlyList<ExtractedPage>> StartExtractPages(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
{
SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() };
ResponseWithHeaders<AnalyzeWithCustomModelHeaders> response = _operations.RestClient.AnalyzeWithCustomModel(new Guid(modelId), includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken);
return new ExtractPagesOperation(_operations, modelId, response.Headers.OperationLocation);
}

public virtual async Task<Operation<IReadOnlyList<ExtractedPage>>> StartExtractPagesAsync(string modelId, Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
{
// TODO: automate content-type detection
// https://github.com/Azure/azure-sdk-for-net/issues/10329
ResponseWithHeaders<AnalyzeWithCustomModelHeaders> response = await _operations.AnalyzeWithCustomModelAsync(new Guid(modelId), includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken).ConfigureAwait(false);
return new ExtractPagesOperation(_operations, modelId, response.Headers.OperationLocation);
}

public virtual async Task<Operation<IReadOnlyList<ExtractedPage>>> StartExtractPagesAsync(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
{
SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() };
ResponseWithHeaders<AnalyzeWithCustomModelHeaders> response = await _operations.RestClient.AnalyzeWithCustomModelAsync(new Guid(modelId), includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken).ConfigureAwait(false);
return new ExtractPagesOperation(_operations, modelId, response.Headers.OperationLocation);
}
#endregion

#region Supervised
public virtual Operation<IReadOnlyList<ExtractedLabeledForm>> StartExtractLabeledForms(string modelId, Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
{
// TODO: automate content-type detection
// https://github.com/Azure/azure-sdk-for-net/issues/10329
ResponseWithHeaders<AnalyzeWithCustomModelHeaders> response = _operations.AnalyzeWithCustomModel(new Guid(modelId), includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken);
return new ExtractFormOperation(_operations, modelId, response.Headers.OperationLocation);
return new ExtractLabeledFormOperation(_operations, modelId, response.Headers.OperationLocation);
}

public virtual Operation<ExtractedForm> StartExtractForm(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
public virtual Operation<IReadOnlyList<ExtractedLabeledForm>> StartExtractLabeledForms(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
{
SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() };
ResponseWithHeaders<AnalyzeWithCustomModelHeaders> response = _operations.RestClient.AnalyzeWithCustomModel(new Guid(modelId), includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken);
return new ExtractFormOperation(_operations, modelId, response.Headers.OperationLocation);
return new ExtractLabeledFormOperation(_operations, modelId, response.Headers.OperationLocation);
}

public virtual async Task<Operation<ExtractedForm>> StartExtractFormAsync(string modelId, Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
public virtual async Task<Operation<IReadOnlyList<ExtractedLabeledForm>>> StartExtractLabeledFormsAsync(string modelId, Stream stream, FormContentType contentType, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
{
// TODO: automate content-type detection
// https://github.com/Azure/azure-sdk-for-net/issues/10329
ResponseWithHeaders<AnalyzeWithCustomModelHeaders> response = await _operations.AnalyzeWithCustomModelAsync(new Guid(modelId), includeTextDetails: includeRawPageExtractions, stream, contentType, cancellationToken).ConfigureAwait(false);
return new ExtractFormOperation(_operations, modelId, response.Headers.OperationLocation);
return new ExtractLabeledFormOperation(_operations, modelId, response.Headers.OperationLocation);
}

public virtual async Task<Operation<ExtractedForm>> StartExtractFormAsync(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
public virtual async Task<Operation<IReadOnlyList<ExtractedLabeledForm>>> StartExtractLabeledFormsAsync(string modelId, Uri uri, bool includeRawPageExtractions = false, CancellationToken cancellationToken = default)
{
SourcePath_internal sourcePath = new SourcePath_internal() { Source = uri.ToString() };
ResponseWithHeaders<AnalyzeWithCustomModelHeaders> response = await _operations.RestClient.AnalyzeWithCustomModelAsync(new Guid(modelId), includeTextDetails: includeRawPageExtractions, sourcePath, cancellationToken).ConfigureAwait(false);
return new ExtractFormOperation(_operations, modelId, response.Headers.OperationLocation);
return new ExtractLabeledFormOperation(_operations, modelId, response.Headers.OperationLocation);
}
#endregion

// TODO: Add methods for labeled models

#endregion Analyze

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Azure.Core;
using Azure.Core.Pipeline;

namespace Azure.AI.FormRecognizer.Models
{
/// <summary>
/// </summary>
internal class ExtractLabeledFormOperation : Operation<IReadOnlyList<ExtractedLabeledForm>>
{
private Response _response;
private IReadOnlyList<ExtractedLabeledForm> _value;
private bool _hasCompleted;

private readonly string _modelId;
private readonly ServiceClient _operations;

public override string Id { get; }

public override IReadOnlyList<ExtractedLabeledForm> Value => OperationHelpers.GetValue(ref _value);

public override bool HasCompleted => _hasCompleted;

public override bool HasValue => _value != null;

/// <inheritdoc/>
public override Response GetRawResponse() => _response;

/// <inheritdoc/>
public override ValueTask<Response<IReadOnlyList<ExtractedLabeledForm>>> WaitForCompletionAsync(CancellationToken cancellationToken = default) =>
this.DefaultWaitForCompletionAsync(cancellationToken);

/// <inheritdoc/>
public override ValueTask<Response<IReadOnlyList<ExtractedLabeledForm>>> WaitForCompletionAsync(TimeSpan pollingInterval, CancellationToken cancellationToken = default) =>
this.DefaultWaitForCompletionAsync(pollingInterval, cancellationToken);

/// <summary>
/// </summary>
/// <param name="operations"></param>
/// <param name="modelId"></param>
/// <param name="operationLocation"></param>
internal ExtractLabeledFormOperation(ServiceClient operations, string modelId, string operationLocation)
{
_operations = operations;
_modelId = modelId;

// TODO: Add validation here
// https://github.com/Azure/azure-sdk-for-net/issues/10385
Id = operationLocation.Split('/').Last();
}

/// <inheritdoc/>
public override Response UpdateStatus(CancellationToken cancellationToken = default) =>
UpdateStatusAsync(false, cancellationToken).EnsureCompleted();

/// <inheritdoc/>
public override async ValueTask<Response> UpdateStatusAsync(CancellationToken cancellationToken = default) =>
await UpdateStatusAsync(true, cancellationToken).ConfigureAwait(false);

private async Task<Response> UpdateStatusAsync(bool async, CancellationToken cancellationToken)
{
if (!_hasCompleted)
{
Response<AnalyzeOperationResult_internal> update = async
? await _operations.GetAnalyzeFormResultAsync(new Guid(_modelId), new Guid(Id), cancellationToken).ConfigureAwait(false)
: _operations.GetAnalyzeFormResult(new Guid(_modelId), new Guid(Id), cancellationToken);

// TODO: Handle correctly according to returned status code
// https://github.com/Azure/azure-sdk-for-net/issues/10386
// TODO: Add reasonable null checks.

if (update.Value.Status == OperationStatus.Succeeded || update.Value.Status == OperationStatus.Failed)
{
_hasCompleted = true;

// TODO: Consider what we'll do when there are multiple DocumentResults
// https://github.com/Azure/azure-sdk-for-net/issues/10387
// Supervised
_value = ConvertToExtractedLabeledForms(update.Value.AnalyzeResult.DocumentResults, update.Value.AnalyzeResult.PageResults, update.Value.AnalyzeResult.ReadResults);
}

_response = update.GetRawResponse();
}

return GetRawResponse();
}

private static IReadOnlyList<ExtractedLabeledForm> ConvertToExtractedLabeledForms(IList<DocumentResult_internal> documentResults, IList<PageResult_internal> pageResults, IList<ReadResult_internal> readResults)
{
List<ExtractedLabeledForm> forms = new List<ExtractedLabeledForm>();
for (int i = 0; i < documentResults.Count; i++)
{
// TODO: How do we know what pages in pageResults map to the pages in documents?
// Think about this in the morning.
forms.Add(new ExtractedLabeledForm(documentResults[i], pageResults[i], readResults[i]));
}
return forms;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT License.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
Expand All @@ -12,18 +13,18 @@ namespace Azure.AI.FormRecognizer.Models
{
/// <summary>
/// </summary>
internal class ExtractFormOperation : Operation<ExtractedForm>
internal class ExtractPagesOperation : Operation<IReadOnlyList<ExtractedPage>>
{
private Response _response;
private ExtractedForm _value;
private IReadOnlyList<ExtractedPage> _value;
private bool _hasCompleted;

private readonly string _modelId;
private readonly ServiceClient _operations;

public override string Id { get; }

public override ExtractedForm Value => OperationHelpers.GetValue(ref _value);
public override IReadOnlyList<ExtractedPage> Value => OperationHelpers.GetValue(ref _value);

public override bool HasCompleted => _hasCompleted;

Expand All @@ -33,19 +34,19 @@ internal class ExtractFormOperation : Operation<ExtractedForm>
public override Response GetRawResponse() => _response;

/// <inheritdoc/>
public override ValueTask<Response<ExtractedForm>> WaitForCompletionAsync(CancellationToken cancellationToken = default) =>
public override ValueTask<Response<IReadOnlyList<ExtractedPage>>> WaitForCompletionAsync(CancellationToken cancellationToken = default) =>
this.DefaultWaitForCompletionAsync(cancellationToken);

/// <inheritdoc/>
public override ValueTask<Response<ExtractedForm>> WaitForCompletionAsync(TimeSpan pollingInterval, CancellationToken cancellationToken = default) =>
public override ValueTask<Response<IReadOnlyList<ExtractedPage>>> WaitForCompletionAsync(TimeSpan pollingInterval, CancellationToken cancellationToken = default) =>
this.DefaultWaitForCompletionAsync(pollingInterval, cancellationToken);

/// <summary>
/// </summary>
/// <param name="operations"></param>
/// <param name="modelId"></param>
/// <param name="operationLocation"></param>
internal ExtractFormOperation(ServiceClient operations, string modelId, string operationLocation)
internal ExtractPagesOperation(ServiceClient operations, string modelId, string operationLocation)
{
_operations = operations;
_modelId = modelId;
Expand Down Expand Up @@ -78,27 +79,23 @@ private async Task<Response> UpdateStatusAsync(bool async, CancellationToken can
if (update.Value.Status == OperationStatus.Succeeded || update.Value.Status == OperationStatus.Failed)
{
_hasCompleted = true;

// TODO: Move this logic into ExtractedForm? It's a bit convoluted right now.
// Determine if the model was supervised or unsupervised
if (update.Value.AnalyzeResult.DocumentResults?.Count == 0)
{
// Unsupervised
_value = new ExtractedForm(update.Value.AnalyzeResult.PageResults, update.Value.AnalyzeResult.ReadResults);
}
else
{
// TODO: Consider what we'll do when there are multiple DocumentResults
// https://github.com/Azure/azure-sdk-for-net/issues/10387
// Supervised
_value = new ExtractedForm(update.Value.AnalyzeResult.DocumentResults.First(), update.Value.AnalyzeResult.PageResults, update.Value.AnalyzeResult.ReadResults);
}
_value = ConvertToExtractedPages(update.Value.AnalyzeResult.PageResults, update.Value.AnalyzeResult.ReadResults);
}

_response = update.GetRawResponse();
}

return GetRawResponse();
}

private static IReadOnlyList<ExtractedPage> ConvertToExtractedPages(IList<PageResult_internal> pageResults, IList<ReadResult_internal> readResults)
{
List<ExtractedPage> pages = new List<ExtractedPage>();
for (int i = 0; i < pageResults.Count; i++)
{
pages.Add(new ExtractedPage(pageResults[i], readResults[i]));
}
return pages;
}
}
}
Loading