Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ This version of the SDK defaults to the latest supported API version, which curr

### Bugs Fixed
- Restarting a long-running operation from a saved state is now supported for the `begin_analyze_actions` and `begin_recognize_healthcare_entities` methods.
- In the event of an action level error, available partial results are now returned for any successful actions in `begin_analyze_actions`.

### Other Changes
- Package requires [azure-core](https://pypi.org/project/azure-core/) version 1.16.0 or greater
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1751,6 +1751,18 @@ class _AnalyzeActionsType(str, Enum):
MULTI_CATEGORY_CLASSIFY = "multi_category_classify"


class ActionPointerKind(str, Enum):
RECOGNIZE_ENTITIES = "entityRecognitionTasks"
RECOGNIZE_PII_ENTITIES = "entityRecognitionPiiTasks"
EXTRACT_KEY_PHRASES = "keyPhraseExtractionTasks"
RECOGNIZE_LINKED_ENTITIES = "entityLinkingTasks"
ANALYZE_SENTIMENT = "sentimentAnalysisTasks"
EXTRACT_SUMMARY = "extractiveSummarizationTasks"
RECOGNIZE_CUSTOM_ENTITIES = "customEntityRecognitionTasks"
SINGLE_CATEGORY_CLASSIFY = "customSingleClassificationTasks"
MULTI_CATEGORY_CLASSIFY = "customMultiClassificationTasks"


class RecognizeEntitiesAction(DictMixin):
"""RecognizeEntitiesAction encapsulates the parameters for starting a long-running Entities Recognition operation.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
RecognizeCustomEntitiesResult,
SingleCategoryClassifyResult,
MultiCategoryClassifyResult,
ActionPointerKind
)


Expand Down Expand Up @@ -340,27 +341,89 @@ def _get_property_name_from_task_type(task_type): # pylint: disable=too-many-re
return "key_phrase_extraction_tasks"


def _get_good_result(task, doc_id_order, response_headers, returned_tasks_object):
def get_task_from_pointer(task_type): # pylint: disable=too-many-return-statements
if task_type == ActionPointerKind.RECOGNIZE_ENTITIES:
return "entity_recognition_tasks"
if task_type == ActionPointerKind.RECOGNIZE_PII_ENTITIES:
return "entity_recognition_pii_tasks"
if task_type == ActionPointerKind.RECOGNIZE_LINKED_ENTITIES:
return "entity_linking_tasks"
if task_type == ActionPointerKind.ANALYZE_SENTIMENT:
return "sentiment_analysis_tasks"
if task_type == ActionPointerKind.EXTRACT_SUMMARY:
return "extractive_summarization_tasks"
if task_type == ActionPointerKind.RECOGNIZE_CUSTOM_ENTITIES:
return "custom_entity_recognition_tasks"
if task_type == ActionPointerKind.SINGLE_CATEGORY_CLASSIFY:
return "custom_single_classification_tasks"
if task_type == ActionPointerKind.MULTI_CATEGORY_CLASSIFY:
return "custom_multi_classification_tasks"
return "key_phrase_extraction_tasks"


def resolve_action_pointer(pointer):
import re
pointer_union = "|".join(value for value in ActionPointerKind)
found = re.search(r"#/tasks/({})/\d+".format(pointer_union), pointer)
if found:
index = int(pointer[-1])
task = pointer.split("#/tasks/")[1].split("/")[0]
property_name = get_task_from_pointer(task)
return property_name, index
raise ValueError(
"Unexpected response from service - action pointer '{}' is not a valid action pointer.".format(pointer)
)


def get_ordered_errors(tasks_obj, task_name, doc_id_order):
# throw exception if error missing a target
missing_target = any([error for error in tasks_obj.errors if error.target is None])
if missing_target:
message = "".join(["({}) {}".format(err.code, err.message) for err in tasks_obj.errors])
raise HttpResponseError(message=message)

# create a DocumentError per input doc with the action error details
for err in tasks_obj.errors:
property_name, index = resolve_action_pointer(err.target)
actions = getattr(tasks_obj.tasks, property_name)
action = actions[index]
if action.task_name == task_name:
errors = [
DocumentError(
id=doc_id,
error=TextAnalyticsError(code=err.code, message=err.message)
) for doc_id in doc_id_order
]
return errors
raise ValueError("Unexpected response from service - no errors for missing action results.")


def _get_doc_results(task, doc_id_order, response_headers, returned_tasks_object):
returned_tasks = returned_tasks_object.tasks
current_task_type, task_name = task
deserialization_callback = _get_deserialization_callback_from_task_type(
current_task_type
)
property_name = _get_property_name_from_task_type(current_task_type)
try:
response_task_to_deserialize = \
next(task for task in getattr(returned_tasks_object, property_name) if task.task_name == task_name)
next(task for task in getattr(returned_tasks, property_name) if task.task_name == task_name)
except StopIteration:
raise ValueError("Unexpected response from service - unable to deserialize result.")

# if no results present, check for action errors
if response_task_to_deserialize.results is None:
return get_ordered_errors(returned_tasks_object, task_name, doc_id_order)
return deserialization_callback(
doc_id_order, response_task_to_deserialize.results, response_headers, lro=True
)


def get_iter_items(doc_id_order, task_order, response_headers, analyze_job_state):
iter_items = defaultdict(list) # map doc id to action results
returned_tasks_object = analyze_job_state.tasks
returned_tasks_object = analyze_job_state
for task in task_order:
results = _get_good_result(
results = _get_doc_results(
task,
doc_id_order,
response_headers,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"jobId": "59678d1c-109e-4d93-a42f-05eb5e063525",
"lastUpdateDateTime": "2021-10-21T23:02:34Z",
"createdDateTime": "2021-10-21T23:02:27Z",
"expirationDateTime": "2021-10-22T23:02:27Z",
"status": "partiallyCompleted",
"errors": [
{
"code": "InternalServerError",
"message": "1 out of 3 job tasks failed. Failed job tasks : v3.2-preview.2/custom/entities/general."
}
],
"tasks": {
"completed": 2,
"failed": 1,
"inProgress": 0,
"total": 3,
"customEntityRecognitionTasks": [
{
"lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
"taskName": "2",
"state": "failed"
}
],
"customSingleClassificationTasks": [
{
"lastUpdateDateTime": "2021-10-21T23:02:29.3641823Z",
"taskName": "0",
"state": "succeeded"
}
],
"customMultiClassificationTasks": [
{
"lastUpdateDateTime": "2021-10-21T23:02:28.7184297Z",
"taskName": "1",
"state": "succeeded"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
{
"jobId": "59678d1c-109e-4d93-a42f-05eb5e063525",
"lastUpdateDateTime": "2021-10-21T23:02:34Z",
"createdDateTime": "2021-10-21T23:02:27Z",
"expirationDateTime": "2021-10-22T23:02:27Z",
"status": "partiallyCompleted",
"errors": [
{
"code": "InvalidRequest",
"message": "Some error2",
"target": "#/tasks/entityRecognitionPiiTasks/0"
},
{
"code": "InvalidRequest",
"message": "Some error6",
"target": "#/tasks/entityRecognitionPiiTasks/1"
},
{
"code": "InvalidRequest",
"message": "Some error0",
"target": "#/tasks/entityRecognitionTasks/0"
},
{
"code": "InvalidRequest",
"message": "Some error1",
"target": "#/tasks/keyPhraseExtractionTasks/0"
},
{
"code": "InvalidRequest",
"message": "Some error3",
"target": "#/tasks/entityLinkingTasks/0"
},
{
"code": "InvalidRequest",
"message": "Some error4",
"target": "#/tasks/sentimentAnalysisTasks/0"
},
{
"code": "InvalidRequest",
"message": "Some error5",
"target": "#/tasks/extractiveSummarizationTasks/0"
},
{
"code": "InvalidRequest",
"message": "Some error9",
"target": "#/tasks/customEntityRecognitionTasks/0"
},
{
"code": "InvalidRequest",
"message": "Some error7",
"target": "#/tasks/customSingleClassificationTasks/0"
},
{
"code": "InvalidRequest",
"message": "Some error8",
"target": "#/tasks/customMultiClassificationTasks/0"
}
],
"tasks": {
"completed": 1,
"failed": 10,
"inProgress": 0,
"total": 11,
"entityRecognitionTasks": [
{
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
"taskName": "0",
"state": "failed"
}
],
"entityRecognitionPiiTasks": [
{
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
"taskName": "2",
"state": "failed"
},
{
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
"taskName": "6",
"state": "failed"
}
],
"keyPhraseExtractionTasks": [
{
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
"taskName": "1",
"state": "failed"
}
],
"entityLinkingTasks": [
{
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
"taskName": "3",
"state": "failed"
}
],
"sentimentAnalysisTasks": [
{
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
"taskName": "4",
"state": "failed"
}
],
"extractiveSummarizationTasks": [
{
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
"taskName": "5",
"state": "failed"
}
],
"customEntityRecognitionTasks": [
{
"lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
"taskName": "9",
"state": "failed"
}
],
"customSingleClassificationTasks": [
{
"lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
"taskName": "7",
"state": "failed"
},
{
"lastUpdateDateTime": "2021-10-21T23:02:29.3641823Z",
"taskName": "10",
"state": "succeeded",
"results": {
"statistics": {
"documentsCount": 2,
"validDocumentsCount": 1,
"erroneousDocumentsCount": 1,
"transactionsCount": 1
},
"documents": [
{
"id": "1",
"classification": {
"category": "RateBook",
"confidenceScore": 0.76
},
"statistics": {
"charactersCount": 295,
"transactionsCount": 1
},
"warnings": []
}
],
"errors": [
{
"id": "2",
"error": {
"code": "InvalidArgument",
"message": "Invalid document in request.",
"innererror": {
"code": "InvalidDocument",
"message": "Document text is empty."
}
}
}
],
"projectName": "single_category_classify_project_name",
"deploymentName": "single_category_classify_project_name"
}
}
],
"customMultiClassificationTasks": [
{
"lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
"taskName": "8",
"state": "failed"
}
]
}
}
Loading