Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## 1.0.0b5 (Unreleased)

**New features**
- We now have a `warnings` property on each document-level response object returned from the endpoints. It is a list of `TextAnalyticsWarning`s.
- Added `text` property to `SentenceSentiment`

**Breaking changes**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
RecognizeEntitiesResult,
DetectLanguageResult,
TextAnalyticsError,
TextAnalyticsWarning,
ExtractKeyPhrasesResult,
RecognizeLinkedEntitiesResult,
TextDocumentStatistics,
Expand All @@ -35,6 +36,7 @@
'DetectLanguageResult',
'CategorizedEntity',
'TextAnalyticsError',
'TextAnalyticsWarning',
'ExtractKeyPhrasesResult',
'RecognizeLinkedEntitiesResult',
'AnalyzeSentimentResult',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ class RecognizeEntitiesResult(DictMixin):
:param entities: Recognized entities in the document.
:type entities:
list[~azure.ai.textanalytics.CategorizedEntity]
:param warnings: Warnings encountered while processing document. Results will still be returned
if there are warnings, but they may not be fully accurate.
:type warnings: list[~azure.ai.textanalytics.TextAnalyticsWarning]
:param statistics: If show_stats=true was specified in the request this
field will contain information about the document payload.
:type statistics:
Expand All @@ -112,12 +115,13 @@ class RecognizeEntitiesResult(DictMixin):
def __init__(self, **kwargs):
self.id = kwargs.get("id", None)
self.entities = kwargs.get("entities", None)
self.warnings = kwargs.get("warnings", [])
self.statistics = kwargs.get("statistics", None)
self.is_error = False

def __repr__(self):
return "RecognizeEntitiesResult(id={}, entities={}, statistics={}, is_error={})" \
.format(self.id, repr(self.entities), repr(self.statistics), self.is_error)[:1024]
return "RecognizeEntitiesResult(id={}, entities={}, warnings={}, statistics={}, is_error={})" \
.format(self.id, repr(self.entities), repr(self.warnings), repr(self.statistics), self.is_error)[:1024]


class DetectLanguageResult(DictMixin):
Expand All @@ -130,6 +134,9 @@ class DetectLanguageResult(DictMixin):
:type id: str
:param primary_language: The primary language detected in the document.
:type primary_language: ~azure.ai.textanalytics.DetectedLanguage
:param warnings: Warnings encountered while processing document. Results will still be returned
if there are warnings, but they may not be fully accurate.
:type warnings: list[~azure.ai.textanalytics.TextAnalyticsWarning]
:param statistics: If show_stats=true was specified in the request this
field will contain information about the document payload.
:type statistics:
Expand All @@ -141,12 +148,14 @@ class DetectLanguageResult(DictMixin):
def __init__(self, **kwargs):
self.id = kwargs.get("id", None)
self.primary_language = kwargs.get("primary_language", None)
self.warnings = kwargs.get("warnings", [])
self.statistics = kwargs.get("statistics", None)
self.is_error = False

def __repr__(self):
return "DetectLanguageResult(id={}, primary_language={}, statistics={}, is_error={})" \
.format(self.id, repr(self.primary_language), repr(self.statistics), self.is_error)[:1024]
return "DetectLanguageResult(id={}, primary_language={}, warnings={}, statistics={}, "\
"is_error={})".format(self.id, repr(self.primary_language), repr(self.warnings),
repr(self.statistics), self.is_error)[:1024]


class CategorizedEntity(DictMixin):
Expand Down Expand Up @@ -235,6 +244,32 @@ def __repr__(self):
return "TextAnalyticsError(code={}, message={}, target={})" \
.format(self.code, self.message, self.target)[:1024]

class TextAnalyticsWarning(DictMixin):
"""TextAnalyticsWarning contains the warning code and message that explains why
the response has a warning.

:param code: Warning code. Possible values include: 'LongWordsInDocument',
'DocumentTruncated'.
:type code: str
:param message: Warning message.
:type message: str
"""

def __init__(self, **kwargs):
self.code = kwargs.get('code', None)
self.message = kwargs.get('message', None)

@classmethod
def _from_generated(cls, warning):
return cls(
code=warning.code,
message=warning.message,
)

def __repr__(self):
return "TextAnalyticsWarning(code={}, message={})" \
.format(self.code, self.message)[:1024]


class ExtractKeyPhrasesResult(DictMixin):
"""ExtractKeyPhrasesResult is a result object which contains
Expand All @@ -248,6 +283,9 @@ class ExtractKeyPhrasesResult(DictMixin):
The number of key phrases returned is proportional to the number of words
in the input document.
:type key_phrases: list[str]
:param warnings: Warnings encountered while processing document. Results will still be returned
if there are warnings, but they may not be fully accurate.
:type warnings: list[~azure.ai.textanalytics.TextAnalyticsWarning]
:param statistics: If show_stats=true was specified in the request this
field will contain information about the document payload.
:type statistics:
Expand All @@ -259,12 +297,13 @@ class ExtractKeyPhrasesResult(DictMixin):
def __init__(self, **kwargs):
self.id = kwargs.get("id", None)
self.key_phrases = kwargs.get("key_phrases", None)
self.warnings = kwargs.get("warnings", [])
self.statistics = kwargs.get("statistics", None)
self.is_error = False

def __repr__(self):
return "ExtractKeyPhrasesResult(id={}, key_phrases={}, statistics={}, is_error={})" \
.format(self.id, self.key_phrases, repr(self.statistics), self.is_error)[:1024]
return "ExtractKeyPhrasesResult(id={}, key_phrases={}, warnings={}, statistics={}, is_error={})" \
.format(self.id, self.key_phrases, repr(self.warnings), repr(self.statistics), self.is_error)[:1024]


class RecognizeLinkedEntitiesResult(DictMixin):
Expand All @@ -278,6 +317,9 @@ class RecognizeLinkedEntitiesResult(DictMixin):
:param entities: Recognized well-known entities in the document.
:type entities:
list[~azure.ai.textanalytics.LinkedEntity]
:param warnings: Warnings encountered while processing document. Results will still be returned
if there are warnings, but they may not be fully accurate.
:type warnings: list[~azure.ai.textanalytics.TextAnalyticsWarning]
:param statistics: If show_stats=true was specified in the request this
field will contain information about the document payload.
:type statistics:
Expand All @@ -289,12 +331,13 @@ class RecognizeLinkedEntitiesResult(DictMixin):
def __init__(self, **kwargs):
self.id = kwargs.get("id", None)
self.entities = kwargs.get("entities", None)
self.warnings = kwargs.get("warnings", [])
self.statistics = kwargs.get("statistics", None)
self.is_error = False

def __repr__(self):
return "RecognizeLinkedEntitiesResult(id={}, entities={}, statistics={}, is_error={})" \
.format(self.id, repr(self.entities), repr(self.statistics), self.is_error)[:1024]
return "RecognizeLinkedEntitiesResult(id={}, entities={}, warnings={}, statistics={}, is_error={})" \
.format(self.id, repr(self.entities), repr(self.warnings), repr(self.statistics), self.is_error)[:1024]


class AnalyzeSentimentResult(DictMixin):
Expand All @@ -310,6 +353,9 @@ class AnalyzeSentimentResult(DictMixin):
Neutral, Positive, or Mixed). Possible values include: 'positive',
'neutral', 'negative', 'mixed'
:type sentiment: str
:param warnings: Warnings encountered while processing document. Results will still be returned
if there are warnings, but they may not be fully accurate.
:type warnings: list[~azure.ai.textanalytics.TextAnalyticsWarning]
:param statistics: If show_stats=true was specified in the request this
field will contain information about the document payload.
:type statistics:
Expand All @@ -328,15 +374,17 @@ class AnalyzeSentimentResult(DictMixin):
def __init__(self, **kwargs):
self.id = kwargs.get("id", None)
self.sentiment = kwargs.get("sentiment", None)
self.warnings = kwargs.get("warnings", [])
self.statistics = kwargs.get("statistics", None)
self.confidence_scores = kwargs.get("confidence_scores", None)
self.sentences = kwargs.get("sentences", None)
self.is_error = False

def __repr__(self):
return "AnalyzeSentimentResult(id={}, sentiment={}, statistics={}, confidence_scores={}, sentences={}, " \
"is_error={})".format(self.id, self.sentiment, repr(self.statistics), repr(self.confidence_scores),
repr(self.sentences), self.is_error)[:1024]
return "AnalyzeSentimentResult(id={}, sentiment={}, warnings={}, statistics={}, confidence_scores={}, "\
"sentences={}, is_error={})".format(
self.id, self.sentiment, repr(self.warnings), repr(self.statistics),
repr(self.confidence_scores), repr(self.sentences), self.is_error)[:1024]


class TextDocumentStatistics(DictMixin):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
DetectedLanguage,
DocumentError,
SentimentConfidenceScores,
TextAnalyticsError
TextAnalyticsError,
TextAnalyticsWarning
)

class CSODataV4Format(ODataV4Format):
Expand Down Expand Up @@ -80,6 +81,7 @@ def language_result(language):
return DetectLanguageResult(
id=language.id,
primary_language=DetectedLanguage._from_generated(language.detected_languages[0]), # pylint: disable=protected-access
warnings=[TextAnalyticsWarning._from_generated(w) for w in language.warnings], # pylint: disable=protected-access
statistics=TextDocumentStatistics._from_generated(language.statistics), # pylint: disable=protected-access
)

Expand All @@ -89,6 +91,7 @@ def entities_result(entity):
return RecognizeEntitiesResult(
id=entity.id,
entities=[CategorizedEntity._from_generated(e) for e in entity.entities], # pylint: disable=protected-access
warnings=[TextAnalyticsWarning._from_generated(w) for w in entity.warnings], # pylint: disable=protected-access
statistics=TextDocumentStatistics._from_generated(entity.statistics), # pylint: disable=protected-access
)

Expand All @@ -98,6 +101,7 @@ def linked_entities_result(entity):
return RecognizeLinkedEntitiesResult(
id=entity.id,
entities=[LinkedEntity._from_generated(e) for e in entity.entities], # pylint: disable=protected-access
warnings=[TextAnalyticsWarning._from_generated(w) for w in entity.warnings], # pylint: disable=protected-access
statistics=TextDocumentStatistics._from_generated(entity.statistics), # pylint: disable=protected-access
)

Expand All @@ -107,6 +111,7 @@ def key_phrases_result(phrases):
return ExtractKeyPhrasesResult(
id=phrases.id,
key_phrases=phrases.key_phrases,
warnings=[TextAnalyticsWarning._from_generated(w) for w in phrases.warnings], # pylint: disable=protected-access
statistics=TextDocumentStatistics._from_generated(phrases.statistics), # pylint: disable=protected-access
)

Expand All @@ -116,6 +121,7 @@ def sentiment_result(sentiment):
return AnalyzeSentimentResult(
id=sentiment.id,
sentiment=sentiment.sentiment,
warnings=[TextAnalyticsWarning._from_generated(w) for w in sentiment.warnings], # pylint: disable=protected-access
statistics=TextDocumentStatistics._from_generated(sentiment.statistics), # pylint: disable=protected-access
confidence_scores=SentimentConfidenceScores._from_generated(sentiment.confidence_scores), # pylint: disable=protected-access
sentences=[SentenceSentiment._from_generated(s) for s in sentiment.sentences], # pylint: disable=protected-access
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ def callback(resp):
_LOGGER.info("raw_response: {}".format(resp.raw_response))

async with text_analytics_client:
result = await text_analytics_client.analyze_sentiment(
result = await text_analytics_client.extract_key_phrases(
documents,
show_stats=True,
model_version="latest",
raw_response_hook=callback
)
for doc in result:
_LOGGER.warning("Doc with id {} has these warnings: {}".format(doc.id, doc.warnings))


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,14 @@ def callback(resp):
_LOGGER.info("model_version: {}".format(resp.model_version))
_LOGGER.info("raw_response: {}".format(resp.raw_response))

result = text_analytics_client.analyze_sentiment(
result = text_analytics_client.extract_key_phrases(
documents,
show_stats=True,
model_version="latest",
raw_response_hook=callback
)
for doc in result:
_LOGGER.warning("Doc with id {} has these warnings: {}".format(doc.id, doc.warnings))


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
interactions:
- request:
body: '{"documents": [{"id": "1", "text": "This won''t actually create a warning
:''(", "language": "en"}]}'
headers:
Accept:
- application/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '98'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/1.0.0b5 Python/3.7.7 (Darwin-17.7.0-x86_64-i386-64bit)
Python/3.7.7 (Darwin-17.7.0-x86_64-i386-64bit)
method: POST
uri: https://westus2.ppe.cognitiveservices.azure.com/text/analytics/v3.0/sentiment?showStats=false
response:
body:
string: '{"documents":[{"id":"1","sentiment":"negative","confidenceScores":{"positive":0.0,"neutral":0.02,"negative":0.98},"sentences":[{"sentiment":"negative","confidenceScores":{"positive":0.0,"neutral":0.02,"negative":0.98},"offset":0,"length":40,"text":"This
won''t actually create a warning :''("}],"warnings":[]}],"errors":[],"modelVersion":"2019-10-01"}'
headers:
apim-request-id:
- c1f122eb-deb0-4369-b857-2d91e3d0e348
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Fri, 24 Apr 2020 18:51:45 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '276'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
interactions:
- request:
body: '{"documents": [{"id": "1", "text": "Thisisaveryveryverylongtextwhichgoesonforalongtimeandwhichalmostdoesn''tseemtostopatanygivenpointintime.ThereasonforthistestistotryandseewhathappenswhenwesubmitaveryveryverylongtexttoLanguage.Thisshouldworkjustfinebutjustincaseitisalwaysgoodtohaveatestcase.ThisallowsustotestwhathappensifitisnotOK.Ofcourseitisgoingtobeokbutthenagainitisalsobettertobesure!",
"language": "en"}]}'
headers:
Accept:
- application/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '413'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/1.0.0b5 Python/3.7.7 (Darwin-17.7.0-x86_64-i386-64bit)
Python/3.7.7 (Darwin-17.7.0-x86_64-i386-64bit)
method: POST
uri: https://westus2.ppe.cognitiveservices.azure.com/text/analytics/v3.0/keyPhrases?showStats=false
response:
body:
string: '{"documents":[{"id":"1","keyPhrases":["Thisisaveryveryverylongtextwhichgoesonforalongtimeandwhichalmost"],"warnings":[{"code":"LongWordsInDocument","message":"The
document contains very long words (longer than 64 characters). These words
will be truncated and may result in unreliable model predictions."}]}],"errors":[],"modelVersion":"2019-10-01"}'
headers:
apim-request-id:
- bbaeaafd-7ae8-46d9-88bb-a0eae6fdab85
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Fri, 24 Apr 2020 18:40:40 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '4'
status:
code: 200
message: OK
version: 1
Loading