Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

**Breaking changes**
- `score` attribute of `DetectedLanguage` has been renamed to `confidence_score`
- Removed `grapheme_offset` and `grapheme_length` from `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`
- `TextDocumentStatistics` attribute `grapheme_count` has been renamed to `character_count`

## 1.0.0b4 (2020-04-07)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,6 @@ class CategorizedEntity(DictMixin):
:type category: str
:param subcategory: Entity subcategory, such as Age/Year/TimeRange etc
:type subcategory: str
:param grapheme_offset: Start position (in Unicode characters) for the
entity text.
:type grapheme_offset: int
:param grapheme_length: Length (in Unicode characters) for the entity
text.
:type grapheme_length: int
:param confidence_score: Confidence score between 0 and 1 of the extracted
entity.
:type confidence_score: float
Expand All @@ -183,8 +177,6 @@ def __init__(self, **kwargs):
self.text = kwargs.get('text', None)
self.category = kwargs.get('category', None)
self.subcategory = kwargs.get('subcategory', None)
self.grapheme_offset = kwargs.get('grapheme_offset', None)
self.grapheme_length = kwargs.get('grapheme_length', None)
self.confidence_score = kwargs.get('confidence_score', None)

@classmethod
Expand All @@ -193,15 +185,13 @@ def _from_generated(cls, entity):
text=entity.text,
category=entity.category,
subcategory=entity.subcategory,
grapheme_offset=entity.offset,
grapheme_length=entity.length,
confidence_score=entity.confidence_score,
)

def __repr__(self):
return "CategorizedEntity(text={}, category={}, subcategory={}, grapheme_offset={}, grapheme_length={}, " \
"confidence_score={})".format(self.text, self.category, self.subcategory, self.grapheme_offset,
self.grapheme_length, self.confidence_score)[:1024]
return "CategorizedEntity(text={}, category={}, subcategory={}, confidence_score={})".format(
self.text, self.category, self.subcategory, self.confidence_score
)[:1024]


class TextAnalyticsError(DictMixin):
Expand Down Expand Up @@ -391,30 +381,30 @@ class TextDocumentStatistics(DictMixin):
"""TextDocumentStatistics contains information about
the document payload.

:param grapheme_count: Number of text elements recognized in
:param character_count: Number of text elements recognized in
the document.
:type grapheme_count: int
:type character_count: int
:param transaction_count: Number of transactions for the
document.
:type transaction_count: int
"""

def __init__(self, **kwargs):
self.grapheme_count = kwargs.get("grapheme_count", None)
self.character_count = kwargs.get("character_count", None)
self.transaction_count = kwargs.get("transaction_count", None)

@classmethod
def _from_generated(cls, stats):
if stats is None:
return None
return cls(
grapheme_count=stats.characters_count,
character_count=stats.characters_count,
transaction_count=stats.transactions_count,
)

def __repr__(self):
return "TextDocumentStatistics(grapheme_count={}, transaction_count={})" \
.format(self.grapheme_count, self.transaction_count)[:1024]
return "TextDocumentStatistics(character_count={}, transaction_count={})" \
.format(self.character_count, self.transaction_count)[:1024]


class DocumentError(DictMixin):
Expand Down Expand Up @@ -549,32 +539,23 @@ class LinkedEntityMatch(DictMixin):
:type confidence_score: float
:param text: Entity text as appears in the request.
:type text: str
:param grapheme_offset: Start position (in Unicode characters) for the
entity match text.
:type grapheme_offset: int
:param grapheme_length: Length (in Unicode characters) for the entity
match text.
:type grapheme_length: int
"""

def __init__(self, **kwargs):
self.confidence_score = kwargs.get("confidence_score", None)
self.text = kwargs.get("text", None)
self.grapheme_offset = kwargs.get("grapheme_offset", None)
self.grapheme_length = kwargs.get("grapheme_length", None)

@classmethod
def _from_generated(cls, match):
return cls(
confidence_score=match.confidence_score,
text=match.text,
grapheme_offset=match.offset,
grapheme_length=match.length
text=match.text
)

def __repr__(self):
return "LinkedEntityMatch(confidence_score={}, text={}, grapheme_offset={}, grapheme_length={})" \
.format(self.confidence_score, self.text, self.grapheme_offset, self.grapheme_length)[:1024]
return "LinkedEntityMatch(confidence_score={}, text={})".format(
self.confidence_score, self.text
)[:1024]


class TextDocumentInput(MultiLanguageInput):
Expand Down Expand Up @@ -654,34 +635,26 @@ class SentenceSentiment(DictMixin):
and 1 for the sentence for all labels.
:type confidence_scores:
~azure.ai.textanalytics.SentimentConfidenceScores
:param grapheme_offset: The sentence offset from the start of the
document.
:type grapheme_offset: int
:param grapheme_length: The length of the sentence by Unicode standard.
:type grapheme_length: int
"""

def __init__(self, **kwargs):
self.text = kwargs.get("text", None)
self.sentiment = kwargs.get("sentiment", None)
self.confidence_scores = kwargs.get("confidence_scores", None)
self.grapheme_offset = kwargs.get("grapheme_offset", None)
self.grapheme_length = kwargs.get("grapheme_length", None)

@classmethod
def _from_generated(cls, sentence):
return cls(
text=sentence.text,
sentiment=sentence.sentiment,
confidence_scores=SentimentConfidenceScores._from_generated(sentence.confidence_scores), # pylint: disable=protected-access
grapheme_offset=sentence.offset,
grapheme_length=sentence.length
)

def __repr__(self):
return "SentenceSentiment(text={}, sentiment={}, confidence_scores={}, grapheme_offset={}, "\
"grapheme_length={})".format(self.text, self.sentiment, repr(self.confidence_scores),
self.grapheme_offset, self.grapheme_length
return "SentenceSentiment(text={}, sentiment={}, confidence_scores={})".format(
self.text,
self.sentiment,
repr(self.confidence_scores)
)[:1024]


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ def test_all_successful_passing_dict(self, client):
for entity in doc.entities:
self.assertIsNotNone(entity.text)
self.assertIsNotNone(entity.category)
self.assertIsNotNone(entity.grapheme_offset)
self.assertIsNotNone(entity.grapheme_length)
self.assertIsNotNone(entity.confidence_score)

@GlobalTextAnalyticsAccountPreparer()
Expand All @@ -63,8 +61,6 @@ def test_all_successful_passing_text_document_input(self, client):
for entity in doc.entities:
self.assertIsNotNone(entity.text)
self.assertIsNotNone(entity.category)
self.assertIsNotNone(entity.grapheme_offset)
self.assertIsNotNone(entity.grapheme_length)
self.assertIsNotNone(entity.confidence_score)

@GlobalTextAnalyticsAccountPreparer()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ async def test_all_successful_passing_dict(self, client):
for entity in doc.entities:
self.assertIsNotNone(entity.text)
self.assertIsNotNone(entity.category)
self.assertIsNotNone(entity.grapheme_offset)
self.assertIsNotNone(entity.grapheme_length)
self.assertIsNotNone(entity.confidence_score)

@GlobalTextAnalyticsAccountPreparer()
Expand All @@ -79,8 +77,6 @@ async def test_all_successful_passing_text_document_input(self, client):
for entity in doc.entities:
self.assertIsNotNone(entity.text)
self.assertIsNotNone(entity.category)
self.assertIsNotNone(entity.grapheme_offset)
self.assertIsNotNone(entity.grapheme_length)
self.assertIsNotNone(entity.confidence_score)

@GlobalTextAnalyticsAccountPreparer()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@ def test_detect_language(self, resource_group, location, text_analytics_account,
def test_repr(self):
detected_language = _models.DetectedLanguage(name="English", iso6391_name="en", confidence_score=1.0)

categorized_entity = _models.CategorizedEntity(text="Bill Gates", category="Person", subcategory="Age",
grapheme_offset=0, grapheme_length=8, confidence_score=0.899)
categorized_entity = _models.CategorizedEntity(text="Bill Gates", category="Person", subcategory="Age", confidence_score=0.899)

text_document_statistics = _models.TextDocumentStatistics(grapheme_count=14, transaction_count=18)
text_document_statistics = _models.TextDocumentStatistics(character_count=14, transaction_count=18)

warnings = [_models.TextAnalyticsWarning(code="LongWordsInDocument", message="The document contains very long words (longer than 64 characters). These words will be truncated and may result in unreliable model predictions.")]

Expand Down Expand Up @@ -63,8 +62,7 @@ def test_repr(self):
id="1", key_phrases=["dog", "cat", "bird"], warnings=warnings, statistics=text_document_statistics, is_error=False
)

linked_entity_match = _models.LinkedEntityMatch(confidence_score=0.999, text="Bill Gates", grapheme_offset=0,
grapheme_length=8)
linked_entity_match = _models.LinkedEntityMatch(confidence_score=0.999, text="Bill Gates")

linked_entity = _models.LinkedEntity(
name="Bill Gates",
Expand All @@ -85,9 +83,7 @@ def test_repr(self):
sentence_sentiment = _models.SentenceSentiment(
text="This is a sentence.",
sentiment="neutral",
confidence_scores=sentiment_confidence_score_per_label,
grapheme_offset=0,
grapheme_length=10
confidence_scores=sentiment_confidence_score_per_label
)

analyze_sentiment_result = _models.AnalyzeSentimentResult(
Expand All @@ -114,60 +110,56 @@ def test_repr(self):
)

self.assertEqual("DetectedLanguage(name=English, iso6391_name=en, confidence_score=1.0)", repr(detected_language))
self.assertEqual("CategorizedEntity(text=Bill Gates, category=Person, subcategory=Age, grapheme_offset=0, "
"grapheme_length=8, confidence_score=0.899)",
self.assertEqual("CategorizedEntity(text=Bill Gates, category=Person, subcategory=Age, confidence_score=0.899)",
repr(categorized_entity))
self.assertEqual("TextDocumentStatistics(grapheme_count=14, transaction_count=18)",
self.assertEqual("TextDocumentStatistics(character_count=14, transaction_count=18)",
repr(text_document_statistics))
self.assertEqual("RecognizeEntitiesResult(id=1, entities=[CategorizedEntity(text=Bill Gates, category=Person, "
"subcategory=Age, grapheme_offset=0, grapheme_length=8, confidence_score=0.899)], "
"subcategory=Age, confidence_score=0.899)], "
"warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "
"These words will be truncated and may result in unreliable model predictions.)], "
"statistics=TextDocumentStatistics(grapheme_count=14, transaction_count=18), "
"statistics=TextDocumentStatistics(character_count=14, transaction_count=18), "
"is_error=False)", repr(recognize_entities_result))
self.assertEqual("DetectLanguageResult(id=1, primary_language=DetectedLanguage(name=English, "
"iso6391_name=en, confidence_score=1.0), "
"warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "
"These words will be truncated and may result in unreliable model predictions.)], "
"statistics=TextDocumentStatistics(grapheme_count=14, "
"statistics=TextDocumentStatistics(character_count=14, "
"transaction_count=18), is_error=False)", repr(detect_language_result))
self.assertEqual("TextAnalyticsError(code=invalidRequest, message=The request is invalid, target=request)",
repr(text_analytics_error))
self.assertEqual("ExtractKeyPhrasesResult(id=1, key_phrases=['dog', 'cat', 'bird'], "
"warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "
"These words will be truncated and may result in unreliable model predictions.)], "
"statistics=TextDocumentStatistics(grapheme_count=14, transaction_count=18), is_error=False)",
"statistics=TextDocumentStatistics(character_count=14, transaction_count=18), is_error=False)",
repr(extract_key_phrases_result))
self.assertEqual("LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, grapheme_offset=0, grapheme_length=8)",
self.assertEqual("LinkedEntityMatch(confidence_score=0.999, text=Bill Gates)",
repr(linked_entity_match))
self.assertEqual("LinkedEntity(name=Bill Gates, matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, "
"grapheme_offset=0, grapheme_length=8), LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, "
"grapheme_offset=0, grapheme_length=8)], language=English, data_source_entity_id=Bill Gates, "
self.assertEqual("LinkedEntity(name=Bill Gates, matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates), "
"LinkedEntityMatch(confidence_score=0.999, text=Bill Gates)], "
"language=English, data_source_entity_id=Bill Gates, "
"url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)", repr(linked_entity))
self.assertEqual("RecognizeLinkedEntitiesResult(id=1, entities=[LinkedEntity(name=Bill Gates, "
"matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, grapheme_offset=0, "
"grapheme_length=8), LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, grapheme_offset=0, "
"grapheme_length=8)], language=English, data_source_entity_id=Bill Gates, "
"matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates), "
"LinkedEntityMatch(confidence_score=0.999, text=Bill Gates)], language=English, data_source_entity_id=Bill Gates, "
"url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)], "
"warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "
"These words will be truncated and may result in unreliable model predictions.)], "
"statistics=TextDocumentStatistics(grapheme_count=14, "
"statistics=TextDocumentStatistics(character_count=14, "
"transaction_count=18), is_error=False)", repr(recognize_linked_entities_result))
self.assertEqual("SentimentConfidenceScores(positive=0.99, neutral=0.05, negative=0.02)",
repr(sentiment_confidence_score_per_label))
self.assertEqual("SentenceSentiment(text=This is a sentence., sentiment=neutral, confidence_scores=SentimentConfidenceScores("
"positive=0.99, neutral=0.05, negative=0.02), grapheme_offset=0, grapheme_length=10)",
"positive=0.99, neutral=0.05, negative=0.02))",
repr(sentence_sentiment))
self.assertEqual("AnalyzeSentimentResult(id=1, sentiment=positive, "
"warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "
"These words will be truncated and may result in unreliable model predictions.)], "
"statistics=TextDocumentStatistics("
"grapheme_count=14, transaction_count=18), confidence_scores=SentimentConfidenceScores"
"character_count=14, transaction_count=18), confidence_scores=SentimentConfidenceScores"
"(positive=0.99, neutral=0.05, negative=0.02), "
"sentences=[SentenceSentiment(text=This is a sentence., sentiment=neutral, confidence_scores="
"SentimentConfidenceScores(positive=0.99, neutral=0.05, negative=0.02), "
"grapheme_offset=0, grapheme_length=10)], "
"is_error=False)",
"SentimentConfidenceScores(positive=0.99, neutral=0.05, negative=0.02))], is_error=False)",
repr(analyze_sentiment_result))
self.assertEqual("DocumentError(id=1, error=TextAnalyticsError(code=invalidRequest, "
"message=The request is invalid, target=request), is_error=True)", repr(document_error))
Expand Down