From 374d2e39978e25e88b4d049bbf401d02821bec62 Mon Sep 17 00:00:00 2001 From: iscai-msft Date: Fri, 1 May 2020 18:25:04 -0400 Subject: [PATCH] removed grapheme_offset and grapheme_length, renamed grapheme_count to character_count --- .../azure-ai-textanalytics/CHANGELOG.md | 2 + .../azure/ai/textanalytics/_models.py | 61 ++++++------------- .../tests/test_recognize_entities.py | 4 -- .../tests/test_recognize_entities_async.py | 4 -- .../tests/test_text_analytics.py | 48 ++++++--------- 5 files changed, 39 insertions(+), 80 deletions(-) diff --git a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md index 2a59a9623687..686177a7bb87 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md +++ b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md @@ -8,6 +8,8 @@ **Breaking changes** - `score` attribute of `DetectedLanguage` has been renamed to `confidence_score` +- Removed `grapheme_offset` and `grapheme_length` from `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch` +- `TextDocumentStatistics` attribute `grapheme_count` has been renamed to `character_count` ## 1.0.0b4 (2020-04-07) diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py index 36bc56ba0d5a..692178449da8 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py @@ -168,12 +168,6 @@ class CategorizedEntity(DictMixin): :type category: str :param subcategory: Entity subcategory, such as Age/Year/TimeRange etc :type subcategory: str - :param grapheme_offset: Start position (in Unicode characters) for the - entity text. - :type grapheme_offset: int - :param grapheme_length: Length (in Unicode characters) for the entity - text. - :type grapheme_length: int :param confidence_score: Confidence score between 0 and 1 of the extracted entity. :type confidence_score: float @@ -183,8 +177,6 @@ def __init__(self, **kwargs): self.text = kwargs.get('text', None) self.category = kwargs.get('category', None) self.subcategory = kwargs.get('subcategory', None) - self.grapheme_offset = kwargs.get('grapheme_offset', None) - self.grapheme_length = kwargs.get('grapheme_length', None) self.confidence_score = kwargs.get('confidence_score', None) @classmethod @@ -193,15 +185,13 @@ def _from_generated(cls, entity): text=entity.text, category=entity.category, subcategory=entity.subcategory, - grapheme_offset=entity.offset, - grapheme_length=entity.length, confidence_score=entity.confidence_score, ) def __repr__(self): - return "CategorizedEntity(text={}, category={}, subcategory={}, grapheme_offset={}, grapheme_length={}, " \ - "confidence_score={})".format(self.text, self.category, self.subcategory, self.grapheme_offset, - self.grapheme_length, self.confidence_score)[:1024] + return "CategorizedEntity(text={}, category={}, subcategory={}, confidence_score={})".format( + self.text, self.category, self.subcategory, self.confidence_score + )[:1024] class TextAnalyticsError(DictMixin): @@ -391,16 +381,16 @@ class TextDocumentStatistics(DictMixin): """TextDocumentStatistics contains information about the document payload. - :param grapheme_count: Number of text elements recognized in + :param character_count: Number of text elements recognized in the document. - :type grapheme_count: int + :type character_count: int :param transaction_count: Number of transactions for the document. :type transaction_count: int """ def __init__(self, **kwargs): - self.grapheme_count = kwargs.get("grapheme_count", None) + self.character_count = kwargs.get("character_count", None) self.transaction_count = kwargs.get("transaction_count", None) @classmethod @@ -408,13 +398,13 @@ def _from_generated(cls, stats): if stats is None: return None return cls( - grapheme_count=stats.characters_count, + character_count=stats.characters_count, transaction_count=stats.transactions_count, ) def __repr__(self): - return "TextDocumentStatistics(grapheme_count={}, transaction_count={})" \ - .format(self.grapheme_count, self.transaction_count)[:1024] + return "TextDocumentStatistics(character_count={}, transaction_count={})" \ + .format(self.character_count, self.transaction_count)[:1024] class DocumentError(DictMixin): @@ -549,32 +539,23 @@ class LinkedEntityMatch(DictMixin): :type confidence_score: float :param text: Entity text as appears in the request. :type text: str - :param grapheme_offset: Start position (in Unicode characters) for the - entity match text. - :type grapheme_offset: int - :param grapheme_length: Length (in Unicode characters) for the entity - match text. - :type grapheme_length: int """ def __init__(self, **kwargs): self.confidence_score = kwargs.get("confidence_score", None) self.text = kwargs.get("text", None) - self.grapheme_offset = kwargs.get("grapheme_offset", None) - self.grapheme_length = kwargs.get("grapheme_length", None) @classmethod def _from_generated(cls, match): return cls( confidence_score=match.confidence_score, - text=match.text, - grapheme_offset=match.offset, - grapheme_length=match.length + text=match.text ) def __repr__(self): - return "LinkedEntityMatch(confidence_score={}, text={}, grapheme_offset={}, grapheme_length={})" \ - .format(self.confidence_score, self.text, self.grapheme_offset, self.grapheme_length)[:1024] + return "LinkedEntityMatch(confidence_score={}, text={})".format( + self.confidence_score, self.text + )[:1024] class TextDocumentInput(MultiLanguageInput): @@ -654,19 +635,12 @@ class SentenceSentiment(DictMixin): and 1 for the sentence for all labels. :type confidence_scores: ~azure.ai.textanalytics.SentimentConfidenceScores - :param grapheme_offset: The sentence offset from the start of the - document. - :type grapheme_offset: int - :param grapheme_length: The length of the sentence by Unicode standard. - :type grapheme_length: int """ def __init__(self, **kwargs): self.text = kwargs.get("text", None) self.sentiment = kwargs.get("sentiment", None) self.confidence_scores = kwargs.get("confidence_scores", None) - self.grapheme_offset = kwargs.get("grapheme_offset", None) - self.grapheme_length = kwargs.get("grapheme_length", None) @classmethod def _from_generated(cls, sentence): @@ -674,14 +648,13 @@ def _from_generated(cls, sentence): text=sentence.text, sentiment=sentence.sentiment, confidence_scores=SentimentConfidenceScores._from_generated(sentence.confidence_scores), # pylint: disable=protected-access - grapheme_offset=sentence.offset, - grapheme_length=sentence.length ) def __repr__(self): - return "SentenceSentiment(text={}, sentiment={}, confidence_scores={}, grapheme_offset={}, "\ - "grapheme_length={})".format(self.text, self.sentiment, repr(self.confidence_scores), - self.grapheme_offset, self.grapheme_length + return "SentenceSentiment(text={}, sentiment={}, confidence_scores={})".format( + self.text, + self.sentiment, + repr(self.confidence_scores) )[:1024] diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities.py index 0cb514de1f00..04a13c52f5aa 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities.py @@ -44,8 +44,6 @@ def test_all_successful_passing_dict(self, client): for entity in doc.entities: self.assertIsNotNone(entity.text) self.assertIsNotNone(entity.category) - self.assertIsNotNone(entity.grapheme_offset) - self.assertIsNotNone(entity.grapheme_length) self.assertIsNotNone(entity.confidence_score) @GlobalTextAnalyticsAccountPreparer() @@ -63,8 +61,6 @@ def test_all_successful_passing_text_document_input(self, client): for entity in doc.entities: self.assertIsNotNone(entity.text) self.assertIsNotNone(entity.category) - self.assertIsNotNone(entity.grapheme_offset) - self.assertIsNotNone(entity.grapheme_length) self.assertIsNotNone(entity.confidence_score) @GlobalTextAnalyticsAccountPreparer() diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities_async.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities_async.py index 6a0982defc89..a254580a6bcf 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities_async.py @@ -60,8 +60,6 @@ async def test_all_successful_passing_dict(self, client): for entity in doc.entities: self.assertIsNotNone(entity.text) self.assertIsNotNone(entity.category) - self.assertIsNotNone(entity.grapheme_offset) - self.assertIsNotNone(entity.grapheme_length) self.assertIsNotNone(entity.confidence_score) @GlobalTextAnalyticsAccountPreparer() @@ -79,8 +77,6 @@ async def test_all_successful_passing_text_document_input(self, client): for entity in doc.entities: self.assertIsNotNone(entity.text) self.assertIsNotNone(entity.category) - self.assertIsNotNone(entity.grapheme_offset) - self.assertIsNotNone(entity.grapheme_length) self.assertIsNotNone(entity.confidence_score) @GlobalTextAnalyticsAccountPreparer() diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_text_analytics.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_text_analytics.py index 5fc2101612df..017c8c0b624a 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_text_analytics.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_text_analytics.py @@ -29,10 +29,9 @@ def test_detect_language(self, resource_group, location, text_analytics_account, def test_repr(self): detected_language = _models.DetectedLanguage(name="English", iso6391_name="en", confidence_score=1.0) - categorized_entity = _models.CategorizedEntity(text="Bill Gates", category="Person", subcategory="Age", - grapheme_offset=0, grapheme_length=8, confidence_score=0.899) + categorized_entity = _models.CategorizedEntity(text="Bill Gates", category="Person", subcategory="Age", confidence_score=0.899) - text_document_statistics = _models.TextDocumentStatistics(grapheme_count=14, transaction_count=18) + text_document_statistics = _models.TextDocumentStatistics(character_count=14, transaction_count=18) warnings = [_models.TextAnalyticsWarning(code="LongWordsInDocument", message="The document contains very long words (longer than 64 characters). These words will be truncated and may result in unreliable model predictions.")] @@ -63,8 +62,7 @@ def test_repr(self): id="1", key_phrases=["dog", "cat", "bird"], warnings=warnings, statistics=text_document_statistics, is_error=False ) - linked_entity_match = _models.LinkedEntityMatch(confidence_score=0.999, text="Bill Gates", grapheme_offset=0, - grapheme_length=8) + linked_entity_match = _models.LinkedEntityMatch(confidence_score=0.999, text="Bill Gates") linked_entity = _models.LinkedEntity( name="Bill Gates", @@ -85,9 +83,7 @@ def test_repr(self): sentence_sentiment = _models.SentenceSentiment( text="This is a sentence.", sentiment="neutral", - confidence_scores=sentiment_confidence_score_per_label, - grapheme_offset=0, - grapheme_length=10 + confidence_scores=sentiment_confidence_score_per_label ) analyze_sentiment_result = _models.AnalyzeSentimentResult( @@ -114,60 +110,56 @@ def test_repr(self): ) self.assertEqual("DetectedLanguage(name=English, iso6391_name=en, confidence_score=1.0)", repr(detected_language)) - self.assertEqual("CategorizedEntity(text=Bill Gates, category=Person, subcategory=Age, grapheme_offset=0, " - "grapheme_length=8, confidence_score=0.899)", + self.assertEqual("CategorizedEntity(text=Bill Gates, category=Person, subcategory=Age, confidence_score=0.899)", repr(categorized_entity)) - self.assertEqual("TextDocumentStatistics(grapheme_count=14, transaction_count=18)", + self.assertEqual("TextDocumentStatistics(character_count=14, transaction_count=18)", repr(text_document_statistics)) self.assertEqual("RecognizeEntitiesResult(id=1, entities=[CategorizedEntity(text=Bill Gates, category=Person, " - "subcategory=Age, grapheme_offset=0, grapheme_length=8, confidence_score=0.899)], " + "subcategory=Age, confidence_score=0.899)], " "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). " "These words will be truncated and may result in unreliable model predictions.)], " - "statistics=TextDocumentStatistics(grapheme_count=14, transaction_count=18), " + "statistics=TextDocumentStatistics(character_count=14, transaction_count=18), " "is_error=False)", repr(recognize_entities_result)) self.assertEqual("DetectLanguageResult(id=1, primary_language=DetectedLanguage(name=English, " "iso6391_name=en, confidence_score=1.0), " "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). " "These words will be truncated and may result in unreliable model predictions.)], " - "statistics=TextDocumentStatistics(grapheme_count=14, " + "statistics=TextDocumentStatistics(character_count=14, " "transaction_count=18), is_error=False)", repr(detect_language_result)) self.assertEqual("TextAnalyticsError(code=invalidRequest, message=The request is invalid, target=request)", repr(text_analytics_error)) self.assertEqual("ExtractKeyPhrasesResult(id=1, key_phrases=['dog', 'cat', 'bird'], " "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). " "These words will be truncated and may result in unreliable model predictions.)], " - "statistics=TextDocumentStatistics(grapheme_count=14, transaction_count=18), is_error=False)", + "statistics=TextDocumentStatistics(character_count=14, transaction_count=18), is_error=False)", repr(extract_key_phrases_result)) - self.assertEqual("LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, grapheme_offset=0, grapheme_length=8)", + self.assertEqual("LinkedEntityMatch(confidence_score=0.999, text=Bill Gates)", repr(linked_entity_match)) - self.assertEqual("LinkedEntity(name=Bill Gates, matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, " - "grapheme_offset=0, grapheme_length=8), LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, " - "grapheme_offset=0, grapheme_length=8)], language=English, data_source_entity_id=Bill Gates, " + self.assertEqual("LinkedEntity(name=Bill Gates, matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates), " + "LinkedEntityMatch(confidence_score=0.999, text=Bill Gates)], " + "language=English, data_source_entity_id=Bill Gates, " "url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)", repr(linked_entity)) self.assertEqual("RecognizeLinkedEntitiesResult(id=1, entities=[LinkedEntity(name=Bill Gates, " - "matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, grapheme_offset=0, " - "grapheme_length=8), LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, grapheme_offset=0, " - "grapheme_length=8)], language=English, data_source_entity_id=Bill Gates, " + "matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates), " + "LinkedEntityMatch(confidence_score=0.999, text=Bill Gates)], language=English, data_source_entity_id=Bill Gates, " "url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)], " "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). " "These words will be truncated and may result in unreliable model predictions.)], " - "statistics=TextDocumentStatistics(grapheme_count=14, " + "statistics=TextDocumentStatistics(character_count=14, " "transaction_count=18), is_error=False)", repr(recognize_linked_entities_result)) self.assertEqual("SentimentConfidenceScores(positive=0.99, neutral=0.05, negative=0.02)", repr(sentiment_confidence_score_per_label)) self.assertEqual("SentenceSentiment(text=This is a sentence., sentiment=neutral, confidence_scores=SentimentConfidenceScores(" - "positive=0.99, neutral=0.05, negative=0.02), grapheme_offset=0, grapheme_length=10)", + "positive=0.99, neutral=0.05, negative=0.02))", repr(sentence_sentiment)) self.assertEqual("AnalyzeSentimentResult(id=1, sentiment=positive, " "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). " "These words will be truncated and may result in unreliable model predictions.)], " "statistics=TextDocumentStatistics(" - "grapheme_count=14, transaction_count=18), confidence_scores=SentimentConfidenceScores" + "character_count=14, transaction_count=18), confidence_scores=SentimentConfidenceScores" "(positive=0.99, neutral=0.05, negative=0.02), " "sentences=[SentenceSentiment(text=This is a sentence., sentiment=neutral, confidence_scores=" - "SentimentConfidenceScores(positive=0.99, neutral=0.05, negative=0.02), " - "grapheme_offset=0, grapheme_length=10)], " - "is_error=False)", + "SentimentConfidenceScores(positive=0.99, neutral=0.05, negative=0.02))], is_error=False)", repr(analyze_sentiment_result)) self.assertEqual("DocumentError(id=1, error=TextAnalyticsError(code=invalidRequest, " "message=The request is invalid, target=request), is_error=True)", repr(document_error))