[text analytics] Remove offset and length #11190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

iscai-msft merged 1 commit into Azure:feature/text_analytics_v3.0 from iscai-msft:remove_offset_length

May 1, 2020

sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md

-Original file line number
+Diff line change
@@ Expand Up / @@ -8,6 +8,8 @@ @@
     **Breaking changes**
     - `score` attribute of `DetectedLanguage` has been renamed to `confidence_score`
+    - Removed `grapheme_offset` and `grapheme_length` from `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`
+    - `TextDocumentStatistics` attribute `grapheme_count` has been renamed to `character_count`
     ## 1.0.0b4 (2020-04-07)
@@ Expand Down @@

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -168,12 +168,6 @@ class CategorizedEntity(DictMixin):
  
        :type category: str

        :param subcategory: Entity subcategory, such as Age/Year/TimeRange etc

        :type subcategory: str

        :param grapheme_offset: Start position (in Unicode characters) for the

            entity text.

        :type grapheme_offset: int

        :param grapheme_length: Length (in Unicode characters) for the entity

            text.

        :type grapheme_length: int

        :param confidence_score: Confidence score between 0 and 1 of the extracted

            entity.

        :type confidence_score: float

    @@ -183,8 +177,6 @@ def __init__(self, **kwargs):
  
            self.text = kwargs.get('text', None)

            self.category = kwargs.get('category', None)

            self.subcategory = kwargs.get('subcategory', None)

            self.grapheme_offset = kwargs.get('grapheme_offset', None)

            self.grapheme_length = kwargs.get('grapheme_length', None)

            self.confidence_score = kwargs.get('confidence_score', None)

        @classmethod

    @@ -193,15 +185,13 @@ def _from_generated(cls, entity):
  
                text=entity.text,

                category=entity.category,

                subcategory=entity.subcategory,

                grapheme_offset=entity.offset,

                grapheme_length=entity.length,

                confidence_score=entity.confidence_score,

            )

        def __repr__(self):

            return "CategorizedEntity(text={}, category={}, subcategory={}, grapheme_offset={}, grapheme_length={}, " \

                   "confidence_score={})".format(self.text, self.category, self.subcategory, self.grapheme_offset,

                                      self.grapheme_length, self.confidence_score)[:1024]

            return "CategorizedEntity(text={}, category={}, subcategory={}, confidence_score={})".format(

                self.text, self.category, self.subcategory, self.confidence_score

            )[:1024]

    class TextAnalyticsError(DictMixin):

    @@ -391,30 +381,30 @@ class TextDocumentStatistics(DictMixin):
  
        """TextDocumentStatistics contains information about

        the document payload.

        :param grapheme_count: Number of text elements recognized in

        :param character_count: Number of text elements recognized in

            the document.

        :type grapheme_count: int

        :type character_count: int

        :param transaction_count: Number of transactions for the

            document.

        :type transaction_count: int

        """

        def __init__(self, **kwargs):

            self.grapheme_count = kwargs.get("grapheme_count", None)

            self.character_count = kwargs.get("character_count", None)

            self.transaction_count = kwargs.get("transaction_count", None)

        @classmethod

        def _from_generated(cls, stats):

            if stats is None:

                return None

            return cls(

                grapheme_count=stats.characters_count,

                character_count=stats.characters_count,

                transaction_count=stats.transactions_count,

            )

        def __repr__(self):

            return "TextDocumentStatistics(grapheme_count={}, transaction_count={})" \

                .format(self.grapheme_count, self.transaction_count)[:1024]

            return "TextDocumentStatistics(character_count={}, transaction_count={})" \

                .format(self.character_count, self.transaction_count)[:1024]

    class DocumentError(DictMixin):

    @@ -549,32 +539,23 @@ class LinkedEntityMatch(DictMixin):
  
        :type confidence_score: float

        :param text: Entity text as appears in the request.

        :type text: str

        :param grapheme_offset: Start position (in Unicode characters) for the

            entity match text.

        :type grapheme_offset: int

        :param grapheme_length: Length (in Unicode characters) for the entity

            match text.

        :type grapheme_length: int

        """

        def __init__(self, **kwargs):

            self.confidence_score = kwargs.get("confidence_score", None)

            self.text = kwargs.get("text", None)

            self.grapheme_offset = kwargs.get("grapheme_offset", None)

            self.grapheme_length = kwargs.get("grapheme_length", None)

        @classmethod

        def _from_generated(cls, match):

            return cls(

                confidence_score=match.confidence_score,

                text=match.text,

                grapheme_offset=match.offset,

                grapheme_length=match.length

                text=match.text

            )

        def __repr__(self):

            return "LinkedEntityMatch(confidence_score={}, text={}, grapheme_offset={}, grapheme_length={})" \

                .format(self.confidence_score, self.text, self.grapheme_offset, self.grapheme_length)[:1024]

            return "LinkedEntityMatch(confidence_score={}, text={})".format(

                self.confidence_score, self.text

            )[:1024]

    class TextDocumentInput(MultiLanguageInput):

    @@ -654,34 +635,26 @@ class SentenceSentiment(DictMixin):
  
            and 1 for the sentence for all labels.

        :type confidence_scores:

            ~azure.ai.textanalytics.SentimentConfidenceScores

        :param grapheme_offset: The sentence offset from the start of the

            document.

        :type grapheme_offset: int

        :param grapheme_length: The length of the sentence by Unicode standard.

        :type grapheme_length: int

        """

        def __init__(self, **kwargs):

            self.text = kwargs.get("text", None)

            self.sentiment = kwargs.get("sentiment", None)

            self.confidence_scores = kwargs.get("confidence_scores", None)

            self.grapheme_offset = kwargs.get("grapheme_offset", None)

            self.grapheme_length = kwargs.get("grapheme_length", None)

        @classmethod

        def _from_generated(cls, sentence):

            return cls(

                text=sentence.text,

                sentiment=sentence.sentiment,

                confidence_scores=SentimentConfidenceScores._from_generated(sentence.confidence_scores),  # pylint: disable=protected-access

                grapheme_offset=sentence.offset,

                grapheme_length=sentence.length

            )

        def __repr__(self):

            return "SentenceSentiment(text={}, sentiment={}, confidence_scores={}, grapheme_offset={}, "\

                "grapheme_length={})".format(self.text, self.sentiment, repr(self.confidence_scores),

                self.grapheme_offset, self.grapheme_length

            return "SentenceSentiment(text={}, sentiment={}, confidence_scores={})".format(

                self.text,

                self.sentiment,

                repr(self.confidence_scores)

            )[:1024]

sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -44,8 +44,6 @@ def test_all_successful_passing_dict(self, client): @@
                 for entity in doc.entities:
                     self.assertIsNotNone(entity.text)
                     self.assertIsNotNone(entity.category)
-                    self.assertIsNotNone(entity.grapheme_offset)
-                    self.assertIsNotNone(entity.grapheme_length)
                     self.assertIsNotNone(entity.confidence_score)
         @GlobalTextAnalyticsAccountPreparer()
@@ Expand All @@
                 for entity in doc.entities:
                     self.assertIsNotNone(entity.text)
                     self.assertIsNotNone(entity.category)
-                    self.assertIsNotNone(entity.grapheme_offset)
-                    self.assertIsNotNone(entity.grapheme_length)
                     self.assertIsNotNone(entity.confidence_score)
         @GlobalTextAnalyticsAccountPreparer()
@@ Expand Down @@

sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_entities_async.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -60,8 +60,6 @@ async def test_all_successful_passing_dict(self, client): @@
                 for entity in doc.entities:
                     self.assertIsNotNone(entity.text)
                     self.assertIsNotNone(entity.category)
-                    self.assertIsNotNone(entity.grapheme_offset)
-                    self.assertIsNotNone(entity.grapheme_length)
                     self.assertIsNotNone(entity.confidence_score)
         @GlobalTextAnalyticsAccountPreparer()
@@ Expand All @@
                 for entity in doc.entities:
                     self.assertIsNotNone(entity.text)
                     self.assertIsNotNone(entity.category)
-                    self.assertIsNotNone(entity.grapheme_offset)
-                    self.assertIsNotNone(entity.grapheme_length)
                     self.assertIsNotNone(entity.confidence_score)
         @GlobalTextAnalyticsAccountPreparer()
@@ Expand Down @@

sdk/textanalytics/azure-ai-textanalytics/tests/test_text_analytics.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -29,10 +29,9 @@ def test_detect_language(self, resource_group, location, text_analytics_account,
  
        def test_repr(self):

            detected_language = _models.DetectedLanguage(name="English", iso6391_name="en", confidence_score=1.0)

            categorized_entity = _models.CategorizedEntity(text="Bill Gates", category="Person", subcategory="Age",

                                                           grapheme_offset=0, grapheme_length=8, confidence_score=0.899)

            categorized_entity = _models.CategorizedEntity(text="Bill Gates", category="Person", subcategory="Age", confidence_score=0.899)

            text_document_statistics = _models.TextDocumentStatistics(grapheme_count=14, transaction_count=18)

            text_document_statistics = _models.TextDocumentStatistics(character_count=14, transaction_count=18)

            warnings = [_models.TextAnalyticsWarning(code="LongWordsInDocument", message="The document contains very long words (longer than 64 characters). These words will be truncated and may result in unreliable model predictions.")]

    @@ -63,8 +62,7 @@ def test_repr(self):
  
                    id="1", key_phrases=["dog", "cat", "bird"], warnings=warnings, statistics=text_document_statistics, is_error=False

                )

            linked_entity_match = _models.LinkedEntityMatch(confidence_score=0.999, text="Bill Gates", grapheme_offset=0,

                                                            grapheme_length=8)

            linked_entity_match = _models.LinkedEntityMatch(confidence_score=0.999, text="Bill Gates")

            linked_entity = _models.LinkedEntity(

                name="Bill Gates",

    @@ -85,9 +83,7 @@ def test_repr(self):
  
            sentence_sentiment = _models.SentenceSentiment(

                text="This is a sentence.",

                sentiment="neutral",

                confidence_scores=sentiment_confidence_score_per_label,

                grapheme_offset=0,

                grapheme_length=10

                confidence_scores=sentiment_confidence_score_per_label

            )

            analyze_sentiment_result = _models.AnalyzeSentimentResult(

    @@ -114,60 +110,56 @@ def test_repr(self):
  
            )

            self.assertEqual("DetectedLanguage(name=English, iso6391_name=en, confidence_score=1.0)", repr(detected_language))

            self.assertEqual("CategorizedEntity(text=Bill Gates, category=Person, subcategory=Age, grapheme_offset=0, "

                             "grapheme_length=8, confidence_score=0.899)",

            self.assertEqual("CategorizedEntity(text=Bill Gates, category=Person, subcategory=Age, confidence_score=0.899)",

                             repr(categorized_entity))

            self.assertEqual("TextDocumentStatistics(grapheme_count=14, transaction_count=18)",

            self.assertEqual("TextDocumentStatistics(character_count=14, transaction_count=18)",

                             repr(text_document_statistics))

            self.assertEqual("RecognizeEntitiesResult(id=1, entities=[CategorizedEntity(text=Bill Gates, category=Person, "

                             "subcategory=Age, grapheme_offset=0, grapheme_length=8, confidence_score=0.899)], "

                             "subcategory=Age, confidence_score=0.899)], "

                             "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "

                             "These words will be truncated and may result in unreliable model predictions.)], "

                             "statistics=TextDocumentStatistics(grapheme_count=14, transaction_count=18), "

                             "statistics=TextDocumentStatistics(character_count=14, transaction_count=18), "

                             "is_error=False)", repr(recognize_entities_result))

            self.assertEqual("DetectLanguageResult(id=1, primary_language=DetectedLanguage(name=English, "

                             "iso6391_name=en, confidence_score=1.0), "

                             "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "

                             "These words will be truncated and may result in unreliable model predictions.)], "

                             "statistics=TextDocumentStatistics(grapheme_count=14, "

                             "statistics=TextDocumentStatistics(character_count=14, "

                             "transaction_count=18), is_error=False)", repr(detect_language_result))

            self.assertEqual("TextAnalyticsError(code=invalidRequest, message=The request is invalid, target=request)",

                             repr(text_analytics_error))

            self.assertEqual("ExtractKeyPhrasesResult(id=1, key_phrases=['dog', 'cat', 'bird'], "

                             "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "

                             "These words will be truncated and may result in unreliable model predictions.)], "

                             "statistics=TextDocumentStatistics(grapheme_count=14, transaction_count=18), is_error=False)",

                             "statistics=TextDocumentStatistics(character_count=14, transaction_count=18), is_error=False)",

                             repr(extract_key_phrases_result))

            self.assertEqual("LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, grapheme_offset=0, grapheme_length=8)",

            self.assertEqual("LinkedEntityMatch(confidence_score=0.999, text=Bill Gates)",

                             repr(linked_entity_match))

            self.assertEqual("LinkedEntity(name=Bill Gates, matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, "

                             "grapheme_offset=0, grapheme_length=8), LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, "

                             "grapheme_offset=0, grapheme_length=8)], language=English, data_source_entity_id=Bill Gates, "

            self.assertEqual("LinkedEntity(name=Bill Gates, matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates), "

                             "LinkedEntityMatch(confidence_score=0.999, text=Bill Gates)], "

                             "language=English, data_source_entity_id=Bill Gates, "

                             "url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)", repr(linked_entity))

            self.assertEqual("RecognizeLinkedEntitiesResult(id=1, entities=[LinkedEntity(name=Bill Gates, "

                             "matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, grapheme_offset=0, "

                             "grapheme_length=8), LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, grapheme_offset=0, "

                             "grapheme_length=8)], language=English, data_source_entity_id=Bill Gates, "

                             "matches=[LinkedEntityMatch(confidence_score=0.999, text=Bill Gates), "

                             "LinkedEntityMatch(confidence_score=0.999, text=Bill Gates)], language=English, data_source_entity_id=Bill Gates, "

                             "url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)], "

                             "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "

                             "These words will be truncated and may result in unreliable model predictions.)], "

                             "statistics=TextDocumentStatistics(grapheme_count=14, "

                             "statistics=TextDocumentStatistics(character_count=14, "

                             "transaction_count=18), is_error=False)", repr(recognize_linked_entities_result))

            self.assertEqual("SentimentConfidenceScores(positive=0.99, neutral=0.05, negative=0.02)",

                             repr(sentiment_confidence_score_per_label))

            self.assertEqual("SentenceSentiment(text=This is a sentence., sentiment=neutral, confidence_scores=SentimentConfidenceScores("

                             "positive=0.99, neutral=0.05, negative=0.02), grapheme_offset=0, grapheme_length=10)",

                             "positive=0.99, neutral=0.05, negative=0.02))",

                             repr(sentence_sentiment))

            self.assertEqual("AnalyzeSentimentResult(id=1, sentiment=positive, "

                             "warnings=[TextAnalyticsWarning(code=LongWordsInDocument, message=The document contains very long words (longer than 64 characters). "

                             "These words will be truncated and may result in unreliable model predictions.)], "

                             "statistics=TextDocumentStatistics("

                             "grapheme_count=14, transaction_count=18), confidence_scores=SentimentConfidenceScores"

                             "character_count=14, transaction_count=18), confidence_scores=SentimentConfidenceScores"

                             "(positive=0.99, neutral=0.05, negative=0.02), "

                             "sentences=[SentenceSentiment(text=This is a sentence., sentiment=neutral, confidence_scores="

                             "SentimentConfidenceScores(positive=0.99, neutral=0.05, negative=0.02), "

                             "grapheme_offset=0, grapheme_length=10)], "

                             "is_error=False)",

                             "SentimentConfidenceScores(positive=0.99, neutral=0.05, negative=0.02))], is_error=False)",

                             repr(analyze_sentiment_result))

            self.assertEqual("DocumentError(id=1, error=TextAnalyticsError(code=invalidRequest, "

                             "message=The request is invalid, target=request), is_error=True)", repr(document_error))

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[text analytics] Remove offset and length #11190

Uh oh!

Diff view

Diff view

There are no files selected for viewing