Skip to content

Commit d25ce0e

Browse files
authored
[text analytics] return None for offset and length for v3.0 (Azure#13382)
1 parent 077e344 commit d25ce0e

20 files changed

+681
-15
lines changed

sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
- We are now targeting the service's v3.1-preview.1 API as the default. If you would like to still use version v3.0 of the service,
77
pass in `v3.0` to the kwarg `api_version` when creating your TextAnalyticsClient
88
- We have added an API `recognize_pii_entities` which returns entities containing personal information for a batch of documents. Only available for API version v3.1-preview.1 and up.
9-
- Added `offset` and `length` properties for `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`.
9+
- Added `offset` and `length` properties for `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`. These properties are only available for API versions v3.1-preview.1 and up.
1010
- `length` is the number of characters in the text of these models
1111
- `offset` is the offset of the text from the start of the document
1212
- We now have added support for opinion mining. To use this feature, you need to make sure you are using the service's

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
# Licensed under the MIT License.
55
# ------------------------------------
66
import re
7-
from ._generated.v3_0.models._models import (
7+
from ._generated.models import (
88
LanguageInput,
9-
MultiLanguageInput
9+
MultiLanguageInput,
1010
)
1111

12+
from ._generated.v3_0 import models as _v3_0_models
13+
1214
def _get_indices(relation):
1315
return [int(s) for s in re.findall(r"\d+", relation)]
1416

@@ -207,9 +209,9 @@ class CategorizedEntity(DictMixin):
207209
:ivar subcategory: Entity subcategory, such as Age/Year/TimeRange etc
208210
:vartype subcategory: str
209211
:ivar int offset: The entity text offset from the start of the document.
210-
Returned in unicode code points.
212+
Returned in unicode code points. Only returned for api versions v3.1-preview.1 and up.
211213
:ivar int length: The length of the entity text. Returned
212-
in unicode code points.
214+
in unicode code points. Only returned for api versions v3.1-preview.1 and up.
213215
:ivar confidence_score: Confidence score between 0 and 1 of the extracted
214216
entity.
215217
:vartype confidence_score: float
@@ -225,12 +227,19 @@ def __init__(self, **kwargs):
225227

226228
@classmethod
227229
def _from_generated(cls, entity):
230+
offset = entity.offset
231+
length = entity.length
232+
if isinstance(entity, _v3_0_models.Entity):
233+
# we do not return offset and length for v3.0 since
234+
# the correct encoding was not introduced for v3.0
235+
offset = None
236+
length = None
228237
return cls(
229238
text=entity.text,
230239
category=entity.category,
231240
subcategory=entity.subcategory,
232-
offset=entity.offset,
233-
length=entity.length,
241+
offset=offset,
242+
length=length,
234243
confidence_score=entity.confidence_score,
235244
)
236245

@@ -640,9 +649,9 @@ class LinkedEntityMatch(DictMixin):
640649
:vartype confidence_score: float
641650
:ivar text: Entity text as appears in the request.
642651
:ivar int offset: The linked entity match text offset from the start of the document.
643-
Returned in unicode code points.
652+
Returned in unicode code points. Only returned for api versions v3.1-preview.1 and up.
644653
:ivar int length: The length of the linked entity match text. Returned
645-
in unicode code points.
654+
in unicode code points. Only returned for api versions v3.1-preview.1 and up.
646655
:vartype text: str
647656
"""
648657

@@ -654,11 +663,18 @@ def __init__(self, **kwargs):
654663

655664
@classmethod
656665
def _from_generated(cls, match):
666+
offset = match.offset
667+
length = match.length
668+
if isinstance(match, _v3_0_models.Match):
669+
# we do not return offset and length for v3.0 since
670+
# the correct encoding was not introduced for v3.0
671+
offset = None
672+
length = None
657673
return cls(
658674
confidence_score=match.confidence_score,
659675
text=match.text,
660-
offset=match.offset,
661-
length=match.length
676+
offset=offset,
677+
length=length
662678
)
663679

664680
def __repr__(self):
@@ -745,9 +761,9 @@ class SentenceSentiment(DictMixin):
745761
:vartype confidence_scores:
746762
~azure.ai.textanalytics.SentimentConfidenceScores
747763
:ivar int offset: The sentence offset from the start of the document. Returned
748-
in unicode code points.
764+
in unicode code points. Only returned for api versions v3.1-preview.1 and up.
749765
:ivar int length: The length of the sentence. Returned
750-
in unicode code points.
766+
in unicode code points. Only returned for api versions v3.1-preview.1 and up.
751767
:ivar mined_opinions: The list of opinions mined from this sentence.
752768
For example in "The food is good, but the service is bad", we would
753769
mind these two opinions "food is good", "service is bad". Only returned
@@ -766,6 +782,13 @@ def __init__(self, **kwargs):
766782

767783
@classmethod
768784
def _from_generated(cls, sentence, results):
785+
offset = sentence.offset
786+
length = sentence.length
787+
if isinstance(sentence, _v3_0_models.SentenceSentiment):
788+
# we do not return offset and length for v3.0 since
789+
# the correct encoding was not introduced for v3.0
790+
offset = None
791+
length = None
769792
if hasattr(sentence, "aspects"):
770793
mined_opinions = (
771794
[MinedOpinion._from_generated(aspect, results) for aspect in sentence.aspects] # pylint: disable=protected-access
@@ -777,8 +800,8 @@ def _from_generated(cls, sentence, results):
777800
text=sentence.text,
778801
sentiment=sentence.sentiment,
779802
confidence_scores=SentimentConfidenceScores._from_generated(sentence.confidence_scores), # pylint: disable=protected-access
780-
offset=sentence.offset,
781-
length=sentence.length,
803+
offset=offset,
804+
length=length,
782805
mined_opinions=mined_opinions
783806
)
784807

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "I like nature. I do not like being
4+
inside", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Accept-Encoding:
9+
- gzip, deflate
10+
Connection:
11+
- keep-alive
12+
Content-Length:
13+
- '99'
14+
Content-Type:
15+
- application/json
16+
User-Agent:
17+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
18+
method: POST
19+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/sentiment?showStats=false
20+
response:
21+
body:
22+
string: '{"documents":[{"id":"0","sentiment":"mixed","confidenceScores":{"positive":0.44,"neutral":0.27,"negative":0.29},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.88,"neutral":0.11,"negative":0.01},"offset":0,"length":14,"text":"I
23+
like nature."},{"sentiment":"negative","confidenceScores":{"positive":0.01,"neutral":0.43,"negative":0.56},"offset":15,"length":26,"text":"I
24+
do not like being inside"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
25+
headers:
26+
apim-request-id:
27+
- 94e0a047-a7be-4d12-a4ec-81ef3f496950
28+
content-type:
29+
- application/json; charset=utf-8
30+
csp-billing-usage:
31+
- CognitiveServices.TextAnalytics.BatchScoring=1
32+
date:
33+
- Thu, 27 Aug 2020 20:56:20 GMT
34+
strict-transport-security:
35+
- max-age=31536000; includeSubDomains; preload
36+
transfer-encoding:
37+
- chunked
38+
x-content-type-options:
39+
- nosniff
40+
x-envoy-upstream-service-time:
41+
- '78'
42+
status:
43+
code: 200
44+
message: OK
45+
version: 1
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "I like nature. I do not like being
4+
inside", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Accept-Encoding:
9+
- gzip, deflate
10+
Connection:
11+
- keep-alive
12+
Content-Length:
13+
- '99'
14+
Content-Type:
15+
- application/json
16+
User-Agent:
17+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
18+
method: POST
19+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
20+
response:
21+
body:
22+
string: '{"documents":[{"id":"0","sentiment":"mixed","confidenceScores":{"positive":0.44,"neutral":0.27,"negative":0.29},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.88,"neutral":0.11,"negative":0.01},"offset":0,"length":14,"text":"I
23+
like nature."},{"sentiment":"negative","confidenceScores":{"positive":0.01,"neutral":0.43,"negative":0.56},"offset":15,"length":26,"text":"I
24+
do not like being inside"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
25+
headers:
26+
apim-request-id:
27+
- c1dc9d16-85c8-420d-95a1-76b21edbb06f
28+
content-type:
29+
- application/json; charset=utf-8
30+
csp-billing-usage:
31+
- CognitiveServices.TextAnalytics.BatchScoring=1
32+
date:
33+
- Fri, 28 Aug 2020 18:31:18 GMT
34+
strict-transport-security:
35+
- max-age=31536000; includeSubDomains; preload
36+
transfer-encoding:
37+
- chunked
38+
x-content-type-options:
39+
- nosniff
40+
x-envoy-upstream-service-time:
41+
- '81'
42+
status:
43+
code: 200
44+
message: OK
45+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "I like nature. I do not like being
4+
inside", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Content-Length:
9+
- '99'
10+
Content-Type:
11+
- application/json
12+
User-Agent:
13+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
14+
method: POST
15+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/sentiment?showStats=false
16+
response:
17+
body:
18+
string: '{"documents":[{"id":"0","sentiment":"mixed","confidenceScores":{"positive":0.44,"neutral":0.27,"negative":0.29},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.88,"neutral":0.11,"negative":0.01},"offset":0,"length":14,"text":"I
19+
like nature."},{"sentiment":"negative","confidenceScores":{"positive":0.01,"neutral":0.43,"negative":0.56},"offset":15,"length":26,"text":"I
20+
do not like being inside"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
21+
headers:
22+
apim-request-id: 0577ce48-c371-418e-b478-cc085c7ecaf8
23+
content-type: application/json; charset=utf-8
24+
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
25+
date: Thu, 27 Aug 2020 20:56:21 GMT
26+
strict-transport-security: max-age=31536000; includeSubDomains; preload
27+
transfer-encoding: chunked
28+
x-content-type-options: nosniff
29+
x-envoy-upstream-service-time: '79'
30+
status:
31+
code: 200
32+
message: OK
33+
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.0/sentiment?showStats=false
34+
version: 1
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "I like nature. I do not like being
4+
inside", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Content-Length:
9+
- '99'
10+
Content-Type:
11+
- application/json
12+
User-Agent:
13+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
14+
method: POST
15+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
16+
response:
17+
body:
18+
string: '{"documents":[{"id":"0","sentiment":"mixed","confidenceScores":{"positive":0.44,"neutral":0.27,"negative":0.29},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.88,"neutral":0.11,"negative":0.01},"offset":0,"length":14,"text":"I
19+
like nature."},{"sentiment":"negative","confidenceScores":{"positive":0.01,"neutral":0.43,"negative":0.56},"offset":15,"length":26,"text":"I
20+
do not like being inside"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
21+
headers:
22+
apim-request-id: 22d88cc1-51fb-48e0-a335-d14b72e1d125
23+
content-type: application/json; charset=utf-8
24+
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
25+
date: Fri, 28 Aug 2020 18:31:18 GMT
26+
strict-transport-security: max-age=31536000; includeSubDomains; preload
27+
transfer-encoding: chunked
28+
x-content-type-options: nosniff
29+
x-envoy-upstream-service-time: '92'
30+
status:
31+
code: 200
32+
message: OK
33+
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
34+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
4+
and Paul Allen", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Accept-Encoding:
9+
- gzip, deflate
10+
Connection:
11+
- keep-alive
12+
Content-Length:
13+
- '108'
14+
Content-Type:
15+
- application/json
16+
User-Agent:
17+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
18+
method: POST
19+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/entities/recognition/general?showStats=false
20+
response:
21+
body:
22+
string: '{"documents":[{"id":"0","entities":[{"text":"Microsoft","category":"Organization","offset":0,"length":9,"confidenceScore":0.82},{"text":"Bill
23+
Gates","category":"Person","offset":25,"length":10,"confidenceScore":0.84},{"text":"Paul
24+
Allen","category":"Person","offset":40,"length":10,"confidenceScore":0.89}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
25+
headers:
26+
apim-request-id:
27+
- 8ebab42d-0090-4d36-8e52-721f4c4b87d7
28+
content-type:
29+
- application/json; charset=utf-8
30+
csp-billing-usage:
31+
- CognitiveServices.TextAnalytics.BatchScoring=1
32+
date:
33+
- Thu, 27 Aug 2020 20:56:21 GMT
34+
strict-transport-security:
35+
- max-age=31536000; includeSubDomains; preload
36+
transfer-encoding:
37+
- chunked
38+
x-content-type-options:
39+
- nosniff
40+
x-envoy-upstream-service-time:
41+
- '82'
42+
status:
43+
code: 200
44+
message: OK
45+
version: 1
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
4+
and Paul Allen", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Accept-Encoding:
9+
- gzip, deflate
10+
Connection:
11+
- keep-alive
12+
Content-Length:
13+
- '108'
14+
Content-Type:
15+
- application/json
16+
User-Agent:
17+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
18+
method: POST
19+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/entities/recognition/general?showStats=false&stringIndexType=UnicodeCodePoint
20+
response:
21+
body:
22+
string: '{"documents":[{"id":"0","entities":[{"text":"Microsoft","category":"Organization","offset":0,"length":9,"confidenceScore":0.82},{"text":"Bill
23+
Gates","category":"Person","offset":25,"length":10,"confidenceScore":0.84},{"text":"Paul
24+
Allen","category":"Person","offset":40,"length":10,"confidenceScore":0.89}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
25+
headers:
26+
apim-request-id:
27+
- c588af7e-ff6c-4bca-9be0-bc50b81df611
28+
content-type:
29+
- application/json; charset=utf-8
30+
csp-billing-usage:
31+
- CognitiveServices.TextAnalytics.BatchScoring=1
32+
date:
33+
- Fri, 28 Aug 2020 18:31:19 GMT
34+
strict-transport-security:
35+
- max-age=31536000; includeSubDomains; preload
36+
transfer-encoding:
37+
- chunked
38+
x-content-type-options:
39+
- nosniff
40+
x-envoy-upstream-service-time:
41+
- '80'
42+
status:
43+
code: 200
44+
message: OK
45+
version: 1

0 commit comments

Comments
 (0)