Skip to content

Commit 9f4e92b

Browse files
authored
[text analytics] add sample stories and improve documents (#15429)
1 parent 10d4675 commit 9f4e92b

21 files changed

+627
-344
lines changed

sdk/textanalytics/azure-ai-textanalytics/README.md

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ The input for each operation is passed as a **list** of documents.
147147

148148
Each document can be passed as a string in the list, e.g.
149149
```python
150-
documents = ["I hated the movie. It was so slow!", "The movie made it into my top ten favorites.", "What a great movie!"]
150+
documents = ["I hated the movie. It was so slow!", "The movie made it into my top ten favorites. What a great movie!"]
151151
```
152152

153153
or, if you wish to pass in a per-item document `id` or `language`/`country_hint`, they can be passed as a list of
@@ -158,8 +158,7 @@ or a dict-like representation of the object:
158158
```python
159159
documents = [
160160
{"id": "1", "language": "en", "text": "I hated the movie. It was so slow!"},
161-
{"id": "2", "language": "en", "text": "The movie made it into my top ten favorites."},
162-
{"id": "3", "language": "en", "text": "What a great movie!"}
161+
{"id": "2", "language": "en", "text": "The movie made it into my top ten favorites. What a great movie!"},
163162
]
164163
```
165164

@@ -210,7 +209,7 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
210209
text_analytics_client = TextAnalyticsClient(endpoint, credential)
211210

212211
documents = [
213-
"I did not like the restaurant. The food was too spicy.",
212+
"I did not like the restaurant. The food was somehow both too spicy and underseasoned. Additionally, I thought the location was too far away from the playhouse.",
214213
"The restaurant was decorated beautifully. The atmosphere was unlike any other restaurant I've been to.",
215214
"The food was yummy. :)"
216215
]
@@ -244,8 +243,10 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
244243
text_analytics_client = TextAnalyticsClient(endpoint, credential)
245244

246245
documents = [
247-
"Microsoft was founded by Bill Gates and Paul Allen.",
248-
"Redmond is a city in King County, Washington, United States, located 15 miles east of Seattle.",
246+
"""
247+
Microsoft was founded by Bill Gates and Paul Allen. Its headquarters are located in Redmond. Redmond is a
248+
city in King County, Washington, United States, located 15 miles east of Seattle.
249+
""",
249250
"Jeff bought three dozen eggs because there was a 50% discount."
250251
]
251252

@@ -280,7 +281,7 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
280281
text_analytics_client = TextAnalyticsClient(endpoint, credential)
281282

282283
documents = [
283-
"Microsoft was founded by Bill Gates and Paul Allen.",
284+
"Microsoft was founded by Bill Gates and Paul Allen. Its headquarters are located in Redmond.",
284285
"Easter Island, a Chilean territory, is a remote volcanic island in Polynesia."
285286
]
286287

@@ -318,8 +319,10 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
318319
text_analytics_client = TextAnalyticsClient(endpoint, credential)
319320

320321
documents = [
321-
"The employee's SSN is 859-98-0987.",
322-
"The employee's phone number is 555-555-5555."
322+
"""
323+
We have an employee called Parker who cleans up after customers. The employee's
324+
SSN is 859-98-0987, and their phone number is 555-555-5555.
325+
"""
323326
]
324327
response = text_analytics_client.recognize_pii_entities(documents, language="en")
325328
result = [doc for doc in response if not doc.is_error]
@@ -351,8 +354,10 @@ text_analytics_client = TextAnalyticsClient(endpoint, credential)
351354

352355
documents = [
353356
"Redmond is a city in King County, Washington, United States, located 15 miles east of Seattle.",
354-
"I need to take my cat to the veterinarian.",
355-
"I will travel to South America in the summer."
357+
"""
358+
I need to take my cat to the veterinarian. He has been sick recently, and I need to take him
359+
before I travel to South America for the summer.
360+
""",
356361
]
357362

358363
response = text_analytics_client.extract_key_phrases(documents, language="en")
@@ -379,7 +384,10 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
379384
text_analytics_client = TextAnalyticsClient(endpoint, credential)
380385

381386
documents = [
382-
"This is written in English.",
387+
"""
388+
This whole document is written in English. In order for the whole document to be written
389+
in English, every sentence also has to be written in English, which it is.
390+
""",
383391
"Il documento scritto in italiano.",
384392
"Dies ist in deutsche Sprache verfasst."
385393
]

sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_alternative_document_input_async.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ async def alternative_document_input(self):
3737
text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
3838

3939
documents = [
40-
{"id": "0", "language": "en", "text": "I had the best day of my life."},
41-
{"id": "1", "language": "en",
40+
{"id": "0", "country_hint": "US", "text": "I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more. I developed a deep-connection with my instructor as well."},
41+
{"id": "1", "country_hint": "GB",
4242
"text": "This was a waste of my time. The speaker put me to sleep."},
43-
{"id": "2", "language": "es", "text": "No tengo dinero ni nada que dar..."},
44-
{"id": "3", "language": "fr",
43+
{"id": "2", "country_hint": "MX", "text": "No tengo dinero ni nada que dar..."},
44+
{"id": "3", "country_hint": "FR",
4545
"text": "L'hôtel n'était pas très confortable. L'éclairage était trop sombre."}
4646
]
4747
async with text_analytics_client:

sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_async.py

Lines changed: 58 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
This sample demonstrates how to analyze sentiment in documents.
1414
An overall and per-sentence sentiment is returned.
1515
16+
In this sample we will be a skydiving company going through reviews people have left for our company.
17+
We will extract the reviews that we are certain have a positive sentiment and post them onto our
18+
website to attract more divers.
19+
1620
USAGE:
1721
python sample_analyze_sentiment_async.py
1822
@@ -28,6 +32,14 @@
2832
class AnalyzeSentimentSampleAsync(object):
2933

3034
async def analyze_sentiment_async(self):
35+
print(
36+
"In this sample we will be combing through reviews customers have left about their"
37+
"experience using our skydiving company, Contoso."
38+
)
39+
print(
40+
"We start out with a list of reviews. Let us extract the reviews we are sure are "
41+
"positive, so we can display them on our website and get even more customers!"
42+
)
3143
# [START analyze_sentiment_async]
3244
from azure.core.credentials import AzureKeyCredential
3345
from azure.ai.textanalytics.aio import TextAnalyticsClient
@@ -36,38 +48,64 @@ async def analyze_sentiment_async(self):
3648
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
3749

3850
text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
51+
3952
documents = [
40-
"I had the best day of my life.",
41-
"This was a waste of my time. The speaker put me to sleep.",
42-
"No tengo dinero ni nada que dar...",
43-
"L'hôtel n'était pas très confortable. L'éclairage était trop sombre."
53+
"""I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more.
54+
I developed a deep-connection with my instructor as well, and I feel as if I've made a life-long friend in her.""",
55+
"""This was a waste of my time. All of the views on this drop are extremely boring, all I saw was grass. 0/10 would
56+
not recommend to any divers, even first timers.""",
57+
"""This was pretty good! The sights were ok, and I had fun with my instructors! Can't complain too much about my experience""",
58+
"""I only have one word for my experience: WOW!!! I can't believe I have had such a wonderful skydiving company right
59+
in my backyard this whole time! I will definitely be a repeat customer, and I want to take my grandmother skydiving too,
60+
I know she'll love it!"""
4461
]
4562

4663
async with text_analytics_client:
4764
result = await text_analytics_client.analyze_sentiment(documents)
4865

4966
docs = [doc for doc in result if not doc.is_error]
5067

68+
print("Let's visualize the sentiment of each of these documents")
5169
for idx, doc in enumerate(docs):
5270
print("Document text: {}".format(documents[idx]))
5371
print("Overall sentiment: {}".format(doc.sentiment))
5472
# [END analyze_sentiment_async]
55-
print("Overall confidence scores: positive={}; neutral={}; negative={} \n".format(
56-
doc.confidence_scores.positive,
57-
doc.confidence_scores.neutral,
58-
doc.confidence_scores.negative,
59-
))
60-
for sentence in doc.sentences:
61-
print("Sentence '{}' has sentiment: {}".format(sentence.text, sentence.sentiment))
62-
print("...Sentence is {} characters from the start of the document and is {} characters long".format(
63-
sentence.offset, len(sentence.text)
64-
))
65-
print("...Sentence confidence scores: positive={}; neutral={}; negative={}".format(
66-
sentence.confidence_scores.positive,
67-
sentence.confidence_scores.neutral,
68-
sentence.confidence_scores.negative,
69-
))
70-
print("------------------------------------")
73+
74+
print("Now, let us extract all of the positive reviews")
75+
positive_reviews = [doc for doc in docs if doc.sentiment == 'positive']
76+
77+
print("We want to be very confident that our reviews are positive since we'll be posting them on our website.")
78+
print("We're going to confirm our chosen reviews are positive using two different tests")
79+
80+
print(
81+
"First, we are going to check how confident the sentiment analysis model is that a document is positive. "
82+
"Let's go with a 90% confidence."
83+
)
84+
positive_reviews = [
85+
review for review in positive_reviews
86+
if review.confidence_scores.positive >= 0.9
87+
]
88+
89+
print(
90+
"Finally, we also want to make sure every sentence is positive so we only showcase our best selves!"
91+
)
92+
positive_reviews_final = []
93+
for idx, review in enumerate(positive_reviews):
94+
print("Looking at positive review #{}".format(idx + 1))
95+
any_sentence_not_positive = False
96+
for sentence in review.sentences:
97+
print("...Sentence '{}' has sentiment '{}' with confidence scores '{}'".format(
98+
sentence.text,
99+
sentence.sentiment,
100+
sentence.confidence_scores
101+
)
102+
)
103+
if sentence.sentiment != 'positive':
104+
any_sentence_not_positive = True
105+
if not any_sentence_not_positive:
106+
positive_reviews_final.append(review)
107+
108+
print("We now have the final list of positive reviews we are going to display on our website!")
71109

72110

73111
async def main():

0 commit comments

Comments
 (0)