Skip to content

Commit 7c86949

Browse files
committed
1.16.1
1 parent 903720b commit 7c86949

File tree

7 files changed

+24
-25
lines changed

7 files changed

+24
-25
lines changed

demos/aspect_term_extraction/checkpoints-v1.16.json

+1-1
Large diffs are not rendered by default.

demos/aspect_term_extraction/deploy_demo.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,17 @@
55
from findfile import find_files
66

77
from pyabsa import ATEPCCheckpointManager
8-
from pyabsa.functional.dataset.dataset_manager import download_datasets_from_github, ABSADatasetList
8+
from pyabsa.functional.dataset.dataset_manager import download_datasets_from_github, ABSADatasetList, detect_infer_dataset
99

1010
download_datasets_from_github(os.getcwd())
1111

12+
dataset_items = {dataset.name: dataset for dataset in ABSADatasetList()}
1213

1314
def get_example(dataset):
14-
filter_key_words = ['.py', '.md', 'readme', 'log', 'result', 'zip', '.state_dict', '.model', '.png', 'acc_', 'f1_', '.origin', '.adv', '.csv']
15-
dataset_file = {'train': [], 'test': [], 'valid': []}
16-
search_path = './'
17-
task = 'apc_datasets'
18-
dataset_file['test'] += find_files(search_path, [dataset, 'test', task, '.inference'], exclude_key=['.adv', '.org', '.defense', 'train.'] + filter_key_words)
15+
task = 'apc'
16+
dataset_file = detect_infer_dataset(dataset_items[dataset], task)
1917

20-
for fname in dataset_file['test']:
18+
for fname in dataset_file:
2119
lines = []
2220
if isinstance(fname, str):
2321
fname = [fname]
@@ -33,7 +31,7 @@ def get_example(dataset):
3331

3432

3533
dataset_dict = {dataset.name: get_example(dataset.name) for dataset in ABSADatasetList()}
36-
aspect_extractor = ATEPCCheckpointManager.get_aspect_extractor(checkpoint='english')
34+
aspect_extractor = ATEPCCheckpointManager.get_aspect_extractor(checkpoint='multilingual')
3735

3836

3937
def perform_inference(text, dataset):
@@ -70,6 +68,7 @@ def perform_inference(text, dataset):
7068
gr.Markdown("You can find the datasets at [github.com/yangheng95/ABSADatasets](https://github.com/yangheng95/ABSADatasets/tree/v1.2/datasets/text_classification)")
7169
dataset_ids = gr.Radio(choices=[dataset.name for dataset in ABSADatasetList()[:-1]], value='Laptop14', label="Datasets")
7270
inference_button = gr.Button("Let's go!")
71+
gr.Markdown("There is a [demo](https://huggingface.co/spaces/yangheng/PyABSA-ATEPC-Chinese) specialized for the Chinese langauge")
7372
gr.Markdown("This demo support many other language as well, you can try and explore the results of other languages by yourself.")
7473

7574
with gr.Column():

demos/aspect_term_extraction/extract_aspects_multilingual.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# github: https://github.com/yangheng95
66
# Copyright (C) 2021. All Rights Reserved.
77

8-
from pyabsa import ATEPCCheckpointManager, available_checkpoints
8+
from pyabsa import ATEPCCheckpointManager, available_checkpoints, ABSADatasetList
99

1010
checkpoint_map = available_checkpoints(from_local=False)
1111

@@ -25,6 +25,7 @@
2525
# 从Google Drive下载提供的预训练模型
2626
aspect_extractor = ATEPCCheckpointManager.get_aspect_extractor(checkpoint='multilingual')
2727

28+
examples = ABSADatasetList.Phone
2829
atepc_result = aspect_extractor.extract_aspect(inference_source=examples, # list-support only, for current
2930
print_result=True, # print the result
3031
pred_sentiment=True, # Predict the sentiment of extracted aspect terms

pyabsa/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# Copyright (C) 2021. All Rights Reserved.
88

99

10-
__version__ = '1.16.0'
10+
__version__ = '1.16.1'
1111

1212
__name__ = 'pyabsa'
1313

pyabsa/functional/checkpoint/checkpoint_manager.py

-4
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,6 @@ def get_aspect_extractor(checkpoint: str = None,
101101
102102
:param checkpoint: zipped checkpoint name, or checkpoint path or checkpoint name queried from google drive
103103
This param is for someone wants to load a checkpoint not registered in PyABSA
104-
:param sentiment_map: label to text index map (deprecated and has no effect)
105-
:param auto_device: True or False, otherwise 'cuda', 'cpu' works
106-
:param eval_batch_size: eval batch_size in modeling
107-
108104
:return:
109105
"""
110106
if os.path.exists(checkpoint):

pyabsa/functional/dataset/dataset_manager.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class ABSADatasetList(list):
5454
Restaurant16 = DatasetItem('Restaurant16', '116.Restaurant16')
5555

5656
# Twitter
57-
ACL_Twitter = DatasetItem('Twitter', '101.Twitter')
57+
ACL_Twitter = DatasetItem('Twitter', '101.ACL_Twitter')
5858

5959
MAMS = DatasetItem('MAMS', '109.MAMS')
6060

@@ -83,7 +83,7 @@ class ABSADatasetList(list):
8383
# assembled dataset
8484
Chinese = DatasetItem('Chinese', ['107.Phone', '103.Camera', '106.Notebook', '104.Car', '105.MOOC'])
8585
Binary_Polarity_Chinese = DatasetItem('Chinese', ['107.Phone', '103.Camera', '106.Notebook', '104.Car'])
86-
Triple_Polarity_Chinese = DatasetItem('Chinese', ['105.MOOC'])
86+
Triple_Polarity_Chinese = DatasetItem('Chinese3way', ['105.MOOC'])
8787

8888
SemEval2016Task5 = DatasetItem('SemEval2016Task5', ['120.SemEval2016Task5'])
8989
Arabic_SemEval2016Task5 = DatasetItem('Arabic_SemEval2016Task5', ['122.Arabic'])
@@ -92,10 +92,10 @@ class ABSADatasetList(list):
9292
Turkish_SemEval2016Task5 = DatasetItem('Turkish_SemEval2016Task5', ['128.Turkish'])
9393
Russian_SemEval2016Task5 = DatasetItem('Russian_SemEval2016Task5', ['126.Russian'])
9494
French_SemEval2016Task5 = DatasetItem('French_SemEval2016Task5', ['125.French'])
95-
English_SemEval2016Task5 = DatasetItem('English_SemEval2016Task5', ['125.English'])
95+
English_SemEval2016Task5 = DatasetItem('English_SemEval2016Task5', ['124.English'])
9696

97-
English = DatasetItem('English', ['Laptop14', 'Restaurant14', 'Restaurant16', 'ACL_Twitter',
98-
'MAMS', 'Television', 'TShirt', 'Yelp', 'MOOC_En'])
97+
English = DatasetItem('English', ['113.Laptop14', '114.Restaurant14', '116.Restaurant16', '101.ACL_Twitter',
98+
'109.MAMS', '117.Television', '118.TShirt', '119.Yelp', '121.MOOC_En'])
9999

100100
# Abandon rest15 dataset due to data leakage, See https://github.com/yangheng95/PyABSA/issues/53
101101
SemEval = DatasetItem('SemEval', ['113.Laptop14', '114.Restaurant14', '116.Restaurant16'])
@@ -108,11 +108,11 @@ def __init__(self):
108108
dataset_list = [
109109
self.Laptop14, self.Restaurant14, self.Restaurant15, self.Restaurant16,
110110
self.ACL_Twitter, self.MAMS, self.Television, self.TShirt,
111-
self.Phone, self.Car, self.Notebook, self.Camera,
112-
self.Binary_Polarity_Chinese, self.Triple_Polarity_Chinese,
113-
self.Shampoo, self.MOOC, self.MOOC_En,
114-
self.English, self.SemEval,
115-
self.Restaurant, self.Multilingual
111+
self.Phone, self.Car, self.Notebook, self.Camera, self.MOOC, self.MOOC_En,
112+
self.Chinese, self.Arabic_SemEval2016Task5, self.Dutch_SemEval2016Task5,
113+
self.Spanish_SemEval2016Task5, self.Turkish_SemEval2016Task5, self.Russian_SemEval2016Task5,
114+
self.French_SemEval2016Task5, self.English_SemEval2016Task5,
115+
self.English, self.SemEval, self.Restaurant, self.Multilingual
116116
]
117117
super().__init__(dataset_list)
118118

release-note.json

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
{
2+
"1.16.1": {
3+
"1": "fix some DatasetItem"
4+
},
25
"1.16.0": {
36
"1": "Fix a checkpoint downloading and inflation bug which prevents loading a checkpoint from huggingface spaces",
47
"2": "Fix a important bug which cause unexpected low performance when performing ATEPC inference for Chinese language (and possibly other non-latin languages)",

0 commit comments

Comments
 (0)