Skip to content

Commit

Permalink
update lac (#2187)
Browse files Browse the repository at this point in the history
  • Loading branch information
jm12138 authored Dec 29, 2022
1 parent 3fcf01d commit 497e3ce
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 67 deletions.
9 changes: 7 additions & 2 deletions modules/text/lexical_analysis/lac/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,8 @@
- 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md)
- ### Gradio APP 支持
从 PaddleHub 2.3.1 开始支持使用链接 http://127.0.0.1:8866/gradio/lac 在浏览器中访问 lac 的 Gradio APP。
## 五、更新历史
Expand Down Expand Up @@ -287,6 +288,10 @@
移除 fluid api
* 2.4.0
添加 Gradio APP 支持
- ```shell
$ hub install lac==2.3.0
$ hub install lac==2.4.0
```
1 change: 0 additions & 1 deletion modules/text/lexical_analysis/lac/ahocorasick.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: UTF-8 -*-
"""
本模块实现AC自动机封装为Ahocorasick类,用于进行词典的多模匹配。
"""
Expand Down
2 changes: 0 additions & 2 deletions modules/text/lexical_analysis/lac/custom.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# -*- coding: UTF-8 -*-
"""
该模块实现用户自定义词典的功能
"""

from io import open

from .ahocorasick import Ahocorasick
Expand Down
11 changes: 9 additions & 2 deletions modules/text/lexical_analysis/lac/module.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
Expand Down Expand Up @@ -33,7 +32,7 @@ def __init__(self, *args):

@moduleinfo(
name="lac",
version="2.3.0",
version="2.4.0",
summary=
"Baidu's open-source lexical analysis tool for Chinese, including word segmentation, part-of-speech tagging & named entity recognition",
author="baidu-nlp",
Expand Down Expand Up @@ -412,3 +411,11 @@ def check_input_data(self, args):
raise DataFormatError

return input_data

def create_gradio_app(self):
import gradio as gr
return gr.Interface(self.cut,
gr.Text(label='text'),
gr.JSON(label='results'),
title='lac',
allow_flagging='never')
1 change: 0 additions & 1 deletion modules/text/lexical_analysis/lac/processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding:utf-8 -*-
import io

import numpy as np
Expand Down
96 changes: 37 additions & 59 deletions modules/text/lexical_analysis/lac/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


class TestHubModule(unittest.TestCase):

@classmethod
def setUpClass(cls) -> None:
cls.text = "今天是个好日子"
Expand All @@ -19,74 +20,51 @@ def tearDownClass(cls) -> None:
shutil.rmtree('inference')

def test_cut1(self):
results = self.module.cut(
text=self.text,
use_gpu=False,
batch_size=1,
return_tag=False
)
results = self.module.cut(text=self.text, use_gpu=False, batch_size=1, return_tag=False)
self.assertEqual(results, ['今天', '是', '个', '好日子'])

def test_cut2(self):
results = self.module.cut(
text=self.texts,
use_gpu=False,
batch_size=1,
return_tag=False
)
self.assertEqual(results, [
{'word': ['今天', '是', '个', '好日子']},
{'word': ['天气预报', '说', '今天', '要', '下雨']},
{'word': ['下', '一班', '地铁', '马上', '就要', '到', '了']}
])
results = self.module.cut(text=self.texts, use_gpu=False, batch_size=1, return_tag=False)
self.assertEqual(results, [{
'word': ['今天', '是', '个', '好日子']
}, {
'word': ['天气预报', '说', '今天', '要', '下雨']
}, {
'word': ['下', '一班', '地铁', '马上', '就要', '到', '了']
}])

def test_cut3(self):
results = self.module.cut(
text=self.texts,
use_gpu=False,
batch_size=2,
return_tag=False
)
self.assertEqual(results, [
{'word': ['今天', '是', '个', '好日子']},
{'word': ['天气预报', '说', '今天', '要', '下雨']},
{'word': ['下', '一班', '地铁', '马上', '就要', '到', '了']}
])
results = self.module.cut(text=self.texts, use_gpu=False, batch_size=2, return_tag=False)
self.assertEqual(results, [{
'word': ['今天', '是', '个', '好日子']
}, {
'word': ['天气预报', '说', '今天', '要', '下雨']
}, {
'word': ['下', '一班', '地铁', '马上', '就要', '到', '了']
}])

def test_cut4(self):
results = self.module.cut(
text=self.texts,
use_gpu=True,
batch_size=2,
return_tag=False
)
self.assertEqual(results, [
{'word': ['今天', '是', '个', '好日子']},
{'word': ['天气预报', '说', '今天', '要', '下雨']},
{'word': ['下', '一班', '地铁', '马上', '就要', '到', '了']}
])
results = self.module.cut(text=self.texts, use_gpu=True, batch_size=2, return_tag=False)
self.assertEqual(results, [{
'word': ['今天', '是', '个', '好日子']
}, {
'word': ['天气预报', '说', '今天', '要', '下雨']
}, {
'word': ['下', '一班', '地铁', '马上', '就要', '到', '了']
}])

def test_cut5(self):
results = self.module.cut(
text=self.texts,
use_gpu=True,
batch_size=2,
return_tag=True
)
self.assertEqual(results, [
{
'word': ['今天', '是', '个', '好日子'],
'tag': ['TIME', 'v', 'q', 'n']
},
{
'word': ['天气预报', '说', '今天', '要', '下雨'],
'tag': ['n', 'v', 'TIME', 'v', 'v']
},
{
'word': ['下', '一班', '地铁', '马上', '就要', '到', '了'],
'tag': ['f', 'm', 'n', 'd', 'v', 'v', 'xc']
}
])
results = self.module.cut(text=self.texts, use_gpu=True, batch_size=2, return_tag=True)
self.assertEqual(results, [{
'word': ['今天', '是', '个', '好日子'],
'tag': ['TIME', 'v', 'q', 'n']
}, {
'word': ['天气预报', '说', '今天', '要', '下雨'],
'tag': ['n', 'v', 'TIME', 'v', 'v']
}, {
'word': ['下', '一班', '地铁', '马上', '就要', '到', '了'],
'tag': ['f', 'm', 'n', 'd', 'v', 'v', 'xc']
}])

def test_save_inference_model(self):
self.module.save_inference_model('./inference/model')
Expand Down

0 comments on commit 497e3ce

Please sign in to comment.