@@ -1296,10 +1296,10 @@ def test_uni_parser_frequency():
1296
1296
def test_sync_crawl ():
1297
1297
from concurrent .futures import ThreadPoolExecutor
1298
1298
uni = Uniparser ()
1299
- uni .pop_frequency ('https://www.baidu.com /robots.txt' )
1299
+ uni .pop_frequency ('https://www.python.org /robots.txt' )
1300
1300
uni = Uniparser ()
1301
1301
crawler_rule = CrawlerRule .loads (
1302
- r'''{"name":"Test Frequency","request_args":{"method":"get","url":"https://www.baidu.com /robots.txt","headers":{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"}},"parse_rules":[{"name":"__request__","chain_rules":[["udf","['https://www.baidu.com /robots.txt'] * 4",""]],"childs":""}],"regex":"^https://www.baidu.com /robots.txt"}'''
1302
+ r'''{"name":"Test Frequency","request_args":{"method":"get","url":"https://www.python.org /robots.txt","headers":{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"}},"parse_rules":[{"name":"__request__","chain_rules":[["udf","['https://www.python.org /robots.txt'] * 4",""]],"childs":""}],"regex":"^https://www.python.org /robots.txt"}'''
1303
1303
)
1304
1304
start_time = time .time ()
1305
1305
pool = ThreadPoolExecutor ()
@@ -1312,7 +1312,7 @@ def test_sync_crawl():
1312
1312
# print(cost_time)
1313
1313
assert cost_time < test_count
1314
1314
# set Frequency, download 1 times each 1 sec
1315
- uni .set_frequency ('https://www.baidu.com /robots.txt' , 1 , 2 )
1315
+ uni .set_frequency ('https://www.python.org /robots.txt' , 1 , 2 )
1316
1316
start_time = time .time ()
1317
1317
pool = ThreadPoolExecutor ()
1318
1318
tasks = [
@@ -1325,9 +1325,9 @@ def test_sync_crawl():
1325
1325
1326
1326
async def test_async_crawl ():
1327
1327
uni = Uniparser ()
1328
- uni .pop_frequency ('https://www.baidu.com /robots.txt' )
1328
+ uni .pop_frequency ('https://www.python.org /robots.txt' )
1329
1329
crawler_rule = CrawlerRule .loads (
1330
- r'''{"name":"Test Frequency","request_args":{"method":"get","url":"https://www.baidu.com /robots.txt","headers":{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"}},"parse_rules":[{"name":"nonsense","chain_rules":[["udf","['https://www.baidu.com /robots.txt'] * 4",""]],"childs":""}],"regex":"^https://www.baidu.com /robots.txt"}'''
1330
+ r'''{"name":"Test Frequency","request_args":{"method":"get","url":"https://www.python.org /robots.txt","headers":{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"}},"parse_rules":[{"name":"nonsense","chain_rules":[["udf","['https://www.python.org /robots.txt'] * 4",""]],"childs":""}],"regex":"^https://www.python.org /robots.txt"}'''
1331
1331
)
1332
1332
start_time = time .time ()
1333
1333
test_count = 3
@@ -1340,7 +1340,7 @@ async def test_async_crawl():
1340
1340
# print(cost_time)
1341
1341
assert cost_time < test_count
1342
1342
# set Frequency, download 1 times each 1 sec
1343
- uni .set_async_frequency ('https://www.baidu.com /robots.txt' , 1 , 2 )
1343
+ uni .set_async_frequency ('https://www.python.org /robots.txt' , 1 , 2 )
1344
1344
start_time = time .time ()
1345
1345
tasks = [
1346
1346
asyncio .ensure_future (uni .adownload (crawler_rule ))
@@ -1393,24 +1393,24 @@ def _partial_test_parser():
1393
1393
from uniparser .config import GlobalConfig
1394
1394
GlobalConfig .GLOBAL_TIMEOUT = 5
1395
1395
for case in (
1396
- # test_utils,
1397
- # test_context_parser,
1398
- # test_css_parser,
1399
- # test_selectolax_parser,
1400
- # test_xml_parser,
1401
- # test_re_parser,
1402
- # test_jsonpath_parser,
1403
- # test_objectpath_parser,
1404
- # test_jmespath_parser,
1405
- # test_python_parser,
1406
- # test_udf_parser,
1407
- # test_loader_parser,
1408
- # test_time_parser,
1409
- # test_uni_parser,
1410
- # test_crawler_rule,
1411
- # test_default_usage,
1412
- # test_crawler_storage,
1413
- # test_uni_parser_frequency,
1396
+ test_utils ,
1397
+ test_context_parser ,
1398
+ test_css_parser ,
1399
+ test_selectolax_parser ,
1400
+ test_xml_parser ,
1401
+ test_re_parser ,
1402
+ test_jsonpath_parser ,
1403
+ test_objectpath_parser ,
1404
+ test_jmespath_parser ,
1405
+ test_python_parser ,
1406
+ test_udf_parser ,
1407
+ test_loader_parser ,
1408
+ test_time_parser ,
1409
+ test_uni_parser ,
1410
+ test_crawler_rule ,
1411
+ test_default_usage ,
1412
+ test_crawler_storage ,
1413
+ test_uni_parser_frequency ,
1414
1414
test_crawler ,
1415
1415
test_object ,
1416
1416
):
0 commit comments