1
1
import { Route } from '@/types' ;
2
- import cache from '@/utils/cache' ;
3
- import got from '@/utils/got' ;
4
- import { load } from 'cheerio' ;
5
- import { parseDate } from '@/utils/parse-date' ;
6
- import timezone from '@/utils/timezone' ;
2
+ import { parse } from 'querystring' ;
3
+ import axios from 'axios' ;
7
4
8
5
export const route : Route = {
9
- path : '/zhengce/govall/ :advance?' ,
6
+ path : '/zhengce/:advance?' ,
10
7
categories : [ 'government' ] ,
11
- example : '/gov/zhengce/govall/orpro=555¬pro=2&search_field=title ' ,
12
- parameters : { advance : '高级搜索选项,将作为请求参数直接添加到url后。目前已知的选项及其意义如下 。' } ,
8
+ example : '/gov/zhengce/searchWord=医保 ' ,
9
+ parameters : { advance : '高级搜索选项,将作为请求参数直接添加到url后。目前可用的选项仅searchWord,因为不同组合对应不同AthenaAppKey,出于稳定性和实用性考虑暂不实现 。' } ,
13
10
features : {
14
11
requireConfig : false ,
15
12
requirePuppeteer : false ,
@@ -20,69 +17,91 @@ export const route: Route = {
20
17
} ,
21
18
radar : [
22
19
{
23
- source : [ 'www.gov.cn/' ] ,
24
- target : '/zhengce/govall ' ,
20
+ source : [ 'sousuo. www.gov.cn/' ] ,
21
+ target : '/zhengce' ,
25
22
} ,
26
23
] ,
27
24
name : '信息稿件' ,
28
- maintainers : [ 'ciaranchen' ] ,
25
+ maintainers : [ 'ciaranchen' , 'zll17' ] ,
29
26
handler,
30
- url : 'www.gov.cn/ ' ,
31
- description : `| 选项 | 意义 | 备注 |
27
+ url : 'sousuo. www.gov.cn' ,
28
+ description : `| 选项 | 意义 | 默认 |
32
29
| :-----------------------------: | :----------------------------------------------: | :----------------------------: |
33
- | orpro | 包含以下任意一个关键词。 | 用空格分隔。 |
34
- | allpro | 包含以下全部关键词 | |
35
- | notpro | 不包含以下关键词 | |
36
- | inpro | 完整不拆分的关键词 | |
37
- | searchfield | title: 搜索词在标题中;content: 搜索词在正文中。 | 默认为空,即网页的任意位置。 |
38
- | pubmintimeYear, pubmintimeMonth | 从某年某月 | 单独使用月份参数无法只筛选月份 |
39
- | pubmaxtimeYear, pubmaxtimeMonth | 到某年某月 | 单独使用月份参数无法只筛选月份 |
40
- | colid | 栏目 | 比较复杂,不建议使用 |` ,
30
+ | searchWord | 搜索关键词 | "" |
31
+ | orderBy | time: 按发布时间排序;related: 按相关度排序 | time |
32
+ | searchBy | title: 仅搜索标题;all: 搜索全文 | title |
33
+ | granularity | ALL: 时间不限;LAST_WEEK: 一周内;LAST_MONTH: 一月内;LAST_YEAR: 一年内;CUSTOM: 自定义时间 | ALL |
34
+ | beginDateTime | 当granularity为CUSTOM时需添加 | yyyy-MM-dd |
35
+ | endDateTime | 当granularity为CUSTOM时需添加 | yyyy-MM-dd |` ,
41
36
} ;
42
37
38
+ async function fetchData ( searchWord = '农业' ) {
39
+ const response = await axios ( {
40
+ method : 'post' ,
41
+ url : 'https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE' ,
42
+ headers : {
43
+ Accept : 'application/json, text/javascript, */*; q=0.01' ,
44
+ 'Accept-Encoding' : 'gzip, deflate, br, zstd' ,
45
+ 'Accept-Language' : 'zh-CN,zh;q=0.9,ru;q=0.8,en;q=0.7' ,
46
+ AthenaAppKey : 'QLswV4i%2Br6EJk0udvQJoax2erH1NAH3CUcGuTvIeczjwvq28TC7AXoaYfa76nhzAEHYIjC%2Fh5lEHUJqKSUOdNtaOBi%2FUhQfVZI7u1PHes%2BhI%2B7GZ7tahykzv%2BFO4mCNorDViLTNranNi0axlXGzM0yTAgu%2FXBNJyaVd5SI89CIw%3D' ,
47
+ AthenaAppName : '%E5%9B%BD%E7%BD%91%E6%90%9C%E7%B4%A2' ,
48
+ Connection : 'keep-alive' ,
49
+ 'Content-Type' : 'application/json;charset=UTF-8' ,
50
+ Host : 'sousuoht.www.gov.cn' ,
51
+ Origin : 'https://sousuo.www.gov.cn' ,
52
+ 'Sec-Ch-Ua' : '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"' ,
53
+ 'Sec-Ch-Ua-Mobile' : '?0' ,
54
+ 'Sec-Ch-Ua-Platform' : '"Windows"' ,
55
+ 'Sec-Fetch-Dest' : 'empty' ,
56
+ 'Sec-Fetch-Mode' : 'cors' ,
57
+ 'Sec-Fetch-Site' : 'same-site' ,
58
+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36' ,
59
+ } ,
60
+ data : JSON . stringify ( {
61
+ code : '17da70961a7' ,
62
+ historySearchWords : [ '医保' , '教育' ] ,
63
+ dataTypeId : '107' ,
64
+ orderBy : 'time' ,
65
+ searchBy : 'title' ,
66
+ appendixType : '' ,
67
+ granularity : 'ALL' ,
68
+ trackTotalHits : true ,
69
+ beginDateTime : '' ,
70
+ endDateTime : '' ,
71
+ isSearchForced : 0 ,
72
+ filters : [ ] ,
73
+ pageNo : 1 ,
74
+ pageSize : 20 ,
75
+ customFilter : { operator : 'and' , properties : [ ] } ,
76
+ searchWord,
77
+ } ) ,
78
+ } ) ;
79
+ return response . data ;
80
+ }
81
+
43
82
async function handler ( ctx ) {
44
83
const advance = ctx . req . param ( 'advance' ) ;
45
- const link = `http://sousuo.gov.cn/list.htm` ;
84
+ const advanceParams = advance ? parse ( advance ) : { } ;
85
+ const searchWord = advanceParams . searchWord || '' ;
86
+
87
+ const link = `https://sousuo.www.gov.cn/sousuo/search.shtml` ;
88
+
46
89
const params = new URLSearchParams ( {
47
- n : 20 ,
48
- t : 'govall' ,
49
- sort : 'pubtime' ,
50
- advance : 'true' ,
90
+ code : '17da70961a7' ,
91
+ dataTypeId : '107' ,
51
92
} ) ;
52
93
const query = `${ params . toString ( ) } &${ advance } ` ;
53
- const res = await got . get ( link , {
54
- searchParams : query . replaceAll ( / ( [ \u4E00 - \u9FA5 ] ) / g, ( str ) => encodeURIComponent ( str ) ) ,
55
- } ) ;
56
- const $ = load ( res . data ) ;
57
94
58
- const list = $ ( 'body > div.dataBox > table > tbody > tr' )
59
- . slice ( 1 )
60
- . toArray ( )
61
- . map ( ( elem ) => {
62
- elem = $ ( elem ) ;
63
- return {
64
- title : elem . find ( 'td:nth-child(2) > a' ) . text ( ) ,
65
- link : elem . find ( 'td:nth-child(2) > a' ) . attr ( 'href' ) ,
66
- pubDate : timezone ( parseDate ( elem . find ( 'td:nth-child(5)' ) . text ( ) ) , 8 ) ,
67
- } ;
68
- } ) ;
95
+ const res = await fetchData ( searchWord ) ;
96
+
97
+ const list = res . result . data . middle . list . map ( ( item ) => ( {
98
+ title : item . title ,
99
+ link : item . url ,
100
+ pubDate : new Date ( item . time ) . toISOString ( ) , // 转换为ISO格式的时间字符串
101
+ description : item . content ,
102
+ } ) ) ;
69
103
70
- const items = await Promise . all (
71
- list . map ( ( item ) =>
72
- cache . tryGet ( item . link , async ( ) => {
73
- let description = '' ;
74
- try {
75
- const contentData = await got ( item . link ) ;
76
- const $ = load ( contentData . data ) ;
77
- description = $ ( '#UCAP-CONTENT' ) . html ( ) ;
78
- } catch {
79
- description = '文章已被删除' ;
80
- }
81
- item . description = description ;
82
- return item ;
83
- } )
84
- )
85
- ) ;
104
+ const items = await Promise . all ( list . map ( ( item ) => item ) ) ;
86
105
87
106
return {
88
107
title : '信息稿件 - 中国政府网' ,
0 commit comments