Skip to content

Commit 12104d2

Browse files
committed
fix(routes/gov/zhengce): Fetch the retrieval results for the given keyword on gov.cn
1 parent c909ce5 commit 12104d2

File tree

2 files changed

+288
-278
lines changed

2 files changed

+288
-278
lines changed

lib/routes/gov/zhengce/govall.ts

+76-57
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
import { Route } from '@/types';
2-
import cache from '@/utils/cache';
3-
import got from '@/utils/got';
4-
import { load } from 'cheerio';
5-
import { parseDate } from '@/utils/parse-date';
6-
import timezone from '@/utils/timezone';
2+
import { parse } from 'querystring';
3+
import axios from 'axios';
74

85
export const route: Route = {
9-
path: '/zhengce/govall/:advance?',
6+
path: '/zhengce/:advance?',
107
categories: ['government'],
11-
example: '/gov/zhengce/govall/orpro=555&notpro=2&search_field=title',
12-
parameters: { advance: '高级搜索选项,将作为请求参数直接添加到url后。目前已知的选项及其意义如下。' },
8+
example: '/gov/zhengce/searchWord=医保',
9+
parameters: { advance: '高级搜索选项,将作为请求参数直接添加到url后。目前可用的选项仅searchWord,因为不同组合对应不同AthenaAppKey,出于稳定性和实用性考虑暂不实现。' },
1310
features: {
1411
requireConfig: false,
1512
requirePuppeteer: false,
@@ -20,69 +17,91 @@ export const route: Route = {
2017
},
2118
radar: [
2219
{
23-
source: ['www.gov.cn/'],
24-
target: '/zhengce/govall',
20+
source: ['sousuo.www.gov.cn/'],
21+
target: '/zhengce',
2522
},
2623
],
2724
name: '信息稿件',
28-
maintainers: ['ciaranchen'],
25+
maintainers: ['ciaranchen', 'zll17'],
2926
handler,
30-
url: 'www.gov.cn/',
31-
description: `| 选项 | 意义 | 备注 |
27+
url: 'sousuo.www.gov.cn',
28+
description: `| 选项 | 意义 | 默认 |
3229
| :-----------------------------: | :----------------------------------------------: | :----------------------------: |
33-
| orpro | 包含以下任意一个关键词。 | 用空格分隔。 |
34-
| allpro | 包含以下全部关键词 | |
35-
| notpro | 不包含以下关键词 | |
36-
| inpro | 完整不拆分的关键词 | |
37-
| searchfield | title: 搜索词在标题中;content: 搜索词在正文中。 | 默认为空,即网页的任意位置。 |
38-
| pubmintimeYear, pubmintimeMonth | 从某年某月 | 单独使用月份参数无法只筛选月份 |
39-
| pubmaxtimeYear, pubmaxtimeMonth | 到某年某月 | 单独使用月份参数无法只筛选月份 |
40-
| colid | 栏目 | 比较复杂,不建议使用 |`,
30+
| searchWord | 搜索关键词 | "" |
31+
| orderBy | time: 按发布时间排序;related: 按相关度排序 | time |
32+
| searchBy | title: 仅搜索标题;all: 搜索全文 | title |
33+
| granularity | ALL: 时间不限;LAST_WEEK: 一周内;LAST_MONTH: 一月内;LAST_YEAR: 一年内;CUSTOM: 自定义时间 | ALL |
34+
| beginDateTime | 当granularity为CUSTOM时需添加 | yyyy-MM-dd |
35+
| endDateTime | 当granularity为CUSTOM时需添加 | yyyy-MM-dd |`,
4136
};
4237

38+
async function fetchData(searchWord = '农业') {
39+
const response = await axios({
40+
method: 'post',
41+
url: 'https://sousuoht.www.gov.cn/athena/forward/2B22E8E39E850E17F95A016A74FCB6B673336FA8B6FEC0E2955907EF9AEE06BE',
42+
headers: {
43+
Accept: 'application/json, text/javascript, */*; q=0.01',
44+
'Accept-Encoding': 'gzip, deflate, br, zstd',
45+
'Accept-Language': 'zh-CN,zh;q=0.9,ru;q=0.8,en;q=0.7',
46+
AthenaAppKey: 'QLswV4i%2Br6EJk0udvQJoax2erH1NAH3CUcGuTvIeczjwvq28TC7AXoaYfa76nhzAEHYIjC%2Fh5lEHUJqKSUOdNtaOBi%2FUhQfVZI7u1PHes%2BhI%2B7GZ7tahykzv%2BFO4mCNorDViLTNranNi0axlXGzM0yTAgu%2FXBNJyaVd5SI89CIw%3D',
47+
AthenaAppName: '%E5%9B%BD%E7%BD%91%E6%90%9C%E7%B4%A2',
48+
Connection: 'keep-alive',
49+
'Content-Type': 'application/json;charset=UTF-8',
50+
Host: 'sousuoht.www.gov.cn',
51+
Origin: 'https://sousuo.www.gov.cn',
52+
'Sec-Ch-Ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
53+
'Sec-Ch-Ua-Mobile': '?0',
54+
'Sec-Ch-Ua-Platform': '"Windows"',
55+
'Sec-Fetch-Dest': 'empty',
56+
'Sec-Fetch-Mode': 'cors',
57+
'Sec-Fetch-Site': 'same-site',
58+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
59+
},
60+
data: JSON.stringify({
61+
code: '17da70961a7',
62+
historySearchWords: ['医保', '教育'],
63+
dataTypeId: '107',
64+
orderBy: 'time',
65+
searchBy: 'title',
66+
appendixType: '',
67+
granularity: 'ALL',
68+
trackTotalHits: true,
69+
beginDateTime: '',
70+
endDateTime: '',
71+
isSearchForced: 0,
72+
filters: [],
73+
pageNo: 1,
74+
pageSize: 20,
75+
customFilter: { operator: 'and', properties: [] },
76+
searchWord,
77+
}),
78+
});
79+
return response.data;
80+
}
81+
4382
async function handler(ctx) {
4483
const advance = ctx.req.param('advance');
45-
const link = `http://sousuo.gov.cn/list.htm`;
84+
const advanceParams = advance ? parse(advance) : {};
85+
const searchWord = advanceParams.searchWord || '';
86+
87+
const link = `https://sousuo.www.gov.cn/sousuo/search.shtml`;
88+
4689
const params = new URLSearchParams({
47-
n: 20,
48-
t: 'govall',
49-
sort: 'pubtime',
50-
advance: 'true',
90+
code: '17da70961a7',
91+
dataTypeId: '107',
5192
});
5293
const query = `${params.toString()}&${advance}`;
53-
const res = await got.get(link, {
54-
searchParams: query.replaceAll(/([\u4E00-\u9FA5])/g, (str) => encodeURIComponent(str)),
55-
});
56-
const $ = load(res.data);
5794

58-
const list = $('body > div.dataBox > table > tbody > tr')
59-
.slice(1)
60-
.toArray()
61-
.map((elem) => {
62-
elem = $(elem);
63-
return {
64-
title: elem.find('td:nth-child(2) > a').text(),
65-
link: elem.find('td:nth-child(2) > a').attr('href'),
66-
pubDate: timezone(parseDate(elem.find('td:nth-child(5)').text()), 8),
67-
};
68-
});
95+
const res = await fetchData(searchWord);
96+
97+
const list = res.result.data.middle.list.map((item) => ({
98+
title: item.title,
99+
link: item.url,
100+
pubDate: new Date(item.time).toISOString(), // 转换为ISO格式的时间字符串
101+
description: item.content,
102+
}));
69103

70-
const items = await Promise.all(
71-
list.map((item) =>
72-
cache.tryGet(item.link, async () => {
73-
let description = '';
74-
try {
75-
const contentData = await got(item.link);
76-
const $ = load(contentData.data);
77-
description = $('#UCAP-CONTENT').html();
78-
} catch {
79-
description = '文章已被删除';
80-
}
81-
item.description = description;
82-
return item;
83-
})
84-
)
85-
);
104+
const items = await Promise.all(list.map((item) => item));
86105

87106
return {
88107
title: '信息稿件 - 中国政府网',

0 commit comments

Comments
 (0)