Skip to content

Commit fa2fcc6

Browse files
authored
Merge pull request #30 from zenuo/fix_google_dom_changed
Fix google dom changed
2 parents 0a59b4b + 11dfd30 commit fa2fcc6

File tree

4 files changed

+50
-47
lines changed

4 files changed

+50
-47
lines changed

README.md

+23
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,26 @@ $ python3 gogo.py stop
147147
## 实例集合
148148

149149
欢迎通过Issue分享实例供学习使用🏇
150+
151+
## 实现思路
152+
153+
```javascript
154+
// User-Agent: Mozilla/5.0 (Mobile; Nokia 8110 4G; rv:48.0) Gecko/48.0 Firefox/48.0 KAIOS/2.5
155+
156+
let searchResultElements = Array.from(document.getElementsByTagName("a"))
157+
.filter(a => a.hasAttribute("href")
158+
&& a.getAttribute("href").startsWith("/url?")
159+
&& a.childElementCount == 2
160+
&& a.childNodes[0].tagName == "H3");
161+
162+
let entries = searchResultElements.map(e => {
163+
let url = new URL(e).searchParams.get("q");
164+
let name = e.children[0].textContent;
165+
let desc = e.parentNode.parentElement.children[2].textContent;
166+
return {
167+
"url": url,
168+
"name": name,
169+
"desc": desc
170+
};
171+
})
172+
```

gogo-server/src/main/java/zenuo/gogo/core/processor/impl/GoogleSearchResultProviderImpl.java

+19-38
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
package zenuo.gogo.core.processor.impl;
22

33
import io.netty.handler.codec.http.HttpResponseStatus;
4+
import io.netty.handler.codec.http.QueryStringDecoder;
45
import lombok.RequiredArgsConstructor;
56
import lombok.extern.slf4j.Slf4j;
67
import org.apache.http.client.methods.HttpGet;
78
import org.jsoup.Jsoup;
89
import org.jsoup.nodes.Document;
910
import org.jsoup.nodes.Element;
10-
import org.jsoup.select.Elements;
1111
import zenuo.gogo.core.processor.IHttpClientProvider;
1212
import zenuo.gogo.core.processor.ISearchResultProvider;
1313
import zenuo.gogo.model.Entry;
1414
import zenuo.gogo.model.SearchResponse;
15-
import zenuo.gogo.util.StringUtils;
1615
import zenuo.gogo.util.UserAgentUtils;
1716

1817
import javax.inject.Inject;
@@ -21,6 +20,7 @@
2120
import java.nio.charset.StandardCharsets;
2221
import java.util.ArrayList;
2322
import java.util.List;
23+
import java.util.stream.Collectors;
2424

2525
/**
2626
* 谷歌搜索
@@ -54,45 +54,26 @@ public SearchResponse search(String key, int page) {
5454
} catch (IOException e) {
5555
throw new RuntimeException(e);
5656
}
57-
final Elements webResults = document.getElementsByClass("g");
58-
if (webResults.isEmpty()) {
59-
log.error("pattern changed");
60-
return patternChanged(builder);
61-
}
62-
final List<Entry> entries = new ArrayList<>();
63-
//traverse search result entries
64-
for (Element result : webResults) {
65-
//entry builder
66-
final Entry.EntryBuilder entryBuilder = Entry.builder();
67-
//name
68-
final Element name = result.getElementsByClass("LC20lb DKV0Md").first();
69-
if (name == null) {
57+
final List<Element> searchResultElements = document.getElementsByTag("a").stream()
58+
.filter(a -> a.hasAttr("href")
59+
&& a.attr("href").startsWith("/url?")
60+
&& a.childrenSize() == 2
61+
&& "h3".equals(a.child(0).tagName()))
62+
.collect(Collectors.toList());
63+
final List<Entry> entries = new ArrayList<>(searchResultElements.size());
64+
builder.entries(entries);
65+
for (Element element : searchResultElements) {
66+
final QueryStringDecoder decoder = new QueryStringDecoder(element.attr("href"));
67+
final List<String> q = decoder.parameters().get("q");
68+
if (q == null) {
7069
continue;
7170
}
72-
entryBuilder.name(StringUtils.escapeHtmlEntities(name.text()));
73-
//url
74-
final Element url = name.parent();
75-
entryBuilder.url(url.attr("href"));
76-
//description
77-
final Element desc = result.getElementsByClass("aCOpRe").first();
78-
entryBuilder.desc(StringUtils.escapeHtmlEntities(desc.text()));
79-
//build
80-
final Entry entry = entryBuilder.build();
81-
//name and url are not null
82-
if (entry.getName() != null && entry.getUrl() != null) {
83-
entries.add(entry);
84-
}
71+
final Entry entry = new Entry();
72+
entries.add(entry);
73+
entry.setUrl(q.get(0));
74+
entry.setName(element.child(0).text());
75+
entry.setDesc(element.parent().parent().child(2).text());
8576
}
86-
final Elements videoResults = document.getElementsByClass("y8AWGd llvJ5e");
87-
for (Element videoResult : videoResults) {
88-
final Element a = videoResult.child(0);
89-
entries.add(Entry.builder()
90-
.url(a.attr("href"))
91-
.name(a.child(1).text())
92-
.desc(videoResult.child(2).text())
93-
.build());
94-
}
95-
builder.entries(entries);
9677
return builder.status(HttpResponseStatus.OK).build();
9778
}
9879

gogo-server/src/main/java/zenuo/gogo/core/processor/impl/LintProcessorImpl.java

+3-4
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public void process(ChannelHandlerContext ctx, FullHttpRequest request, QueryStr
6363
request,
6464
ResponseType.API,
6565
body,
66-
response.getStatus());
66+
HttpResponseStatus.OK);
6767
}
6868
}
6969

@@ -125,11 +125,10 @@ LintResponse response(final String key) {
125125
builder.key(key);
126126
try {
127127
final List<String> lints = lint(key);
128-
builder.lints(lints).status(HttpResponseStatus.OK);
128+
builder.lints(lints);
129129
} catch (Exception e) {
130130
log.error("lint {}", key, e);
131-
builder.error(e.getMessage())
132-
.status(HttpResponseStatus.GATEWAY_TIMEOUT);
131+
builder.error(e.getMessage());
133132
}
134133
return builder.build();
135134
}

gogo-server/src/main/java/zenuo/gogo/util/UserAgentUtils.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ public final class UserAgentUtils {
1111
/**
1212
* 用户代理字符串数组
1313
*/
14-
private static String[] USER_AGENTS = {
15-
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
16-
"Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
17-
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
18-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
14+
private static final String[] USER_AGENTS = {
15+
"Mozilla/5.0 (Mobile; Nokia 8110 4G; rv:46.0) Gecko/46.0 Firefox/46.0 KAIOS/2.5",
16+
"Mozilla/5.0 (Mobile; Nokia 8110 4G; rv:47.0) Gecko/47.0 Firefox/47.0 KAIOS/2.5",
17+
"Mozilla/5.0 (Mobile; Nokia 8110 4G; rv:48.0) Gecko/48.0 Firefox/48.0 KAIOS/2.5",
18+
"Mozilla/5.0 (Mobile; Nokia 8110 4G; rv:49.0) Gecko/49.0 Firefox/49.0 KAIOS/2.5",
1919
};
2020
/**
2121
* 最后一次使用的索引

0 commit comments

Comments
 (0)