|
1 | 1 | package zenuo.gogo.core.processor.impl;
|
2 | 2 |
|
3 | 3 | import io.netty.handler.codec.http.HttpResponseStatus;
|
| 4 | +import io.netty.handler.codec.http.QueryStringDecoder; |
4 | 5 | import lombok.RequiredArgsConstructor;
|
5 | 6 | import lombok.extern.slf4j.Slf4j;
|
6 | 7 | import org.apache.http.client.methods.HttpGet;
|
7 | 8 | import org.jsoup.Jsoup;
|
8 | 9 | import org.jsoup.nodes.Document;
|
9 | 10 | import org.jsoup.nodes.Element;
|
10 |
| -import org.jsoup.select.Elements; |
11 | 11 | import zenuo.gogo.core.processor.IHttpClientProvider;
|
12 | 12 | import zenuo.gogo.core.processor.ISearchResultProvider;
|
13 | 13 | import zenuo.gogo.model.Entry;
|
14 | 14 | import zenuo.gogo.model.SearchResponse;
|
15 |
| -import zenuo.gogo.util.StringUtils; |
16 | 15 | import zenuo.gogo.util.UserAgentUtils;
|
17 | 16 |
|
18 | 17 | import javax.inject.Inject;
|
|
21 | 20 | import java.nio.charset.StandardCharsets;
|
22 | 21 | import java.util.ArrayList;
|
23 | 22 | import java.util.List;
|
| 23 | +import java.util.stream.Collectors; |
24 | 24 |
|
25 | 25 | /**
|
26 | 26 | * 谷歌搜索
|
@@ -54,45 +54,26 @@ public SearchResponse search(String key, int page) {
|
54 | 54 | } catch (IOException e) {
|
55 | 55 | throw new RuntimeException(e);
|
56 | 56 | }
|
57 |
| - final Elements webResults = document.getElementsByClass("g"); |
58 |
| - if (webResults.isEmpty()) { |
59 |
| - log.error("pattern changed"); |
60 |
| - return patternChanged(builder); |
61 |
| - } |
62 |
| - final List<Entry> entries = new ArrayList<>(); |
63 |
| - //traverse search result entries |
64 |
| - for (Element result : webResults) { |
65 |
| - //entry builder |
66 |
| - final Entry.EntryBuilder entryBuilder = Entry.builder(); |
67 |
| - //name |
68 |
| - final Element name = result.getElementsByClass("LC20lb DKV0Md").first(); |
69 |
| - if (name == null) { |
| 57 | + final List<Element> searchResultElements = document.getElementsByTag("a").stream() |
| 58 | + .filter(a -> a.hasAttr("href") |
| 59 | + && a.attr("href").startsWith("/url?") |
| 60 | + && a.childrenSize() == 2 |
| 61 | + && "h3".equals(a.child(0).tagName())) |
| 62 | + .collect(Collectors.toList()); |
| 63 | + final List<Entry> entries = new ArrayList<>(searchResultElements.size()); |
| 64 | + builder.entries(entries); |
| 65 | + for (Element element : searchResultElements) { |
| 66 | + final QueryStringDecoder decoder = new QueryStringDecoder(element.attr("href")); |
| 67 | + final List<String> q = decoder.parameters().get("q"); |
| 68 | + if (q == null) { |
70 | 69 | continue;
|
71 | 70 | }
|
72 |
| - entryBuilder.name(StringUtils.escapeHtmlEntities(name.text())); |
73 |
| - //url |
74 |
| - final Element url = name.parent(); |
75 |
| - entryBuilder.url(url.attr("href")); |
76 |
| - //description |
77 |
| - final Element desc = result.getElementsByClass("aCOpRe").first(); |
78 |
| - entryBuilder.desc(StringUtils.escapeHtmlEntities(desc.text())); |
79 |
| - //build |
80 |
| - final Entry entry = entryBuilder.build(); |
81 |
| - //name and url are not null |
82 |
| - if (entry.getName() != null && entry.getUrl() != null) { |
83 |
| - entries.add(entry); |
84 |
| - } |
| 71 | + final Entry entry = new Entry(); |
| 72 | + entries.add(entry); |
| 73 | + entry.setUrl(q.get(0)); |
| 74 | + entry.setName(element.child(0).text()); |
| 75 | + entry.setDesc(element.parent().parent().child(2).text()); |
85 | 76 | }
|
86 |
| - final Elements videoResults = document.getElementsByClass("y8AWGd llvJ5e"); |
87 |
| - for (Element videoResult : videoResults) { |
88 |
| - final Element a = videoResult.child(0); |
89 |
| - entries.add(Entry.builder() |
90 |
| - .url(a.attr("href")) |
91 |
| - .name(a.child(1).text()) |
92 |
| - .desc(videoResult.child(2).text()) |
93 |
| - .build()); |
94 |
| - } |
95 |
| - builder.entries(entries); |
96 | 77 | return builder.status(HttpResponseStatus.OK).build();
|
97 | 78 | }
|
98 | 79 |
|
|
0 commit comments