dipu-bd · dipu-bd · Sep 11, 2024 · Sep 5, 2024 · Sep 10, 2024
diff --git a/sources/zh/piaotian.py b/sources/zh/piaotian.py
@@ -2,6 +2,14 @@
 import logging
 
 from lncrawl.core.crawler import Crawler
+import urllib.parse
+
+headers = {
+    "Cache-Control": "no-cache",
+    "Content-Type": "application/x-www-form-urlencoded",
+    "Origin": "https://www.piaotia.com",
+    "Referer": "https://www.piaotia.com/modules/article/search.php",
+}
 
 logger = logging.getLogger(__name__)
 
@@ -13,9 +21,52 @@ class PiaoTian(Crawler):
     base_url = [
         "https://www.piaotian.com",
         "https://www.ptwxz.com",
-        "https://www.piaotia.com"
+        "https://www.piaotia.com",
     ]
 
+    def search_novel(self, query):
+        query = urllib.parse.quote(query.encode("gbk"))
+        search = urllib.parse.quote(" 搜 索 ".encode("gbk"))
+        data = f"searchtype=articlename&searchkey={query}&Submit={search}"
+        headers["Origin"] = self.home_url
+        headers["Referer"] = novel_search_url % self.home_url
+
+        response = self.post_response(
+            novel_search_url % self.home_url,
+            headers=headers,
+            data=data,
+        )
+        soup = self.make_soup(response, "gbk")
+
+        results = []
+
+        # if there is only one result, the search page redirects to bookinfo page of that result
+        if response.url.startswith("%sbookinfo/" % self.home_url):
+            author = soup.select('div#content table tr td[width]')[2].get_text()
+            author = author.replace(u'\xa0', "").replace("作 者：", "")
+            results.append(
+                {
+                    "title": soup.select_one("div#content table table table h1").get_text(),
+                    "url": response.url,
+                    "info": f"Author: {author}",
+                }
+            )
+
+        else:
+            for data in soup.select("div#content table tr")[1:]:
+                title = data.select_one("td a").get_text()
+                author = data.select("td")[2].get_text()
+                url = data.select_one("td a")["href"]
+
+                results.append(
+                    {
+                        "title": title,
+                        "url": url,
+                        "info": f"Author: {author}",
+                    }
+                )
+        return results
+
     def read_novel_info(self):
         # Transform bookinfo page into chapter list page
         # https://www.piaotia.com/bookinfo/8/8866.html -> https://www.piaotia.com/html/8/8866/