|
16 | 16 | from collections import deque
|
17 | 17 |
|
18 | 18 | def load():
|
19 |
| - """Load the module""" |
20 |
| - registerFunction("catalog %s %S", catalog_search_handler, "catalog <board> <regex>") |
21 |
| - registerFunction("board %s %S", board_search_handler, "board <board> <regex>") |
| 19 | + """Load the module""" |
| 20 | + registerFunction("catalog %s %S", catalog_search_handler, "catalog <board> <regex>") |
| 21 | + registerFunction("board %s %S", board_search_handler, "board <board> <regex>") |
22 | 22 | registerModule("ThreadSearch", load)
|
23 | 23 |
|
24 | 24 | def sanitise(string):
|
25 |
| - """Strips a string of all non-alphanumeric characters""" |
26 |
| - return re.sub(r"[^a-zA-Z0-9 ]", "", string) |
| 25 | + """Strips a string of all non-alphanumeric characters""" |
| 26 | + return re.sub(r"[^a-zA-Z0-9 ]", "", string) |
27 | 27 |
|
28 | 28 | def catalog_search_handler(channel, sender, board, user_regex):
|
29 |
| - """Handler for initiating catalog search""" |
30 |
| - results_data = perform_concurrent_4chan_search(board, user_regex, catalog_search=True) |
31 |
| - process_results(channel, sender, results_data) |
| 29 | + """Handler for initiating catalog search""" |
| 30 | + results_data = perform_concurrent_4chan_search(board, user_regex, catalog_search=True) |
| 31 | + process_results(channel, sender, results_data) |
32 | 32 |
|
33 | 33 | def board_search_handler(channel, sender, board, user_regex):
|
34 |
| - """Handler for initiating full board search""" |
35 |
| - results_data = perform_concurrent_4chan_search(board, user_regex, catalog_search=False) |
36 |
| - process_results(channel, sender, results_data) |
| 34 | + """Handler for initiating full board search""" |
| 35 | + results_data = perform_concurrent_4chan_search(board, user_regex, catalog_search=False) |
| 36 | + process_results(channel, sender, results_data) |
37 | 37 |
|
38 | 38 | def process_results(channel, sender, results_data):
|
39 |
| - """Process the resulting data of a search and present it""" |
40 |
| - max_num_urls_displayed = 3 |
41 |
| - search_parameters = results_data["search_parameters"] |
42 |
| - post_numbers = results_data["post_numbers"] |
43 |
| - |
44 |
| - if len(post_numbers) <= 0: |
45 |
| - sendMessage(channel, "{0}: No results for {1} on {2}".format(sender, search_parameters["string"], |
46 |
| - search_parameters["user_board"])) |
47 |
| - else: |
48 |
| - post_template = "https://boards.4chan.org/{0}/thread/{1}" |
49 |
| - urls = [post_template.format(search_parameters["board"], post_num) for post_num in post_numbers] |
50 |
| - if len(urls) > max_num_urls_displayed: |
51 |
| - message = nnmm('\n'.join(urls)) |
52 |
| - else: |
53 |
| - message = " ".join(urls[:max_num_urls_displayed]) |
54 |
| - sendMessage(channel, "{0}: {1}".format(sender, message)) |
| 39 | + """Process the resulting data of a search and present it""" |
| 40 | + max_num_urls_displayed = 3 |
| 41 | + search_parameters = results_data["search_parameters"] |
| 42 | + post_numbers = results_data["post_numbers"] |
| 43 | + |
| 44 | + if len(post_numbers) <= 0: |
| 45 | + sendMessage(channel, "{0}: No results for {1} on {2}".format(sender, search_parameters["string"], |
| 46 | + search_parameters["user_board"])) |
| 47 | + else: |
| 48 | + post_template = "https://boards.4chan.org/{0}/thread/{1}" |
| 49 | + urls = [post_template.format(search_parameters["board"], post_num) for post_num in post_numbers] |
| 50 | + if len(urls) > max_num_urls_displayed: |
| 51 | + message = nnmm('\n'.join(urls)) |
| 52 | + else: |
| 53 | + message = " ".join(urls[:max_num_urls_displayed]) |
| 54 | + sendMessage(channel, "{0}: {1}".format(sender, message)) |
55 | 55 |
|
56 | 56 | def get_json_data(url):
|
57 |
| - """Returns a json data object from a given url.""" |
58 |
| - response = None |
59 |
| - try: |
60 |
| - response = requests.get(url) |
61 |
| - if response.status_code == 404: |
62 |
| - log.error("url {}: 404".format(url)) |
63 |
| - return None |
64 |
| - json_data = json.loads(response.text.encode()) |
65 |
| - return json_data |
66 |
| - except Exception as e: |
67 |
| - if response is None: |
68 |
| - exception_string = "url: {0}\n{1}".format(url, traceback.format_exc()) |
69 |
| - else: |
70 |
| - exception_string = "url: {0} status_code: {1}\n{2}".format( |
71 |
| - url, response.status_code, traceback.format_exc()) |
72 |
| - log.error(exception_string) |
73 |
| - print(exception_string) |
74 |
| - raise |
| 57 | + """Returns a json data object from a given url.""" |
| 58 | + response = None |
| 59 | + try: |
| 60 | + response = requests.get(url) |
| 61 | + if response.status_code == 404: |
| 62 | + log.error("url {}: 404".format(url)) |
| 63 | + return None |
| 64 | + json_data = json.loads(response.text.encode()) |
| 65 | + return json_data |
| 66 | + except Exception as e: |
| 67 | + if response is None: |
| 68 | + exception_string = "url: {0}\n{1}".format(url, traceback.format_exc()) |
| 69 | + else: |
| 70 | + exception_string = "url: {0} status_code: {1}\n{2}".format( |
| 71 | + url, response.status_code, traceback.format_exc()) |
| 72 | + log.error(exception_string) |
| 73 | + print(exception_string) |
| 74 | + raise |
75 | 75 |
|
76 | 76 | def search_thread(results_deque, thread_num, search_parameters):
|
77 |
| - """ |
78 |
| - Searches every post in thread thread_num on a board for the |
79 |
| - string provided. Returns a list of matching post numbers. |
80 |
| - """ |
81 |
| - json_url = "https://a.4cdn.org/{0}/thread/{1}.json".format(search_parameters["board"], thread_num) |
82 |
| - thread_json = get_json_data(json_url) |
83 |
| - if thread_json is None: |
84 |
| - return |
85 |
| - |
86 |
| - regex_match = search_parameters["compiled_regex"].match |
87 |
| - for post in thread_json["posts"]: |
88 |
| - user_text = "".join([post[s] for s in search_parameters["sections"] if s in post]) |
89 |
| - if regex_match(user_text) is not None: |
90 |
| - results_deque.append("{0}#p{1}".format(thread_num, post["no"])) |
| 77 | + """ |
| 78 | + Searches every post in thread thread_num on a board for the |
| 79 | + string provided. Returns a list of matching post numbers. |
| 80 | + """ |
| 81 | + json_url = "https://a.4cdn.org/{0}/thread/{1}.json".format(search_parameters["board"], thread_num) |
| 82 | + thread_json = get_json_data(json_url) |
| 83 | + if thread_json is None: |
| 84 | + return |
| 85 | + |
| 86 | + regex_match = search_parameters["compiled_regex"].match |
| 87 | + sections = search_parameters["sections"] |
| 88 | + deque_append = results_deque.append |
| 89 | + for post in thread_json["posts"]: |
| 90 | + [deque_append("{0}#p{1}".format(thread_num, post["no"])) for item in map(post.__getitem__, filter(post.__contains__, sections)) if regex_match(item)] |
| 91 | + #user_text = "".join([post[s] for s in search_parameters["sections"] if s in post]) |
| 92 | + #if regex_match(user_text) is not None: |
| 93 | + # results_deque.append("{0}#p{1}".format(thread_num, post["no"])) |
91 | 94 |
|
92 | 95 | def search_catalog_page(results_deque, page, search_parameters):
|
93 |
| - """Will be run by the threading module. Searches all the |
94 |
| - 4chan threads on a page and adds matching results to synchronised queue""" |
95 |
| - regex_match = search_parameters["compiled_regex"].match |
96 |
| - for thread in page["threads"]: |
97 |
| - user_text = "".join([thread[s] for s in search_parameters["sections"] if s in thread]) |
98 |
| - if regex_match(user_text) is not None: |
99 |
| - results_deque.append(thread["no"]) |
| 96 | + """Will be run by the threading module. Searches all the |
| 97 | + 4chan threads on a page and adds matching results to synchronised queue""" |
| 98 | + regex_match = search_parameters["compiled_regex"].match |
| 99 | + sections = search_parameters["sections"] |
| 100 | + deque_append = results_deque.append |
| 101 | + for thread in page["threads"]: |
| 102 | + [deque_append(thread["no"]) for item in map(thread.__getitem__, filter(thread.__contains__, sections)) if regex_match(item)] |
| 103 | + #user_text = "".join([thread[s] for s in search_parameters["sections"] if s in thread]) |
| 104 | + #if regex_match(user_text) is not None: |
| 105 | + # results_deque.append(thread["no"]) |
100 | 106 |
|
101 | 107 | def perform_concurrent_4chan_search(board, user_regex, catalog_search=False):
|
102 |
| - """Search a thread or catalog on 4chan using several threads concurrently, then return relevant data""" |
103 |
| - thread_join_timeout_seconds = 10 |
104 |
| - results_deque = deque() |
105 |
| - json_url = "https://a.4cdn.org/{0}/{1}.json".format(board, "catalog" if catalog_search else "threads") |
106 |
| - sections = ["com", "name", "trip", "email", "sub", "filename"] |
107 |
| - json_data = get_json_data(json_url) |
108 |
| - search_regex = re.compile(user_regex, re.UNICODE + re.IGNORECASE) |
109 |
| - search_parameters = {"sections": sections, "board": sanitise(board), "string": user_regex, |
110 |
| - "compiled_regex": search_regex, "user_board": board} |
111 |
| - results_data = {"post_numbers": results_deque, "search_parameters": search_parameters} |
112 |
| - thread_pool = [] |
113 |
| - |
114 |
| - if json_data is None: |
115 |
| - return results_data |
116 |
| - |
117 |
| - for page in json_data: |
118 |
| - if catalog_search: |
119 |
| - t = Thread(None, target=search_catalog_page, args=(results_deque, page, search_parameters)) |
120 |
| - t.start() |
121 |
| - thread_pool.append(t) |
122 |
| - else: |
123 |
| - for thread in page["threads"]: |
124 |
| - t = Thread(None, target=search_thread, args=(results_deque, thread["no"], search_parameters)) |
125 |
| - t.start() |
126 |
| - thread_pool.append(t) |
127 |
| - |
128 |
| - for _thread in thread_pool: |
129 |
| - if _thread.is_alive(): |
130 |
| - _thread.join(float(thread_join_timeout_seconds)) |
131 |
| - |
132 |
| - return results_data |
| 108 | + """Search a thread or catalog on 4chan using several threads concurrently, then return relevant data""" |
| 109 | + thread_join_timeout_seconds = 10 |
| 110 | + results_deque = deque() |
| 111 | + json_url = "https://a.4cdn.org/{0}/{1}.json".format(board, "catalog" if catalog_search else "threads") |
| 112 | + sections = ["com", "name", "ext", "email", "sub", "filename"] |
| 113 | + json_data = get_json_data(json_url) |
| 114 | + search_regex = re.compile(user_regex, re.UNICODE + re.IGNORECASE) |
| 115 | + search_parameters = {"sections": sections, "board": sanitise(board), "string": user_regex, |
| 116 | + "compiled_regex": search_regex, "user_board": board} |
| 117 | + results_data = {"post_numbers": results_deque, "search_parameters": search_parameters} |
| 118 | + thread_pool = [] |
| 119 | + |
| 120 | + if json_data is None: |
| 121 | + return results_data |
| 122 | + |
| 123 | + for page in json_data: |
| 124 | + if catalog_search: |
| 125 | + t = Thread(None, target=search_catalog_page, args=(results_deque, page, search_parameters)) |
| 126 | + t.start() |
| 127 | + thread_pool.append(t) |
| 128 | + else: |
| 129 | + for thread in page["threads"]: |
| 130 | + t = Thread(None, target=search_thread, args=(results_deque, thread["no"], search_parameters)) |
| 131 | + t.start() |
| 132 | + thread_pool.append(t) |
| 133 | + |
| 134 | + for _thread in thread_pool: |
| 135 | + if _thread.is_alive(): |
| 136 | + _thread.join(float(thread_join_timeout_seconds)) |
| 137 | + |
| 138 | + return results_data |
133 | 139 |
|
134 | 140 |
|
0 commit comments