Skip to content
This repository was archived by the owner on Nov 24, 2020. It is now read-only.

Commit f3aa9db

Browse files
made some optimisations, and fixed the way fieldswere being searched
1 parent 4d9d2f1 commit f3aa9db

File tree

2 files changed

+107
-97
lines changed

2 files changed

+107
-97
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,7 @@ config.py
99
#ignore logs
1010
log
1111
.idea
12+
# vim swapfiles
13+
.*.swp
14+
# vim backup files
15+
*.*~

modules/threadsearch.py

+103-97
Original file line numberDiff line numberDiff line change
@@ -16,119 +16,125 @@
1616
from collections import deque
1717

1818
def load():
19-
"""Load the module"""
20-
registerFunction("catalog %s %S", catalog_search_handler, "catalog <board> <regex>")
21-
registerFunction("board %s %S", board_search_handler, "board <board> <regex>")
19+
"""Load the module"""
20+
registerFunction("catalog %s %S", catalog_search_handler, "catalog <board> <regex>")
21+
registerFunction("board %s %S", board_search_handler, "board <board> <regex>")
2222
registerModule("ThreadSearch", load)
2323

2424
def sanitise(string):
25-
"""Strips a string of all non-alphanumeric characters"""
26-
return re.sub(r"[^a-zA-Z0-9 ]", "", string)
25+
"""Strips a string of all non-alphanumeric characters"""
26+
return re.sub(r"[^a-zA-Z0-9 ]", "", string)
2727

2828
def catalog_search_handler(channel, sender, board, user_regex):
29-
"""Handler for initiating catalog search"""
30-
results_data = perform_concurrent_4chan_search(board, user_regex, catalog_search=True)
31-
process_results(channel, sender, results_data)
29+
"""Handler for initiating catalog search"""
30+
results_data = perform_concurrent_4chan_search(board, user_regex, catalog_search=True)
31+
process_results(channel, sender, results_data)
3232

3333
def board_search_handler(channel, sender, board, user_regex):
34-
"""Handler for initiating full board search"""
35-
results_data = perform_concurrent_4chan_search(board, user_regex, catalog_search=False)
36-
process_results(channel, sender, results_data)
34+
"""Handler for initiating full board search"""
35+
results_data = perform_concurrent_4chan_search(board, user_regex, catalog_search=False)
36+
process_results(channel, sender, results_data)
3737

3838
def process_results(channel, sender, results_data):
39-
"""Process the resulting data of a search and present it"""
40-
max_num_urls_displayed = 3
41-
search_parameters = results_data["search_parameters"]
42-
post_numbers = results_data["post_numbers"]
43-
44-
if len(post_numbers) <= 0:
45-
sendMessage(channel, "{0}: No results for {1} on {2}".format(sender, search_parameters["string"],
46-
search_parameters["user_board"]))
47-
else:
48-
post_template = "https://boards.4chan.org/{0}/thread/{1}"
49-
urls = [post_template.format(search_parameters["board"], post_num) for post_num in post_numbers]
50-
if len(urls) > max_num_urls_displayed:
51-
message = nnmm('\n'.join(urls))
52-
else:
53-
message = " ".join(urls[:max_num_urls_displayed])
54-
sendMessage(channel, "{0}: {1}".format(sender, message))
39+
"""Process the resulting data of a search and present it"""
40+
max_num_urls_displayed = 3
41+
search_parameters = results_data["search_parameters"]
42+
post_numbers = results_data["post_numbers"]
43+
44+
if len(post_numbers) <= 0:
45+
sendMessage(channel, "{0}: No results for {1} on {2}".format(sender, search_parameters["string"],
46+
search_parameters["user_board"]))
47+
else:
48+
post_template = "https://boards.4chan.org/{0}/thread/{1}"
49+
urls = [post_template.format(search_parameters["board"], post_num) for post_num in post_numbers]
50+
if len(urls) > max_num_urls_displayed:
51+
message = nnmm('\n'.join(urls))
52+
else:
53+
message = " ".join(urls[:max_num_urls_displayed])
54+
sendMessage(channel, "{0}: {1}".format(sender, message))
5555

5656
def get_json_data(url):
57-
"""Returns a json data object from a given url."""
58-
response = None
59-
try:
60-
response = requests.get(url)
61-
if response.status_code == 404:
62-
log.error("url {}: 404".format(url))
63-
return None
64-
json_data = json.loads(response.text.encode())
65-
return json_data
66-
except Exception as e:
67-
if response is None:
68-
exception_string = "url: {0}\n{1}".format(url, traceback.format_exc())
69-
else:
70-
exception_string = "url: {0} status_code: {1}\n{2}".format(
71-
url, response.status_code, traceback.format_exc())
72-
log.error(exception_string)
73-
print(exception_string)
74-
raise
57+
"""Returns a json data object from a given url."""
58+
response = None
59+
try:
60+
response = requests.get(url)
61+
if response.status_code == 404:
62+
log.error("url {}: 404".format(url))
63+
return None
64+
json_data = json.loads(response.text.encode())
65+
return json_data
66+
except Exception as e:
67+
if response is None:
68+
exception_string = "url: {0}\n{1}".format(url, traceback.format_exc())
69+
else:
70+
exception_string = "url: {0} status_code: {1}\n{2}".format(
71+
url, response.status_code, traceback.format_exc())
72+
log.error(exception_string)
73+
print(exception_string)
74+
raise
7575

7676
def search_thread(results_deque, thread_num, search_parameters):
77-
"""
78-
Searches every post in thread thread_num on a board for the
79-
string provided. Returns a list of matching post numbers.
80-
"""
81-
json_url = "https://a.4cdn.org/{0}/thread/{1}.json".format(search_parameters["board"], thread_num)
82-
thread_json = get_json_data(json_url)
83-
if thread_json is None:
84-
return
85-
86-
regex_match = search_parameters["compiled_regex"].match
87-
for post in thread_json["posts"]:
88-
user_text = "".join([post[s] for s in search_parameters["sections"] if s in post])
89-
if regex_match(user_text) is not None:
90-
results_deque.append("{0}#p{1}".format(thread_num, post["no"]))
77+
"""
78+
Searches every post in thread thread_num on a board for the
79+
string provided. Returns a list of matching post numbers.
80+
"""
81+
json_url = "https://a.4cdn.org/{0}/thread/{1}.json".format(search_parameters["board"], thread_num)
82+
thread_json = get_json_data(json_url)
83+
if thread_json is None:
84+
return
85+
86+
regex_match = search_parameters["compiled_regex"].match
87+
sections = search_parameters["sections"]
88+
deque_append = results_deque.append
89+
for post in thread_json["posts"]:
90+
[deque_append("{0}#p{1}".format(thread_num, post["no"])) for item in map(post.__getitem__, filter(post.__contains__, sections)) if regex_match(item)]
91+
#user_text = "".join([post[s] for s in search_parameters["sections"] if s in post])
92+
#if regex_match(user_text) is not None:
93+
# results_deque.append("{0}#p{1}".format(thread_num, post["no"]))
9194

9295
def search_catalog_page(results_deque, page, search_parameters):
93-
"""Will be run by the threading module. Searches all the
94-
4chan threads on a page and adds matching results to synchronised queue"""
95-
regex_match = search_parameters["compiled_regex"].match
96-
for thread in page["threads"]:
97-
user_text = "".join([thread[s] for s in search_parameters["sections"] if s in thread])
98-
if regex_match(user_text) is not None:
99-
results_deque.append(thread["no"])
96+
"""Will be run by the threading module. Searches all the
97+
4chan threads on a page and adds matching results to synchronised queue"""
98+
regex_match = search_parameters["compiled_regex"].match
99+
sections = search_parameters["sections"]
100+
deque_append = results_deque.append
101+
for thread in page["threads"]:
102+
[deque_append(thread["no"]) for item in map(thread.__getitem__, filter(thread.__contains__, sections)) if regex_match(item)]
103+
#user_text = "".join([thread[s] for s in search_parameters["sections"] if s in thread])
104+
#if regex_match(user_text) is not None:
105+
# results_deque.append(thread["no"])
100106

101107
def perform_concurrent_4chan_search(board, user_regex, catalog_search=False):
102-
"""Search a thread or catalog on 4chan using several threads concurrently, then return relevant data"""
103-
thread_join_timeout_seconds = 10
104-
results_deque = deque()
105-
json_url = "https://a.4cdn.org/{0}/{1}.json".format(board, "catalog" if catalog_search else "threads")
106-
sections = ["com", "name", "trip", "email", "sub", "filename"]
107-
json_data = get_json_data(json_url)
108-
search_regex = re.compile(user_regex, re.UNICODE + re.IGNORECASE)
109-
search_parameters = {"sections": sections, "board": sanitise(board), "string": user_regex,
110-
"compiled_regex": search_regex, "user_board": board}
111-
results_data = {"post_numbers": results_deque, "search_parameters": search_parameters}
112-
thread_pool = []
113-
114-
if json_data is None:
115-
return results_data
116-
117-
for page in json_data:
118-
if catalog_search:
119-
t = Thread(None, target=search_catalog_page, args=(results_deque, page, search_parameters))
120-
t.start()
121-
thread_pool.append(t)
122-
else:
123-
for thread in page["threads"]:
124-
t = Thread(None, target=search_thread, args=(results_deque, thread["no"], search_parameters))
125-
t.start()
126-
thread_pool.append(t)
127-
128-
for _thread in thread_pool:
129-
if _thread.is_alive():
130-
_thread.join(float(thread_join_timeout_seconds))
131-
132-
return results_data
108+
"""Search a thread or catalog on 4chan using several threads concurrently, then return relevant data"""
109+
thread_join_timeout_seconds = 10
110+
results_deque = deque()
111+
json_url = "https://a.4cdn.org/{0}/{1}.json".format(board, "catalog" if catalog_search else "threads")
112+
sections = ["com", "name", "ext", "email", "sub", "filename"]
113+
json_data = get_json_data(json_url)
114+
search_regex = re.compile(user_regex, re.UNICODE + re.IGNORECASE)
115+
search_parameters = {"sections": sections, "board": sanitise(board), "string": user_regex,
116+
"compiled_regex": search_regex, "user_board": board}
117+
results_data = {"post_numbers": results_deque, "search_parameters": search_parameters}
118+
thread_pool = []
119+
120+
if json_data is None:
121+
return results_data
122+
123+
for page in json_data:
124+
if catalog_search:
125+
t = Thread(None, target=search_catalog_page, args=(results_deque, page, search_parameters))
126+
t.start()
127+
thread_pool.append(t)
128+
else:
129+
for thread in page["threads"]:
130+
t = Thread(None, target=search_thread, args=(results_deque, thread["no"], search_parameters))
131+
t.start()
132+
thread_pool.append(t)
133+
134+
for _thread in thread_pool:
135+
if _thread.is_alive():
136+
_thread.join(float(thread_join_timeout_seconds))
137+
138+
return results_data
133139

134140

0 commit comments

Comments
 (0)