Skip to content

Commit b1f4487

Browse files
committed
Drafting ThreadsafeBurstyRateLimiterState
Related to #133
1 parent c5618f1 commit b1f4487

File tree

4 files changed

+51
-7
lines changed

4 files changed

+51
-7
lines changed

minet/crawl/crawler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ def __init__(
403403
"spoof_ua": spoof_ua,
404404
"use_pycurl": use_pycurl,
405405
"compressed": compressed,
406-
"known_encoding": known_encoding
406+
"known_encoding": known_encoding,
407407
}
408408

409409
def __repr__(self):

minet/rate_limiting.py

+48-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
from typing import Optional
2+
13
import time
24
import functools
5+
from threading import Event, Lock
36

47

5-
class RateLimiter(object):
8+
class RateLimiter:
69
"""
710
Naive rate limiter context manager with smooth output.
811
@@ -70,7 +73,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
7073
return self.exit()
7174

7275

73-
class RetryableIterator(object):
76+
class RetryableIterator:
7477
"""
7578
Iterator exposing a #.retry method that will make sure the next item
7679
is the same as the current one.
@@ -99,7 +102,7 @@ def retry(self):
99102
self.retried = True
100103

101104

102-
class RateLimitedIterator(object):
105+
class RateLimitedIterator:
103106
"""
104107
Handy iterator wrapper that will yield its items while respecting a given
105108
rate limit and that will not sleep needlessly when the iterator is
@@ -143,7 +146,7 @@ def __iter__(self):
143146
self.rate_limiter.exit()
144147

145148

146-
class RateLimiterState(object):
149+
class RateLimiterState:
147150
def __init__(self, max_per_period: int, period: float = 1.0):
148151
max_per_second = max_per_period / period
149152
self.min_interval = 1.0 / max_per_second
@@ -163,6 +166,47 @@ def update(self):
163166
self.last_entry = time.perf_counter()
164167

165168

169+
class ThreadsafeBurstyRateLimiterState:
170+
def __init__(self, max_per_period: int, period: float = 1.0):
171+
self.max_per_period = max_per_period
172+
self.period = period
173+
174+
self.current_burst = 0
175+
self.time_of_next_burst: Optional[float] = None
176+
177+
self.event = Event()
178+
self.lock = Lock()
179+
180+
self.event.set()
181+
182+
def wait_if_needed(self):
183+
self.event.wait()
184+
self.lock.acquire()
185+
186+
if self.current_burst < self.max_per_period:
187+
if self.time_of_next_burst is None:
188+
self.time_of_next_burst = time.perf_counter() + self.period
189+
190+
self.current_burst += 1
191+
self.lock.release()
192+
return
193+
194+
assert self.time_of_next_burst is not None
195+
196+
delta = time.perf_counter() - self.time_of_next_burst
197+
198+
self.time_of_next_burst = None
199+
self.current_burst = 0
200+
201+
if delta > 0:
202+
self.event.clear()
203+
self.lock.release()
204+
time.sleep(delta)
205+
self.event.set()
206+
else:
207+
self.lock.release()
208+
209+
166210
def rate_limited(max_per_period, period=1.0):
167211
state = RateLimiterState(max_per_period, period)
168212

minet/twitter/api_scraper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ def request(self, url, headers=None, method="GET"):
481481
pool_manager=self.pool_manager,
482482
spoof_ua=True,
483483
method=method,
484-
headers=headers
484+
headers=headers,
485485
)
486486

487487
# def acquire_guest_token(self):

minet/web.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1127,7 +1127,7 @@ def request_jsonrpc(
11271127
url,
11281128
pool_manager=pool_manager,
11291129
method="POST",
1130-
json_body={"method": method, "params": params}
1130+
json_body={"method": method, "params": params},
11311131
)
11321132

11331133

0 commit comments

Comments
 (0)