Skip to content

Commit

Permalink
More debug logging
Browse files Browse the repository at this point in the history
To help diagnose when things are going wrong this commit adds some
additional debug logging when requests are happening, what parameters
are being used, sleep behavior and error responses from web.archive.org.

Closes #138
  • Loading branch information
edsu committed Nov 1, 2023
1 parent 03b8388 commit 7e15ea2
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
6 changes: 6 additions & 0 deletions wayback/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,17 +400,21 @@ def send(self, *args, **kwargs):
retries = 0
while True:
try:
logger.debug("sending %s", kwargs)
result = super().send(*args, **kwargs)
if retries >= maximum or not self.should_retry(result):
if result.status_code == 429:
logger.warning("caught rate limit error: %s", result.content)
raise RateLimitError(result)
return result
except WaybackSession.handleable_errors as error:
logging.warn("caught exception during request: %s", error)
response = getattr(error, 'response', None)
if response:
read_and_close(response)

if retries >= maximum:
logger.error("Too many retries %s >= %s", retries, maximum)
raise WaybackRetryError(retries, total_time, error) from error
elif not self.should_retry_error(error):
raise
Expand All @@ -419,6 +423,7 @@ def send(self, *args, **kwargs):
if retries > 0:
seconds = self.backoff * 2 ** (retries - 1)
total_time += seconds
logger.debug("retrying after sleep of %s seconds", seconds)
time.sleep(seconds)

retries += 1
Expand Down Expand Up @@ -712,6 +717,7 @@ def search(self, url, *, match_type=None, limit=1000, offset=None,
sent_query, next_query = next_query, None
with _utils.rate_limited(self.session.search_calls_per_second,
group='search'):
logger.debug("querying %s %s", CDX_SEARCH_URL, sent_query)
response = self.session.request('GET', CDX_SEARCH_URL,
params=sent_query)
try:
Expand Down
6 changes: 4 additions & 2 deletions wayback/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def set_memento_url_mode(url, mode):


@contextmanager
def rate_limited(calls_per_second=2, group='default'):
def rate_limited(calls_per_second=1.5, group='default'):
"""
A context manager that restricts entries to its body to occur only N times
per second (N can be a float). The current thread will be put to sleep in
Expand All @@ -226,7 +226,9 @@ def rate_limited(calls_per_second=2, group='default'):
minimum_wait = 1.0 / calls_per_second
current_time = time.time()
if current_time - last_call < minimum_wait:
time.sleep(minimum_wait - (current_time - last_call))
secs = minimum_wait - (current_time - last_call)
logging.debug("sleeping %s", secs)
time.sleep(secs)
_last_call_by_group[group] = time.time()
yield

Expand Down

0 comments on commit 7e15ea2

Please sign in to comment.