Skip to content

Commit

Permalink
More debug logging (#139)
Browse files Browse the repository at this point in the history
To help diagnose when things are going wrong this, commit adds some additional debug logging when requests are happening, what parameters are being used, sleep behavior and error responses from web.archive.org.

Closes #138.

Co-authored-by: Rob Brackett <[email protected]>
  • Loading branch information
edsu and Mr0grog committed Nov 23, 2023
1 parent ac28660 commit 6baec33
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
10 changes: 9 additions & 1 deletion wayback/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,25 +400,33 @@ def send(self, *args, **kwargs):
retries = 0
while True:
try:
logger.debug('sending HTTP request %s "%s", %s', args[0].method, args[0].url, kwargs)
result = super().send(*args, **kwargs)
if retries >= maximum or not self.should_retry(result):
if result.status_code == 429:
raise RateLimitError(result)
return result
else:
# TODO: parse and use Retry-After header if present.
# TODO: add additional delay for 429 responses.
logger.debug('Received error response (status: %s), will retry', result.status_code)
except WaybackSession.handleable_errors as error:
response = getattr(error, 'response', None)
if response:
read_and_close(response)

if retries >= maximum:
raise WaybackRetryError(retries, total_time, error) from error
elif not self.should_retry_error(error):
elif self.should_retry_error(error):
logger.warn('Caught exception during request, will retry: %s', error)
else:
raise

# The first retry has no delay.
if retries > 0:
seconds = self.backoff * 2 ** (retries - 1)
total_time += seconds
logger.debug('Will retry after sleeping for %s seconds...', seconds)
time.sleep(seconds)

retries += 1
Expand Down
4 changes: 3 additions & 1 deletion wayback/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,9 @@ def rate_limited(calls_per_second=1, group='default'):
minimum_wait = 1.0 / calls_per_second
current_time = time.time()
if current_time - last_call < minimum_wait:
time.sleep(minimum_wait - (current_time - last_call))
seconds = minimum_wait - (current_time - last_call)
logger.debug('Hit %s rate limit, sleeping for %s seconds', group, seconds)
time.sleep(seconds)
_last_call_by_group[group] = time.time()
yield

Expand Down

0 comments on commit 6baec33

Please sign in to comment.