Skip to content

Commit ebbb388

Browse files
authored
Merge pull request #37 from dataiku/feature/sc-86553-429-error-on-multiple-files-uploads
Handling http 429, error on multiple files uploads
2 parents ea2a0ce + de201c8 commit ebbb388

File tree

4 files changed

+118
-4
lines changed

4 files changed

+118
-4
lines changed

python-lib/common.py

+53
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
import re
2+
import datetime
3+
import time
24
try:
35
import urlparse
46
except:
57
import urllib.parse as urlparse
8+
from safe_logger import SafeLogger
9+
from sharepoint_constants import SharePointConstants
10+
11+
logger = SafeLogger("sharepoint-online plugin", ["Authorization", "sharepoint_username", "sharepoint_password", "client_secret"])
612

713

814
def get_rel_path(path):
@@ -55,3 +61,50 @@ def parse_query_string_to_dict(query_string):
5561
def parse_url(tenant_name):
5662
url_tokens = urlparse.urlparse(tenant_name.strip('/'))
5763
return url_tokens.scheme, url_tokens.netloc, url_tokens.path
64+
65+
66+
def is_request_performed(response):
67+
if response is None:
68+
return False
69+
if response.status_code in [429, 503]:
70+
logger.warning("Error {}, headers = {}".format(response.status_code, response.headers))
71+
seconds_before_retry = decode_retry_after_header(response)
72+
logger.warning("Sleeping for {} seconds".format(seconds_before_retry))
73+
time.sleep(seconds_before_retry)
74+
return False
75+
return True
76+
77+
78+
def decode_retry_after_header(response):
79+
seconds_before_retry = SharePointConstants.DEFAULT_WAIT_BEFORE_RETRY
80+
raw_header_value = response.headers.get("Retry-After", str(SharePointConstants.DEFAULT_WAIT_BEFORE_RETRY))
81+
if raw_header_value.isdigit():
82+
seconds_before_retry = int(raw_header_value)
83+
else:
84+
# Date format, "Wed, 21 Oct 2015 07:28:00 GMT"
85+
try:
86+
datetime_now = datetime.datetime.now()
87+
datetime_header = datetime.datetime.strptime(raw_header_value, '%a, %d %b %Y %H:%M:%S GMT')
88+
if datetime_header.timestamp() > datetime_now.timestamp():
89+
# target date in the future
90+
seconds_before_retry = (datetime_header - datetime_now).seconds
91+
except Exception as err:
92+
logger.error("decode_retry_after_header error {}".format(err))
93+
seconds_before_retry = SharePointConstants.DEFAULT_WAIT_BEFORE_RETRY
94+
return seconds_before_retry
95+
96+
97+
class ItemsLimit():
98+
def __init__(self, records_limit=-1):
99+
self.has_no_limit = (records_limit == -1)
100+
self.records_limit = records_limit
101+
self.counter = 0
102+
103+
def is_reached(self, number_of_new_records=None):
104+
if self.has_no_limit:
105+
return False
106+
self.counter += number_of_new_records or 1
107+
return self.counter > self.records_limit
108+
109+
def add_record(self):
110+
self.counter += 1

python-lib/sharepoint_client.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from robust_session import RobustSession
1414
from sharepoint_constants import SharePointConstants
1515
from dss_constants import DSSConstants
16-
from common import is_email_address, get_value_from_path, parse_url, get_value_from_paths
16+
from common import is_email_address, get_value_from_path, parse_url, get_value_from_paths, is_request_performed, ItemsLimit
1717
from safe_logger import SafeLogger
1818

1919

@@ -820,12 +820,17 @@ def __init__(self, sharepoint_user_name, sharepoint_password, sharepoint_url, sh
820820
self.form_digest_value = self.get_form_digest_value()
821821

822822
def get(self, url, headers=None, params=None):
823+
retries_limit = ItemsLimit(SharePointConstants.MAX_RETRIES)
823824
headers = headers or {}
824825
headers["Accept"] = DSSConstants.APPLICATION_JSON
825826
headers["Authorization"] = self.get_authorization_bearer()
826-
return requests.get(url, headers=headers, params=params)
827+
response = None
828+
while not is_request_performed(response) and not retries_limit.is_reached():
829+
response = requests.get(url, headers=headers, params=params)
830+
return response
827831

828832
def post(self, url, headers=None, json=None, data=None, params=None):
833+
retries_limit = ItemsLimit(SharePointConstants.MAX_RETRIES)
829834
headers = headers or {}
830835
default_headers = {
831836
"Accept": DSSConstants.APPLICATION_JSON_NOMETADATA,
@@ -835,7 +840,10 @@ def post(self, url, headers=None, json=None, data=None, params=None):
835840
if self.form_digest_value:
836841
default_headers.update({"X-RequestDigest": self.form_digest_value})
837842
default_headers.update(headers)
838-
return requests.post(url, headers=default_headers, json=json, data=data, params=params, timeout=SharePointConstants.TIMEOUT_SEC)
843+
response = None
844+
while not is_request_performed(response) and not retries_limit.is_reached():
845+
response = requests.post(url, headers=default_headers, json=json, data=data, params=params, timeout=SharePointConstants.TIMEOUT_SEC)
846+
return response
839847

840848
@staticmethod
841849
def close():

python-lib/sharepoint_constants.py

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ class SharePointConstants(object):
33
COMMENT_COLUMN = 'comment'
44
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
55
DEFAULT_VIEW_ENDPOINT = "DefaultView/ViewFields"
6+
DEFAULT_WAIT_BEFORE_RETRY = 60
67
ENTITY_PROPERTY_NAME = 'EntityPropertyName'
78
ERROR_CONTAINER = 'error'
89
EXPENDABLES_FIELDS = {"Author": "Title", "Editor": "Title"}

tests/python/unit/test_common.py

+53-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
1-
from common import get_value_from_path
1+
from common import get_value_from_path, is_request_performed, decode_retry_after_header
2+
from sharepoint_constants import SharePointConstants
23
import pytest
34

45

6+
class MockResponse:
7+
def __init__(self, status_code, headers):
8+
self.status_code = status_code
9+
self.headers = headers
10+
11+
512
class TestCommonMethods:
613
def setup_class(self):
714
self.dictionary_to_search = {
@@ -15,6 +22,14 @@ def setup_class(self):
1522
self.ok_path_1 = ["a", "b", "c"]
1623
self.ok_path_2 = ["a", "d"]
1724
self.ko_path = ["a", "c"]
25+
self.mock_response_none = None
26+
self.mock_response_http_200 = MockResponse(200, {"Retry-After": "1"})
27+
self.mock_response_http_429_digit_1s = MockResponse(429, {"Retry-After": "1"})
28+
self.mock_response_http_429_no_header = MockResponse(429, {})
29+
self.mock_response_http_503_digit_1s = MockResponse(503, {"Retry-After": "1"})
30+
self.mock_response_http_429_date_in_past = MockResponse(429, {"Retry-After": "Wed, 21 Oct 2015 07:28:00 GMT"})
31+
self.mock_response_http_429_date_in_future = MockResponse(429, {"Retry-After": "Wed, 21 Oct 9999 07:28:00 GMT"})
32+
self.mock_response_http_429_garbage = MockResponse(429, {"Retry-After": "blablablabla"})
1833

1934
def test_get_value_from_path_long_path(self):
2035
key = get_value_from_path(self.dictionary_to_search, self.ok_path_1)
@@ -31,3 +46,40 @@ def test_get_value_from_path_wrong_path(self):
3146
def test_get_value_from_path_wrong_path_custom_reply(self):
3247
key = get_value_from_path(self.dictionary_to_search, self.ko_path, default_reply="ko")
3348
assert key == "ko"
49+
50+
def test_is_request_performed_none(self):
51+
mock_response = None
52+
response = is_request_performed(mock_response)
53+
assert response is False
54+
55+
def test_is_request_performed_error_200(self):
56+
response = is_request_performed(self.mock_response_http_200)
57+
assert response is True
58+
59+
def test_is_request_performed_error_429(self):
60+
response = is_request_performed(self.mock_response_http_429_digit_1s)
61+
assert response is False
62+
63+
def test_is_request_performed_error_503(self):
64+
response = is_request_performed(self.mock_response_http_503_digit_1s)
65+
assert response is False
66+
67+
def test_decode_retry_after_header_seconds(self):
68+
seconds_before_retry = decode_retry_after_header(self.mock_response_http_429_digit_1s)
69+
assert seconds_before_retry == 1
70+
71+
def test_decode_retry_after_header_future_date(self):
72+
seconds_before_retry = decode_retry_after_header(self.mock_response_http_429_date_in_future)
73+
assert seconds_before_retry >= 4000
74+
75+
def test_decode_retry_after_header_past_date(self):
76+
seconds_before_retry = decode_retry_after_header(self.mock_response_http_429_date_in_past)
77+
assert seconds_before_retry == SharePointConstants.DEFAULT_WAIT_BEFORE_RETRY
78+
79+
def test_decode_retry_after_header_garbage(self):
80+
seconds_before_retry = decode_retry_after_header(self.mock_response_http_429_garbage)
81+
assert seconds_before_retry == SharePointConstants.DEFAULT_WAIT_BEFORE_RETRY
82+
83+
def test_decode_retry_after_header_no_header(self):
84+
seconds_before_retry = decode_retry_after_header(self.mock_response_http_429_no_header)
85+
assert seconds_before_retry == SharePointConstants.DEFAULT_WAIT_BEFORE_RETRY

0 commit comments

Comments
 (0)