Skip to content

Commit

Permalink
chore: add delay into data verification (eqasim-org#256)
Browse files Browse the repository at this point in the history
* chore: add delay into data verification

* triggering verification

* printing the error to see what is going on

* add a timeout of 2 minutes

* trying to add retries

* update

* revert
  • Loading branch information
sebhoerl authored Sep 14, 2024
1 parent 66969ab commit 550f343
Showing 1 changed file with 31 additions and 13 deletions.
44 changes: 31 additions & 13 deletions scripts/verify_data.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import requests
import time

# The goal of this script is to verify the availability of the data
# that is needed to set up the pipeline

sleep_time = 5 # seconds
timeout = 30 # seconds
retries = 3

class Report:
def __init__(self):
self.sources = []
Expand All @@ -13,19 +18,32 @@ def register(self, name, url):
def validate(self):
failed = []

for index, source in enumerate(self.sources):
print("[{}/{}] Checking {} ...".format(index + 1, len(self.sources), source["name"]))

try:
response = requests.head(source["url"])
source["status"] = response.status_code
except:
source["status"] = "error"

print(" Status {}".format(source["status"]))

if source["status"] != 200:
failed.append(source["name"])
with requests.Session() as session:
session.headers.update({ "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0" })
for index, source in enumerate(self.sources):
print("[{}/{}] Checking {} ...".format(index + 1, len(self.sources), source["name"]))

retry = 0
success = False

while not success and retry < retries:
try:
response = session.head(source["url"], timeout = timeout)
source["status"] = response.status_code
success = True
except TimeoutError:
source["status"] = "timeout"
except Exception as e:
source["status"] = "error"
print(e)

retry += 1
print(" Status {} (retry {}/{})".format(source["status"], retry, retries))

time.sleep(sleep_time)

if source["status"] != 200:
failed.append(source["name"])

print("Done.")
print("Missing: ", len(failed))
Expand Down

0 comments on commit 550f343

Please sign in to comment.