11import json
22import pathlib
33
4- import airflow . utils . dates
4+ import airflow
55import requests
6+ import requests .exceptions as requests_exceptions
67from airflow import DAG
78from airflow .operators .bash_operator import BashOperator
89from airflow .operators .python_operator import PythonOperator
910
1011dag = DAG (
11- dag_id = "chapter2_download_rocket_launches" ,
12- description = "Download rocket pictures of recently launched rockets." ,
12+ dag_id = "listing_2_10" ,
1313 start_date = airflow .utils .dates .days_ago (14 ),
1414 schedule_interval = "@daily" ,
1515)
1616
1717download_launches = BashOperator (
1818 task_id = "download_launches" ,
19- bash_command = "curl -o /tmp/launches.json 'https://launchlibrary.net/1.4/launch?next=5&mode=verbose'" ,
19+ bash_command = "curl -o /tmp/launches.json 'https://launchlibrary.net/1.4/launch?next=5&mode=verbose'" , # noqa: E501
2020 dag = dag ,
2121)
2222
@@ -30,12 +30,17 @@ def _get_pictures():
3030 launches = json .load (f )
3131 image_urls = [launch ["rocket" ]["imageURL" ] for launch in launches ["launches" ]]
3232 for image_url in image_urls :
33- response = requests .get (image_url )
34- image_filename = image_url .split ("/" )[- 1 ]
35- target_file = f"/tmp/images/{ image_filename } "
36- with open (target_file , "wb" ) as f :
37- f .write (response .content )
38- print (f"Downloaded { image_url } to { target_file } " )
33+ try :
34+ response = requests .get (image_url )
35+ image_filename = image_url .split ("/" )[- 1 ]
36+ target_file = f"/tmp/images/{ image_filename } "
37+ with open (target_file , "wb" ) as f :
38+ f .write (response .content )
39+ print (f"Downloaded { image_url } to { target_file } " )
40+ except requests_exceptions .MissingSchema :
41+ print (f"{ image_url } appears to be an invalid URL." )
42+ except requests_exceptions .ConnectionError :
43+ print (f"Could not connect to { image_url } ." )
3944
4045
4146get_pictures = PythonOperator (
0 commit comments