11from datetime import datetime , timedelta , timezone
22from os import path
3- from urllib .parse import urlparse
3+ from urllib .parse import parse_qs , urlparse
44
55from requests import Session
66
@@ -43,23 +43,33 @@ def detect(self, source, ref=None, extra_args=None):
4343 if not parsed_url .netloc :
4444 return None
4545
46- url_parts = parsed_url .path .split ("/" )
47- if url_parts [- 2 ] == "dataset" :
48- self .dataset_id = url_parts [- 1 ]
46+ url_parts_1 = parsed_url .path .split ("/history/" )
47+ url_parts_2 = url_parts_1 [0 ].split ("/" )
48+ if url_parts_2 [- 2 ] == "dataset" :
49+ self .dataset_id = url_parts_2 [- 1 ]
4950 else :
5051 return None
5152
5253 api_url_path = "/api/3/action/"
5354 api_url = parsed_url ._replace (
54- path = "/" .join (url_parts [:- 2 ]) + api_url_path
55+ path = "/" .join (url_parts_2 [:- 2 ]) + api_url_path , query = ""
5556 ).geturl ()
5657
5758 status_show_url = f"{ api_url } status_show"
5859 resp = self .urlopen (status_show_url )
5960 if resp .status_code == 200 :
61+
62+ # handle the activites
63+ activity_id = None
64+ if parse_qs (parsed_url .query ).get ("activity_id" ) is not None :
65+ activity_id = parse_qs (parsed_url .query ).get ("activity_id" )[0 ]
66+ if len (url_parts_1 ) == 2 :
67+ activity_id = url_parts_1 [- 1 ]
68+
6069 self .version = self ._fetch_version (api_url )
6170 return {
6271 "dataset_id" : self .dataset_id ,
72+ "activity_id" : activity_id ,
6373 "api_url" : api_url ,
6474 "version" : self .version ,
6575 }
@@ -69,11 +79,21 @@ def detect(self, source, ref=None, extra_args=None):
6979 def fetch (self , spec , output_dir , yield_output = False ):
7080 """Fetch a CKAN dataset."""
7181 dataset_id = spec ["dataset_id" ]
82+ activity_id = spec ["activity_id" ]
7283
7384 yield f"Fetching CKAN dataset { dataset_id } .\n "
74- package_show_url = f"{ spec ['api_url' ]} package_show?id={ dataset_id } "
85+
86+ # handle the activites
87+ if activity_id :
88+ fetch_url = (
89+ f"{ spec ['api_url' ]} activity_data_show?"
90+ f"id={ activity_id } &object_type=package"
91+ )
92+ else :
93+ fetch_url = f"{ spec ['api_url' ]} package_show?id={ dataset_id } "
94+
7595 resp = self .urlopen (
76- package_show_url ,
96+ fetch_url ,
7797 headers = {"accept" : "application/json" },
7898 )
7999
0 commit comments