Skip to content

Commit 09e0761

Browse files
committed
2 parents c54ef01 + f10ce59 commit 09e0761

File tree

3 files changed

+83
-58
lines changed

3 files changed

+83
-58
lines changed

ckan-backend-dev/src/ckanext-wri/README.md

+16-13
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,8 @@ Migrates an RW dataset/metadata to CKAN. It maps all supported RW fields to CKAN
241241

242242
**Parameters:**
243243
- **rw_dataset_id** (string) – The RW UUID of the dataset to migrate (required—unless `gfw_dataset` is provided). Example: `c0b5f4b1-4f3b-4f1e-8f1e-3f4b1f3b4f1e`.
244-
- **application** (string) – The RW application of the dataset to migrate (required). Example: `rw`.
244+
- **rw_application** (string) – The RW application of the dataset to migrate (required). Example: `rw`.
245+
- **dx_application** (string) – The destination DX application name (group name) to associate the dataset with (required). Example: `land-carbon-lab`.
245246
- **dataset_slug** (string) – The desired slug of the dataset to migrate (optional). If you use this option, you will need to include this parameter each time you call `migrate_dataset` for this dataset. This value will override the `slug` value from the RW/GFW APIs. Example: `my-dataset`.
246247
- **dataset_title** (string) – The desired title of the dataset to migrate (optional). If you use this option, you will need to include this parameter each time you call `migrate_dataset` for this dataset. This value will override the `name` value from the RW API or the `title` value from the GFW API. Example: `My Dataset`.
247248
- **gfw_dataset** (string) – The GFW dataset to migrate (optional). If this dataset also has metadata in the RW API, you should also include `rw_dataset_id`. Example: `gfw_forest_data`.
@@ -260,7 +261,7 @@ A successful request will return the Prefect status of the new migration job.
260261
##### Usage Example
261262

262263
```
263-
% curl -H "Authorization: YOUR_API_TOKEN" "https://wri.dev.ckan.datopian.com/api/3/action/migrate_dataset?rw_dataset_id=c12446ce-174f-4ffb-b2f7-77ecb0116aba&application=rw&team=migration-test&topics=lucas-topic,nov-16-topic"
264+
% curl -H "Authorization: YOUR_API_TOKEN" "https://wri.dev.ckan.datopian.com/api/3/action/migrate_dataset?rw_dataset_id=c12446ce-174f-4ffb-b2f7-77ecb0116aba&rw_application=rw&dx_application=land-carbon-lab&team=migration-test&topics=lucas-topic,nov-16-topic"
264265
{
265266
"help": "https://wri.dev.ckan.datopian.com/api/3/action/help_show?name=migration_status",
266267
"success": true,
@@ -283,7 +284,8 @@ A successful request will return the Prefect status of the new migration job.
283284
"lucas-topic",
284285
"nov-16-topic"
285286
],
286-
"application": "rw"
287+
"rw_application": "rw",
288+
"dx_application": "land-carbon-lab"
287289
}
288290
},
289291
"idempotency_key": null,
@@ -443,7 +445,8 @@ You'll need this ID: `"id": "7cd8a09e-1834-4ab5-8b72-bd638e9392ae"` (`result.id`
443445
Add a custom file to the `migration/files` directory and commit it to the repo. Once deployed, you can use the `file_name` parameter to specify it. The file should be a CSV with the following columns:
444446

445447
- `rw_dataset_id` (required—unless `gfw_dataset` is provided)
446-
- `application` (required)
448+
- `rw_application` (required)
449+
- `dx_application` (required)
447450
- `team` (optional)
448451
- `topics` (optional)
449452
- `geographic_coverage` (optional)
@@ -461,14 +464,13 @@ Add a custom file to the `migration/files` directory and commit it to the repo.
461464
Example:
462465

463466
```csv
464-
rw_dataset_id,gfw_dataset,application,team,topics,geographic_coverage,authors,maintainers,layer_ids,dataset_title,dataset_slug
465-
d491f094-ad6e-4015-b248-1d1cd83667fa,,aqueduct-water-risk,aqueduct,"freshwater,surface-water-bodies",Global,,John Smith:[email protected];Jane Smith:[email protected],,An Aqueduct Dataset,an-aqueduct-dataset
466-
b318381e-485d-46c9-8958-c9a9d75d7e91,,aqueduct-water-risk,aqueduct,"freshwater,water-risks",Global,John Smith:[email protected];Jane Smith:[email protected],,,Another Aqueduct Dataset,another-aqueduct-dataset
467-
faf79d2c-5e54-4591-9d70-4bd1029c18e6,,crt,agriadapt,atmosphere,Global,John Smith:[email protected],Jane Smith:[email protected],,,
468-
,gfw_forest_flux_forest_age_category,gfw,global-forest-watch,"land,ghg-emissions,forest",,,John Smith:[email protected],,,
469-
,gfw_forest_flux_removal_forest_type,gfw,global-forest-watch,"land,ghg-emissions,forest",,Jane Smith:[email protected],John Smith:[email protected],,Another Title Example,
470-
47a8e6cc-ea40-44a8-b1fc-6cf4fcc7d868,nasa_viirs_fire_alerts,gfw,global-forest-watch,"land,natural-hazards,forest",Global,,,2462cceb-41de-4bd2-8251-a6f75fe4e3d5,,another-slug-example
471-
c92b6411-f0e5-4606-bbd9-138e40e50eb8,,gfw,global-forest-watch,"land,forest",,Jane Smith:[email protected],,"0cba3c4f-2d3b-4fb1-8c93-c951dc1da84b,2351399c-ef2c-48da-9485-20698190acb0",,
467+
rw_dataset_id,gfw_dataset,rw_application,team,topics,geographic_coverage,authors,maintainers,layer_ids,dataset_title,dataset_slug,dx_application
468+
d491f094-ad6e-4015-b248-1d1cd83667fa,,aqueduct-water-risk,aqueduct,"freshwater,surface-water-bodies",Global,,John Smith:[email protected];Jane Smith:[email protected],,An Aqueduct Dataset,an-aqueduct-dataset,aqueduct
469+
b318381e-485d-46c9-8958-c9a9d75d7e91,,aqueduct-water-risk,aqueduct,"freshwater,water-risks",Global,John Smith:[email protected];Jane Smith:[email protected],,,Another Aqueduct Dataset,another-aqueduct-dataset,aqueduct
470+
,gfw_forest_flux_forest_age_category,gfw,global-forest-watch,"land,ghg-emissions,forest",,,John Smith:[email protected],,,,global-forest-watch
471+
,gfw_forest_flux_removal_forest_type,gfw,global-forest-watch,"land,ghg-emissions,forest",,Jane Smith:[email protected],John Smith:[email protected],,Another Title Example,,global-forest-watch
472+
47a8e6cc-ea40-44a8-b1fc-6cf4fcc7d868,nasa_viirs_fire_alerts,gfw,global-forest-watch,"land,natural-hazards,forest",Global,,,2462cceb-41de-4bd2-8251-a6f75fe4e3d5,,another-slug-example,global-forest-watch
473+
c92b6411-f0e5-4606-bbd9-138e40e50eb8,,gfw,global-forest-watch,"land,forest",,Jane Smith:[email protected],,"0cba3c4f-2d3b-4fb1-8c93-c951dc1da84b,2351399c-ef2c-48da-9485-20698190acb0",,,global-forest-watch
472474
```
473475

474476
#### POST /api/3/action/migration_status
@@ -508,7 +510,8 @@ The following uses the flow run ID from the `/migrate_dataset` endpoint example
508510
"lucas-topic",
509511
"nov-16-topic"
510512
],
511-
"application": "rw"
513+
"rw_application": "rw",
514+
"dx_application": "land-carbon-lab"
512515
}
513516
},
514517
"idempotency_key": null,

ckan-backend-dev/src/ckanext-wri/ckanext/wri/logic/action/create.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@
113113
"gfw_dataset",
114114
"gfw_only",
115115
"gfw_version",
116-
"application",
116+
"rw_application",
117+
"dx_application",
117118
"team",
118119
"topics",
119120
"layer_ids",
@@ -278,7 +279,8 @@ def trigger_migration(context: Context, data_dict: DataDict):
278279
@logic.side_effect_free
279280
def migrate_dataset(context: Context, data_dict: DataDict):
280281
dataset_id = data_dict.get("rw_dataset_id")
281-
application = data_dict.get("application")
282+
dx_application = data_dict.get("dx_application")
283+
rw_application = data_dict.get("rw_application")
282284
gfw_dataset = data_dict.get("gfw_dataset")
283285

284286
data_dict = _black_white_list("whitelist", data_dict)
@@ -295,9 +297,19 @@ def migrate_dataset(context: Context, data_dict: DataDict):
295297
else:
296298
data_dict["gfw_only"] = True
297299

298-
if not application:
300+
if not rw_application:
299301
if not gfw_dataset:
300-
raise tk.ValidationError(_("Application is required"))
302+
raise tk.ValidationError(_("'rw_application' is required when no 'gfw_dataset' is provided"))
303+
304+
if not dx_application:
305+
raise tk.ValidationError(_("'dx_application' is required to associate the dataset with a DX application"))
306+
307+
try:
308+
tk.get_action("group_show")(
309+
{"ignore_auth": True}, {"id": dx_application, "type": "application"}
310+
)
311+
except logic.NotFound:
312+
raise tk.ValidationError(_("'dx_application' not found: ") + dx_application)
301313

302314
team = data_dict.get("team")
303315
topics = data_dict.get("topics")

migration/tasks/migration_task.py

+51-41
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def normalize_value(value):
246246
return value.strip()
247247

248248

249-
def check_dataset_exists(dataset_id, rw_id=None, application=None):
249+
def check_dataset_exists(dataset_id, dx_application, rw_application, rw_id):
250250
"""
251251
Check if dataset exists in CKAN.
252252
"""
@@ -255,9 +255,9 @@ def check_dataset_exists(dataset_id, rw_id=None, application=None):
255255
dataset = ckan.action.package_show(id=dataset_id)
256256
return True, dataset
257257
except ckanapi.errors.NotFound:
258-
if rw_id and application:
258+
if rw_id and dx_application and rw_application:
259259
dataset = ckan.action.package_search(
260-
fq=f"+rw_id:{rw_id} +application:{application}"
260+
fq=f"+rw_id:{rw_id} +(groups:{dx_application} OR application:{rw_application})"
261261
)
262262

263263
dataset_count = dataset.get("count")
@@ -273,6 +273,10 @@ def check_dataset_exists(dataset_id, rw_id=None, application=None):
273273
log.warning("Using the first dataset found.")
274274

275275
return dataset_count > 0, dataset_results[0] if dataset_count > 0 else None
276+
else:
277+
log.error(
278+
f"Missing required parameters: rw_id, dx_application, rw_application: {rw_id}, {dx_application}, {rw_application}"
279+
)
276280

277281
return False, None
278282

@@ -291,7 +295,8 @@ def get_datasets_from_csv(file_name):
291295
dataset = {}
292296
dataset_id = row.get("rw_dataset_id")
293297
gfw_dataset = row.get("gfw_dataset")
294-
application = row.get("application")
298+
rw_application = row.get("rw_application")
299+
dx_application = row.get("dx_application")
295300
gfw_only = row.get("gfw_only") or False
296301

297302
if not dataset_id:
@@ -300,10 +305,10 @@ def get_datasets_from_csv(file_name):
300305
else:
301306
dataset_id = gfw_dataset
302307
gfw_only = True
303-
application = "gfw"
308+
rw_application = "gfw"
304309

305-
if not application:
306-
raise ValueError("'application' required")
310+
if not rw_application and not dx_application:
311+
raise ValueError("Both 'rw_application' and 'dx_application' required")
307312

308313
team = row.get("team")
309314
topics = row.get("topics")
@@ -325,7 +330,8 @@ def get_datasets_from_csv(file_name):
325330
"rw_dataset_id": dataset_id,
326331
"gfw_dataset": gfw_dataset,
327332
"gfw_only": gfw_only,
328-
"application": application,
333+
"rw_application": rw_application,
334+
"dx_application": dx_application,
329335
"team": team,
330336
"topics": topics,
331337
"authors": authors,
@@ -347,7 +353,8 @@ def send_migration_dataset(data_dict):
347353

348354
dataset_id = data_dict.get("rw_dataset_id")
349355
gfw_dataset = data_dict.get("gfw_dataset")
350-
application = data_dict.get("application")
356+
rw_application = data_dict.get("rw_application")
357+
dx_application = data_dict.get("dx_application")
351358
gfw_only = data_dict.get("gfw_only")
352359
gfw_version = data_dict.get("gfw_version")
353360
dataset_slug = data_dict.get("dataset_slug")
@@ -359,13 +366,13 @@ def send_migration_dataset(data_dict):
359366
else:
360367
dataset_id = gfw_dataset
361368
gfw_only = True
362-
application = "gfw"
369+
rw_application = "gfw"
363370

364-
if not application:
365-
raise ValueError("'application' required")
371+
if not rw_application and not dx_application:
372+
raise ValueError("Both 'rw_application' and 'dx_application' required")
366373

367374
dataset = get_dataset_from_api(
368-
dataset_id, application, gfw_dataset, gfw_only, gfw_version
375+
dataset_id, rw_application, gfw_dataset, gfw_only, gfw_version
369376
)
370377
external_dataset_slug = (
371378
dataset.get("dataset", {}).get("slug") if not gfw_only else dataset_id
@@ -471,7 +478,10 @@ def migrate_dataset(data_dict):
471478

472479
dataset_name = data_dict.get("name")
473480
dataset_exists, dataset = check_dataset_exists(
474-
dataset_name, data_dict.get("rw_id"), data_dict.get("application")
481+
dataset_name,
482+
data_dict.get("dx_application"),
483+
data_dict.get("rw_application"),
484+
data_dict.get("rw_id"),
475485
)
476486

477487
log_name = f'{dataset_name if dataset_name else "Unknown dataset"} -'
@@ -879,7 +889,11 @@ def unstringify_agents(agents, agent_type, log, log_name):
879889

880890
name, email = agent.split(":")
881891
name = name.strip() if name else None
882-
email = email.strip() if email and email_validator(email, agent_type, log, log_name) else None
892+
email = (
893+
email.strip()
894+
if email and email_validator(email, agent_type, log, log_name)
895+
else None
896+
)
883897

884898
if not name or not email:
885899
log.error(
@@ -900,7 +914,11 @@ def unstringify_agents(agents, agent_type, log, log_name):
900914
name = agent.get("name")
901915
email = agent.get("email")
902916
name = name.strip() if name else None
903-
email = email.strip() if email and email_validator(email, agent_type, log, log_name) else None
917+
email = (
918+
email.strip()
919+
if email and email_validator(email, agent_type, log, log_name)
920+
else None
921+
)
904922

905923
if not name or not email:
906924
log.error(
@@ -938,7 +956,8 @@ def stringify_agents(data_dict):
938956
def prepare_dataset(data_dict, original_data_dict, gfw_only=False):
939957
log = get_run_logger()
940958

941-
application = original_data_dict.get("application")
959+
rw_application = original_data_dict.get("rw_application")
960+
dx_application = original_data_dict.get("dx_application")
942961
team = original_data_dict.get("team")
943962
topics = original_data_dict.get("topics")
944963
whitelist = original_data_dict.get("whitelist")
@@ -979,31 +998,12 @@ def get_value(key, default="", data_object=None):
979998

980999
base_name = dataset_slug or f'{get_value("name", data_object="dataset")}'
9811000

982-
dataset_application = get_value("application")
983-
requested_application = application
984-
9851001
warnings = []
9861002

987-
if not requested_application:
988-
warnings.append(
989-
f"Requested application not found, using application: {application}"
990-
)
991-
requested_application = dataset_application
992-
993-
if dataset_application and type(dataset_application) == list:
994-
application = [a.lower() for a in dataset_application]
995-
996-
if requested_application not in application:
997-
warnings.append(
998-
f"Requested application not found in dataset applications: {application}"
999-
)
1000-
warnings.append(f"Requested application: {requested_application}")
1001-
1002-
application = requested_application
10031003
gfw_title = None
10041004

1005-
if gfw_only or application == "gfw":
1006-
application = "gfw"
1005+
if gfw_only or rw_application == "gfw":
1006+
rw_application = "gfw"
10071007
gfw_title = get_value("title", data_object="metadata")
10081008

10091009
if not gfw_title and layer_names:
@@ -1012,7 +1012,7 @@ def get_value(key, default="", data_object=None):
10121012
if len(layer_name) == 1:
10131013
gfw_title = layer_name[0]
10141014

1015-
name = munge_title_to_name(f"{base_name} {application}")
1015+
name = dataset_slug or munge_title_to_name(f"{base_name} {rw_application}")
10161016

10171017
log_name = f'{name if name else "Unknown dataset"} -'
10181018

@@ -1090,7 +1090,6 @@ def get_value(key, default="", data_object=None):
10901090
"approval_status": approval_status,
10911091
"is_approved": is_approved,
10921092
"draft": is_draft,
1093-
"application": application,
10941093
"visibility_type": visibility_type,
10951094
}
10961095

@@ -1156,9 +1155,20 @@ def get_value(key, default="", data_object=None):
11561155
if valid_topics:
11571156
required_dataset_values["groups"] = valid_topics
11581157

1158+
try:
1159+
application_dict = ckan.action.group_show(id=dx_application)
1160+
required_dataset_values["groups"] = required_dataset_values.get(
1161+
"groups", []
1162+
) + [{"name": application_dict["name"]}]
1163+
except ckanapi.errors.NotFound:
1164+
log.error(f"{log_name} Application not found: {dx_application}")
1165+
log.error(
1166+
f"{log_name} The process will continue, but the dataset will not be associated with the desired application"
1167+
)
1168+
11591169
resources = []
11601170

1161-
if application not in ["aqueduct", "aqueduct-water-risk"] and not gfw_only:
1171+
if rw_application not in ["aqueduct", "aqueduct-water-risk"] and not gfw_only:
11621172
required_dataset_values["rw_id"] = resource["dataset_id"]
11631173

11641174
for layer in layers:

0 commit comments

Comments
 (0)