Skip to content

Commit 9399810

Browse files
committed
Revert "Merge branch 'dev' of https://github.com/wri/wri-odp into dev"
This reverts commit 09e0761, reversing changes made to c54ef01.
1 parent 7ec8708 commit 9399810

File tree

3 files changed

+58
-83
lines changed

3 files changed

+58
-83
lines changed

ckan-backend-dev/src/ckanext-wri/README.md

+13-16
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,7 @@ Migrates an RW dataset/metadata to CKAN. It maps all supported RW fields to CKAN
241241

242242
**Parameters:**
243243
- **rw_dataset_id** (string) – The RW UUID of the dataset to migrate (required—unless `gfw_dataset` is provided). Example: `c0b5f4b1-4f3b-4f1e-8f1e-3f4b1f3b4f1e`.
244-
- **rw_application** (string) – The RW application of the dataset to migrate (required). Example: `rw`.
245-
- **dx_application** (string) – The destination DX application name (group name) to associate the dataset with (required). Example: `land-carbon-lab`.
244+
- **application** (string) – The RW application of the dataset to migrate (required). Example: `rw`.
246245
- **dataset_slug** (string) – The desired slug of the dataset to migrate (optional). If you use this option, you will need to include this parameter each time you call `migrate_dataset` for this dataset. This value will override the `slug` value from the RW/GFW APIs. Example: `my-dataset`.
247246
- **dataset_title** (string) – The desired title of the dataset to migrate (optional). If you use this option, you will need to include this parameter each time you call `migrate_dataset` for this dataset. This value will override the `name` value from the RW API or the `title` value from the GFW API. Example: `My Dataset`.
248247
- **gfw_dataset** (string) – The GFW dataset to migrate (optional). If this dataset also has metadata in the RW API, you should also include `rw_dataset_id`. Example: `gfw_forest_data`.
@@ -261,7 +260,7 @@ A successful request will return the Prefect status of the new migration job.
261260
##### Usage Example
262261

263262
```
264-
% curl -H "Authorization: YOUR_API_TOKEN" "https://wri.dev.ckan.datopian.com/api/3/action/migrate_dataset?rw_dataset_id=c12446ce-174f-4ffb-b2f7-77ecb0116aba&rw_application=rw&dx_application=land-carbon-lab&team=migration-test&topics=lucas-topic,nov-16-topic"
263+
% curl -H "Authorization: YOUR_API_TOKEN" "https://wri.dev.ckan.datopian.com/api/3/action/migrate_dataset?rw_dataset_id=c12446ce-174f-4ffb-b2f7-77ecb0116aba&application=rw&team=migration-test&topics=lucas-topic,nov-16-topic"
265264
{
266265
"help": "https://wri.dev.ckan.datopian.com/api/3/action/help_show?name=migration_status",
267266
"success": true,
@@ -284,8 +283,7 @@ A successful request will return the Prefect status of the new migration job.
284283
"lucas-topic",
285284
"nov-16-topic"
286285
],
287-
"rw_application": "rw",
288-
"dx_application": "land-carbon-lab"
286+
"application": "rw"
289287
}
290288
},
291289
"idempotency_key": null,
@@ -445,8 +443,7 @@ You'll need this ID: `"id": "7cd8a09e-1834-4ab5-8b72-bd638e9392ae"` (`result.id`
445443
Add a custom file to the `migration/files` directory and commit it to the repo. Once deployed, you can use the `file_name` parameter to specify it. The file should be a CSV with the following columns:
446444

447445
- `rw_dataset_id` (required—unless `gfw_dataset` is provided)
448-
- `rw_application` (required)
449-
- `dx_application` (required)
446+
- `application` (required)
450447
- `team` (optional)
451448
- `topics` (optional)
452449
- `geographic_coverage` (optional)
@@ -464,13 +461,14 @@ Add a custom file to the `migration/files` directory and commit it to the repo.
464461
Example:
465462

466463
```csv
467-
rw_dataset_id,gfw_dataset,rw_application,team,topics,geographic_coverage,authors,maintainers,layer_ids,dataset_title,dataset_slug,dx_application
468-
d491f094-ad6e-4015-b248-1d1cd83667fa,,aqueduct-water-risk,aqueduct,"freshwater,surface-water-bodies",Global,,John Smith:[email protected];Jane Smith:[email protected],,An Aqueduct Dataset,an-aqueduct-dataset,aqueduct
469-
b318381e-485d-46c9-8958-c9a9d75d7e91,,aqueduct-water-risk,aqueduct,"freshwater,water-risks",Global,John Smith:[email protected];Jane Smith:[email protected],,,Another Aqueduct Dataset,another-aqueduct-dataset,aqueduct
470-
,gfw_forest_flux_forest_age_category,gfw,global-forest-watch,"land,ghg-emissions,forest",,,John Smith:[email protected],,,,global-forest-watch
471-
,gfw_forest_flux_removal_forest_type,gfw,global-forest-watch,"land,ghg-emissions,forest",,Jane Smith:[email protected],John Smith:[email protected],,Another Title Example,,global-forest-watch
472-
47a8e6cc-ea40-44a8-b1fc-6cf4fcc7d868,nasa_viirs_fire_alerts,gfw,global-forest-watch,"land,natural-hazards,forest",Global,,,2462cceb-41de-4bd2-8251-a6f75fe4e3d5,,another-slug-example,global-forest-watch
473-
c92b6411-f0e5-4606-bbd9-138e40e50eb8,,gfw,global-forest-watch,"land,forest",,Jane Smith:[email protected],,"0cba3c4f-2d3b-4fb1-8c93-c951dc1da84b,2351399c-ef2c-48da-9485-20698190acb0",,,global-forest-watch
464+
rw_dataset_id,gfw_dataset,application,team,topics,geographic_coverage,authors,maintainers,layer_ids,dataset_title,dataset_slug
465+
d491f094-ad6e-4015-b248-1d1cd83667fa,,aqueduct-water-risk,aqueduct,"freshwater,surface-water-bodies",Global,,John Smith:[email protected];Jane Smith:[email protected],,An Aqueduct Dataset,an-aqueduct-dataset
466+
b318381e-485d-46c9-8958-c9a9d75d7e91,,aqueduct-water-risk,aqueduct,"freshwater,water-risks",Global,John Smith:[email protected];Jane Smith:[email protected],,,Another Aqueduct Dataset,another-aqueduct-dataset
467+
faf79d2c-5e54-4591-9d70-4bd1029c18e6,,crt,agriadapt,atmosphere,Global,John Smith:[email protected],Jane Smith:[email protected],,,
468+
,gfw_forest_flux_forest_age_category,gfw,global-forest-watch,"land,ghg-emissions,forest",,,John Smith:[email protected],,,
469+
,gfw_forest_flux_removal_forest_type,gfw,global-forest-watch,"land,ghg-emissions,forest",,Jane Smith:[email protected],John Smith:[email protected],,Another Title Example,
470+
47a8e6cc-ea40-44a8-b1fc-6cf4fcc7d868,nasa_viirs_fire_alerts,gfw,global-forest-watch,"land,natural-hazards,forest",Global,,,2462cceb-41de-4bd2-8251-a6f75fe4e3d5,,another-slug-example
471+
c92b6411-f0e5-4606-bbd9-138e40e50eb8,,gfw,global-forest-watch,"land,forest",,Jane Smith:[email protected],,"0cba3c4f-2d3b-4fb1-8c93-c951dc1da84b,2351399c-ef2c-48da-9485-20698190acb0",,
474472
```
475473

476474
#### POST /api/3/action/migration_status
@@ -510,8 +508,7 @@ The following uses the flow run ID from the `/migrate_dataset` endpoint example
510508
"lucas-topic",
511509
"nov-16-topic"
512510
],
513-
"rw_application": "rw",
514-
"dx_application": "land-carbon-lab"
511+
"application": "rw"
515512
}
516513
},
517514
"idempotency_key": null,

ckan-backend-dev/src/ckanext-wri/ckanext/wri/logic/action/create.py

+4-16
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,7 @@
113113
"gfw_dataset",
114114
"gfw_only",
115115
"gfw_version",
116-
"rw_application",
117-
"dx_application",
116+
"application",
118117
"team",
119118
"topics",
120119
"layer_ids",
@@ -279,8 +278,7 @@ def trigger_migration(context: Context, data_dict: DataDict):
279278
@logic.side_effect_free
280279
def migrate_dataset(context: Context, data_dict: DataDict):
281280
dataset_id = data_dict.get("rw_dataset_id")
282-
dx_application = data_dict.get("dx_application")
283-
rw_application = data_dict.get("rw_application")
281+
application = data_dict.get("application")
284282
gfw_dataset = data_dict.get("gfw_dataset")
285283

286284
data_dict = _black_white_list("whitelist", data_dict)
@@ -297,19 +295,9 @@ def migrate_dataset(context: Context, data_dict: DataDict):
297295
else:
298296
data_dict["gfw_only"] = True
299297

300-
if not rw_application:
298+
if not application:
301299
if not gfw_dataset:
302-
raise tk.ValidationError(_("'rw_application' is required when no 'gfw_dataset' is provided"))
303-
304-
if not dx_application:
305-
raise tk.ValidationError(_("'dx_application' is required to associate the dataset with a DX application"))
306-
307-
try:
308-
tk.get_action("group_show")(
309-
{"ignore_auth": True}, {"id": dx_application, "type": "application"}
310-
)
311-
except logic.NotFound:
312-
raise tk.ValidationError(_("'dx_application' not found: ") + dx_application)
300+
raise tk.ValidationError(_("Application is required"))
313301

314302
team = data_dict.get("team")
315303
topics = data_dict.get("topics")

migration/tasks/migration_task.py

+41-51
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def normalize_value(value):
246246
return value.strip()
247247

248248

249-
def check_dataset_exists(dataset_id, dx_application, rw_application, rw_id):
249+
def check_dataset_exists(dataset_id, rw_id=None, application=None):
250250
"""
251251
Check if dataset exists in CKAN.
252252
"""
@@ -255,9 +255,9 @@ def check_dataset_exists(dataset_id, dx_application, rw_application, rw_id):
255255
dataset = ckan.action.package_show(id=dataset_id)
256256
return True, dataset
257257
except ckanapi.errors.NotFound:
258-
if rw_id and dx_application and rw_application:
258+
if rw_id and application:
259259
dataset = ckan.action.package_search(
260-
fq=f"+rw_id:{rw_id} +(groups:{dx_application} OR application:{rw_application})"
260+
fq=f"+rw_id:{rw_id} +application:{application}"
261261
)
262262

263263
dataset_count = dataset.get("count")
@@ -273,10 +273,6 @@ def check_dataset_exists(dataset_id, dx_application, rw_application, rw_id):
273273
log.warning("Using the first dataset found.")
274274

275275
return dataset_count > 0, dataset_results[0] if dataset_count > 0 else None
276-
else:
277-
log.error(
278-
f"Missing required parameters: rw_id, dx_application, rw_application: {rw_id}, {dx_application}, {rw_application}"
279-
)
280276

281277
return False, None
282278

@@ -295,8 +291,7 @@ def get_datasets_from_csv(file_name):
295291
dataset = {}
296292
dataset_id = row.get("rw_dataset_id")
297293
gfw_dataset = row.get("gfw_dataset")
298-
rw_application = row.get("rw_application")
299-
dx_application = row.get("dx_application")
294+
application = row.get("application")
300295
gfw_only = row.get("gfw_only") or False
301296

302297
if not dataset_id:
@@ -305,10 +300,10 @@ def get_datasets_from_csv(file_name):
305300
else:
306301
dataset_id = gfw_dataset
307302
gfw_only = True
308-
rw_application = "gfw"
303+
application = "gfw"
309304

310-
if not rw_application and not dx_application:
311-
raise ValueError("Both 'rw_application' and 'dx_application' required")
305+
if not application:
306+
raise ValueError("'application' required")
312307

313308
team = row.get("team")
314309
topics = row.get("topics")
@@ -330,8 +325,7 @@ def get_datasets_from_csv(file_name):
330325
"rw_dataset_id": dataset_id,
331326
"gfw_dataset": gfw_dataset,
332327
"gfw_only": gfw_only,
333-
"rw_application": rw_application,
334-
"dx_application": dx_application,
328+
"application": application,
335329
"team": team,
336330
"topics": topics,
337331
"authors": authors,
@@ -353,8 +347,7 @@ def send_migration_dataset(data_dict):
353347

354348
dataset_id = data_dict.get("rw_dataset_id")
355349
gfw_dataset = data_dict.get("gfw_dataset")
356-
rw_application = data_dict.get("rw_application")
357-
dx_application = data_dict.get("dx_application")
350+
application = data_dict.get("application")
358351
gfw_only = data_dict.get("gfw_only")
359352
gfw_version = data_dict.get("gfw_version")
360353
dataset_slug = data_dict.get("dataset_slug")
@@ -366,13 +359,13 @@ def send_migration_dataset(data_dict):
366359
else:
367360
dataset_id = gfw_dataset
368361
gfw_only = True
369-
rw_application = "gfw"
362+
application = "gfw"
370363

371-
if not rw_application and not dx_application:
372-
raise ValueError("Both 'rw_application' and 'dx_application' required")
364+
if not application:
365+
raise ValueError("'application' required")
373366

374367
dataset = get_dataset_from_api(
375-
dataset_id, rw_application, gfw_dataset, gfw_only, gfw_version
368+
dataset_id, application, gfw_dataset, gfw_only, gfw_version
376369
)
377370
external_dataset_slug = (
378371
dataset.get("dataset", {}).get("slug") if not gfw_only else dataset_id
@@ -478,10 +471,7 @@ def migrate_dataset(data_dict):
478471

479472
dataset_name = data_dict.get("name")
480473
dataset_exists, dataset = check_dataset_exists(
481-
dataset_name,
482-
data_dict.get("dx_application"),
483-
data_dict.get("rw_application"),
484-
data_dict.get("rw_id"),
474+
dataset_name, data_dict.get("rw_id"), data_dict.get("application")
485475
)
486476

487477
log_name = f'{dataset_name if dataset_name else "Unknown dataset"} -'
@@ -889,11 +879,7 @@ def unstringify_agents(agents, agent_type, log, log_name):
889879

890880
name, email = agent.split(":")
891881
name = name.strip() if name else None
892-
email = (
893-
email.strip()
894-
if email and email_validator(email, agent_type, log, log_name)
895-
else None
896-
)
882+
email = email.strip() if email and email_validator(email, agent_type, log, log_name) else None
897883

898884
if not name or not email:
899885
log.error(
@@ -914,11 +900,7 @@ def unstringify_agents(agents, agent_type, log, log_name):
914900
name = agent.get("name")
915901
email = agent.get("email")
916902
name = name.strip() if name else None
917-
email = (
918-
email.strip()
919-
if email and email_validator(email, agent_type, log, log_name)
920-
else None
921-
)
903+
email = email.strip() if email and email_validator(email, agent_type, log, log_name) else None
922904

923905
if not name or not email:
924906
log.error(
@@ -956,8 +938,7 @@ def stringify_agents(data_dict):
956938
def prepare_dataset(data_dict, original_data_dict, gfw_only=False):
957939
log = get_run_logger()
958940

959-
rw_application = original_data_dict.get("rw_application")
960-
dx_application = original_data_dict.get("dx_application")
941+
application = original_data_dict.get("application")
961942
team = original_data_dict.get("team")
962943
topics = original_data_dict.get("topics")
963944
whitelist = original_data_dict.get("whitelist")
@@ -998,12 +979,31 @@ def get_value(key, default="", data_object=None):
998979

999980
base_name = dataset_slug or f'{get_value("name", data_object="dataset")}'
1000981

982+
dataset_application = get_value("application")
983+
requested_application = application
984+
1001985
warnings = []
1002986

987+
if not requested_application:
988+
warnings.append(
989+
f"Requested application not found, using application: {application}"
990+
)
991+
requested_application = dataset_application
992+
993+
if dataset_application and type(dataset_application) == list:
994+
application = [a.lower() for a in dataset_application]
995+
996+
if requested_application not in application:
997+
warnings.append(
998+
f"Requested application not found in dataset applications: {application}"
999+
)
1000+
warnings.append(f"Requested application: {requested_application}")
1001+
1002+
application = requested_application
10031003
gfw_title = None
10041004

1005-
if gfw_only or rw_application == "gfw":
1006-
rw_application = "gfw"
1005+
if gfw_only or application == "gfw":
1006+
application = "gfw"
10071007
gfw_title = get_value("title", data_object="metadata")
10081008

10091009
if not gfw_title and layer_names:
@@ -1012,7 +1012,7 @@ def get_value(key, default="", data_object=None):
10121012
if len(layer_name) == 1:
10131013
gfw_title = layer_name[0]
10141014

1015-
name = dataset_slug or munge_title_to_name(f"{base_name} {rw_application}")
1015+
name = munge_title_to_name(f"{base_name} {application}")
10161016

10171017
log_name = f'{name if name else "Unknown dataset"} -'
10181018

@@ -1090,6 +1090,7 @@ def get_value(key, default="", data_object=None):
10901090
"approval_status": approval_status,
10911091
"is_approved": is_approved,
10921092
"draft": is_draft,
1093+
"application": application,
10931094
"visibility_type": visibility_type,
10941095
}
10951096

@@ -1155,20 +1156,9 @@ def get_value(key, default="", data_object=None):
11551156
if valid_topics:
11561157
required_dataset_values["groups"] = valid_topics
11571158

1158-
try:
1159-
application_dict = ckan.action.group_show(id=dx_application)
1160-
required_dataset_values["groups"] = required_dataset_values.get(
1161-
"groups", []
1162-
) + [{"name": application_dict["name"]}]
1163-
except ckanapi.errors.NotFound:
1164-
log.error(f"{log_name} Application not found: {dx_application}")
1165-
log.error(
1166-
f"{log_name} The process will continue, but the dataset will not be associated with the desired application"
1167-
)
1168-
11691159
resources = []
11701160

1171-
if rw_application not in ["aqueduct", "aqueduct-water-risk"] and not gfw_only:
1161+
if application not in ["aqueduct", "aqueduct-water-risk"] and not gfw_only:
11721162
required_dataset_values["rw_id"] = resource["dataset_id"]
11731163

11741164
for layer in layers:

0 commit comments

Comments
 (0)