Revert "Merge branch 'dev' of https://github.com/wri/wri-odp into dev"

luccasmmg · luccasmmg · commit 93998103c5b6 · 2024-12-02T20:50:02.000-03:00
This reverts commit 09e0761, reversing changes made to c54ef01.
diff --git a/ckan-backend-dev/src/ckanext-wri/README.md b/ckan-backend-dev/src/ckanext-wri/README.md
@@ -241,8 +241,7 @@ Migrates an RW dataset/metadata to CKAN. It maps all supported RW fields to CKAN
 
 **Parameters:**
 - **rw_dataset_id** (string) – The RW UUID of the dataset to migrate (required—unless `gfw_dataset` is provided). Example: `c0b5f4b1-4f3b-4f1e-8f1e-3f4b1f3b4f1e`.
-- **rw_application** (string) – The RW application of the dataset to migrate (required). Example: `rw`.
-- **dx_application** (string) – The destination DX application name (group name) to associate the dataset with (required). Example: `land-carbon-lab`.
+- **application** (string) – The RW application of the dataset to migrate (required). Example: `rw`.
 - **dataset_slug** (string) – The desired slug of the dataset to migrate (optional). If you use this option, you will need to include this parameter each time you call `migrate_dataset` for this dataset. This value will override the `slug` value from the RW/GFW APIs. Example: `my-dataset`.
 - **dataset_title** (string) – The desired title of the dataset to migrate (optional). If you use this option, you will need to include this parameter each time you call `migrate_dataset` for this dataset. This value will override the `name` value from the RW API or the `title` value from the GFW API. Example: `My Dataset`.
 - **gfw_dataset** (string) – The GFW dataset to migrate (optional). If this dataset also has metadata in the RW API, you should also include `rw_dataset_id`. Example: `gfw_forest_data`.
@@ -261,7 +260,7 @@ A successful request will return the Prefect status of the new migration job.
 ##### Usage Example
 
 ```
-% curl -H "Authorization: YOUR_API_TOKEN" "https://wri.dev.ckan.datopian.com/api/3/action/migrate_dataset?rw_dataset_id=c12446ce-174f-4ffb-b2f7-77ecb0116aba&rw_application=rw&dx_application=land-carbon-lab&team=migration-test&topics=lucas-topic,nov-16-topic"
+% curl -H "Authorization: YOUR_API_TOKEN" "https://wri.dev.ckan.datopian.com/api/3/action/migrate_dataset?rw_dataset_id=c12446ce-174f-4ffb-b2f7-77ecb0116aba&application=rw&team=migration-test&topics=lucas-topic,nov-16-topic"
 {
   "help": "https://wri.dev.ckan.datopian.com/api/3/action/help_show?name=migration_status",
   "success": true,
@@ -284,8 +283,7 @@ A successful request will return the Prefect status of the new migration job.
           "lucas-topic",
           "nov-16-topic"
         ],
-        "rw_application": "rw",
-        "dx_application": "land-carbon-lab"
+        "application": "rw"
       }
     },
     "idempotency_key": null,
@@ -445,8 +443,7 @@ You'll need this ID: `"id": "7cd8a09e-1834-4ab5-8b72-bd638e9392ae"` (`result.id`
 Add a custom file to the `migration/files` directory and commit it to the repo. Once deployed, you can use the `file_name` parameter to specify it. The file should be a CSV with the following columns:
 
 - `rw_dataset_id` (required—unless `gfw_dataset` is provided)
-- `rw_application` (required)
-- `dx_application` (required)
+- `application` (required)
 - `team` (optional)
 - `topics` (optional)
 - `geographic_coverage` (optional)
@@ -464,13 +461,14 @@ Add a custom file to the `migration/files` directory and commit it to the repo.
 Example:
 
 ```csv
-rw_dataset_id,gfw_dataset,rw_application,team,topics,geographic_coverage,authors,maintainers,layer_ids,dataset_title,dataset_slug,dx_application
-d491f094-ad6e-4015-b248-1d1cd83667fa,,aqueduct-water-risk,aqueduct,"freshwater,surface-water-bodies",Global,,John Smith:john.smith@example.com;Jane Smith:jane.smith@example.com,,An Aqueduct Dataset,an-aqueduct-dataset,aqueduct
-b318381e-485d-46c9-8958-c9a9d75d7e91,,aqueduct-water-risk,aqueduct,"freshwater,water-risks",Global,John Smith:john.smith@example.com;Jane Smith:jane.smith@example.com,,,Another Aqueduct Dataset,another-aqueduct-dataset,aqueduct
-,gfw_forest_flux_forest_age_category,gfw,global-forest-watch,"land,ghg-emissions,forest",,,John Smith:john.smith@example.com,,,,global-forest-watch
-,gfw_forest_flux_removal_forest_type,gfw,global-forest-watch,"land,ghg-emissions,forest",,Jane Smith:jane.smith@example.com,John Smith:john.smith@example.com,,Another Title Example,,global-forest-watch
-47a8e6cc-ea40-44a8-b1fc-6cf4fcc7d868,nasa_viirs_fire_alerts,gfw,global-forest-watch,"land,natural-hazards,forest",Global,,,2462cceb-41de-4bd2-8251-a6f75fe4e3d5,,another-slug-example,global-forest-watch
-c92b6411-f0e5-4606-bbd9-138e40e50eb8,,gfw,global-forest-watch,"land,forest",,Jane Smith:jane.smith@example.com,,"0cba3c4f-2d3b-4fb1-8c93-c951dc1da84b,2351399c-ef2c-48da-9485-20698190acb0",,,global-forest-watch
+rw_dataset_id,gfw_dataset,application,team,topics,geographic_coverage,authors,maintainers,layer_ids,dataset_title,dataset_slug
+d491f094-ad6e-4015-b248-1d1cd83667fa,,aqueduct-water-risk,aqueduct,"freshwater,surface-water-bodies",Global,,John Smith:john.smith@example.com;Jane Smith:jane.smith@example.com,,An Aqueduct Dataset,an-aqueduct-dataset
+b318381e-485d-46c9-8958-c9a9d75d7e91,,aqueduct-water-risk,aqueduct,"freshwater,water-risks",Global,John Smith:john.smith@example.com;Jane Smith:jane.smith@example.com,,,Another Aqueduct Dataset,another-aqueduct-dataset
+faf79d2c-5e54-4591-9d70-4bd1029c18e6,,crt,agriadapt,atmosphere,Global,John Smith:john.smith@example.com,Jane Smith:jane.smith@example.com,,,
+,gfw_forest_flux_forest_age_category,gfw,global-forest-watch,"land,ghg-emissions,forest",,,John Smith:john.smith@example.com,,,
+,gfw_forest_flux_removal_forest_type,gfw,global-forest-watch,"land,ghg-emissions,forest",,Jane Smith:jane.smith@example.com,John Smith:john.smith@example.com,,Another Title Example,
+47a8e6cc-ea40-44a8-b1fc-6cf4fcc7d868,nasa_viirs_fire_alerts,gfw,global-forest-watch,"land,natural-hazards,forest",Global,,,2462cceb-41de-4bd2-8251-a6f75fe4e3d5,,another-slug-example
+c92b6411-f0e5-4606-bbd9-138e40e50eb8,,gfw,global-forest-watch,"land,forest",,Jane Smith:jane.smith@example.com,,"0cba3c4f-2d3b-4fb1-8c93-c951dc1da84b,2351399c-ef2c-48da-9485-20698190acb0",,
 ```
 
 #### POST /api/3/action/migration_status
@@ -510,8 +508,7 @@ The following uses the flow run ID from the `/migrate_dataset` endpoint example
           "lucas-topic",
           "nov-16-topic"
         ],
-        "rw_application": "rw",
-        "dx_application": "land-carbon-lab"
+        "application": "rw"
       }
     },
     "idempotency_key": null,
diff --git a/ckan-backend-dev/src/ckanext-wri/ckanext/wri/logic/action/create.py b/ckan-backend-dev/src/ckanext-wri/ckanext/wri/logic/action/create.py
@@ -113,8 +113,7 @@
     "gfw_dataset",
     "gfw_only",
     "gfw_version",
-    "rw_application",
-    "dx_application",
+    "application",
     "team",
     "topics",
     "layer_ids",
@@ -279,8 +278,7 @@ def trigger_migration(context: Context, data_dict: DataDict):
 @logic.side_effect_free
 def migrate_dataset(context: Context, data_dict: DataDict):
     dataset_id = data_dict.get("rw_dataset_id")
-    dx_application = data_dict.get("dx_application")
-    rw_application = data_dict.get("rw_application")
+    application = data_dict.get("application")
     gfw_dataset = data_dict.get("gfw_dataset")
 
     data_dict = _black_white_list("whitelist", data_dict)
@@ -297,19 +295,9 @@ def migrate_dataset(context: Context, data_dict: DataDict):
         else:
             data_dict["gfw_only"] = True
 
-    if not rw_application:
+    if not application:
         if not gfw_dataset:
-            raise tk.ValidationError(_("'rw_application' is required when no 'gfw_dataset' is provided"))
-
-    if not dx_application:
-        raise tk.ValidationError(_("'dx_application' is required to associate the dataset with a DX application"))
-
-    try:
-        tk.get_action("group_show")(
-            {"ignore_auth": True}, {"id": dx_application, "type": "application"}
-        )
-    except logic.NotFound:
-        raise tk.ValidationError(_("'dx_application' not found: ") + dx_application)
+            raise tk.ValidationError(_("Application is required"))
 
     team = data_dict.get("team")
     topics = data_dict.get("topics")
diff --git a/migration/tasks/migration_task.py b/migration/tasks/migration_task.py
@@ -246,7 +246,7 @@ def normalize_value(value):
     return value.strip()
 
 
-def check_dataset_exists(dataset_id, dx_application, rw_application, rw_id):
+def check_dataset_exists(dataset_id, rw_id=None, application=None):
     """
     Check if dataset exists in CKAN.
     """
@@ -255,9 +255,9 @@ def check_dataset_exists(dataset_id, dx_application, rw_application, rw_id):
         dataset = ckan.action.package_show(id=dataset_id)
         return True, dataset
     except ckanapi.errors.NotFound:
-        if rw_id and dx_application and rw_application:
+        if rw_id and application:
             dataset = ckan.action.package_search(
-                fq=f"+rw_id:{rw_id} +(groups:{dx_application} OR application:{rw_application})"
+                fq=f"+rw_id:{rw_id} +application:{application}"
             )
 
             dataset_count = dataset.get("count")
@@ -273,10 +273,6 @@ def check_dataset_exists(dataset_id, dx_application, rw_application, rw_id):
                 log.warning("Using the first dataset found.")
 
             return dataset_count > 0, dataset_results[0] if dataset_count > 0 else None
-        else:
-            log.error(
-                f"Missing required parameters: rw_id, dx_application, rw_application: {rw_id}, {dx_application}, {rw_application}"
-            )
 
         return False, None
 
@@ -295,8 +291,7 @@ def get_datasets_from_csv(file_name):
             dataset = {}
             dataset_id = row.get("rw_dataset_id")
             gfw_dataset = row.get("gfw_dataset")
-            rw_application = row.get("rw_application")
-            dx_application = row.get("dx_application")
+            application = row.get("application")
             gfw_only = row.get("gfw_only") or False
 
             if not dataset_id:
@@ -305,10 +300,10 @@ def get_datasets_from_csv(file_name):
                 else:
                     dataset_id = gfw_dataset
                     gfw_only = True
-                    rw_application = "gfw"
+                    application = "gfw"
 
-            if not rw_application and not dx_application:
-                raise ValueError("Both 'rw_application' and 'dx_application' required")
+            if not application:
+                raise ValueError("'application' required")
 
             team = row.get("team")
             topics = row.get("topics")
@@ -330,8 +325,7 @@ def get_datasets_from_csv(file_name):
                 "rw_dataset_id": dataset_id,
                 "gfw_dataset": gfw_dataset,
                 "gfw_only": gfw_only,
-                "rw_application": rw_application,
-                "dx_application": dx_application,
+                "application": application,
                 "team": team,
                 "topics": topics,
                 "authors": authors,
@@ -353,8 +347,7 @@ def send_migration_dataset(data_dict):
 
     dataset_id = data_dict.get("rw_dataset_id")
     gfw_dataset = data_dict.get("gfw_dataset")
-    rw_application = data_dict.get("rw_application")
-    dx_application = data_dict.get("dx_application")
+    application = data_dict.get("application")
     gfw_only = data_dict.get("gfw_only")
     gfw_version = data_dict.get("gfw_version")
     dataset_slug = data_dict.get("dataset_slug")
@@ -366,13 +359,13 @@ def send_migration_dataset(data_dict):
         else:
             dataset_id = gfw_dataset
             gfw_only = True
-            rw_application = "gfw"
+            application = "gfw"
 
-    if not rw_application and not dx_application:
-        raise ValueError("Both 'rw_application' and 'dx_application' required")
+    if not application:
+        raise ValueError("'application' required")
 
     dataset = get_dataset_from_api(
-        dataset_id, rw_application, gfw_dataset, gfw_only, gfw_version
+        dataset_id, application, gfw_dataset, gfw_only, gfw_version
     )
     external_dataset_slug = (
         dataset.get("dataset", {}).get("slug") if not gfw_only else dataset_id
@@ -478,10 +471,7 @@ def migrate_dataset(data_dict):
 
     dataset_name = data_dict.get("name")
     dataset_exists, dataset = check_dataset_exists(
-        dataset_name,
-        data_dict.get("dx_application"),
-        data_dict.get("rw_application"),
-        data_dict.get("rw_id"),
+        dataset_name, data_dict.get("rw_id"), data_dict.get("application")
     )
 
     log_name = f'{dataset_name if dataset_name else "Unknown dataset"} -'
@@ -889,11 +879,7 @@ def unstringify_agents(agents, agent_type, log, log_name):
 
                 name, email = agent.split(":")
                 name = name.strip() if name else None
-                email = (
-                    email.strip()
-                    if email and email_validator(email, agent_type, log, log_name)
-                    else None
-                )
+                email = email.strip() if email and email_validator(email, agent_type, log, log_name) else None
 
                 if not name or not email:
                     log.error(
@@ -914,11 +900,7 @@ def unstringify_agents(agents, agent_type, log, log_name):
                 name = agent.get("name")
                 email = agent.get("email")
                 name = name.strip() if name else None
-                email = (
-                    email.strip()
-                    if email and email_validator(email, agent_type, log, log_name)
-                    else None
-                )
+                email = email.strip() if email and email_validator(email, agent_type, log, log_name) else None
 
                 if not name or not email:
                     log.error(
@@ -956,8 +938,7 @@ def stringify_agents(data_dict):
 def prepare_dataset(data_dict, original_data_dict, gfw_only=False):
     log = get_run_logger()
 
-    rw_application = original_data_dict.get("rw_application")
-    dx_application = original_data_dict.get("dx_application")
+    application = original_data_dict.get("application")
     team = original_data_dict.get("team")
     topics = original_data_dict.get("topics")
     whitelist = original_data_dict.get("whitelist")
@@ -998,12 +979,31 @@ def get_value(key, default="", data_object=None):
 
     base_name = dataset_slug or f'{get_value("name", data_object="dataset")}'
 
+    dataset_application = get_value("application")
+    requested_application = application
+
     warnings = []
 
+    if not requested_application:
+        warnings.append(
+            f"Requested application not found, using application: {application}"
+        )
+        requested_application = dataset_application
+
+    if dataset_application and type(dataset_application) == list:
+        application = [a.lower() for a in dataset_application]
+
+        if requested_application not in application:
+            warnings.append(
+                f"Requested application not found in dataset applications: {application}"
+            )
+            warnings.append(f"Requested application: {requested_application}")
+
+    application = requested_application
     gfw_title = None
 
-    if gfw_only or rw_application == "gfw":
-        rw_application = "gfw"
+    if gfw_only or application == "gfw":
+        application = "gfw"
         gfw_title = get_value("title", data_object="metadata")
 
         if not gfw_title and layer_names:
@@ -1012,7 +1012,7 @@ def get_value(key, default="", data_object=None):
             if len(layer_name) == 1:
                 gfw_title = layer_name[0]
 
-    name = dataset_slug or munge_title_to_name(f"{base_name} {rw_application}")
+    name = munge_title_to_name(f"{base_name} {application}")
 
     log_name = f'{name if name else "Unknown dataset"} -'
 
@@ -1090,6 +1090,7 @@ def get_value(key, default="", data_object=None):
         "approval_status": approval_status,
         "is_approved": is_approved,
         "draft": is_draft,
+        "application": application,
         "visibility_type": visibility_type,
     }
 
@@ -1155,20 +1156,9 @@ def get_value(key, default="", data_object=None):
         if valid_topics:
             required_dataset_values["groups"] = valid_topics
 
-    try:
-        application_dict = ckan.action.group_show(id=dx_application)
-        required_dataset_values["groups"] = required_dataset_values.get(
-            "groups", []
-        ) + [{"name": application_dict["name"]}]
-    except ckanapi.errors.NotFound:
-        log.error(f"{log_name} Application not found: {dx_application}")
-        log.error(
-            f"{log_name} The process will continue, but the dataset will not be associated with the desired application"
-        )
-
     resources = []
 
-    if rw_application not in ["aqueduct", "aqueduct-water-risk"] and not gfw_only:
+    if application not in ["aqueduct", "aqueduct-water-risk"] and not gfw_only:
         required_dataset_values["rw_id"] = resource["dataset_id"]
 
         for layer in layers: