@@ -202,7 +202,7 @@ def iso_language_code(value):
202
202
return value
203
203
204
204
205
- def url_validator (value ):
205
+ def url_validator (key , value ):
206
206
"""
207
207
Check that the value is a valid URL. Must start with "http://" or "https://".
208
208
"ftp://" is not currently supported.
@@ -217,17 +217,40 @@ def url_validator(value):
217
217
return value
218
218
219
219
if isinstance (value , str ):
220
+ # Attempt to handle markdown links
221
+ match = re .match (r"\[.*?\]\((https?://.*?)\)" , value )
222
+
223
+ if match :
224
+ log .info (f"{ key } - Markdown link found: { value } " )
225
+
226
+ extracted_url = match .group (1 )
227
+
228
+ log .info (f"{ key } - Attempting to extract URL: { extracted_url } " )
229
+
230
+ valid_markdown_url = url_validator (key , extracted_url )
231
+
232
+ if valid_markdown_url :
233
+ log .info (f"{ key } - URL successfully extracted: { valid_markdown_url } " )
234
+
235
+ return valid_markdown_url
236
+ else :
237
+ log .error (f"{ key } - Failed to extract URL from markdown link: { extracted_url } " )
238
+
239
+ return ""
240
+
220
241
if not value .startswith ("http://" ) and not value .startswith ("https://" ):
221
- error_message = ' Value must start with "http://" or "https://"'
242
+ error_message = f' { key } - Value must start with "http://" or "https://"'
222
243
else :
223
- error_message = "URL is not a string"
244
+ error_message = (
245
+ f"{ key } - Must be a string\n Value: { value } \n Value type: { type (value )} "
246
+ )
224
247
225
248
if error_message :
226
249
log .error (error_message )
227
250
228
251
return ""
229
- else :
230
- return value
252
+
253
+ return value
231
254
232
255
233
256
def normalize_value (value ):
@@ -506,7 +529,7 @@ def migrate_dataset(data_dict):
506
529
dataset_value = dataset .get (key )
507
530
508
531
if (
509
- not all (v in ["" , None ] for v in [dataset_value , value ])
532
+ not all (v in ["" , [], None ] for v in [dataset_value , value ])
510
533
and dataset_value != value
511
534
and key
512
535
not in [
@@ -1063,7 +1086,12 @@ def get_value(key, default="", data_object=None):
1063
1086
language = iso_language_code (language )
1064
1087
1065
1088
citation = get_value ("citation" )
1089
+
1066
1090
learn_more_link = get_value ("learn_more" ) or get_value ("learn_more_link" )
1091
+
1092
+ if learn_more_link :
1093
+ learn_more_link = url_validator ("learn_more" , learn_more_link )
1094
+
1067
1095
function = get_value ("functions" )
1068
1096
1069
1097
if function in [None , "" ]:
@@ -1075,7 +1103,7 @@ def get_value(key, default="", data_object=None):
1075
1103
data_download_link = get_value ("data_download_link" )
1076
1104
1077
1105
if data_download_link :
1078
- data_download_link = url_validator (data_download_link )
1106
+ data_download_link = url_validator ("data_download_link" , data_download_link )
1079
1107
1080
1108
extras = dataset .get ("extras" , [])
1081
1109
0 commit comments