1
1
import re
2
2
import pandas as pd
3
+
3
4
from urllib .parse import urlparse
5
+ from shapely .geometry .base import BaseGeometry
6
+ from shapely .validation import explain_validity
7
+
8
+ from .types import PYTHON_TYPES
4
9
5
10
REGEX_EMAIL = re .compile ("[^@]+@[^@]+\.[^@]+" )
6
11
REGEX_UUID = re .compile ("^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\Z" )
@@ -12,6 +17,11 @@ def validate_column(data, rules):
12
17
# Skip validation for NaN values or implement special handling if required
13
18
continue
14
19
20
+ dtype = rules .get ('type' )
21
+ python_type = PYTHON_TYPES .get (dtype )
22
+ if python_type is not None and not isinstance (value , python_type ):
23
+ return [f"Value '{ value } ' is not of type { dtype } ." ]
24
+
15
25
if isinstance (value , str ):
16
26
issues = validate_string (value , rules )
17
27
elif isinstance (value , (int , float )):
@@ -20,6 +30,8 @@ def validate_column(data, rules):
20
30
issues = validate_array (value , rules )
21
31
elif isinstance (value , dict ):
22
32
issues = validate_object (value , rules )
33
+ elif isinstance (value , BaseGeometry ):
34
+ issues = validate_geometry (value , rules )
23
35
else :
24
36
continue
25
37
@@ -28,6 +40,21 @@ def validate_column(data, rules):
28
40
29
41
return []
30
42
43
+ # Geometry validation
44
+ def validate_geometry (value , rules ):
45
+ issues = []
46
+
47
+ geom_types = rules .get ("geometryTypes" , [])
48
+ if len (geom_types ) > 0 and value .geom_type not in geom_types :
49
+ allowed = ", " .join (geom_types )
50
+ issues .append (f"Geometry type '{ value .geom_type } ' is not one of the allowed types: { allowed } " )
51
+
52
+ why = explain_validity (value )
53
+ if why != 'Valid Geometry' :
54
+ issues .append (f"Geometry { value } is not valid: { why } " )
55
+
56
+ return issues
57
+
31
58
# String validation
32
59
def validate_string (value , rules ):
33
60
issues = []
0 commit comments