@@ -98,13 +98,19 @@ def test_validation_errors_field(dummy_schema):
98
98
"SPIDERMON_VALIDATION_ERRORS_FIELD" : "custom_validation_field" ,
99
99
}
100
100
101
- item = {"no" : "schema" }
102
-
103
101
crawler = get_crawler (settings_dict = settings )
104
102
pipeline = ItemValidationPipeline .from_crawler (crawler )
103
+
104
+ # Instantiate validation field if not defined
105
+ item = {"no" : "schema" }
105
106
item = pipeline .process_item (item , None )
106
107
assert "custom_validation_field" in item
107
108
109
+ # Instantiate validation field if None
110
+ item = {"no" : "schema" , "custom_validation_field" : None }
111
+ item = pipeline .process_item (item , None )
112
+ assert item ["custom_validation_field" ] is not None
113
+
108
114
109
115
def test_add_error_to_items_undefined_validation_field (dummy_schema ):
110
116
settings = {
@@ -138,20 +144,84 @@ class DataclassItem:
138
144
foo : str
139
145
140
146
item = DataclassItem (foo = "invalid" )
141
- # Does not support item assignment
147
+ # Supports item assignment but does not support field
148
+ with pytest .raises (KeyError , match = "custom_validation_field" ):
149
+ item = pipeline .process_item (item , None )
150
+
151
+
152
+ def test_not_configured ():
153
+ # No validators
154
+ settings = {
155
+ "SPIDERMON_ENABLED" : True ,
156
+ "SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS" : True ,
157
+ "SPIDERMON_VALIDATION_ERRORS_FIELD" : "custom_validation_field" ,
158
+ }
159
+ crawler = get_crawler (settings_dict = settings )
142
160
with pytest .raises (
143
- TypeError , match = "'DataclassItem' object does not support item assignment "
161
+ scrapy . exceptions . NotConfigured , match = "No validators were found "
144
162
):
145
- item = pipeline . process_item ( item , None )
163
+ ItemValidationPipeline . from_crawler ( crawler )
146
164
147
- @dataclass
148
- class DataclassItemWithItemAssignment :
149
- foo : str
165
+ # Invalid validator type
166
+ settings = {
167
+ "SPIDERMON_ENABLED" : True ,
168
+ "SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS" : True ,
169
+ "SPIDERMON_VALIDATION_SCHEMAS" : object (),
170
+ "SPIDERMON_VALIDATION_ERRORS_FIELD" : "custom_validation_field" ,
171
+ }
172
+ crawler = get_crawler (settings_dict = settings )
173
+ with pytest .raises (
174
+ scrapy .exceptions .NotConfigured ,
175
+ match = r"Invalid <.*> type for <.*> settings" ,
176
+ ):
177
+ ItemValidationPipeline .from_crawler (crawler )
150
178
151
- def __setitem__ (self , key , value ):
152
- setattr (self , key , value )
179
+ # Invalid schema type
180
+ settings = {
181
+ "SPIDERMON_ENABLED" : True ,
182
+ "SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS" : True ,
183
+ "SPIDERMON_VALIDATION_SCHEMAS" : [False ],
184
+ "SPIDERMON_VALIDATION_ERRORS_FIELD" : "custom_validation_field" ,
185
+ }
186
+ crawler = get_crawler (settings_dict = settings )
187
+ with pytest .raises (
188
+ scrapy .exceptions .NotConfigured ,
189
+ match = r"Invalid schema, jsonschemas must be defined as:.*" ,
190
+ ):
191
+ ItemValidationPipeline .from_crawler (crawler )
153
192
154
- item = DataclassItemWithItemAssignment (foo = "invalid" )
155
- # Supports item assignment but does not support field
156
- with pytest .raises (KeyError , match = "custom_validation_field" ):
157
- item = pipeline .process_item (item , None )
193
+
194
+ def test_drop_invalid_item (dummy_schema ):
195
+ settings = {
196
+ "SPIDERMON_ENABLED" : True ,
197
+ "SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS" : True ,
198
+ "SPIDERMON_VALIDATION_SCHEMAS" : [dummy_schema ],
199
+ "SPIDERMON_VALIDATION_DROP_ITEMS_WITH_ERRORS" : True ,
200
+ "SPIDERMON_VALIDATION_ERRORS_FIELD" : "custom_validation_field" ,
201
+ }
202
+
203
+ crawler = get_crawler (settings_dict = settings )
204
+ pipeline = ItemValidationPipeline .from_crawler (crawler )
205
+
206
+ item = {"foo" : "invalid" }
207
+ with pytest .raises (scrapy .exceptions .DropItem ):
208
+ pipeline .process_item (item , None )
209
+
210
+
211
+ def test_ignore_classes_without_schema (dummy_schema ):
212
+ settings = {
213
+ "SPIDERMON_ENABLED" : True ,
214
+ "SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS" : True ,
215
+ "SPIDERMON_VALIDATION_SCHEMAS" : {scrapy .Item : dummy_schema },
216
+ "SPIDERMON_VALIDATION_DROP_ITEMS_WITH_ERRORS" : True ,
217
+ "SPIDERMON_VALIDATION_ERRORS_FIELD" : "custom_validation_field" ,
218
+ }
219
+ crawler = get_crawler (settings_dict = settings )
220
+ pipeline = ItemValidationPipeline .from_crawler (crawler )
221
+
222
+ @dataclass
223
+ class DummyItem :
224
+ foo : str = "bar"
225
+
226
+ item = DummyItem ()
227
+ pipeline .process_item (item , None )
0 commit comments