diff --git a/itemloaders/__init__.py b/itemloaders/__init__.py index 70a66de..d606a7e 100644 --- a/itemloaders/__init__.py +++ b/itemloaders/__init__.py @@ -5,7 +5,7 @@ """ from contextlib import suppress -from itemadapter import ItemAdapter +from itemadapter import get_field_meta_from_class, ItemAdapter from parsel.utils import extract_regex, flatten from itemloaders.common import wrap_loader_context @@ -24,6 +24,18 @@ def unbound_method(method): return method +class _Context(dict): + def __init__(self, item_loader, *args, **kwargs): + super().__init__(*args, **kwargs) + self._item_loader = item_loader + + def __getitem__(self, key): + value = super().__getitem__(key) + if key == 'item' and value is None: + value = self[key] = self._item_loader.item + return value + + class ItemLoader: """ Return a new Item Loader for populating the given item. If no item is @@ -102,17 +114,15 @@ class Product: def __init__(self, item=None, selector=None, parent=None, **context): self.selector = selector context.update(selector=selector) - if item is None: - item = self.default_item_class() - self._local_item = item + self._item = item context['item'] = item - self.context = context + self.context = _Context(parent or self, **context) self.parent = parent self._local_values = {} - # values from initial item - for field_name, value in ItemAdapter(item).items(): - self._values.setdefault(field_name, []) - self._values[field_name] += arg_to_iter(value) + if item is not None: + for field_name, value in ItemAdapter(item).items(): + self._values.setdefault(field_name, []) + self._values[field_name] += arg_to_iter(value) @property def _values(self): @@ -126,37 +136,29 @@ def item(self): if self.parent is not None: return self.parent.item else: - return self._local_item + if self._item is None: + self._item = self.default_item_class(**self._values) + return self._item def nested_xpath(self, xpath, **context): """ Create a nested loader with an xpath selector. The supplied selector is applied relative to selector associated - with this :class:`ItemLoader`. The nested loader shares the item - with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`, - :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected. + with this :class:`ItemLoader`. """ selector = self.selector.xpath(xpath) context.update(selector=selector) - subloader = self.__class__( - item=self.item, parent=self, **context - ) - return subloader + return self.__class__(parent=self, **context) def nested_css(self, css, **context): """ Create a nested loader with a css selector. The supplied selector is applied relative to selector associated - with this :class:`ItemLoader`. The nested loader shares the item - with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`, - :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected. + with this :class:`ItemLoader`. """ selector = self.selector.css(css) context.update(selector=selector) - subloader = self.__class__( - item=self.item, parent=self, **context - ) - return subloader + return self.__class__(parent=self, **context) def add_value(self, field_name, value, *processors, **kw): """ @@ -305,7 +307,15 @@ def get_output_processor(self, field_name): return unbound_method(proc) def _get_item_field_attr(self, field_name, key, default=None): - field_meta = ItemAdapter(self.item).get_field_meta(field_name) + if self.parent is not None and self.parent._item is not None: + item_adapter = ItemAdapter(self.parent._item) + field_meta = item_adapter.get_field_meta(field_name) + if self._item is not None: + item_adapter = ItemAdapter(self._item) + field_meta = item_adapter.get_field_meta(field_name) + else: + item_class = self.default_item_class + field_meta = get_field_meta_from_class(item_class, field_name) return field_meta.get(key, default) def _process_input_value(self, field_name, value): diff --git a/setup.py b/setup.py index 85e0d3a..ea5d0f4 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ 'w3lib>=1.17.0', 'parsel>=1.5.0', 'jmespath>=0.9.5', - 'itemadapter>=0.1.0', + 'itemadapter>=0.1.1', ], # extras_require=extras_require, ) diff --git a/tests/test_delayed_object_creation.py b/tests/test_delayed_object_creation.py new file mode 100644 index 0000000..ee144ae --- /dev/null +++ b/tests/test_delayed_object_creation.py @@ -0,0 +1,61 @@ +from collections.abc import MutableMapping +from unittest import TestCase + +from parsel import Selector + +from itemloaders import ItemLoader + + +EXPECTED_ERROR = RuntimeError + + +class UninitializableItem(dict): + + def __init__(self, *args, **kwargs): + raise EXPECTED_ERROR + + +class UninitializableItemLoader(ItemLoader): + default_item_class = UninitializableItem + + +class DelayedObjectCreationTestCase(TestCase): + + def test_loader_creation(self): + UninitializableItemLoader() + + def test_add(self): + selector = Selector(text="") + il = UninitializableItemLoader(selector=selector) + il.add_value('key', 'value') + il.add_css('key', 'html') + il.add_xpath('key', '//html') + + def test_context(self): + il = UninitializableItemLoader() + context = il.context + with self.assertRaises(EXPECTED_ERROR): + context['item'] + + def test_load_item(self): + il = UninitializableItemLoader() + with self.assertRaises(EXPECTED_ERROR): + il.load_item() + + def test_nested_loader_creation(self): + selector = Selector(text="") + il = UninitializableItemLoader(selector=selector) + il.nested_css('html') + il.nested_xpath('//html') + + def test_nested_load_item(self): + selector = Selector(text="") + il = UninitializableItemLoader(selector=selector) + + css_il = il.nested_css('html') + with self.assertRaises(EXPECTED_ERROR): + css_il.load_item() + + xpath_il = il.nested_xpath('//html') + with self.assertRaises(EXPECTED_ERROR): + xpath_il.load_item() diff --git a/tests/test_nested_loader.py b/tests/test_nested_loader.py index 1e193d3..9971ff8 100644 --- a/tests/test_nested_loader.py +++ b/tests/test_nested_loader.py @@ -98,3 +98,26 @@ def test_nested_load_item(self): self.assertEqual(item['name'], ['marta']) self.assertEqual(item['url'], ['http://www.scrapy.org']) self.assertEqual(item['image'], ['/images/logo.png']) + + def test_nested_from_item(self): + """Check that everything works as usual when the nested selector has a + parent item""" + item = {'foo': 'bar'} + loader = ItemLoader(selector=self.selector, item=item) + nl1 = loader.nested_xpath('//footer') + nl2 = nl1.nested_xpath('img') + + loader.add_xpath('name', '//header/div/text()') + nl1.add_xpath('url', 'a/@href') + nl2.add_xpath('image', '@src') + + item = loader.load_item() + + assert item is loader.item + assert item is nl1.item + assert item is nl2.item + + self.assertEqual(item['foo'], ['bar']) + self.assertEqual(item['name'], ['marta']) + self.assertEqual(item['url'], ['http://www.scrapy.org']) + self.assertEqual(item['image'], ['/images/logo.png']) diff --git a/tests/test_output_processor.py b/tests/test_output_processor.py index 54bb1fe..14175b4 100644 --- a/tests/test_output_processor.py +++ b/tests/test_output_processor.py @@ -4,8 +4,13 @@ from itemloaders.processors import Identity, Compose, TakeFirst -class TestOutputProcessorDict(unittest.TestCase): - def test_output_processor(self): +def take_first(value): + return value[0] + + +class TestOutputProcessor(unittest.TestCase): + + def test_item_class(self): class TempDict(dict): def __init__(self, *args, **kwargs): @@ -22,9 +27,8 @@ class TempLoader(ItemLoader): self.assertIsInstance(item, TempDict) self.assertEqual(dict(item), {'temp': 0.3}) + def test_item_object(self): -class TestOutputProcessorItem(unittest.TestCase): - def test_output_processor(self): class TempLoader(ItemLoader): default_input_processor = Identity() default_output_processor = Compose(TakeFirst()) @@ -35,3 +39,14 @@ class TempLoader(ItemLoader): item = loader.load_item() self.assertIsInstance(item, dict) self.assertEqual(dict(item), {'temp': 0.3}) + + def test_unbound_processor(self): + """Ensure that a processor not taking a `self` parameter does not break + anything""" + + class TempLoader(ItemLoader): + default_output_processor = take_first + + loader = TempLoader() + loader.add_value('foo', 'bar') + self.assertEqual(loader.load_item(), {'foo': 'bar'}) diff --git a/tox.ini b/tox.ini index 47c65ce..f074fa1 100644 --- a/tox.ini +++ b/tox.ini @@ -8,7 +8,7 @@ deps = commands = py.test \ - --cov-report=term --cov-report=html --cov-report= --cov=itemloaders \ + --cov-report=term --cov-report=html --cov-report=term-missing --cov=itemloaders \ --doctest-modules \ {posargs:itemloaders tests}