From c35a59a001abd21bb6231cd98e2e901fe5cd4b9e Mon Sep 17 00:00:00 2001 From: Victor Ruiz Date: Mon, 6 May 2024 15:28:01 +0200 Subject: [PATCH] Add support for fluent interface / method chaining in add_x or replace_x methods --- itemloaders/__init__.py | 58 ++++++++++++++++++++++++++--------- tests/test_selector_loader.py | 16 ++++++++++ 2 files changed, 60 insertions(+), 14 deletions(-) diff --git a/itemloaders/__init__.py b/itemloaders/__init__.py index 74a9970..f13bd77 100644 --- a/itemloaders/__init__.py +++ b/itemloaders/__init__.py @@ -191,7 +191,7 @@ def add_value( *processors: Callable[..., Any], re: Union[str, Pattern[str], None] = None, **kw: Any, - ) -> None: + ) -> Self: """ Process and then add the given ``value`` for the given field. @@ -205,6 +205,9 @@ def add_value( multiple fields may be added. And the processed value should be a dict with field_name mapped to values. + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + Examples:: loader.add_value('name', 'Color TV') @@ -212,6 +215,7 @@ def add_value( loader.add_value('length', '100') loader.add_value('name', 'name: foo', TakeFirst(), re='name: (.+)') loader.add_value(None, {'name': 'foo', 'sex': 'male'}) + """ value = self.get_value(value, *processors, re=re, **kw) if value is None: @@ -221,6 +225,7 @@ def add_value( self._add_value(k, v) else: self._add_value(field_name, value) + return self def replace_value( self, @@ -229,10 +234,13 @@ def replace_value( *processors: Callable[..., Any], re: Union[str, Pattern[str], None] = None, **kw: Any, - ) -> None: + ) -> Self: """ Similar to :meth:`add_value` but replaces the collected data with the new value instead of adding it. + + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader """ value = self.get_value(value, *processors, re=re, **kw) if value is None: @@ -242,6 +250,7 @@ def replace_value( self._replace_value(k, v) else: self._replace_value(field_name, value) + return self def _add_value(self, field_name: str, value: Any) -> None: value = arg_to_iter(value) @@ -387,7 +396,7 @@ def add_xpath( *processors: Callable[..., Any], re: Union[str, Pattern[str], None] = None, **kw: Any, - ) -> None: + ) -> Self: """ Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a value, which is used to extract a list of strings from the @@ -398,6 +407,9 @@ def add_xpath( :param xpath: the XPath to extract data from :type xpath: str + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + Examples:: # HTML snippet:

Color TV

@@ -407,7 +419,7 @@ def add_xpath( """ values = self._get_xpathvalues(xpath, **kw) - self.add_value(field_name, values, *processors, re=re, **kw) + return self.add_value(field_name, values, *processors, re=re, **kw) def replace_xpath( self, @@ -416,12 +428,16 @@ def replace_xpath( *processors: Callable[..., Any], re: Union[str, Pattern[str], None] = None, **kw: Any, - ) -> None: + ) -> Self: """ Similar to :meth:`add_xpath` but replaces collected data instead of adding it. + + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + """ values = self._get_xpathvalues(xpath, **kw) - self.replace_value(field_name, values, *processors, re=re, **kw) + return self.replace_value(field_name, values, *processors, re=re, **kw) def get_xpath( self, @@ -468,7 +484,7 @@ def add_css( *processors: Callable[..., Any], re: Union[str, Pattern[str], None] = None, **kw: Any, - ) -> None: + ) -> Self: """ Similar to :meth:`ItemLoader.add_value` but receives a CSS selector instead of a value, which is used to extract a list of unicode strings @@ -479,15 +495,19 @@ def add_css( :param css: the CSS selector to extract data from :type css: str + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + Examples:: # HTML snippet:

Color TV

loader.add_css('name', 'p.product-name') # HTML snippet:

the price is $1200

loader.add_css('price', 'p#price', re='the price is (.*)') + """ values = self._get_cssvalues(css) - self.add_value(field_name, values, *processors, re=re, **kw) + return self.add_value(field_name, values, *processors, re=re, **kw) def replace_css( self, @@ -496,12 +516,16 @@ def replace_css( *processors: Callable[..., Any], re: Union[str, Pattern[str], None] = None, **kw: Any, - ) -> None: + ) -> Self: """ Similar to :meth:`add_css` but replaces collected data instead of adding it. + + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + """ values = self._get_cssvalues(css) - self.replace_value(field_name, values, *processors, re=re, **kw) + return self.replace_value(field_name, values, *processors, re=re, **kw) def get_css( self, @@ -545,7 +569,7 @@ def add_jmes( *processors: Callable[..., Any], re: Union[str, Pattern[str], None] = None, **kw: Any, - ) -> None: + ) -> Self: """ Similar to :meth:`ItemLoader.add_value` but receives a JMESPath selector instead of a value, which is used to extract a list of unicode strings @@ -556,6 +580,9 @@ def add_jmes( :param jmes: the JMESPath selector to extract data from :type jmes: str + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader + Examples:: # HTML snippet: {"name": "Color TV"} @@ -564,7 +591,7 @@ def add_jmes( loader.add_jmes('price', TakeFirst(), re='the price is (.*)') """ values = self._get_jmesvalues(jmes) - self.add_value(field_name, values, *processors, re=re, **kw) + return self.add_value(field_name, values, *processors, re=re, **kw) def replace_jmes( self, @@ -573,12 +600,15 @@ def replace_jmes( *processors: Callable[..., Any], re: Union[str, Pattern[str], None] = None, **kw: Any, - ) -> None: + ) -> Self: """ Similar to :meth:`add_jmes` but replaces collected data instead of adding it. + + :returns: The current ItemLoader instance for method chaining. + :rtype: ItemLoader """ values = self._get_jmesvalues(jmes) - self.replace_value(field_name, values, *processors, re=re, **kw) + return self.replace_value(field_name, values, *processors, re=re, **kw) def get_jmes( self, diff --git a/tests/test_selector_loader.py b/tests/test_selector_loader.py index 484c239..d52e2e3 100644 --- a/tests/test_selector_loader.py +++ b/tests/test_selector_loader.py @@ -273,3 +273,19 @@ def test_replace_jmes_re(self): self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) loader.replace_jmes("url", "website.url", re=r"http://www\.(.+)") self.assertEqual(loader.get_output_value("url"), ["scrapy.org"]) + + def test_fluent_interface(self): + loader = ItemLoader(selector=self.selector) + item = ( + loader.add_xpath("name", "//body/text()") + .replace_xpath("name", "//div/text()") + .add_css("description", "div::text") + .replace_css("description", "p::text") + .add_value("url", "http://example.com") + .replace_value("url", "http://foo") + .load_item() + ) + self.assertEqual( + item, + {"name": ["marta"], "description": ["paragraph"], "url": ["http://foo"]}, + )