diff --git a/README.md b/README.md deleted file mode 100644 index 6040b8a..0000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# scrapy-itemloaders - -This is an extraction of `ItemLoader` from `scrapy`'s codebase. \ No newline at end of file diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..6f7c439 --- /dev/null +++ b/README.rst @@ -0,0 +1,76 @@ +=========== +itemloaders +=========== + +.. image:: https://img.shields.io/pypi/v/itemloaders.svg + :target: https://pypi.python.org/pypi/itemloaders + :alt: PyPI Version + +.. image:: https://img.shields.io/pypi/pyversions/itemloaders.svg + :target: https://pypi.python.org/pypi/itemloaders + :alt: Supported Python Versions + +.. image:: https://travis-ci.com/scrapy/itemloaders.svg?branch=master + :target: https://travis-ci.com/scrapy/itemloaders + :alt: Build Status + +.. image:: https://codecov.io/github/scrapy/itemloaders/coverage.svg?branch=master + :target: https://codecov.io/gh/scrapy/itemloaders + :alt: Coverage report + +.. image:: https://readthedocs.org/projects/itemloaders/badge/?version=latest + :target: https://itemloaders.readthedocs.io/en/latest/?badge=latest + :alt: Documentation Status + + +``itemloaders`` is a library that helps you collect data from HTML and XML sources. + +It comes in handy to extract data from web pages, as it supports +data extraction using CSS and XPath Selectors. + +It's specially useful when you need to standardize the data from many sources. +For example, it allows you to have all your casting and parsing rules in a +single place. + +Here is an example to get you started:: + + from itemloaders import ItemLoader + from parsel import Selector + + html_data = ''' + + + + Some random product page + + +
Some random product page
+

$ 100.12

+ + + ''' + loader = ItemLoader(selector=Selector(html_data)) + loader.add_xpath('name', '//div[@class="product_name"]/text()') + loader.add_xpath('name', '//div[@class="product_title"]/text()') + loader.add_css('price', '#price::text') + loader.add_value('last_updated', 'today') # you can also use literal values + item = loader.load_item() + item + # {'name': ['Some random product page'], 'price': ['$ 100.12'], 'last_updated': ['today']} + +For more information, check out the `documentation `_. + +Contributing +============ + +All contributions are welcome! + +* If you want to review some code, check open + `Pull Requests here `_ + +* If you want to submit a code change + + * File an `issue here `_, if there isn't one yet + * Fork this repository + * Create a branch to work on your changes + * Push your local branch and submit a Pull Request diff --git a/setup.py b/setup.py index f6aa18c..db137d1 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -with open('README.md') as f: +with open('README.rst') as f: long_description = f.read() setup(