Skip to content

Commit

Permalink
Fix some websites scrapers (#99)
Browse files Browse the repository at this point in the history
* chore: add clean command to package.json file

* feat(scraper): add URL to several scrapers

* fix(scraper): add missing field
  • Loading branch information
ricardofelgueiras authored May 27, 2024
1 parent 3c2afe9 commit 2cd850d
Show file tree
Hide file tree
Showing 16 changed files with 168 additions and 29 deletions.
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"preview": "vite preview",
"lint": "eslint src -c ./.eslintrc.cjs --ext .jsx,.js,.ts,.tsx",
"check-types": "tsc --noEmit",
"prepare": "husky"
"prepare": "husky",
"clean": "rm -rf dist"
},
"prettier": "@rows/prettier-config",
"dependencies": {
Expand Down
8 changes: 8 additions & 0 deletions src/scrappers/babymarket-list-products.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ elementParser:
query: '.product__brand'
type: text

- title: Item model
query: '.product__model'
type: text

- title: Item Description
query: '.product__title'
type: text
Expand All @@ -17,3 +21,7 @@ elementParser:
- title: Price
query: '.product__price'
type: text

- title: URL
query: .product__link
type: link
10 changes: 9 additions & 1 deletion src/scrappers/babyone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ elementParser:
query: .product-name
type: text

- title: Item model
query: 'product-model'
type: text

- title: Item brand
query: .product-brand
type: text
Expand All @@ -17,4 +21,8 @@ elementParser:

- title: Price
query: .sale-price
type: text
type: text

- title: URL
query: .product-link
type: link
14 changes: 13 additions & 1 deletion src/scrappers/babypark-de-list-products.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ url:
listElementsQuery: '#kuLandingProductsListUl > li'
elementParser:

- title: Item brand
query: 'kuBrand'
type: text

- title: Item model
query: 'kuModel'
type: text

- title: Item Description
query: '.kuName'
type: text
Expand All @@ -13,4 +21,8 @@ elementParser:

- title: Price
query: '.kuSalePrice'
type: text
type: text

- title: URL
query: a
type: link
13 changes: 12 additions & 1 deletion src/scrappers/babypark-nl-list-products.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@ url:
- https://www.babypark.nl/search/*
listElementsQuery: '#kuLandingProductsListUl > li'
elementParser:
- title: Item brand
query: 'kuBrand'
type: text

- title: Item model
query: 'kuModel'
type: text

- title: Item Description
query: '.kuName'
type: text
Expand All @@ -13,4 +20,8 @@ elementParser:

- title: Price
query: '.kuSalePrice'
type: text
type: text

- title: URL
query: a
type: link
17 changes: 12 additions & 5 deletions src/scrappers/bebe9.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ url:
- https://www.bebe9.com/*
listElementsQuery: '.products-list > article'
elementParser:
- title: Item brand
query: .product-brand
type: text

- title: Item Description
query: .product-name
- title: Item model
query: '.product-model'
type: text

- title: Item brand
query: .product-brand
- title: Item Description
query: .product-name
type: text

- title: RRP
Expand All @@ -17,4 +20,8 @@ elementParser:

- title: Price
query: .product-price
type: text
type: text

- title: URL
query: a
type: link
17 changes: 12 additions & 5 deletions src/scrappers/bol.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ url:
- https://www.bol.com/*
listElementsQuery: .product-item, .product-item--row
elementParser:
- title: Item brand
query: '.product-brand'
type: text

- title: Item Description
query: .product-title
- title: Item model
query: '.product-model'
type: text

- title: Brand
query: '[itemprop="name"]'
- title: Item Description
query: .product-title
type: text

- title: RRP
Expand All @@ -17,4 +20,8 @@ elementParser:

- title: Price
query: .promo-price
type: text
type: text

- title: URL
query: a
type: link
17 changes: 14 additions & 3 deletions src/scrappers/cdiscount.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
url:
- https://www.cdiscount.com/*
listElementsQuery: '[data-cs-override-id="Offres-LP"] > li'
listElementsQuery: ul > li[data-sku]
elementParser:
- title: Item brand
query: '.brand'
type: text

- title: Item model
query: '.model'
type: text

- title: Item Description
query: h2
Expand All @@ -12,5 +19,9 @@ elementParser:
type: text

- title: Price
query: .c-price
type: text
query: .prdtBILPrice > .price
type: text

- title: URL
query: a
type: link
17 changes: 16 additions & 1 deletion src/scrappers/dreambaby.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,26 @@ url:
- https://www.dreambaby.be/*
listElementsQuery: '[data-name="Product"]'
elementParser:
- title: Item brand
query: '.brand'
type: text

- title: Item model
query: '.model'
type: text

- title: Item Description
query: 'h3'
type: text

- title: RRP
query: 'empty'
type: text

- title: Price
query: '[itemprop="price"]'
type: text
type: text

- title: URL
query: a
type: link
17 changes: 12 additions & 5 deletions src/scrappers/johnlewis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ url:
- https://www.johnlewis.com/*
listElementsQuery: article
elementParser:
- title: Item brand
query: '[data-test="product-title"] > h2 > span:nth-child(1)'
type: text

- title: Item Description
query: '[data-test="product-title"] > h2 > span:nth-child(2)'
- title: Item model
query: '.model'
type: text

- title: Item brand
query: '[data-test="product-title"] > h2 > span:nth-child(1)'
- title: Item Description
query: '[data-test="product-title"] > h2 > span:nth-child(2)'
type: text

- title: RRP
Expand All @@ -17,4 +20,8 @@ elementParser:

- title: Price
query: '[data-test="product-card-price-now"]'
type: text
type: text

- title: URL
query: a
type: link
14 changes: 13 additions & 1 deletion src/scrappers/jollyroom.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ url:
listElementsQuery: '.product-info'
elementParser:

- title: Item brand
query: '[itemprop="brand"]'
type: text

- title: Item model
query: '[itemprop="model"]'
type: text

- title: Item Description
query: h3
type: text
Expand All @@ -13,4 +21,8 @@ elementParser:

- title: Price
query: .price
type: text
type: text

- title: URL
query: a
type: link
10 changes: 9 additions & 1 deletion src/scrappers/pinkorblue-list-products-it.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ elementParser:
query: '.product__brand'
type: text

- title: Item model
query: '.product__model'
type: text

- title: Item Description
query: '.product__title'
type: text
Expand All @@ -16,4 +20,8 @@ elementParser:

- title: Price
query: '.product__price'
type: text
type: text

- title: URL
query: .product__link
type: link
10 changes: 9 additions & 1 deletion src/scrappers/pinkorblue-list-products-nl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ elementParser:
query: '.product__brand'
type: text

- title: Item model
query: '.product__model'
type: text

- title: Item Description
query: '.product__title'
type: text
Expand All @@ -16,4 +20,8 @@ elementParser:

- title: Price
query: '.product__price'
type: text
type: text

- title: URL
query: .product__link
type: link
10 changes: 9 additions & 1 deletion src/scrappers/pinkorblue-list-products.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ elementParser:
query: '.product__brand'
type: text

- title: Item model
query: '.product__model'
type: text

- title: Item Description
query: '.product__title'
type: text
Expand All @@ -16,4 +20,8 @@ elementParser:

- title: Price
query: '.product__price'
type: text
type: text

- title: URL
query: .product__link
type: link
10 changes: 9 additions & 1 deletion src/scrappers/rosaoazul-es-list-products.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ elementParser:
query: '.product__brand'
type: text

- title: Item model
query: '.product__model'
type: text

- title: Item Description
query: '.product__title'
type: text
Expand All @@ -24,4 +28,8 @@ elementParser:

- title: Price
query: '.product__price'
type: text
type: text

- title: URL
query: .product__link
type: link
Loading

0 comments on commit 2cd850d

Please sign in to comment.