Skip to content

Commit

Permalink
[shopify] use API for product listings (#1793)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Aug 28, 2021
1 parent 292fffc commit 95157e0
Showing 1 changed file with 14 additions and 41 deletions.
55 changes: 14 additions & 41 deletions gallery_dl/extractor/shopify.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from .common import BaseExtractor, Message
from .. import text
import re


class ShopifyExtractor(BaseExtractor):
Expand All @@ -27,17 +26,7 @@ def items(self):
data = self.metadata()
yield Message.Directory, data

headers = {"X-Requested-With": "XMLHttpRequest"}
for url in self.products():
response = self.request(
url + ".json", headers=headers, fatal=False)
if response.status_code >= 400:
self.log.warning('Skipping %s ("%s: %s")',
url, response.status_code, response.reason)
continue
product = response.json()["product"]
del product["image"]

for product in self.products():
for num, image in enumerate(product.pop("images"), 1):
text.nameext_from_url(image["src"], image)
image.update(data)
Expand Down Expand Up @@ -84,34 +73,16 @@ def metadata(self):
return self.request(self.item_url + ".json").json()

def products(self):
params = {"page": 1}
fetch = True
last = None

for pattern in (
r"/collections/[\w-]+/products/[\w-]+",
r"href=[\"'](/products/[\w-]+)",
):
search_re = re.compile(pattern)

while True:
if fetch:
page = self.request(self.item_url, params=params).text
urls = search_re.findall(page)

if len(urls) < 3:
if last:
return
fetch = False
break
fetch = True

for path in urls:
if last == path:
continue
last = path
yield self.root + path
params["page"] += 1
url = self.item_url + "/products.json"

while url:
response = self.request(url)
yield from response.json()["products"]

url = response.links.get("next")
if not url:
return
url = url["url"]


class ShopifyProductExtractor(ShopifyExtractor):
Expand All @@ -132,4 +103,6 @@ class ShopifyProductExtractor(ShopifyExtractor):
)

def products(self):
return (self.item_url,)
product = self.request(self.item_url + ".json").json()["product"]
del product["image"]
return (product,)

0 comments on commit 95157e0

Please sign in to comment.