Skip to content

Commit

Permalink
beget fix (#281)
Browse files Browse the repository at this point in the history
  • Loading branch information
rsolovyeaws authored Apr 20, 2024
2 parents e4b8176 + 9d62675 commit b013b92
Showing 1 changed file with 40 additions and 21 deletions.
61 changes: 40 additions & 21 deletions src/grabber/nsreg/spiders/nsreg_beget.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
# -*- coding: utf-8 -*-
import scrapy

from ..base_site_spider import BaseSpiderComponent, EMPTY_PRICE, find_price
from ..items import NsregItem
import re
import logging


def find_price_beget(re_pattern, price):
price = str(price).strip()
if m := re.match(re_pattern, price):
price = m.group(1)
price = re.sub(r'\s', '', price)

price = f'{float(price)}'
logging.info('price = %s', price)

return price


class NsregWebnamesSpider(scrapy.Spider):
Expand All @@ -20,43 +33,49 @@ def __init__(self, name=None, **kwargs):
regex={
'price_reg': r'([0-9]{3})',
'price_prolong': r'([0-9]{3})',
'price_change': r'.*?(\d+)\s*рублей.*',
'price_change': r'.*Стоимость переноса составляет (\d+) рублей.*',
},
path={
'price_reg': '/html/body/div[1]/div/div/div[2]/div[1]/div[2]/div[1]/div/div[2]/div/div[1]/div/div[1]/p[2]/text()',
'price_prolong': '/html/body/div[1]/div/div/div[2]/div[1]/div[2]/div[1]/div/div[2]/div/div[1]/div/div[2]/p[2]/text()',
'price_change': '/html/body/div[1]/div/div/div[3]/div[1]/div/div/div/div[3]/div[2]/div/div[1]/ul[3]/li[1]/text()',
'price_reg': '/html/body/div[1]/div/div/div/div[2]/div/div[2]/div[1]/div/div[2]/div/div[1]/div/div[1]/p[2]/text()',
'price_prolong': '//*[@id="app"]/div/div[2]/div[1]/div[2]/div[1]/div/div[2]/div/div[1]/div/div[2]/p[2]/text()',
'price_change': '/html/body/div[1]/div/div/div/div[3]/div[1]/div/div/div/div[3]/div[2]/div/div[1]/ul[3]/li[1]/text()',
},
)

def parse_price_change(self, response):
price_change = response.xpath(self.component.path['price_change']).get()
price_change = find_price(self.component.regex['price_change'], price_change)
item = NsregItem()
item['name'] = self.site_names[0]
price = item.get('price', EMPTY_PRICE)
price['price_change'] = price_change
item['price'] = price

yield item
def start_requests(self):
cookies = {'beget': 'begetok'} # Set the cookie as determined by previous JavaScript analysis
for url in self.start_urls:
yield scrapy.Request(url, cookies=cookies, callback=self.parse)

def parse(self, response):
cookies = {'beget': 'begetok'}
price_reg = response.xpath(self.component.path['price_reg']).get()
price_reg = find_price(self.component.regex['price_reg'], price_reg)

price_prolong = response.xpath(self.component.path['price_prolong']).get()
price_prolong = find_price(self.component.regex['price_prolong'], price_prolong)

yield scrapy.Request(
'https://beget.com/ru/kb/how-to/domains/kak-perenesti-domeny-v-beget',
callback=self.parse_price_change,
)

item = NsregItem()
item['name'] = self.site_names[0]
price = item.get('price', EMPTY_PRICE)
price['price_reg'] = price_reg
price['price_prolong'] = price_prolong
item['price'] = price

return item
request = scrapy.Request(
'https://beget.com/ru/kb/how-to/domains/kak-perenesti-domeny-v-beget',
callback=self.parse_price_change,
cookies=cookies
)
request.meta['item'] = item # Pass the item along with the request
yield request

def parse_price_change(self, response):
item = response.meta['item'] # Retrieve the item
price_change_text = response.xpath(self.component.path['price_change']).get()
price_change = find_price_beget(self.component.regex['price_change'], price_change_text)

price = item.get('price', EMPTY_PRICE)
price['price_change'] = price_change
item['price'] = price
yield item

0 comments on commit b013b92

Please sign in to comment.