Skip to content

Commit

Permalink
Refactor/models import (#311)
Browse files Browse the repository at this point in the history
  • Loading branch information
andprov authored Jun 1, 2024
2 parents ad7eeda + cad550f commit 681db1b
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 174 deletions.
12 changes: 12 additions & 0 deletions src/grabber/injector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import os
import sys
from pathlib import Path

import django


def load_django_settings(path: Path) -> None:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "website.settings")
sys.path.append(str(path / "src" / "website"))
django.setup()
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
24 changes: 0 additions & 24 deletions src/grabber/nsreg/manage.py

This file was deleted.

104 changes: 0 additions & 104 deletions src/grabber/nsreg/models.py

This file was deleted.

17 changes: 10 additions & 7 deletions src/grabber/nsreg/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@


# useful for handling different item types with a single interface
from .models import ParseHistory, Price, Registrator
from catalog.models import ParseHistory, Price, Registrator


class NsregPipeline:

def process_item(self, item, spider):
price = item.get('price', {
'price_reg': None,
'price_prolong': None,
'price_change': None,
})
price = item.get(
"price",
{
"price_reg": None,
"price_prolong": None,
"price_change": None,
},
)

Price.objects.create(
price_reg=price["price_reg"],
Expand All @@ -30,4 +33,4 @@ def process_item(self, item, spider):
return item

def close_spider(self, spider):
...
pass
38 changes: 10 additions & 28 deletions src/grabber/nsreg/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,22 @@
import os
import time
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()
from injector import load_django_settings

os.environ["TZ"] = "UTC"
time.tzset()

# Build paths inside the project like this: BASE_DIR / 'subdir'.
GRABBER_DIR = Path(__file__).resolve().parent.parent
PROJECT_DIR = GRABBER_DIR.parent.parent

load_django_settings(PROJECT_DIR)

BOT_NAME = 'nsreg'
BOT_NAME = "nsreg"

SPIDER_MODULES = ['nsreg.spiders']
NEWSPIDER_MODULE = 'nsreg.spiders'
SPIDER_MODULES = ["nsreg.spiders"]
NEWSPIDER_MODULE = "nsreg.spiders"


# Crawl responsibly by identifying yourself (and your website) on the user-agent
Expand Down Expand Up @@ -80,7 +81,7 @@
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'nsreg.pipelines.NsregPipeline': 300,
"nsreg.pipelines.NsregPipeline": 300,
}

# Enable and configure the AutoThrottle extension (disabled by default)
Expand All @@ -105,26 +106,7 @@
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

# Set settings whose default value is deprecated to a future-proof value
REQUEST_FINGERPRINTER_IMPLEMENTATION = '2.7'
TWISTED_REACTOR = 'twisted.internet.asyncioreactor.AsyncioSelectorReactor'

USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'


# DJANGO ORM SETTINGS
DATABASES = {
'default': {
"ENGINE": "django.db.backends.postgresql",
"HOST": os.environ['HOSTNAME_DB'],
"NAME": os.environ['DATABASE_NAME'],
"USER": os.environ['USERNAME_DB'],
"PASSWORD": os.environ['PASSWORD_DB'],
"PORT": os.environ['PORT_DB'],
}
}
INSTALLED_APP = "catalog",
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"

DEFAULT_SETTINGS = {
"DATABASES": DATABASES,
"INSTALLED_APP": INSTALLED_APP,
}
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
6 changes: 3 additions & 3 deletions src/grabber/nsreg/spiders/monitor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import scrapy

from .. import models
from catalog.models import ParseHistory, Registrator


def has_data_changed(company, data):
Expand All @@ -17,7 +17,7 @@ class NSRegSpider(scrapy.Spider):
start_urls = ['https://cctld.ru/domains/reg/']

def __init__(self, name=None, **kwargs):
models.ParseHistory.objects.create()
ParseHistory.objects.create()

def parse(self, response):
for reg in response.xpath('//*[@id="registrator-list"]/div/div'):
Expand All @@ -29,7 +29,7 @@ def parse(self, response):
'website': reg.xpath('div/a/@href').get()
}

company, created = models.Registrator.objects.get_or_create(name=data['name'])
company, created = Registrator.objects.get_or_create(name=data['name'])

# Проверка, изменились ли данные
if created or has_data_changed(company, data):
Expand Down
8 changes: 0 additions & 8 deletions src/website/catalog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,6 @@ class Meta:
app_label = 'catalog'


# class Domain(models.Model):
# id = models.BigAutoField(primary_key=True)
# name = models.CharField(max_length=255)
#
# def __str__(self):
# return self.name


class ParseHistory(models.Model):
id = models.BigAutoField(primary_key=True)
date = models.DateTimeField("Дата парсинга", auto_now=True)
Expand Down

0 comments on commit 681db1b

Please sign in to comment.