Skip to content

Commit

Permalink
Upgrade PointOfInterest structure, more changes after fresh rebase.
Browse files Browse the repository at this point in the history
  • Loading branch information
murchik committed Jun 7, 2022
1 parent d6d9072 commit a93833e
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 20 deletions.
20 changes: 11 additions & 9 deletions scraping/spiders/poland_rjps.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,12 @@ def parse(self, response, category: str):
'address': self._get_address(response),
'lat': '',
'lng': '',
'category': category or DEFAULT_CATEGORY,
'organizations': '',
'description': self._get_description(response)
'categories': [category or DEFAULT_CATEGORY],
'organizations': ['Poland RJPS'],
'description': self._get_description(response),
'phone': self._get_phone(response),
'email': self._get_email(response),
'url': self._get_website(response),
}

def _get_name(self, response):
Expand All @@ -74,11 +77,9 @@ def _get_address(self, response):
return ''.join(lines)

def _get_description(self, response):
rows = [self._get_email(response),
self._get_phone(response),
self._get_website(response),
self._get_update_date(response)]

rows = [
self._get_update_date(response),
]
return '\n'.join(map(self._clean_spaces, rows))

def _get_email(self, response):
Expand All @@ -88,7 +89,8 @@ def _get_phone(self, response):
return response.css('div[title=Telefon] > div > span.wrap-anywhere::text').get() or ''

def _get_website(self, response):
return response.css('div[title="Strona www"] > div > div::text').get() or ''
url = response.css('div[title="Strona www"] > div > div::text').get() or ''
return url.strip()

def _get_update_date(self, response):
data = self._clean_spaces(response.css('body > div > div > div > div.data-aktualizacji::text').get()) or ''
Expand Down
12 changes: 6 additions & 6 deletions tests/spiders/test_poland_rjps.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,16 @@ def test_parse_address(self, normal_place):
assert address == '16-060 Zabłudów ul. Rynek 8'

def test_parse_email(self, normal_place):
description = PolandRJPSSpider().parse(normal_place, category='test').get('description')
assert '[email protected]' in description
point = PolandRJPSSpider().parse(normal_place, category='test')
assert '[email protected]' == point.get('email')

def test_parse_phone(self, normal_place):
description = PolandRJPSSpider().parse(normal_place, category='test').get('description')
assert 'tel. 85 7188100' in description
point = PolandRJPSSpider().parse(normal_place, category='test')
assert 'tel. 85 7188100' == point.get('phone')

def test_parse_website(self, normal_place):
description = PolandRJPSSpider().parse(normal_place, category='test').get('description')
assert 'http://bip.mops.um.zabludow.wrotapodlasia.pl' in description
point = PolandRJPSSpider().parse(normal_place, category='test')
assert 'http://bip.mops.um.zabludow.wrotapodlasia.pl' == point.get('url')

def test_parse_update_date(self, normal_place):
description = PolandRJPSSpider().parse(normal_place, category='test').get('description')
Expand Down
11 changes: 6 additions & 5 deletions tests/test_address_santizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from models.point_of_interest import PointOfInterest
from implemented import ConvertSpreadsheetData


@pytest.fixture
def point_of_interest():
return PointOfInterest(
Expand All @@ -15,9 +16,9 @@ def point_of_interest():
address='ul. Zamenhofa 1, 00-153',
lat='',
lng='',
categories="General",
organizations='Fundacja “Nasz Wybór”',
description='Crisis support center'
categories=['General'],
organizations=['Fundacja “Nasz Wybór”'],
description='Crisis support center',
)


Expand All @@ -34,7 +35,7 @@ def fake_init_google_maps(key):


def test_address_sanitizer(point_of_interest: PointOfInterest):

addressSanitizer: AddressSanitizer = ConvertSpreadsheetData(
make_places_request=fake_make_places_request,
init_google_maps=fake_init_google_maps).address_sanitizer
Expand All @@ -46,7 +47,7 @@ def test_address_sanitizer(point_of_interest: PointOfInterest):
#Test with IO

# def test_address_sanitizer(point_of_interest: PointOfInterest):

# addressSanitizer = AddressSanitizer(Settings(), print)
# addressSanitizer.settings.sanitize_address = True
# result = addressSanitizer.sanitize([point_of_interest])
Expand Down

0 comments on commit a93833e

Please sign in to comment.