Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Edmonton #333

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions ca_ab_edmonton/people.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,33 @@
import re

from utils import CanadianPerson as Person
from utils import CanadianScraper

COUNCIL_PAGE = "https://www.edmonton.ca/city_government/city_organization/city-councillors.aspx"
MAYOR_PAGE = "https://www.edmonton.ca/city_government/city_organization/the-mayor.aspx"
COUNCIL_PAGE = "https://www.edmonton.ca/city_government/city_organization/city-councillors"
MAYOR_PAGE = "https://www.edmonton.ca/city_government/city_organization/the-mayor"


class EdmontonPersonScraper(CanadianScraper):
def scrape(self):
yield self.scrape_mayor()
page = self.lxmlize(COUNCIL_PAGE)

councillors = page.xpath('//div[contains(@class, "documentexcerpt-module__item")]')
councillors = page.xpath('.//div[contains(@class, "feature-box__title")]')
assert len(councillors), "No councillors found"
for cell in councillors:
name = cell[1].text
if name == "Vacant":
name = cell.xpath("./a")[0].text_content()
if "Vacant" in name:
continue

page_url = cell[0].attrib["href"]
page_url = cell.xpath("./a/@href")[0]
page = self.lxmlize(page_url)
district_name = page.xpath('//h1[contains(@class, "page-title")]')[0].text_content()
district_name = page.xpath(
'//h1[contains(@class, "page-title")]|//h1[contains(@class, "page-title page-title--black-content-page")]'
)[0].text_content()
district, name = district_name.split(" - ", 1)
district = district.replace("Ward ", "")
if " " in district and re.search("[^A-Za-z ]", district):
district = district.split()[0]

p = Person(primary_org="legislature", name=name, district=district, role="Councillor")
p.add_source(COUNCIL_PAGE)
Expand Down
37 changes: 25 additions & 12 deletions country-ca.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5046,6 +5046,7 @@ ocd-division/country:ca/csd:3521024/ward:2,Ward 2,,,,,,,,,,,,,,
ocd-division/country:ca/csd:3521024/ward:3,Ward 3,,,,,,,,,,,,,,
ocd-division/country:ca/csd:3521024/ward:4,Ward 4,,,,,,,,,,,,,,
ocd-division/country:ca/csd:3521024/ward:5,Ward 5,,,,,,,,,,,,,,
ocd-division/country:ca/csd:3521024/ward:6,Ward 6,,,,,,,,,,,,,,
ocd-division/country:ca/csd:3522001,East Garafraxa,,,,,TP,,East Garafraxa,,Township of East Garafraxa,ocd-division/country:ca/cd:3522,,,,
ocd-division/country:ca/csd:3522008,Amaranth,,,,,TP,,Amaranth,,Township of Amaranth,ocd-division/country:ca/cd:3522,,,,
ocd-division/country:ca/csd:3522010,Grand Valley,,,,,T,,Grand Valley,,Town of Grand Valley,ocd-division/country:ca/cd:3522,,,,
Expand Down Expand Up @@ -7411,18 +7412,30 @@ ocd-division/country:ca/csd:4811052/ward:8,Ward 8,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811056,Fort Saskatchewan,,,,,CY,N,Fort Saskatchewan,,City of Fort Saskatchewan,,,,,
ocd-division/country:ca/csd:4811059,Sturgeon County,,,,,MD,Y,Sturgeon County,,Municipality of Sturgeon County,,,,,
ocd-division/country:ca/csd:4811061,Edmonton,,,,,CY,Y,Edmonton,,City of Edmonton,,,,,
ocd-division/country:ca/csd:4811061/ward:1,Ward 1,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:10,Ward 10,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:11,Ward 11,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:12,Ward 12,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:2,Ward 2,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:3,Ward 3,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:4,Ward 4,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:5,Ward 5,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:6,Ward 6,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:7,Ward 7,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:8,Ward 8,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:9,Ward 9,,,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:1,Ward 1,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:10,Ward 10,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:11,Ward 11,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:12,Ward 12,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:2,Ward 2,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:3,Ward 3,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:4,Ward 4,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:5,Ward 5,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:6,Ward 6,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:7,Ward 7,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:8,Ward 8,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:9,Ward 9,,2020-12-06,,,,,,,,,,,,
ocd-division/country:ca/csd:4811061/ward:anirniq,Anirniq,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:dene,Dene,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:ipiihkoohkanipiaohtsi,Ipiihkoohkanipiaohtsi,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:karhiio,Karhiio,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:metis,Métis,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:nakota_isga,Nakota Isga,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:o~day~min,O-day'min,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:papastew,papastew,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:pihesiwin,pihêsiwin,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:sipiwiyiniwak,sipiwiyiniwak,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:sspomitapi,Sspomitapi,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811061/ward:tastawiyiniwak,tastawiyiniwak,,,,,,,,,,,,,,2020-12-07
ocd-division/country:ca/csd:4811062,St. Albert,,,,,CY,N,St. Albert,,City of St. Albert,,,,,
ocd-division/country:ca/csd:4811064,Gibbons,,,,,T,N,Gibbons,,Town of Gibbons,,,,,
ocd-division/country:ca/csd:4811065,Redwater,,,,,T,N,Redwater,,Town of Redwater,,,,,
Expand Down