diff --git a/ca_ab_edmonton/people.py b/ca_ab_edmonton/people.py index 6a69f0a3..6bf1f521 100644 --- a/ca_ab_edmonton/people.py +++ b/ca_ab_edmonton/people.py @@ -1,8 +1,10 @@ +import re + from utils import CanadianPerson as Person from utils import CanadianScraper -COUNCIL_PAGE = "https://www.edmonton.ca/city_government/city_organization/city-councillors.aspx" -MAYOR_PAGE = "https://www.edmonton.ca/city_government/city_organization/the-mayor.aspx" +COUNCIL_PAGE = "https://www.edmonton.ca/city_government/city_organization/city-councillors" +MAYOR_PAGE = "https://www.edmonton.ca/city_government/city_organization/the-mayor" class EdmontonPersonScraper(CanadianScraper): @@ -10,17 +12,22 @@ def scrape(self): yield self.scrape_mayor() page = self.lxmlize(COUNCIL_PAGE) - councillors = page.xpath('//div[contains(@class, "documentexcerpt-module__item")]') + councillors = page.xpath('.//div[contains(@class, "feature-box__title")]') assert len(councillors), "No councillors found" for cell in councillors: - name = cell[1].text - if name == "Vacant": + name = cell.xpath("./a")[0].text_content() + if "Vacant" in name: continue - page_url = cell[0].attrib["href"] + page_url = cell.xpath("./a/@href")[0] page = self.lxmlize(page_url) - district_name = page.xpath('//h1[contains(@class, "page-title")]')[0].text_content() + district_name = page.xpath( + '//h1[contains(@class, "page-title")]|//h1[contains(@class, "page-title page-title--black-content-page")]' + )[0].text_content() district, name = district_name.split(" - ", 1) + district = district.replace("Ward ", "") + if " " in district and re.search("[^A-Za-z ]", district): + district = district.split()[0] p = Person(primary_org="legislature", name=name, district=district, role="Councillor") p.add_source(COUNCIL_PAGE) diff --git a/country-ca.csv b/country-ca.csv index eea6a7ad..3dbb69c6 100644 --- a/country-ca.csv +++ b/country-ca.csv @@ -5046,6 +5046,7 @@ ocd-division/country:ca/csd:3521024/ward:2,Ward 2,,,,,,,,,,,,,, ocd-division/country:ca/csd:3521024/ward:3,Ward 3,,,,,,,,,,,,,, ocd-division/country:ca/csd:3521024/ward:4,Ward 4,,,,,,,,,,,,,, ocd-division/country:ca/csd:3521024/ward:5,Ward 5,,,,,,,,,,,,,, +ocd-division/country:ca/csd:3521024/ward:6,Ward 6,,,,,,,,,,,,,, ocd-division/country:ca/csd:3522001,East Garafraxa,,,,,TP,,East Garafraxa,,Township of East Garafraxa,ocd-division/country:ca/cd:3522,,,, ocd-division/country:ca/csd:3522008,Amaranth,,,,,TP,,Amaranth,,Township of Amaranth,ocd-division/country:ca/cd:3522,,,, ocd-division/country:ca/csd:3522010,Grand Valley,,,,,T,,Grand Valley,,Town of Grand Valley,ocd-division/country:ca/cd:3522,,,, @@ -7411,18 +7412,30 @@ ocd-division/country:ca/csd:4811052/ward:8,Ward 8,,,,,,,,,,,,,, ocd-division/country:ca/csd:4811056,Fort Saskatchewan,,,,,CY,N,Fort Saskatchewan,,City of Fort Saskatchewan,,,,, ocd-division/country:ca/csd:4811059,Sturgeon County,,,,,MD,Y,Sturgeon County,,Municipality of Sturgeon County,,,,, ocd-division/country:ca/csd:4811061,Edmonton,,,,,CY,Y,Edmonton,,City of Edmonton,,,,, -ocd-division/country:ca/csd:4811061/ward:1,Ward 1,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:10,Ward 10,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:11,Ward 11,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:12,Ward 12,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:2,Ward 2,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:3,Ward 3,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:4,Ward 4,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:5,Ward 5,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:6,Ward 6,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:7,Ward 7,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:8,Ward 8,,,,,,,,,,,,,, -ocd-division/country:ca/csd:4811061/ward:9,Ward 9,,,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:1,Ward 1,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:10,Ward 10,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:11,Ward 11,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:12,Ward 12,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:2,Ward 2,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:3,Ward 3,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:4,Ward 4,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:5,Ward 5,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:6,Ward 6,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:7,Ward 7,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:8,Ward 8,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:9,Ward 9,,2020-12-06,,,,,,,,,,,, +ocd-division/country:ca/csd:4811061/ward:anirniq,Anirniq,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:dene,Dene,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:ipiihkoohkanipiaohtsi,Ipiihkoohkanipiaohtsi,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:karhiio,Karhiio,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:metis,Métis,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:nakota_isga,Nakota Isga,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:o~day~min,O-day'min,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:papastew,papastew,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:pihesiwin,pihêsiwin,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:sipiwiyiniwak,sipiwiyiniwak,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:sspomitapi,Sspomitapi,,,,,,,,,,,,,,2020-12-07 +ocd-division/country:ca/csd:4811061/ward:tastawiyiniwak,tastawiyiniwak,,,,,,,,,,,,,,2020-12-07 ocd-division/country:ca/csd:4811062,St. Albert,,,,,CY,N,St. Albert,,City of St. Albert,,,,, ocd-division/country:ca/csd:4811064,Gibbons,,,,,T,N,Gibbons,,Town of Gibbons,,,,, ocd-division/country:ca/csd:4811065,Redwater,,,,,T,N,Redwater,,Town of Redwater,,,,,