From ca9e1b910d6b2de9713739ab9cd1b1baacc9e785 Mon Sep 17 00:00:00 2001
From: Forest Gregg <fgregg@uchicago.edu>
Date: Sun, 12 Jul 2020 09:37:38 -0400
Subject: [PATCH] ann arbor scraper, orgs and bills

---
 annarbor/__init__.py    |  47 ++++++++
 annarbor/bills.py       | 236 ++++++++++++++++++++++++++++++++++++++++
 annarbor/events.py      |   9 ++
 annarbor/people.py      |  49 +++++++++
 annarbor/vote_events.py |   9 ++
 5 files changed, 350 insertions(+)
 create mode 100644 annarbor/__init__.py
 create mode 100644 annarbor/bills.py
 create mode 100644 annarbor/events.py
 create mode 100644 annarbor/people.py
 create mode 100644 annarbor/vote_events.py

diff --git a/annarbor/__init__.py b/annarbor/__init__.py
new file mode 100644
index 00000000..28a03cdc
--- /dev/null
+++ b/annarbor/__init__.py
@@ -0,0 +1,47 @@
+# encoding=utf-8
+from pupa.scrape import Jurisdiction, Organization
+# from .events import AnnarborEventScraper
+from .people import AnnarborPersonScraper
+from .bills import AnnarborBillScraper
+# from .vote_events import AnnarborVoteEventScraper
+
+
+class Annarbor(Jurisdiction):
+    division_id = "ocd-division/country:us/state:mi/place:ann_arbor"
+    classification = "legislature"
+    name = "City of Ann Arbor"
+    url = "https://www.a2gov.org/departments/city-council/Pages/Home.aspx"
+    scrapers = {
+        # "events": AnnarborEventScraper,
+        "people": AnnarborPersonScraper,
+        "bills": AnnarborBillScraper,
+        # "vote_events": AnnarborVoteEventScraper,
+    }
+
+    legislative_sessions = [{"identifier": "{}".format(year),
+                             "name": "{} Session".format(year),
+                             "start_date": "{}-11-15".format(year),
+                             "end_date": "{}-11-14".format(year + 1)}
+                            for year in range(2007, 2018)]
+    legislative_sessions.append({"identifier": "2018",
+                                 "name": "2018 Session",
+                                 "start_date": "2018-11-15",
+                                 "end_date": "2020-11-14"})
+    legislative_sessions.append({"identifier": "before"})
+
+    def get_organizations(self):
+        council = Organization(name="Ann Arbor City Council", classification="legislature")
+
+        # OPTIONAL: add posts to your organizaion using this format,
+        # where label is a human-readable description of the post (eg "Ward 8 councilmember")
+        # and role is the position type (eg councilmember, alderman, mayor...)
+        # skip entirely if you're not writing a people scraper.
+        # vorg.add_post(label="position_description", role="position_type")
+
+        yield council
+
+        clerk = Organization('City Clerk', classification='executive')
+        yield clerk
+
+        mayor = Organization("Mayor's Office", classification='executive')
+        yield mayor
diff --git a/annarbor/bills.py b/annarbor/bills.py
new file mode 100644
index 00000000..04db33b3
--- /dev/null
+++ b/annarbor/bills.py
@@ -0,0 +1,236 @@
+from legistar.bills import LegistarAPIBillScraper
+from pupa.scrape import Bill, VoteEvent, Scraper
+from pupa.utils import _make_pseudo_id
+import datetime
+import pytz
+
+
+class AnnarborBillScraper(LegistarAPIBillScraper, Scraper):
+    BASE_URL = 'http://webapi.legistar.com/v1/a2gov'
+    BASE_WEB_URL = 'https://a2gov.legistar.com'
+    TIMEZONE = "America/Detroit"
+
+    VOTE_OPTIONS = {'non-voting': 'not voting',
+                    'yea': 'yes',
+                    'nay': 'no',
+                    'recused': 'excused',
+                    }
+
+    def session(self, action_date):
+        localize = pytz.timezone(self.TIMEZONE).localize
+        if action_date < localize(datetime.datetime(2007, 11, 15)):
+            return "before"
+        elif action_date < localize(datetime.datetime(2008, 11, 15)):
+            return "2007"
+        elif action_date < localize(datetime.datetime(2009, 11, 15)):
+            return "2008"
+        elif action_date < localize(datetime.datetime(2010, 11, 15)):
+            return "2009"
+        elif action_date < localize(datetime.datetime(2011, 11, 15)):
+            return "2010"
+        elif action_date < localize(datetime.datetime(2012, 11, 15)):
+            return "2011"
+        elif action_date < localize(datetime.datetime(2013, 11, 15)):
+            return "2012"
+        elif action_date < localize(datetime.datetime(2014, 11, 15)):
+            return "2013"
+        elif action_date < localize(datetime.datetime(2015, 11, 15)):
+            return "2014"
+        elif action_date < localize(datetime.datetime(2016, 11, 15)):
+            return "2015"
+        elif action_date < localize(datetime.datetime(2017, 11, 15)):
+            return "2016"
+        elif action_date < localize(datetime.datetime(2018, 11, 15)):
+            return "2017"
+        elif action_date < localize(datetime.datetime(2020, 11, 15)):
+            return "2018"
+        elif action_date < localize(datetime.datetime(2022, 11, 15)):
+            return "2020"
+        import pdb
+        pdb.set_trace()
+
+    def sponsorships(self, matter_id):
+        for i, sponsor in enumerate(self.sponsors(matter_id)):
+            sponsorship = {}
+            if i == 0:
+                sponsorship['primary'] = True
+                sponsorship['classification'] = "Primary"
+            else:
+                sponsorship['primary'] = False
+                sponsorship['classification'] = "Regular"
+
+            sponsor_name = sponsor['MatterSponsorName'].strip()
+
+            sponsorship['name'] = sponsor_name
+            sponsorship['entity_type'] = 'person'
+
+            yield sponsorship
+
+    def actions(self, matter_id):
+        old_action = None
+        actions = self.history(matter_id)
+
+        for action in actions:
+            action_description = action['MatterHistoryActionName']
+            action_date = action['MatterHistoryActionDate']
+            action_text = action['MatterHistoryActionText']
+            responsible_org = action['MatterHistoryActionBodyName']
+            if responsible_org == 'City Council':
+                responsible_org = 'Ann Arbor City Council'
+
+            action_date = self.toTime(action_date).date()
+
+            responsible_person = None
+
+            bill_action = {'description': action_description,
+                           'date': action_date,
+                           'organization': {'name': responsible_org},
+                           'classification': None,
+                           'responsible person': responsible_person,
+                           }
+            if action_text:
+                bill_action['extras'] = {'text': action_text}
+
+            if bill_action != old_action:
+                old_action = bill_action
+            else:
+                continue
+
+            vote_possible = (action['MatterHistoryEventId'] is not None
+                             and action['MatterHistoryRollCallFlag'] is not None
+                             and action['MatterHistoryPassedFlag'] is not None)
+
+            if vote_possible:
+
+                bool_result = action['MatterHistoryPassedFlag']
+                result = 'pass' if bool_result else 'fail'
+
+                votes = (result, self.votes(action['MatterHistoryId']))
+            else:
+                votes = (None, [])
+
+            yield bill_action, votes, action['MatterHistoryId']
+
+    def scrape(self, window=3):
+        window = float(window)
+        if window:
+            n_days_ago = datetime.datetime.utcnow() - datetime.timedelta(window)
+        else:
+            n_days_ago = None
+
+        for matter in self.matters(n_days_ago):
+            matter_id = matter['MatterId']
+
+            date = matter['MatterIntroDate']
+            title = matter['MatterTitle']
+            identifier = matter['MatterFile']
+
+            # If a bill has a duplicate action item that's causing the entire scrape
+            # to fail, add it to the `problem_bills` array to skip it.
+            # For the time being...nothing to skip!
+
+            problem_bills = set()
+
+            if identifier in problem_bills:
+                continue
+
+            if not all((date, title, identifier)):
+                continue
+
+            if date == '5012-05-07T00:00:00':
+                continue
+            bill_session = self.session(self.toTime(date))
+            # bill_type = BILL_TYPES[matter['MatterTypeName']]
+
+            if identifier.startswith('S'):
+                alternate_identifiers = [identifier]
+                identifier = identifier[1:]
+            else:
+                alternate_identifiers = []
+
+            bill = Bill(identifier=identifier,
+                        legislative_session=bill_session,
+                        title=title,
+                        # classification=bill_type,
+                        from_organization={"name": "Ann Arbor City Council"})
+
+            legistar_web = matter['legistar_url']
+
+            legistar_api = 'http://webapi.legistar.com/v1/a2gov/matters/{0}'.format(matter_id)
+
+            bill.add_source(legistar_web, note='web')
+            bill.add_source(legistar_api, note='api')
+
+            for identifier in alternate_identifiers:
+                bill.add_identifier(identifier)
+
+            for action, vote, history_id in self.actions(matter_id):
+                responsible_person = action.pop('responsible person')
+                act = bill.add_action(**action)
+
+                if responsible_person:
+                    act.add_related_entity(responsible_person,
+                                           'person',
+                                           entity_id=_make_pseudo_id(name=responsible_person))
+
+                if action['description'] == 'Referred':
+                    body_name = matter['MatterBodyName']
+                    if body_name != 'City Council':
+                        act.add_related_entity(body_name,
+                                               'organization',
+                                               entity_id=_make_pseudo_id(name=body_name))
+
+                result, votes = vote
+                if result:
+                    vote_event = VoteEvent(legislative_session=bill.legislative_session,
+                                           motion_text=action['description'],
+                                           organization=action['organization'],
+                                           classification=None,
+                                           start_date=action['date'],
+                                           result=result,
+                                           bill=bill)
+
+                    # this is abusing the identifier, which is
+                    # supposed to be something from upstream
+                    # like rollcall # 132
+                    vote_event.identifier = str(history_id)
+
+                    if 'extras' in action:
+                        vote_event.extras['text'] = action['extras']['text']
+
+                    vote_event.add_source(legistar_web)
+                    vote_event.add_source(legistar_api + '/histories')
+
+                    for vote in votes:
+                        vote_value = vote['VoteValueName']
+                        if vote_value is None:
+                            continue
+                        raw_option = vote_value.lower()
+                        clean_option = self.VOTE_OPTIONS.get(raw_option,
+                                                             raw_option)
+                        vote_event.vote(clean_option,
+                                        vote['VotePersonName'].strip())
+
+                    yield vote_event
+
+            for sponsorship in self.sponsorships(matter_id):
+                bill.add_sponsorship(**sponsorship)
+
+            for topic in self.topics(matter_id):
+                bill.add_subject(topic['MatterIndexName'].strip())
+
+            for attachment in self.attachments(matter_id):
+                if attachment['MatterAttachmentName']:
+                    bill.add_version_link(attachment['MatterAttachmentName'],
+                                          attachment['MatterAttachmentHyperlink'],
+                                          media_type="application/pdf")
+
+            bill.extras = {'local_classification': matter['MatterTypeName']}
+
+            text = self.text(matter_id)
+
+            if text:
+                if text['MatterTextPlain']:
+                    bill.extras['plain_text'] = text['MatterTextPlain']
+
+            yield bill
diff --git a/annarbor/events.py b/annarbor/events.py
new file mode 100644
index 00000000..e6a2c845
--- /dev/null
+++ b/annarbor/events.py
@@ -0,0 +1,9 @@
+from pupa.scrape import Scraper
+from pupa.scrape import Event
+
+
+class AnnarborEventScraper(Scraper):
+
+    def scrape(self):
+        # needs to be implemented
+        pass
diff --git a/annarbor/people.py b/annarbor/people.py
new file mode 100644
index 00000000..67d60dbf
--- /dev/null
+++ b/annarbor/people.py
@@ -0,0 +1,49 @@
+from pupa.scrape import Organization, Scraper
+from legistar.people import LegistarAPIPersonScraper
+
+
+class AnnarborPersonScraper(LegistarAPIPersonScraper, Scraper):
+    BASE_URL = 'http://webapi.legistar.com/v1/a2gov'
+    WEB_URL = 'https://a2gov.legistar.com'
+    TIMEZONE = "America/Detroit"
+
+    def scrape(self):
+
+        for body in self.bodies():
+            body_type = body['BodyTypeName'].lower()
+            body_name = body['BodyName']
+
+            is_commission = ('commission' in body_type
+                             or body_name in {'Airport Advisory Committee',
+                                              'Election Commission',
+                                              'Building Board of Appeals',
+                                              'Taxicab Board',
+                                              "Employees' Retirement System Board of Trustees"})
+
+            if body_type == 'council committee':
+                o = Organization(body_name,
+                                 classification='committee',
+                                 parent_id={'name': 'Ann Arbor City Council'})
+
+                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
+                o.add_source(self.WEB_URL + '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.format(**body), note='web')
+
+            elif is_commission:
+                o = Organization(body_name,
+                                 classification='commission')
+
+                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
+                o.add_source(self.WEB_URL + '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.format(**body), note='web')
+
+            elif 'component unit' in body_type or 'community services area' in body_type:
+                o = Organization(body_name,
+                                 classification='corporation')
+
+                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
+                o.add_source(self.WEB_URL + '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.format(**body), note='web')
+
+            else:
+                print(body)
+                continue
+
+            yield o
diff --git a/annarbor/vote_events.py b/annarbor/vote_events.py
new file mode 100644
index 00000000..fb68bb97
--- /dev/null
+++ b/annarbor/vote_events.py
@@ -0,0 +1,9 @@
+from pupa.scrape import Scraper
+from pupa.scrape import VoteEvent
+
+
+class AnnarborVoteEventScraper(Scraper):
+
+    def scrape(self):
+        # needs to be implemented
+        pass