Skip to content

Commit e5c3c3f

Browse files
authored
People: Cleanup/Remove cli cmd and data migration script for agency address parsed from portrait field
TYPE: Feature LINK: ogc-1053
1 parent cc5f3c0 commit e5c3c3f

File tree

3 files changed

+0
-455
lines changed

3 files changed

+0
-455
lines changed

Diff for: src/onegov/people/cli.py

-288
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
import sys
22

33
import click
4-
import re
54
import transaction
65

76
from collections import OrderedDict
8-
from bs4 import BeautifulSoup
97

108
from onegov.core.cli import command_group
119
from onegov.core.cli import abort
12-
from onegov.people import Agency
1310
from onegov.people.models import Person
1411
from openpyxl import load_workbook
1512
from openpyxl import Workbook
@@ -168,288 +165,3 @@ def _import(request: 'CoreRequest', app: 'Framework') -> None:
168165
click.secho(f'Imported {count} person(s)', fg='green')
169166

170167
return _import
171-
172-
173-
p2 = re.compile(r'(.*), (.*)Postadresse: (.*), (.*)')
174-
p3 = re.compile(r'(.*), (Postfach), (.*)')
175-
p4 = re.compile(r'(.*), (.*), (.*)')
176-
p1 = re.compile(r'(.*), (.*)')
177-
p6 = re.compile(r'(.*)\n(.*)')
178-
p5 = re.compile(r'([A-Za-z ]*) ?(\d+[a-z]?)?') # street name and optional
179-
# building number
180-
181-
182-
def parse_and_split_address_field(address: str) -> tuple[str, str, str, str]:
183-
"""
184-
Parsing the `address` field to split into location address and code/city
185-
as well as postal address and code/city.
186-
187-
:param address: str
188-
:return: tuple: (location_address, location_code_city,
189-
postal_address, postal_code_city)
190-
"""
191-
location_addr = ''
192-
location_pcc = ''
193-
postal_addr = ''
194-
postal_pcc = ''
195-
196-
# sanitize address
197-
if ';' in address:
198-
address = address.replace('; ', '')
199-
address = address.replace(';', '')
200-
201-
if not address:
202-
return location_addr, location_pcc, postal_addr, postal_pcc
203-
204-
if m := p2.match(address):
205-
location_addr = m.group(1)
206-
location_pcc = m.group(2)
207-
postal_addr = m.group(3)
208-
postal_pcc = m.group(4)
209-
return location_addr, location_pcc, postal_addr, postal_pcc
210-
211-
if m := p3.match(address):
212-
postal_addr = m.group(1) + '\n' + m.group(2)
213-
postal_pcc = m.group(3)
214-
return location_addr, location_pcc, postal_addr, postal_pcc
215-
216-
if m := p4.match(address):
217-
postal_addr = m.group(1) + '\n' + m.group(2)
218-
postal_pcc = m.group(3)
219-
return location_addr, location_pcc, postal_addr, postal_pcc
220-
221-
if m := p1.match(address):
222-
postal_addr = m.group(1)
223-
postal_pcc = m.group(2)
224-
return location_addr, location_pcc, postal_addr, postal_pcc
225-
226-
if p6.match(address):
227-
postal_addr, postal_pcc = address.rsplit('\n', 1)
228-
return location_addr, location_pcc, postal_addr, postal_pcc
229-
230-
if m := p5.match(address):
231-
postal_addr = m.group(1)
232-
if m.group(2):
233-
postal_addr += f'{m.group(2)}'
234-
return location_addr, location_pcc, postal_addr, postal_pcc
235-
236-
# default no match found
237-
return location_addr, location_pcc, postal_addr, postal_pcc
238-
239-
240-
@cli.command('migrate-people-address-field')
241-
@click.option('--dry-run/--no-dry-run', default=False)
242-
def migrate_people_address_field(
243-
dry_run: bool
244-
) -> 'Callable[[CoreRequest, Framework], None]':
245-
""" Migrates onegov_agency people address field.
246-
247-
Migrate data from onegov_agency table 'people' column 'address' field to
248-
'location_address', 'location_code_city', 'postal_address' and
249-
'postal_code_city' fields.
250-
251-
252-
Example::
253-
254-
onegov-people --select /onegov_agency/bs migrate-people-address-field
255-
256-
onegov-people --select /onegov_agency/bs migrate-people-address-field
257-
--dry-run
258-
259-
"""
260-
261-
def _migrate(request: 'CoreRequest', app: 'Framework') -> None:
262-
session = app.session()
263-
click.secho("Migrate data from table 'people' column 'address' "
264-
"field to 'location_address', 'location_code_city', "
265-
"'postal_address' and 'postal_code_city ..",
266-
fg='yellow')
267-
migration_count = 0
268-
total_count = 0
269-
for person in session.query(Person):
270-
total_count += 1
271-
272-
if not person.address:
273-
continue
274-
275-
(
276-
person.location_address,
277-
person.location_code_city,
278-
person.postal_address,
279-
person.postal_code_city
280-
) = parse_and_split_address_field(person.address)
281-
282-
migration_count += 1
283-
284-
if dry_run:
285-
transaction.abort()
286-
click.secho('Aborting transaction', fg='yellow')
287-
288-
click.secho(f'Migrated all {migration_count} address(es) of totally '
289-
f'{total_count} people', fg='green')
290-
291-
return _migrate
292-
293-
294-
@cli.command('onegov-migrate-people-address-field')
295-
@click.option('--dry-run/--no-dry-run', default=False)
296-
def onegov_migrate_people_address_field(
297-
dry_run: bool
298-
) -> 'Callable[[CoreRequest, Framework], None]':
299-
""" Migrates people address field everywhere in onegov.
300-
301-
Migrate data from 'people' column 'address' field to
302-
'location_address', 'location_code_city', 'postal_address' and
303-
'postal_code_city' fields.
304-
305-
306-
Example::
307-
308-
onegov-people --select /onegov_town6/ebikon
309-
onegov-migrate-people-address-field
310-
311-
onegov-people --select /onegov_org/risch
312-
onegov-migrate-people-address-field --dry-run
313-
314-
"""
315-
316-
def _migrate(request: 'CoreRequest', app: 'Framework') -> None:
317-
click.secho(f'Request url: {request.url}..')
318-
session = app.session()
319-
click.secho("Onegov migrate data from table 'people' column "
320-
"'address' field to 'location_address', "
321-
"'location_code_city', 'postal_address' and "
322-
"'postal_code_city ..",
323-
fg='yellow')
324-
migration_count = 0
325-
total_count = 0
326-
for person in session.query(Person):
327-
total_count += 1
328-
329-
if not person.address:
330-
continue
331-
332-
(
333-
person.location_address,
334-
person.location_code_city,
335-
person.postal_address,
336-
person.postal_code_city
337-
) = parse_and_split_address_field(person.address)
338-
339-
migration_count += 1
340-
341-
if dry_run:
342-
transaction.abort()
343-
click.secho('Aborting transaction', fg='yellow')
344-
345-
click.secho(f'Migrated all {migration_count} address(es) of totally '
346-
f'{total_count} people', fg='green')
347-
348-
return _migrate
349-
350-
351-
re_postal_code_city_ch = re.compile(r'\d{4} .*') # e.g. '1234 Mein Ort'
352-
re_postal_code_city_de = re.compile(r'D-\d{5} .*') # e.g. 'D-12345 Mein Ort'
353-
354-
355-
def parse_agency_portrait_field_for_address(
356-
portrait: str
357-
) -> tuple[str, str, str, str]:
358-
"""
359-
Parsing the `portrait` field of agencies and extract address and
360-
code/city as well as location address and city if present.
361-
362-
:param portrait: html str
363-
:return: tuple: (location_addr, location_pcc ,postal_address,
364-
postal_code_city)
365-
"""
366-
367-
location_addr = ''
368-
location_pcc = ''
369-
postal_addr = ''
370-
postal_pcc = ''
371-
plz_city_found_idx = -1
372-
373-
soup = BeautifulSoup(portrait, "html.parser")
374-
# convert from html to text using soup
375-
portrait_text = soup.get_text('\n')
376-
lines = portrait_text.split('\n')
377-
for line, idx in zip(lines, range(len(lines))):
378-
if m := (re_postal_code_city_ch.match(line)
379-
or re_postal_code_city_de.match(line)):
380-
381-
if plz_city_found_idx:
382-
# assuming address initially found was location address
383-
location_addr = postal_addr
384-
location_pcc = postal_pcc
385-
386-
postal_pcc = m.group(0)
387-
postal_addr = lines[idx - 1] if idx > 0 else '' # if only
388-
# code/city no street and number
389-
390-
# only extend postal address 'Postfach' with street/house number if
391-
# previous line is at least two lines away
392-
# Dorfstrasse 1, Postfach, 1234 Govikon
393-
if (
394-
'postfach' in postal_addr.lower()
395-
and (plz_city_found_idx + 2 < idx)
396-
and idx >= 2 and lines[idx - 2] != ''
397-
):
398-
postal_addr = lines[idx - 2] + '\n' + postal_addr
399-
400-
plz_city_found_idx = idx
401-
402-
return location_addr, location_pcc, postal_addr, postal_pcc
403-
404-
405-
@cli.command('extract-address-from-portrait-field')
406-
@click.option('--dry-run/--no-dry-run', default=False)
407-
def extract_address_from_portrait_field(
408-
dry_run: bool
409-
) -> 'Callable[[CoreRequest, Framework], None]':
410-
""" Extracts address from onegov_agency agency portrait field.
411-
412-
Extracts address, postal code and city from onegov_agency table
413-
'agencies' column 'portrait'.
414-
415-
Example::
416-
417-
onegov-people --select /onegov_agency/bs
418-
extract-address-from-portrait-field
419-
onegov-people --select /onegov_agency/bs
420-
extract-address-from-portrait-field --dry-run
421-
"""
422-
423-
def _extract(request: 'CoreRequest', app: 'Framework') -> None:
424-
session = app.session()
425-
click.secho("Extract address, postal code and city from table "
426-
"'agencies' column 'portrait' to "
427-
"'location_address', 'location_code_city', "
428-
"'postal_address' and 'postal_code_city ..",
429-
fg='yellow')
430-
extraction_count = 0
431-
total_count = 0
432-
for agency in session.query(Agency):
433-
total_count += 1
434-
435-
if not agency.portrait:
436-
continue
437-
438-
(
439-
agency.location_address,
440-
agency.location_code_city,
441-
agency.postal_address,
442-
agency.postal_code_city
443-
) = parse_agency_portrait_field_for_address(agency.portrait)
444-
445-
extraction_count += 1
446-
447-
if dry_run:
448-
transaction.abort()
449-
click.secho('Aborting transaction', fg='yellow')
450-
451-
transaction.commit()
452-
click.secho(f'Extracted {extraction_count} address(es) of totally '
453-
f'{total_count} agencies', fg='green')
454-
455-
return _extract

Diff for: src/onegov/people/upgrade.py

-58
Original file line numberDiff line numberDiff line change
@@ -200,61 +200,3 @@ def fix_agency_address_column(context: UpgradeContext) -> None:
200200
context.operations.add_column('agencies', Column(
201201
'address', Text, nullable=True
202202
))
203-
204-
205-
@upgrade_task(
206-
'Remove address columns from agency',
207-
requires='onegov.people:Fix agency address column'
208-
)
209-
def remove_address_columns_from_agency(context: UpgradeContext) -> None:
210-
if context.has_column('agencies', 'zip_code'):
211-
context.operations.drop_column('agencies', 'zip_code')
212-
if context.has_column('agencies', 'city'):
213-
context.operations.drop_column('agencies', 'city')
214-
if context.has_column('agencies', 'address'):
215-
context.operations.drop_column('agencies', 'address')
216-
217-
218-
@upgrade_task('ogc-966 extend agency and person tables with more fields')
219-
def extend_agency_and_person_with_more_fields(context: UpgradeContext) -> None:
220-
# add columns to table 'agencies'
221-
agencies_columns = ['email', 'phone', 'phone_direct', 'website',
222-
'location_address', 'location_code_city',
223-
'postal_address', 'postal_code_city',
224-
'opening_hours']
225-
table = 'agencies'
226-
227-
for column in agencies_columns:
228-
if not context.has_column(table, column):
229-
context.add_column_with_defaults(
230-
table,
231-
Column(column, Text, nullable=True),
232-
default=lambda x: ''
233-
)
234-
235-
context.session.flush()
236-
237-
# add columns to table 'people'
238-
people_columns = ['location_address', 'location_code_city',
239-
'postal_address', 'postal_code_city', 'website_2']
240-
table = 'people'
241-
242-
for column in people_columns:
243-
if not context.has_column(table, column):
244-
context.add_column_with_defaults(
245-
table,
246-
Column(column, Text, nullable=True),
247-
default=lambda x: ''
248-
)
249-
250-
251-
@upgrade_task('Add organisation columns to people')
252-
def add_organisation_columns_to_people(context: UpgradeContext) -> None:
253-
if not context.has_column('people', 'organisation'):
254-
context.operations.add_column('people', Column(
255-
'organisation', Text, nullable=True
256-
))
257-
if not context.has_column('people', 'sub_organisation'):
258-
context.operations.add_column('people', Column(
259-
'sub_organisation', Text, nullable=True
260-
))

0 commit comments

Comments
 (0)