|
1 | 1 | import sys
|
2 | 2 |
|
3 | 3 | import click
|
4 |
| -import re |
5 | 4 | import transaction
|
6 | 5 |
|
7 | 6 | from collections import OrderedDict
|
8 |
| -from bs4 import BeautifulSoup |
9 | 7 |
|
10 | 8 | from onegov.core.cli import command_group
|
11 | 9 | from onegov.core.cli import abort
|
12 |
| -from onegov.people import Agency |
13 | 10 | from onegov.people.models import Person
|
14 | 11 | from openpyxl import load_workbook
|
15 | 12 | from openpyxl import Workbook
|
@@ -168,288 +165,3 @@ def _import(request: 'CoreRequest', app: 'Framework') -> None:
|
168 | 165 | click.secho(f'Imported {count} person(s)', fg='green')
|
169 | 166 |
|
170 | 167 | return _import
|
171 |
| - |
172 |
| - |
173 |
| -p2 = re.compile(r'(.*), (.*)Postadresse: (.*), (.*)') |
174 |
| -p3 = re.compile(r'(.*), (Postfach), (.*)') |
175 |
| -p4 = re.compile(r'(.*), (.*), (.*)') |
176 |
| -p1 = re.compile(r'(.*), (.*)') |
177 |
| -p6 = re.compile(r'(.*)\n(.*)') |
178 |
| -p5 = re.compile(r'([A-Za-z ]*) ?(\d+[a-z]?)?') # street name and optional |
179 |
| -# building number |
180 |
| - |
181 |
| - |
182 |
| -def parse_and_split_address_field(address: str) -> tuple[str, str, str, str]: |
183 |
| - """ |
184 |
| - Parsing the `address` field to split into location address and code/city |
185 |
| - as well as postal address and code/city. |
186 |
| -
|
187 |
| - :param address: str |
188 |
| - :return: tuple: (location_address, location_code_city, |
189 |
| - postal_address, postal_code_city) |
190 |
| - """ |
191 |
| - location_addr = '' |
192 |
| - location_pcc = '' |
193 |
| - postal_addr = '' |
194 |
| - postal_pcc = '' |
195 |
| - |
196 |
| - # sanitize address |
197 |
| - if ';' in address: |
198 |
| - address = address.replace('; ', '') |
199 |
| - address = address.replace(';', '') |
200 |
| - |
201 |
| - if not address: |
202 |
| - return location_addr, location_pcc, postal_addr, postal_pcc |
203 |
| - |
204 |
| - if m := p2.match(address): |
205 |
| - location_addr = m.group(1) |
206 |
| - location_pcc = m.group(2) |
207 |
| - postal_addr = m.group(3) |
208 |
| - postal_pcc = m.group(4) |
209 |
| - return location_addr, location_pcc, postal_addr, postal_pcc |
210 |
| - |
211 |
| - if m := p3.match(address): |
212 |
| - postal_addr = m.group(1) + '\n' + m.group(2) |
213 |
| - postal_pcc = m.group(3) |
214 |
| - return location_addr, location_pcc, postal_addr, postal_pcc |
215 |
| - |
216 |
| - if m := p4.match(address): |
217 |
| - postal_addr = m.group(1) + '\n' + m.group(2) |
218 |
| - postal_pcc = m.group(3) |
219 |
| - return location_addr, location_pcc, postal_addr, postal_pcc |
220 |
| - |
221 |
| - if m := p1.match(address): |
222 |
| - postal_addr = m.group(1) |
223 |
| - postal_pcc = m.group(2) |
224 |
| - return location_addr, location_pcc, postal_addr, postal_pcc |
225 |
| - |
226 |
| - if p6.match(address): |
227 |
| - postal_addr, postal_pcc = address.rsplit('\n', 1) |
228 |
| - return location_addr, location_pcc, postal_addr, postal_pcc |
229 |
| - |
230 |
| - if m := p5.match(address): |
231 |
| - postal_addr = m.group(1) |
232 |
| - if m.group(2): |
233 |
| - postal_addr += f'{m.group(2)}' |
234 |
| - return location_addr, location_pcc, postal_addr, postal_pcc |
235 |
| - |
236 |
| - # default no match found |
237 |
| - return location_addr, location_pcc, postal_addr, postal_pcc |
238 |
| - |
239 |
| - |
240 |
| -@cli.command('migrate-people-address-field') |
241 |
| -@click.option('--dry-run/--no-dry-run', default=False) |
242 |
| -def migrate_people_address_field( |
243 |
| - dry_run: bool |
244 |
| -) -> 'Callable[[CoreRequest, Framework], None]': |
245 |
| - """ Migrates onegov_agency people address field. |
246 |
| -
|
247 |
| - Migrate data from onegov_agency table 'people' column 'address' field to |
248 |
| - 'location_address', 'location_code_city', 'postal_address' and |
249 |
| - 'postal_code_city' fields. |
250 |
| -
|
251 |
| -
|
252 |
| - Example:: |
253 |
| -
|
254 |
| - onegov-people --select /onegov_agency/bs migrate-people-address-field |
255 |
| -
|
256 |
| - onegov-people --select /onegov_agency/bs migrate-people-address-field |
257 |
| - --dry-run |
258 |
| -
|
259 |
| - """ |
260 |
| - |
261 |
| - def _migrate(request: 'CoreRequest', app: 'Framework') -> None: |
262 |
| - session = app.session() |
263 |
| - click.secho("Migrate data from table 'people' column 'address' " |
264 |
| - "field to 'location_address', 'location_code_city', " |
265 |
| - "'postal_address' and 'postal_code_city ..", |
266 |
| - fg='yellow') |
267 |
| - migration_count = 0 |
268 |
| - total_count = 0 |
269 |
| - for person in session.query(Person): |
270 |
| - total_count += 1 |
271 |
| - |
272 |
| - if not person.address: |
273 |
| - continue |
274 |
| - |
275 |
| - ( |
276 |
| - person.location_address, |
277 |
| - person.location_code_city, |
278 |
| - person.postal_address, |
279 |
| - person.postal_code_city |
280 |
| - ) = parse_and_split_address_field(person.address) |
281 |
| - |
282 |
| - migration_count += 1 |
283 |
| - |
284 |
| - if dry_run: |
285 |
| - transaction.abort() |
286 |
| - click.secho('Aborting transaction', fg='yellow') |
287 |
| - |
288 |
| - click.secho(f'Migrated all {migration_count} address(es) of totally ' |
289 |
| - f'{total_count} people', fg='green') |
290 |
| - |
291 |
| - return _migrate |
292 |
| - |
293 |
| - |
294 |
| -@cli.command('onegov-migrate-people-address-field') |
295 |
| -@click.option('--dry-run/--no-dry-run', default=False) |
296 |
| -def onegov_migrate_people_address_field( |
297 |
| - dry_run: bool |
298 |
| -) -> 'Callable[[CoreRequest, Framework], None]': |
299 |
| - """ Migrates people address field everywhere in onegov. |
300 |
| -
|
301 |
| - Migrate data from 'people' column 'address' field to |
302 |
| - 'location_address', 'location_code_city', 'postal_address' and |
303 |
| - 'postal_code_city' fields. |
304 |
| -
|
305 |
| -
|
306 |
| - Example:: |
307 |
| -
|
308 |
| - onegov-people --select /onegov_town6/ebikon |
309 |
| - onegov-migrate-people-address-field |
310 |
| -
|
311 |
| - onegov-people --select /onegov_org/risch |
312 |
| - onegov-migrate-people-address-field --dry-run |
313 |
| -
|
314 |
| - """ |
315 |
| - |
316 |
| - def _migrate(request: 'CoreRequest', app: 'Framework') -> None: |
317 |
| - click.secho(f'Request url: {request.url}..') |
318 |
| - session = app.session() |
319 |
| - click.secho("Onegov migrate data from table 'people' column " |
320 |
| - "'address' field to 'location_address', " |
321 |
| - "'location_code_city', 'postal_address' and " |
322 |
| - "'postal_code_city ..", |
323 |
| - fg='yellow') |
324 |
| - migration_count = 0 |
325 |
| - total_count = 0 |
326 |
| - for person in session.query(Person): |
327 |
| - total_count += 1 |
328 |
| - |
329 |
| - if not person.address: |
330 |
| - continue |
331 |
| - |
332 |
| - ( |
333 |
| - person.location_address, |
334 |
| - person.location_code_city, |
335 |
| - person.postal_address, |
336 |
| - person.postal_code_city |
337 |
| - ) = parse_and_split_address_field(person.address) |
338 |
| - |
339 |
| - migration_count += 1 |
340 |
| - |
341 |
| - if dry_run: |
342 |
| - transaction.abort() |
343 |
| - click.secho('Aborting transaction', fg='yellow') |
344 |
| - |
345 |
| - click.secho(f'Migrated all {migration_count} address(es) of totally ' |
346 |
| - f'{total_count} people', fg='green') |
347 |
| - |
348 |
| - return _migrate |
349 |
| - |
350 |
| - |
351 |
| -re_postal_code_city_ch = re.compile(r'\d{4} .*') # e.g. '1234 Mein Ort' |
352 |
| -re_postal_code_city_de = re.compile(r'D-\d{5} .*') # e.g. 'D-12345 Mein Ort' |
353 |
| - |
354 |
| - |
355 |
| -def parse_agency_portrait_field_for_address( |
356 |
| - portrait: str |
357 |
| -) -> tuple[str, str, str, str]: |
358 |
| - """ |
359 |
| - Parsing the `portrait` field of agencies and extract address and |
360 |
| - code/city as well as location address and city if present. |
361 |
| -
|
362 |
| - :param portrait: html str |
363 |
| - :return: tuple: (location_addr, location_pcc ,postal_address, |
364 |
| - postal_code_city) |
365 |
| - """ |
366 |
| - |
367 |
| - location_addr = '' |
368 |
| - location_pcc = '' |
369 |
| - postal_addr = '' |
370 |
| - postal_pcc = '' |
371 |
| - plz_city_found_idx = -1 |
372 |
| - |
373 |
| - soup = BeautifulSoup(portrait, "html.parser") |
374 |
| - # convert from html to text using soup |
375 |
| - portrait_text = soup.get_text('\n') |
376 |
| - lines = portrait_text.split('\n') |
377 |
| - for line, idx in zip(lines, range(len(lines))): |
378 |
| - if m := (re_postal_code_city_ch.match(line) |
379 |
| - or re_postal_code_city_de.match(line)): |
380 |
| - |
381 |
| - if plz_city_found_idx: |
382 |
| - # assuming address initially found was location address |
383 |
| - location_addr = postal_addr |
384 |
| - location_pcc = postal_pcc |
385 |
| - |
386 |
| - postal_pcc = m.group(0) |
387 |
| - postal_addr = lines[idx - 1] if idx > 0 else '' # if only |
388 |
| - # code/city no street and number |
389 |
| - |
390 |
| - # only extend postal address 'Postfach' with street/house number if |
391 |
| - # previous line is at least two lines away |
392 |
| - # Dorfstrasse 1, Postfach, 1234 Govikon |
393 |
| - if ( |
394 |
| - 'postfach' in postal_addr.lower() |
395 |
| - and (plz_city_found_idx + 2 < idx) |
396 |
| - and idx >= 2 and lines[idx - 2] != '' |
397 |
| - ): |
398 |
| - postal_addr = lines[idx - 2] + '\n' + postal_addr |
399 |
| - |
400 |
| - plz_city_found_idx = idx |
401 |
| - |
402 |
| - return location_addr, location_pcc, postal_addr, postal_pcc |
403 |
| - |
404 |
| - |
405 |
| -@cli.command('extract-address-from-portrait-field') |
406 |
| -@click.option('--dry-run/--no-dry-run', default=False) |
407 |
| -def extract_address_from_portrait_field( |
408 |
| - dry_run: bool |
409 |
| -) -> 'Callable[[CoreRequest, Framework], None]': |
410 |
| - """ Extracts address from onegov_agency agency portrait field. |
411 |
| -
|
412 |
| - Extracts address, postal code and city from onegov_agency table |
413 |
| - 'agencies' column 'portrait'. |
414 |
| -
|
415 |
| - Example:: |
416 |
| -
|
417 |
| - onegov-people --select /onegov_agency/bs |
418 |
| - extract-address-from-portrait-field |
419 |
| - onegov-people --select /onegov_agency/bs |
420 |
| - extract-address-from-portrait-field --dry-run |
421 |
| - """ |
422 |
| - |
423 |
| - def _extract(request: 'CoreRequest', app: 'Framework') -> None: |
424 |
| - session = app.session() |
425 |
| - click.secho("Extract address, postal code and city from table " |
426 |
| - "'agencies' column 'portrait' to " |
427 |
| - "'location_address', 'location_code_city', " |
428 |
| - "'postal_address' and 'postal_code_city ..", |
429 |
| - fg='yellow') |
430 |
| - extraction_count = 0 |
431 |
| - total_count = 0 |
432 |
| - for agency in session.query(Agency): |
433 |
| - total_count += 1 |
434 |
| - |
435 |
| - if not agency.portrait: |
436 |
| - continue |
437 |
| - |
438 |
| - ( |
439 |
| - agency.location_address, |
440 |
| - agency.location_code_city, |
441 |
| - agency.postal_address, |
442 |
| - agency.postal_code_city |
443 |
| - ) = parse_agency_portrait_field_for_address(agency.portrait) |
444 |
| - |
445 |
| - extraction_count += 1 |
446 |
| - |
447 |
| - if dry_run: |
448 |
| - transaction.abort() |
449 |
| - click.secho('Aborting transaction', fg='yellow') |
450 |
| - |
451 |
| - transaction.commit() |
452 |
| - click.secho(f'Extracted {extraction_count} address(es) of totally ' |
453 |
| - f'{total_count} agencies', fg='green') |
454 |
| - |
455 |
| - return _extract |
0 commit comments