|
| 1 | +#!/usr/bin/env python |
| 2 | +from copy import copy |
| 3 | +import sys |
| 4 | +from olclient.openlibrary import OpenLibrary |
| 5 | + |
| 6 | +""" |
| 7 | + Removes 'fake' ex-system subjects from Open Library works or editions. |
| 8 | + Takes as CLI argument a filename containing a list of Open Library keys: |
| 9 | + e.g. |
| 10 | + /works/OL1001319W |
| 11 | + /books/OL24710466M |
| 12 | +""" |
| 13 | + |
| 14 | +ol = OpenLibrary() |
| 15 | + |
| 16 | +inlist = sys.argv[1] |
| 17 | + |
| 18 | +fakes = ['overdrive', 'in library', 'accessible book', 'protected daisy', 'lending library', 'internet archive wishlist'] |
| 19 | +# only remove these from works: |
| 20 | +wfakes = ['large type books', 'popular print disabled books'] |
| 21 | + |
| 22 | + |
| 23 | +otherbad = ['fictiion'] |
| 24 | + |
| 25 | +fakes += otherbad |
| 26 | +changes_made = 0 |
| 27 | +with open(inlist, 'r') as f: |
| 28 | + for item in f: |
| 29 | + olid = item.strip().replace('/books/', '').replace('/works/', '') |
| 30 | + book = ol.get(olid) |
| 31 | + if not book.type.get('key') in ('/type/edition', '/type/work'): |
| 32 | + print("Unexpected type for %s -- Skipping!" % olid) |
| 33 | + else: |
| 34 | + orig_subjects = [] |
| 35 | + if hasattr(book, 'subjects'): |
| 36 | + orig_subjects = copy(book.subjects) |
| 37 | + else: |
| 38 | + continue |
| 39 | + #print(olid) |
| 40 | + #print(u"%s: %s -- %s" % (olid, book.title, orig_subjects)) |
| 41 | + targets = copy(fakes) |
| 42 | + if book.type['key'] == '/type/work': |
| 43 | + targets += wfakes |
| 44 | + removals = [] |
| 45 | + for s in book.subjects: |
| 46 | + if s.lower() in targets: |
| 47 | + #print("%s -- Fake subject %s found!" % (olid, s)) |
| 48 | + removals.append(s) |
| 49 | + if s.lower() != s: # remove duplicate lowercased subjects |
| 50 | + if s.lower() in book.subjects: |
| 51 | + #print(" Removing dupe lower(): %s" % s.lower()) |
| 52 | + removals.append(s.lower()) |
| 53 | + for r in removals: |
| 54 | + try: |
| 55 | + book.subjects.remove(r) |
| 56 | + except ValueError: |
| 57 | + print(' unable to remove %s from %s -- probably already removed?' % (r, olid)) |
| 58 | + if book.subjects != orig_subjects: |
| 59 | + #print("SUBJECTS CHANGED -- TO SAVE!") |
| 60 | + #print("New subjects: %s" % book.subjects) |
| 61 | + r = book.save('remove fake subjects') |
| 62 | + if r.status_code != 200: # Only log unsuccessful saves |
| 63 | + print('%s: %s' % (olid, r)) |
| 64 | + else: |
| 65 | + changes_made += 1 |
| 66 | +print('%s subject changes saved.' % changes_made) |
0 commit comments