Skip to content

Commit 8bd1bb9

Browse files
committed
1 parent 9b4bc0d commit 8bd1bb9

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

Diff for: tasks/clear-fakes.py

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python
2+
from copy import copy
3+
import sys
4+
from olclient.openlibrary import OpenLibrary
5+
6+
"""
7+
Removes 'fake' ex-system subjects from Open Library works or editions.
8+
Takes as CLI argument a filename containing a list of Open Library keys:
9+
e.g.
10+
/works/OL1001319W
11+
/books/OL24710466M
12+
"""
13+
14+
ol = OpenLibrary()
15+
16+
inlist = sys.argv[1]
17+
18+
fakes = ['overdrive', 'in library', 'accessible book', 'protected daisy', 'lending library', 'internet archive wishlist']
19+
# only remove these from works:
20+
wfakes = ['large type books', 'popular print disabled books']
21+
22+
23+
otherbad = ['fictiion']
24+
25+
fakes += otherbad
26+
changes_made = 0
27+
with open(inlist, 'r') as f:
28+
for item in f:
29+
olid = item.strip().replace('/books/', '').replace('/works/', '')
30+
book = ol.get(olid)
31+
if not book.type.get('key') in ('/type/edition', '/type/work'):
32+
print("Unexpected type for %s -- Skipping!" % olid)
33+
else:
34+
orig_subjects = []
35+
if hasattr(book, 'subjects'):
36+
orig_subjects = copy(book.subjects)
37+
else:
38+
continue
39+
#print(olid)
40+
#print(u"%s: %s -- %s" % (olid, book.title, orig_subjects))
41+
targets = copy(fakes)
42+
if book.type['key'] == '/type/work':
43+
targets += wfakes
44+
removals = []
45+
for s in book.subjects:
46+
if s.lower() in targets:
47+
#print("%s -- Fake subject %s found!" % (olid, s))
48+
removals.append(s)
49+
if s.lower() != s: # remove duplicate lowercased subjects
50+
if s.lower() in book.subjects:
51+
#print(" Removing dupe lower(): %s" % s.lower())
52+
removals.append(s.lower())
53+
for r in removals:
54+
try:
55+
book.subjects.remove(r)
56+
except ValueError:
57+
print(' unable to remove %s from %s -- probably already removed?' % (r, olid))
58+
if book.subjects != orig_subjects:
59+
#print("SUBJECTS CHANGED -- TO SAVE!")
60+
#print("New subjects: %s" % book.subjects)
61+
r = book.save('remove fake subjects')
62+
if r.status_code != 200: # Only log unsuccessful saves
63+
print('%s: %s' % (olid, r))
64+
else:
65+
changes_made += 1
66+
print('%s subject changes saved.' % changes_made)

0 commit comments

Comments
 (0)