Skip to content

Commit 2edde54

Browse files
committed
Fixes a bug that sometimes caused the plugin to fail
1 parent a44b50d commit 2edde54

File tree

3 files changed

+79
-11
lines changed

3 files changed

+79
-11
lines changed

.github/workflows/main.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
- name: Checkout
1111
uses: actions/checkout@v2
1212
- name: Package
13-
run: python3 make_release.py 10.0.0
13+
run: python3 make_release.py 10.0.1
1414
- name: Upload
1515
uses: actions/upload-artifact@v2
1616
with:

DeDRM_plugin/__init__.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# Copyright © 2021 NoDRM
77

88
__license__ = 'GPL v3'
9-
__version__ = '10.0.0'
9+
__version__ = '10.0.1'
1010
__docformat__ = 'restructuredtext en'
1111

1212

@@ -79,6 +79,7 @@
7979
# 7.2.0 - Update for latest KFX changes, and Python 3 Obok fixes.
8080
# 7.2.1 - Whitespace!
8181
# 10.0.0 - First forked version by NoDRM. See CHANGELOG.md for details.
82+
# 10.0.1 - Fixes a bug in the watermark code.
8283

8384
"""
8485
Decrypt DRMed ebooks.
@@ -210,6 +211,8 @@ def postProcessEPUB(self, path_to_ebook):
210211
# It does stuff like de-obfuscating fonts (by calling checkFonts)
211212
# or removing watermarks.
212213

214+
postProcessStart = time.time()
215+
213216
try:
214217
import calibre_plugins.dedrm.prefs as prefs
215218
dedrmprefs = prefs.DeDRM_Prefs()
@@ -224,13 +227,17 @@ def postProcessEPUB(self, path_to_ebook):
224227
# Remove Tolino's CDP watermark file
225228
path_to_ebook = watermark.removeCDPwatermark(self, path_to_ebook) or path_to_ebook
226229

227-
# Remove watermarks (currently just Amazon) from the OPF file
230+
# Remove watermarks (Amazon or LemonInk) from the OPF file
228231
path_to_ebook = watermark.removeOPFwatermarks(self, path_to_ebook) or path_to_ebook
229232

230-
# Remove watermarks (currently just Adobe's resource ID) from all HTML and XHTML files
233+
# Remove watermarks (Adobe or LemonInk) from all HTML and XHTML files
231234
path_to_ebook = watermark.removeHTMLwatermarks(self, path_to_ebook) or path_to_ebook
232235

233-
return path_to_ebook
236+
postProcessEnd = time.time()
237+
print("{0} v{1}: Post-processing took {2:.1f} seconds".format(PLUGIN_NAME, PLUGIN_VERSION, postProcessEnd-postProcessStart))
238+
239+
240+
return path_to_ebook
234241

235242
except:
236243
print("Error while checking settings")

DeDRM_plugin/epubwatermark.py

+67-6
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ def removeHTMLwatermarks(object, path_to_ebook):
2929
modded_names = []
3030
modded_contents = []
3131

32+
count_adept = 0
33+
34+
count_lemonink_invisible = 0
35+
count_lemonink_visible = 0
36+
lemonink_trackingID = None
37+
3238
for file in namelist:
3339
if not (file.endswith('.html') or file.endswith('.xhtml') or file.endswith('.xml')):
3440
continue
@@ -40,8 +46,33 @@ def removeHTMLwatermarks(object, path_to_ebook):
4046
# Remove Adobe ADEPT watermarks
4147
# Match optional newline at the beginning, then a "meta" tag with name = "Adept.expected.resource" or "Adept.resource"
4248
# and either a "value" or a "content" element with an Adobe UUID
49+
pre_remove = str_new
4350
str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s*\/>', '', str_new)
4451
str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s*\/>', '', str_new)
52+
53+
if (str_new != pre_remove):
54+
count_adept += 1
55+
56+
# Remove eLibri / LemonInk watermark
57+
# Run this in a loop, as it is possible a file has been watermarked twice ...
58+
while True:
59+
pre_remove = str_new
60+
unique_id = re.search(r'<body[^>]+class="[^"]*(t0x[0-9a-fA-F]{25})[^"]*"[^>]*>', str_new)
61+
if (unique_id):
62+
lemonink_trackingID = unique_id.groups()[0]
63+
count_lemonink_invisible += 1
64+
str_new = re.sub(lemonink_trackingID, '', str_new)
65+
pre_remove = str_new
66+
pm = r'(<body[^>]+class="[^"]*"[^>]*>)'
67+
pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>'
68+
pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>'
69+
str_new = re.sub(pm, r'\1', str_new)
70+
71+
if (str_new != pre_remove):
72+
count_lemonink_visible += 1
73+
else:
74+
break
75+
4576
except:
4677
traceback.print_exc()
4778
continue
@@ -51,14 +82,15 @@ def removeHTMLwatermarks(object, path_to_ebook):
5182

5283
modded_names.append(file)
5384
modded_contents.append(str_new)
85+
5486

5587
if len(modded_names) == 0:
5688
# No file modified, return original
5789
return path_to_ebook
5890

5991
if len(modded_names) != len(modded_contents):
6092
# Something went terribly wrong, return original
61-
print("Watermark: Error during ADEPT watermark removal")
93+
print("Watermark: Error during watermark removal")
6294
return path_to_ebook
6395

6496
# Re-package with modified files:
@@ -105,12 +137,20 @@ def removeHTMLwatermarks(object, path_to_ebook):
105137
traceback.print_exc()
106138
return path_to_ebook
107139

140+
if (count_adept > 0):
141+
print("Watermark: Successfully stripped {0} ADEPT watermark(s) from ebook.".format(count_adept))
142+
143+
if (count_lemonink_invisible > 0 or count_lemonink_visible > 0):
144+
print("Watermark: Successfully stripped {0} visible and {1} invisible LemonInk watermark(s) (\"{2}\") from ebook."
145+
.format(count_lemonink_visible, count_lemonink_invisible, lemonink_trackingID))
146+
147+
return output
148+
108149
except:
109150
traceback.print_exc()
110151
return path_to_ebook
111152

112-
print("Watermark: Successfully stripped {0} ADEPT watermark(s) from ebook.".format(len(modded_names)))
113-
return output
153+
114154

115155

116156
# Finds the main OPF file, then uses RegEx to remove watermarks
@@ -141,10 +181,27 @@ def removeOPFwatermarks(object, path_to_ebook):
141181
container_str = inf.read(opf_path).decode("utf-8")
142182
container_str_new = container_str
143183

184+
had_amazon = False
185+
had_elibri = False
186+
144187
# Remove Amazon hex watermarks
145188
# Match optional newline at the beginning, then spaces, then a "meta" tag with name = "Watermark" or "Watermark_(hex)" and a "content" element.
146-
container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"Watermark(_\(hex\))?\"\s+content=\"[0-9a-fA-F]+\"\s*\/>', '', container_str_new)
147-
container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+content=\"[0-9a-fA-F]+\"\s+name=\"Watermark(_\(hex\))?\"\s*\/>', '', container_str_new)
189+
# This regex also matches DuMont watermarks with meta name="watermark", with the case-insensitive match on the "w" in watermark.
190+
pre_remove = container_str_new
191+
container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"[Ww]atermark(_\(hex\))?\"\s+content=\"[0-9a-fA-F]+\"\s*\/>', '', container_str_new)
192+
container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<meta\s+content=\"[0-9a-fA-F]+\"\s+name=\"[Ww]atermark(_\(hex\))?\"\s*\/>', '', container_str_new)
193+
if pre_remove != container_str_new:
194+
had_amazon = True
195+
196+
# Remove elibri / lemonink watermark
197+
# Lemonink replaces all "id" fields in the opf with "idX_Y", with X being the watermark and Y being a number for that particular ID.
198+
# This regex replaces all "idX_Y" IDs with "id_Y", removing the watermark IDs.
199+
pre_remove = container_str_new
200+
container_str_new = re.sub(r'((\r\n|\r|\n)\s*)?\<\!\-\-\s*Wygenerowane przez elibri dla zamówienia numer [0-9a-fA-F]+\s*\-\-\>', '', container_str_new)
201+
container_str_new = re.sub(r'\=\"id[0-9]+_([0-9]+)\"', r'="id_\1"', container_str_new)
202+
if pre_remove != container_str_new:
203+
had_elibri = True
204+
148205
except:
149206
traceback.print_exc()
150207
return path_to_ebook
@@ -191,7 +248,11 @@ def removeOPFwatermarks(object, path_to_ebook):
191248
traceback.print_exc()
192249
return path_to_ebook
193250

194-
print("Watermark: Successfully stripped Amazon watermark from OPF file.")
251+
if had_elibri:
252+
print("Watermark: Successfully stripped eLibri watermark from OPF file.")
253+
if had_amazon:
254+
print("Watermark: Successfully stripped Amazon watermark from OPF file.")
255+
195256
return output
196257

197258

0 commit comments

Comments
 (0)