@@ -29,6 +29,12 @@ def removeHTMLwatermarks(object, path_to_ebook):
29
29
modded_names = []
30
30
modded_contents = []
31
31
32
+ count_adept = 0
33
+
34
+ count_lemonink_invisible = 0
35
+ count_lemonink_visible = 0
36
+ lemonink_trackingID = None
37
+
32
38
for file in namelist :
33
39
if not (file .endswith ('.html' ) or file .endswith ('.xhtml' ) or file .endswith ('.xml' )):
34
40
continue
@@ -40,8 +46,33 @@ def removeHTMLwatermarks(object, path_to_ebook):
40
46
# Remove Adobe ADEPT watermarks
41
47
# Match optional newline at the beginning, then a "meta" tag with name = "Adept.expected.resource" or "Adept.resource"
42
48
# and either a "value" or a "content" element with an Adobe UUID
49
+ pre_remove = str_new
43
50
str_new = re .sub (r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s*\/>' , '' , str_new )
44
51
str_new = re .sub (r'((\r\n|\r|\n)\s*)?\<meta\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s*\/>' , '' , str_new )
52
+
53
+ if (str_new != pre_remove ):
54
+ count_adept += 1
55
+
56
+ # Remove eLibri / LemonInk watermark
57
+ # Run this in a loop, as it is possible a file has been watermarked twice ...
58
+ while True :
59
+ pre_remove = str_new
60
+ unique_id = re .search (r'<body[^>]+class="[^"]*(t0x[0-9a-fA-F]{25})[^"]*"[^>]*>' , str_new )
61
+ if (unique_id ):
62
+ lemonink_trackingID = unique_id .groups ()[0 ]
63
+ count_lemonink_invisible += 1
64
+ str_new = re .sub (lemonink_trackingID , '' , str_new )
65
+ pre_remove = str_new
66
+ pm = r'(<body[^>]+class="[^"]*"[^>]*>)'
67
+ pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>'
68
+ pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>'
69
+ str_new = re .sub (pm , r'\1' , str_new )
70
+
71
+ if (str_new != pre_remove ):
72
+ count_lemonink_visible += 1
73
+ else :
74
+ break
75
+
45
76
except :
46
77
traceback .print_exc ()
47
78
continue
@@ -51,14 +82,15 @@ def removeHTMLwatermarks(object, path_to_ebook):
51
82
52
83
modded_names .append (file )
53
84
modded_contents .append (str_new )
85
+
54
86
55
87
if len (modded_names ) == 0 :
56
88
# No file modified, return original
57
89
return path_to_ebook
58
90
59
91
if len (modded_names ) != len (modded_contents ):
60
92
# Something went terribly wrong, return original
61
- print ("Watermark: Error during ADEPT watermark removal" )
93
+ print ("Watermark: Error during watermark removal" )
62
94
return path_to_ebook
63
95
64
96
# Re-package with modified files:
@@ -105,12 +137,20 @@ def removeHTMLwatermarks(object, path_to_ebook):
105
137
traceback .print_exc ()
106
138
return path_to_ebook
107
139
140
+ if (count_adept > 0 ):
141
+ print ("Watermark: Successfully stripped {0} ADEPT watermark(s) from ebook." .format (count_adept ))
142
+
143
+ if (count_lemonink_invisible > 0 or count_lemonink_visible > 0 ):
144
+ print ("Watermark: Successfully stripped {0} visible and {1} invisible LemonInk watermark(s) (\" {2}\" ) from ebook."
145
+ .format (count_lemonink_visible , count_lemonink_invisible , lemonink_trackingID ))
146
+
147
+ return output
148
+
108
149
except :
109
150
traceback .print_exc ()
110
151
return path_to_ebook
111
152
112
- print ("Watermark: Successfully stripped {0} ADEPT watermark(s) from ebook." .format (len (modded_names )))
113
- return output
153
+
114
154
115
155
116
156
# Finds the main OPF file, then uses RegEx to remove watermarks
@@ -141,10 +181,27 @@ def removeOPFwatermarks(object, path_to_ebook):
141
181
container_str = inf .read (opf_path ).decode ("utf-8" )
142
182
container_str_new = container_str
143
183
184
+ had_amazon = False
185
+ had_elibri = False
186
+
144
187
# Remove Amazon hex watermarks
145
188
# Match optional newline at the beginning, then spaces, then a "meta" tag with name = "Watermark" or "Watermark_(hex)" and a "content" element.
146
- container_str_new = re .sub (r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"Watermark(_\(hex\))?\"\s+content=\"[0-9a-fA-F]+\"\s*\/>' , '' , container_str_new )
147
- container_str_new = re .sub (r'((\r\n|\r|\n)\s*)?\<meta\s+content=\"[0-9a-fA-F]+\"\s+name=\"Watermark(_\(hex\))?\"\s*\/>' , '' , container_str_new )
189
+ # This regex also matches DuMont watermarks with meta name="watermark", with the case-insensitive match on the "w" in watermark.
190
+ pre_remove = container_str_new
191
+ container_str_new = re .sub (r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"[Ww]atermark(_\(hex\))?\"\s+content=\"[0-9a-fA-F]+\"\s*\/>' , '' , container_str_new )
192
+ container_str_new = re .sub (r'((\r\n|\r|\n)\s*)?\<meta\s+content=\"[0-9a-fA-F]+\"\s+name=\"[Ww]atermark(_\(hex\))?\"\s*\/>' , '' , container_str_new )
193
+ if pre_remove != container_str_new :
194
+ had_amazon = True
195
+
196
+ # Remove elibri / lemonink watermark
197
+ # Lemonink replaces all "id" fields in the opf with "idX_Y", with X being the watermark and Y being a number for that particular ID.
198
+ # This regex replaces all "idX_Y" IDs with "id_Y", removing the watermark IDs.
199
+ pre_remove = container_str_new
200
+ container_str_new = re .sub (r'((\r\n|\r|\n)\s*)?\<\!\-\-\s*Wygenerowane przez elibri dla zamówienia numer [0-9a-fA-F]+\s*\-\-\>' , '' , container_str_new )
201
+ container_str_new = re .sub (r'\=\"id[0-9]+_([0-9]+)\"' , r'="id_\1"' , container_str_new )
202
+ if pre_remove != container_str_new :
203
+ had_elibri = True
204
+
148
205
except :
149
206
traceback .print_exc ()
150
207
return path_to_ebook
@@ -191,7 +248,11 @@ def removeOPFwatermarks(object, path_to_ebook):
191
248
traceback .print_exc ()
192
249
return path_to_ebook
193
250
194
- print ("Watermark: Successfully stripped Amazon watermark from OPF file." )
251
+ if had_elibri :
252
+ print ("Watermark: Successfully stripped eLibri watermark from OPF file." )
253
+ if had_amazon :
254
+ print ("Watermark: Successfully stripped Amazon watermark from OPF file." )
255
+
195
256
return output
196
257
197
258
0 commit comments