Skip to content

Commit d43fb61

Browse files
committed
fix img re pattern
1 parent 50547b0 commit d43fb61

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

geektime_dl/geektime_ebook/maker.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,14 @@ def generate_cover_img(url, output_dir):
4141

4242
def parse_image(content, output_dir):
4343

44-
p = r'img src="(.*?)"'
45-
img_url_list = re.findall(p, content, re.S)
44+
# remove the xxx `style=xxx`
45+
p = r'img (.{1,15}=".*?") src=".*?"'
46+
fucking_styles = re.findall(p, content)
47+
for style in fucking_styles:
48+
content = content.replace(style, '')
49+
50+
p = r'img\s+src="(.*?)"'
51+
img_url_list = re.findall(p, content)
4652
for url in img_url_list:
4753
try:
4854
url_local = str(uuid.uuid4()) + '.jpg'

0 commit comments

Comments
 (0)