-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPaperImageExtract.py
executable file
·145 lines (98 loc) · 4.52 KB
/
PaperImageExtract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python3
import argparse
import os
import sys
import re
import urllib.request
def convert_textfile_to_list(file_path):
"""Read text file and return each line in a list.
Args:
file_path (str): Full path to the text file.
Returns:
list: List where each index is a line from the text file.
"""
markdown_file = open(file_path, "r")
markdown_file_lines = markdown_file.readlines()
return markdown_file_lines
def convert_list_to_textfile(line_list, filename, output_folder):
"""Convert list to text file. Each index will be a line in the file.
Args:
line_list (list): List with each index being a line of the text file.
filename (str): Filename of the file to create.
output_folder (str): Folder to put the created file into.
"""
# Open file for writing
full_path = output_folder + "/" + filename
created_textfile = open(full_path, "w")
# Write all lines
created_textfile.writelines(line_list)
# Close file
created_textfile.close()
def print_error_and_exit(error_message):
"""Print error message and exit program.
Args:
error_message (str): Error message to print.
"""
print("Error: " + error_message)
sys.exit()
def download_image(image_url, destination_folder):
"""Download an image and place it in a specfic folder.
Args:
image_url (str): URL of image.
destination_folder (str): Folder to place image into.
"""
image_filename = image_url.split("/")[-1]
urllib.request.urlretrieve(image_url, destination_folder + "/" + image_filename)
def convert_markdown_images(file_path, output_folder):
"""Will convert Markdown to have images locally rather than on Dropbox.
Args:
file_path (str): Full path to the text file.
output_folder (str): Folder to put altered Markdown file and images.
"""
# Read file to list
markdown_lines_list = convert_textfile_to_list(file_path)
# Find the indexes with a match
image_tag_regex = r"^!\[.*\]\(https:\/\/paper-attachments.dropbox.com\/s_[A-Z,0-9]{64}_[0-9]{13}_image.png\)$"
matched_index_list = []
for index, line in enumerate(markdown_lines_list):
if re.match(image_tag_regex, line) != None:
matched_index_list.append(index)
# If we have no matches, inform user there is nothing to be done
if len(matched_index_list) == 0:
print_error_and_exit("No images where found in file, there is nothing to be done")
# Alter Markdown File
image_folder = output_folder + "/img"
os.mkdir(image_folder)
image_url_regex = r"https:\/\/paper-attachments.dropbox.com\/s_[A-Z,0-9]{64}_[0-9]{13}_image.png"
for index in matched_index_list:
# Extract URL of image and download
image_url_match = re.search(image_url_regex, markdown_lines_list[index]).group(0)
download_image(image_url_match, image_folder)
# Replace Dropbox URL with local path
image_filename = image_url_match.split("/")[-1]
markdown_lines_list[index] = markdown_lines_list[index].replace(image_url_match, "./img/" + image_filename)
# Read altered file list to disk
convert_list_to_textfile(markdown_lines_list, os.path.basename(file_path), output_folder)
def main():
# Parse the arguments passed
parser = argparse.ArgumentParser(description='Store Dropbox Paper Images Locally')
parser.add_argument('-f', '--file', help="Markdown file path.", required=True)
parser.add_argument('-o', '--output', help="Folder to store altered Markdown file and images.", required=True)
args = parser.parse_args()
# Check that Markdown file exists
if os.path.exists(args.file) == False:
print_error_and_exit("Specfied markdown file does not exist.")
elif os.path.isfile(args.file) == False:
print_error_and_exit("Specfied markdown file is not a file.")
# Check that output is a valid empty folder
if os.path.exists(args.output) == False:
print_error_and_exit("Specfied output folder does not exist.")
elif os.path.isdir(args.output) == False:
print_error_and_exit("Specfied output folder is not a folder.")
elif len(os.listdir(args.output)) != 0:
# Maybe in future allow it to be non-empty, but ensure an img folder along with Markdown file is not present
print_error_and_exit("Specfied output folder is not empty")
# Perform core logic to convert Markdown file
convert_markdown_images(args.file, args.output)
if __name__ == "__main__":
main()