-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstd_retriever.py
314 lines (264 loc) · 12.9 KB
/
std_retriever.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
"""
File: std_retriever.py
Description: This script connects to the 3GPP FTP server to download specified standard
files and stores them in the designated 'standards' folder.
Contributors:
Amirpooya Asadollahnejad
David Schön
Marcelo Santibáñez
Adrian Hassa
Created: 2024-10-12
Last Modified: 2024-12-10
Project: 3GPP Requirement Tools
URL: https://github.com/Adrian2901/3gpp-requirements-tools
License: MIT License (see LICENSE file for details)
"""
from ftplib import FTP, error_perm
import json
import re
import os
import zipfile
import pandas as pd
import shutil
import sys
# Change the working directory to the directory of the executable for PyInstaller
os.chdir(sys._MEIPASS) if getattr(sys, 'frozen', False) else os.chdir(os.path.dirname(os.path.abspath(__file__)))
##############################################
# Configuration variables
host = "www.3gpp.org"
ftp_directory_path = "Specs/archive"
download_folder_path = "zipped_standards"
excel_spec_file = "Specification_list.xlsx"
standard_specs_folder_path = "standard_specs_folder"
##############################################
class FTPClient:
def __init__(self, host, user='', passwd=''):
self.host = host
self.ftp = FTP(host) # Establish connection upon initialization
try:
self.ftp.login(user=user, passwd=passwd)
print(f"Logged in to {self.host}")
except Exception as e:
print(f"Error connecting to {self.host}: {e}")
raise
def change_directory(self, path):
"""Change the working directory on the FTP server."""
try:
self.ftp.cwd(path)
print(f"Changed to directory: {self.ftp.pwd()}")
except error_perm as e:
print(f"Permission error changing directory: {e}")
raise
except Exception as e:
print(f"Error changing directory: {e}")
raise
def list_directory(self):
dir_content = []
try:
self.ftp.retrlines('LIST', lambda line: dir_content.append(line))
except Exception as e:
print(f"Error listing directory: {e}")
raise
return dir_content
def download(self, filename, local_path):
# Ensure that `local_path` is a directory, and construct the full path to the file
full_path = os.path.join(local_path, filename)
with open (full_path, 'wb') as file:
try:
self.ftp.retrbinary(f'RETR {filename}', file.write)
print(f"Downloaded {filename} to {local_path}")
except Exception as e:
print(f"An Error occured while downloading {filename} to {local_path}")
#print(e)
def close_connection(self):
if self.ftp:
self.ftp.close()
print(f"Disconnected from {self.host}")
# std_list is path to the JSON file that contains the name of the files alongside their version
# local_path is path to the destination folder you want to download standards to
def get_standards(ftp_client: FTPClient, std_list: str, local_path: str, update):
os.makedirs(local_path, exist_ok=True) # creating a directory if it does not exist.
#clear_folder(local_path)
#open the json file
with open(std_list, 'r') as series_list:
series_data = json.load(series_list) # load the data from json file
series_found = False
# check if a directory with given number exists
for entry in ftp_client.list_directory():
if(series_data["series_no"] + "_series" in entry):
ftp_client.change_directory(series_data["series_no"] + "_series")
series_found = True
break
# Terminate the process if given series was not found
if(not series_found):
print(f'{series_data["series_no"]} was not found.')
update(f'{series_data["series_no"]} was not found.')
return
# search for the given files in the series folder
for entry in ftp_client.list_directory():
for index in series_data['indexes']:
if(series_data['series_no'] + "."+ index['spec_no'] in entry):
try:
ftp_client.change_directory(series_data['series_no'] + "." + index['spec_no']) # change directory to the current standard folder
except Exception as e:
print(f"Error changing directory to {series_data['series_no'] + '.' + index['spec_no']}: {e}")
continue
# check for the version that needs to be downloaded
if(index['version'] == 'latest'):
all_versions = ftp_client.list_directory()
if(len(all_versions) == 0):
ftp_client.change_directory('..')
continue
last_entry: str = all_versions[-1] # since in the 3Gpp files are organized by date latest is always the last entry of the directory
filename_ = last_entry.split()[-1] # Extract the name of the file from the entry
ftp_client.download(filename_, local_path)
else:
filename_ = series_data['series_no'] + index['spec_no'] + '-' + index['version'] + ".zip"
ftp_client.download(filename_, local_path)
update("Downloading " + filename_)
ftp_client.change_directory('..') # going back one directory up after being finished with the current file
# getting back to the original path after downloading all the standards
if(ftp_client.ftp.pwd() != ftp_directory_path):
ftp_client.change_directory('..')
def unzip_all_in_folder(folder_path, extract_to): # This function is created by Chat-GPT
os.makedirs(extract_to, exist_ok=True)
clear_folder(extract_to)
# List all files in the folder
for file_name in os.listdir(folder_path):
# Construct full file path
file_path = os.path.join(folder_path, file_name)
# Check if it's a valid zip file
if zipfile.is_zipfile(file_path):
print(f'Unzipping {file_name}...')
with zipfile.ZipFile(file_path, 'r') as zip_ref:
# Extract the files to the destination folder
zip_ref.extractall(extract_to)
# removing the zipped file after it is unzipped to the 'extract_to' path
os.remove(file_path)
else:
print(f'Skipping {file_name}, not a zip file.')
def search_title(folder_path, xlsx_file, config): # folder_path: where json files will be stored, xlsx_file: excel file thatt holds the spec_no s and titles.
os.makedirs(folder_path, exist_ok=True)
clear_folder(folder_path)
phrase = config["phrase"]
series_no = config["series_no"]
lookedup_publicitation = config["publication"]
lookedup_technology = config["technology"]
lookedup_status = config["status"]
lookedup_specification_number = config["specification_number"]
lookedup_type = config["type"]
# If a phrase is provided, create a regular expression based on it; otherwise, match all titles
pattern = re.compile(phrase, re.IGNORECASE) if phrase else None
df = pd.read_excel(xlsx_file) # read xlsx file
previous_series = ""
data = {} # for creating json file for each series
# Mapping of keywords to columns from the Excel file
keyword_column_map = {
"Publication": lookedup_publicitation,
"Status": lookedup_status,
"Technology": lookedup_technology,
"Spec No": lookedup_specification_number,
"Type": lookedup_type
}
# Function to check if a row matches the keyword criteria
def row_matches(row):
for column, keyword in keyword_column_map.items():
if isinstance(keyword, list): # check if the keyword is a list
if not any(item in str(row[column]) for item in keyword): # check if any of the items in the list is in the column
return False
elif keyword not in str(row[column]): # if not in the list, check if the keyword is in the column
return False
return True # if all the criteria are met return True
# Apply the mapping function to the dataframe
result = df[df.apply(row_matches, axis=1)]
# going through each row of excel file
for index, row in result.iterrows():
current_spec:str = row["Spec No"]
title:str = row["Title"]
current_series = current_spec.split('.')[0]
if (series_no and current_series != series_no): # If a series number is specified and it does not match the current series number we don't save the specifications
continue
if(current_series != previous_series): # Then save data of previous series to a json file and reset the data
if(previous_series): # since when we start at first there is no previous series
json_filename = os.path.join(folder_path, f"{previous_series}_series.json")
with open(json_filename, 'w') as json_file:
json.dump(data, json_file, indent=4)
# Reset data
data = {
"series_no": current_series,
"indexes": []
}
if pattern:
if pattern.search(title): # search for the phrase in title
data['indexes'].append({
"spec_no": current_spec.split('.')[1],
"version": "latest"
})
else: # If no phrase, match all titles
data['indexes'].append({
"spec_no": current_spec.split('.')[1],
"version": "latest"
})
previous_series = current_series # update the previous series
# Save the last series data if needed
if data['indexes']:
json_filename = os.path.join(folder_path, f"{previous_series}_series.json")
with open(json_filename, 'w') as json_file:
json.dump(data, json_file, indent=4)
file_names = []
# Iterate over files in the folder
for file_name in os.listdir(folder_path):
# Check if it's a file (to exclude directories)
if os.path.isfile(os.path.join(folder_path, file_name)):
file_names.append(file_name)
# return the name of created json files that specific phrase was found in their title
return file_names
def clear_folder(folder_path): # This function is created by Chat-GPT
# Check if the folder exists
if os.path.exists(folder_path):
# Check if the folder is empty
if not os.listdir(folder_path): # `os.listdir` returns an empty list if the folder is empty
print("Folder is empty.")
else:
# Folder is not empty; delete its contents
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path) # Remove the file
elif os.path.isdir(file_path):
shutil.rmtree(file_path) # Remove the directory
print(f"Deleted: {file_path}")
except Exception as e:
print(f"Failed to delete {file_path}. Reason: {e}")
print("Folder contents cleared.")
else:
print("Folder does not exist.")
#################### script #########################
def download(config, button_handler, update):
button_handler.config(state="disabled")
unzipped_folder_path = config["download_folder_path"]
phrase = config["phrase"]
series_no = config["series_no"]
try:
# create the connection to FTP server and change to the wanted directory
ftp_client = FTPClient(host)
ftp_client.change_directory(ftp_directory_path)
#* search by title if necessary
standards = search_title(standard_specs_folder_path, excel_spec_file, config)
for standard in standards:
get_standards(ftp_client, os.path.join(standard_specs_folder_path, standard) , download_folder_path, update)
# unzip the downloaded standards
update("Unzipping downloaded standards...")
unzip_all_in_folder(download_folder_path, unzipped_folder_path)
# removing the standards_specs folder
update("Cleaning up...")
shutil.rmtree(standard_specs_folder_path)
update("Done! Check the output folder for the results.")
except Exception as e:
update(f"An error occured: {e}. Please try again.")
finally:
ftp_client.close_connection()
button_handler.config(state="normal")
if __name__ == "__main__":
print("This is a script that retrieves 3GPP standards documents with filters.\nUsage: python gui_std_retriever.py")