1111import  uuid 
1212import  zipfile 
1313from  concurrent .futures  import  ThreadPoolExecutor 
14- from  email  import  message_from_string 
15- from  typing  import  List , Union 
14+ from  typing  import  List , Union , Dict 
1615from  urllib .parse  import  urlparse 
1716
1817import  backoff 
@@ -34,6 +33,18 @@ def calculate_chunk_size(file_size: int) -> int:
3433
3534    return  1024  *  1024  *  10   # 10 MB 
3635
36+ def  extract_disposition_params (content_disposition : str )->  Dict [str , str ]:
37+     parts  =  (p .strip () for  p  in  content_disposition .split (";" ))
38+ 
39+     params  =  {
40+         key .strip ().lower (): value .strip ().strip ('"' )
41+         for  part  in  parts 
42+         if  "="  in  part 
43+         for  key , value  in  [part .split ("=" , 1 )]
44+     }
45+ 
46+     return  params 
47+ 
3748
3849def  download_files_from_urls (job_id : str , urls : Union [str , List [str ]]) ->  List [str ]:
3950    """ 
@@ -55,8 +66,7 @@ def download_file(url: str, path_to_save: str) -> str:
5566            content_disposition  =  response .headers .get ("Content-Disposition" )
5667            file_extension  =  "" 
5768            if  content_disposition :
58-                 msg  =  message_from_string (f"Content-Disposition: { content_disposition }  )
59-                 params  =  dict (msg .items ())
69+                 params  =  extract_disposition_params (content_disposition )
6070                file_extension  =  os .path .splitext (params .get ("filename" , "" ))[1 ]
6171
6272            # If no extension could be determined from 'Content-Disposition', get it from the URL 
@@ -113,15 +123,15 @@ def file(file_url: str) -> dict:
113123
114124    download_response  =  SyncClientSession ().get (file_url , headers = HEADERS , timeout = 30 )
115125
116-     original_file_name  =  []
117-     if  "Content-Disposition"  in  download_response .headers .keys ():
118-         original_file_name  =  re .findall (
119-             "filename=(.+)" , download_response .headers ["Content-Disposition" ]
120-         )
126+     content_disposition  =  download_response .headers .get ("Content-Disposition" )
121127
122-     if  len (original_file_name ) >  0 :
123-         original_file_name  =  original_file_name [0 ]
124-     else :
128+     original_file_name  =  "" 
129+     if  content_disposition :
130+         params  =  extract_disposition_params (content_disposition )
131+ 
132+         original_file_name  =  params .get ("filename" , "" )
133+     
134+     if  not  original_file_name :
125135        download_path  =  urlparse (file_url ).path 
126136        original_file_name  =  os .path .basename (download_path )
127137
0 commit comments