@@ -44,6 +44,8 @@ def get_indexed_files(response):
44
44
if (url .path and
45
45
url .path != '.' and
46
46
url .path != '..' and
47
+ url .path != './' and
48
+ url .path != '../' and
47
49
not url .path .startswith ('/' ) and
48
50
not url .scheme and
49
51
not url .netloc ):
@@ -171,15 +173,15 @@ def process_tasks(initial_tasks, worker, jobs, args=(), tasks_done=None):
171
173
class DownloadWorker (Worker ):
172
174
''' Download a list of files '''
173
175
174
- def init (self , url , directory , retry , timeout , module = None ):
176
+ def init (self , url , directory , retry , timeout , follow_redirects = False , module = None ):
175
177
self .session = requests .Session ()
176
178
self .session .verify = False
177
179
self .session .mount (url , requests .adapters .HTTPAdapter (max_retries = retry ))
178
180
self .module = module
179
181
180
- def do_task (self , filepath , url , directory , retry , timeout , module = None ):
182
+ def do_task (self , filepath , url , directory , retry , timeout , follow_redirects = False , module = None ):
181
183
with closing (self .session .get ('%s/%s' % (url , filepath ),
182
- allow_redirects = False ,
184
+ allow_redirects = follow_redirects ,
183
185
stream = True ,
184
186
timeout = timeout ,
185
187
headers = {"User-Agent" : USER_AGENT })) as response :
@@ -202,9 +204,9 @@ def do_task(self, filepath, url, directory, retry, timeout, module=None):
202
204
class RecursiveDownloadWorker (DownloadWorker ):
203
205
''' Download a directory recursively '''
204
206
205
- def do_task (self , filepath , url , directory , retry , timeout ):
207
+ def do_task (self , filepath , url , directory , retry , timeout , follow_redirects = False ):
206
208
with closing (self .session .get ('%s/%s' % (url , filepath ),
207
- allow_redirects = False ,
209
+ allow_redirects = follow_redirects ,
208
210
stream = True ,
209
211
timeout = timeout ,
210
212
headers = {"User-Agent" : USER_AGENT })) as response :
@@ -237,9 +239,9 @@ def do_task(self, filepath, url, directory, retry, timeout):
237
239
class FindRefsWorker (DownloadWorker ):
238
240
''' Find refs/ '''
239
241
240
- def do_task (self , filepath , url , directory , retry , timeout , module ):
242
+ def do_task (self , filepath , url , directory , retry , timeout , follow_redirects = False , module = None ):
241
243
response = self .session .get ('%s/%s' % (url , filepath ),
242
- allow_redirects = False ,
244
+ allow_redirects = follow_redirects ,
243
245
timeout = timeout ,
244
246
headers = {"User-Agent" : USER_AGENT })
245
247
printf ('[-] Fetching %s/%s [%d]\n ' , url , filepath , response .status_code )
@@ -271,11 +273,11 @@ def do_task(self, filepath, url, directory, retry, timeout, module):
271
273
class FindObjectsWorker (DownloadWorker ):
272
274
''' Find objects '''
273
275
274
- def do_task (self , obj , url , directory , retry , timeout , module ):
276
+ def do_task (self , obj , url , directory , retry , timeout , follow_redirects , module ):
275
277
# module = ".git" if not url.endswith("/modules") else ""
276
278
filepath = '%s/objects/%s/%s' % (self .module , obj [:2 ], obj [2 :])
277
279
response = self .session .get ('%s/%s' % (url , filepath ),
278
- allow_redirects = False ,
280
+ allow_redirects = follow_redirects ,
279
281
timeout = timeout ,
280
282
headers = {"User-Agent" : USER_AGENT })
281
283
printf ('[-] Fetching %s/%s [%d]\n ' , url , filepath , response .status_code )
@@ -295,7 +297,7 @@ def do_task(self, obj, url, directory, retry, timeout, module):
295
297
return get_referenced_sha1 (obj_file )
296
298
297
299
298
- def fetch_git (url , directory , jobs , retry , timeout , module = ".git" ):
300
+ def fetch_git (url , directory , jobs , retry , timeout , follow_redirects , module = ".git" ):
299
301
''' Dump a git repository into the output directory '''
300
302
301
303
assert os .path .isdir (directory ), '%s is not a directory' % directory
@@ -320,7 +322,7 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
320
322
321
323
# check for /.git/HEAD
322
324
printf ('[-] Testing %s/%s/HEAD ' , url , module )
323
- response = requests .get ('%s/%s/HEAD' % (url , module ), verify = False , allow_redirects = False , headers = {"User-Agent" : USER_AGENT })
325
+ response = requests .get ('%s/%s/HEAD' % (url , module ), verify = False , allow_redirects = follow_redirects , headers = {"User-Agent" : USER_AGENT })
324
326
printf ('[%d]\n ' , response .status_code )
325
327
326
328
if response .status_code != 200 :
@@ -332,15 +334,15 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
332
334
333
335
# check for directory listing
334
336
printf ('[-] Testing %s/%s/ ' , url , module )
335
- response = requests .get ('%s/%s/' % (url , module ), verify = False , allow_redirects = False , headers = {"User-Agent" : USER_AGENT })
337
+ response = requests .get ('%s/%s/' % (url , module ), verify = False , allow_redirects = follow_redirects , headers = {"User-Agent" : USER_AGENT })
336
338
printf ('[%d]\n ' , response .status_code )
337
339
338
340
if response .status_code == 200 and is_html (response ) and 'HEAD' in get_indexed_files (response ):
339
341
printf ('[-] Fetching .git recursively\n ' )
340
342
process_tasks (['.git/' , '.gitignore' ],
341
343
RecursiveDownloadWorker ,
342
344
jobs ,
343
- args = (url , directory , retry , timeout ))
345
+ args = (url , directory , retry , timeout , follow_redirects ))
344
346
345
347
printf ('[-] Running git checkout .\n ' )
346
348
os .chdir (directory )
@@ -378,7 +380,7 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
378
380
process_tasks (tasks ,
379
381
DownloadWorker ,
380
382
jobs ,
381
- args = (url , directory , retry , timeout , module ))
383
+ args = (url , directory , retry , timeout , follow_redirects , module ))
382
384
383
385
if module == ".git" :
384
386
modules_path = os .path .join (directory , '.gitmodules' )
@@ -392,7 +394,7 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
392
394
printf ("[-] Fetching module: %s\n " , module_name )
393
395
# os.makedirs(os.path.abspath(module_dir))
394
396
module_url = url + "/.git/modules"
395
- fetch_git (module_url , module_dir , jobs , retry , timeout , module = module_name )
397
+ fetch_git (module_url , module_dir , jobs , retry , timeout , follow_redirects , module = module_name )
396
398
printf ("[+] Done iterating module\n " )
397
399
398
400
# find refs
@@ -420,7 +422,7 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
420
422
process_tasks (tasks ,
421
423
FindRefsWorker ,
422
424
jobs ,
423
- args = (url , directory , retry , timeout , module ))
425
+ args = (url , directory , retry , timeout , follow_redirects , module ))
424
426
425
427
# find packs
426
428
printf ('[-] Finding packs\n ' )
@@ -439,7 +441,7 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
439
441
process_tasks (tasks ,
440
442
DownloadWorker ,
441
443
jobs ,
442
- args = (url , directory , retry , timeout ))
444
+ args = (url , directory , retry , timeout , follow_redirects ))
443
445
444
446
# find objects
445
447
printf ('[-] Finding objects\n ' )
@@ -477,8 +479,12 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
477
479
if os .path .exists (index_path ):
478
480
index = dulwich .index .Index (index_path )
479
481
480
- for entry in index .iterblobs ():
481
- objs .add (entry [1 ].decode ())
482
+ # index.iteritems()
483
+ for entry in index .iteritems ():
484
+ if isinstance (entry [1 ], dulwich .index .IndexEntry ):
485
+ objs .add (entry [1 ].sha .decode ())
486
+ elif hasattr (entry [1 ], "decode" ):
487
+ objs .add (entry [1 ].decode ())
482
488
483
489
# use packs to find more objects to fetch, and objects that are packed
484
490
pack_file_dir = os .path .join (directory , module , 'objects' , 'pack' )
@@ -500,7 +506,7 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
500
506
process_tasks (objs ,
501
507
FindObjectsWorker ,
502
508
jobs ,
503
- args = (url , directory , retry , timeout , module ),
509
+ args = (url , directory , retry , timeout , follow_redirects , module ),
504
510
tasks_done = packed_objs )
505
511
506
512
# git checkout
@@ -529,6 +535,9 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
529
535
help = 'number of request attempts before giving up' )
530
536
parser .add_argument ('-t' , '--timeout' , type = int , default = 3 ,
531
537
help = 'maximum time in seconds before giving up' )
538
+ parser .add_argument ('-L' , '--follow-redirects' , default = False ,
539
+ dest = 'follow_redirects' , action = "store_true" ,
540
+ help = 'follow redirects' )
532
541
args = parser .parse_args ()
533
542
534
543
# jobs
@@ -576,7 +585,7 @@ def fetch_git(url, directory, jobs, retry, timeout, module=".git"):
576
585
577
586
# fetch everything
578
587
path = os .path .realpath (args .directory )
579
- code = fetch_git (args .url , args .directory , args .jobs , args .retry , args .timeout )
588
+ code = fetch_git (args .url , args .directory , args .jobs , args .retry , args .timeout , args . follow_redirects )
580
589
if not os .listdir (path ):
581
590
os .rmdir (path )
582
591
0 commit comments