-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpdf-hunter.py
executable file
·94 lines (71 loc) · 2.26 KB
/
pdf-hunter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python
# Created by @catalyst256/[email protected] - May 2013
# Allows for recreation of pdf files from pcap files
# Usage is ./pdf-hunter.py <pcap file> <file location>
# e.g. ./pdf-hunter.py pdftest.pcap /tmp/out.pdf
import os, logging, sys, hashlib
logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
from scapy.all import *
if len(sys.argv) != 3:
print 'Usage is ./pdf-hunter.py <pcap file> <file location>'
sys.exit(1)
pkts = rdpcap(sys.argv[1])
artifact = 'Content-Type: application/pdf'
file_length = ''
ack = ''
cfile = []
start = str('%PDF-')
end = str('%%EOF')
tmpfile = '/tmp/tmp.pdf'
pdffile = sys.argv[2]
outfile = open(tmpfile, 'w')
outfile2 = open(pdffile, 'w')
print '[+] Starting the PDF Hunt..'
# Search through pcap file and look for anything that has a content type of pdf, save the TCP ACK as a variable
for x in pkts:
if x.haslayer(Raw):
raw = x.getlayer(Raw).load
if artifact in raw:
ack = str(x.getlayer(TCP).ack)
# Search again through the pcap file this time using the ack as the key and then write the raw load to a list
for p in pkts:
if p.haslayer(TCP) and p.haslayer(Raw) and (p.getlayer(TCP).ack == int(ack) or p.getlayer(TCP).seq == int(ack)):
raw = p.getlayer(Raw).load
cfile.append(raw)
x = ''.join(cfile)
# Write the file out to outfile variable
outfile.writelines(x)
outfile.close()
# Open the temp file, cut the HTTP headers out and then save it again as a PDF
total_lines = ''
firstcut = ''
secondcut = ''
final_cut = ''
f = open(tmpfile, 'r').readlines()
total_lines = len(f)
for x, line in enumerate(f):
if start in line:
firstcut = int(x)
for y, line in enumerate(f):
if end in line:
secondcut = int(y) + 1
f = f[firstcut:]
if int(total_lines) - int(secondcut) != 0:
final_cut = int(total_lines) - int(secondcut)
f = f[:-final_cut]
outfile2.writelines(f)
outfile2.close()
else:
outfile2.writelines(f)
outfile2.close()
# Time to delete the temporary file
cmd = 'rm -f ' + tmpfile
os.system(cmd)
print '[-] Temporary file deleted ' + str(tmpfile)
# Hash the retrieved file with MD5
filehash = ''
fh = open(pdffile, 'r')
filehash = hashlib.md5(fh.read()).hexdigest()
# Print the file location + hash value
print '[+] File written to: ' + str(pdffile)
print '[!] File hash is: ' + str(filehash)