forked from corkami/collisions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf.py
143 lines (102 loc) · 3.13 KB
/
pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python
# script to craft MD5 collisions of 2 PDFs via mutool and UniColl
# Ange Albertini 2018
# uses mutool from https://mupdf.com/index.html
import os
import sys
import hashlib
def EnclosedString(d, starts, ends):
off = d.find(starts) + len(starts)
return d[off:d.find(ends, off)]
def getCount(d):
s = EnclosedString(d, "/Count ", "/")
count = int(s)
return count
def procreate(l): # :p
return " 0 R ".join(l) + " 0 R"
if len(sys.argv) == 1:
print("PDF MD5 collider")
print("Usage: pdf.py <file1.pdf> <file2.pdf>")
sys.exit()
os.system('mutool merge -o first.pdf %s' % sys.argv[1])
os.system('mutool merge -o second.pdf %s' % sys.argv[2])
os.system('mutool merge -o merged.pdf dummy.pdf %s %s' % (sys.argv[1], sys.argv[2]))
with open("first.pdf", "rb") as f:
d1 = f.read()
with open("second.pdf", "rb") as f:
d2 = f.read()
with open("merged.pdf", "rb") as f:
dm = f.read()
COUNT1 = getCount(d1)
COUNT2 = getCount(d2)
kids = EnclosedString(dm, "/Kids[", "]")
# we skip the first dummy, and the last " 0 R" string
pages = kids[:-4].split(" 0 R ")[1:]
template = """%%PDF-1.4
1 0 obj
<<
/Type /Catalog
%% for alignments (comments will be removed by merging or cleaning)
/MD5_is__ /REALLY_dead_now__
/Pages 2 0 R
%% to make sure we don't get rid of the other pages when garbage collecting
/Fakes 3 0 R
%% placeholder for UniColl collision blocks
/0123456789ABCDEF0123456789ABCDEF012
/0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0
>>
endobj
2 0 obj
<</Type/Pages/Count %(COUNT2)i/Kids[%(KIDS2)s]>>
endobj
3 0 obj
<</Type/Pages/Count %(COUNT1)i/Kids[%(KIDS1)s]>>
endobj
4 0 obj %% overwritten - was a fake page to fool merging
<< >>
endobj
"""
KIDS1 = procreate(pages[:getCount(d1)])
KIDS2 = procreate(pages[getCount(d1):])
with open("hacked.pdf", "wb") as f:
f.write(template % locals())
# adjust parents for the first set of pages
f.write(dm[dm.find("5 0 obj"):].replace("/Parent 2 0 R", "/Parent 3 0 R", COUNT1))
# let's adjust offsets - -g to get rid of object 4 by garbage collecting
# (yes, errors will appear because we modified objects without adjusting XREF)
print
print "KEEP CALM and IGNORE THE NEXT ERRORS"
os.system('mutool clean -gggg hacked.pdf cleaned.pdf')
with open("cleaned.pdf", "rb") as f:
cleaned = f.read()
# some mutool versions do different stuff :(
cleaned = cleaned.replace(
" 65536 f \n0000000016 00000 n \n",
" 65536 f \n0000000018 00000 n \n",
1)
with open("pdf1.bin", "rb") as f:
prefix1 = f.read()
with open("pdf2.bin", "rb") as f:
prefix2 = f.read()
file1 = prefix1 + "\n" + cleaned[192:]
file2 = prefix2 + "\n" + cleaned[192:]
with open("collision1.pdf", "wb") as f:
f.write(file1)
with open("collision2.pdf", "wb") as f:
f.write(file2)
os.remove('first.pdf')
os.remove('second.pdf')
os.remove('merged.pdf')
os.remove('hacked.pdf')
os.remove('cleaned.pdf')
md5 = hashlib.md5(file1).hexdigest()
assert md5 == hashlib.md5(file2).hexdigest()
# to prove the files should be 100% valid
print
os.system('mutool info -X collision1.pdf')
print
print
os.system('mutool info -X collision2.pdf')
print
print "MD5: %s" % md5
print "Success!"