|
| 1 | +#!/usr/bin/env python |
| 2 | +# |
| 3 | +# https://github.com/git/git/blob/master/Documentation/technical/index-format.txt |
| 4 | +# |
| 5 | + |
| 6 | +import binascii |
| 7 | +import collections |
| 8 | +import mmap |
| 9 | +import struct |
| 10 | +import sys |
| 11 | + |
| 12 | + |
| 13 | +def check(boolean, message): |
| 14 | + if not boolean: |
| 15 | + import sys |
| 16 | + print "error: " + message |
| 17 | + sys.exit(1) |
| 18 | + |
| 19 | + |
| 20 | +def parse(filename, pretty=True): |
| 21 | + with open(filename, "rb") as o: |
| 22 | + f = mmap.mmap(o.fileno(), 0, access=mmap.ACCESS_READ) |
| 23 | + |
| 24 | + def read(format): |
| 25 | + # "All binary numbers are in network byte order." |
| 26 | + # Hence "!" = network order, big endian |
| 27 | + format = "! " + format |
| 28 | + bytes = f.read(struct.calcsize(format)) |
| 29 | + return struct.unpack(format, bytes)[0] |
| 30 | + |
| 31 | + index = collections.OrderedDict() |
| 32 | + |
| 33 | + # 4-byte signature, b"DIRC" |
| 34 | + index["signature"] = f.read(4).decode("ascii") |
| 35 | + check(index["signature"] == "DIRC", "Not a Git index file") |
| 36 | + |
| 37 | + # 4-byte version number |
| 38 | + index["version"] = read("I") |
| 39 | + check(index["version"] in {2, 3}, |
| 40 | + "Unsupported version: %s" % index["version"]) |
| 41 | + |
| 42 | + # 32-bit number of index entries, i.e. 4-byte |
| 43 | + index["entries"] = read("I") |
| 44 | + |
| 45 | + yield index |
| 46 | + |
| 47 | + for n in range(index["entries"]): |
| 48 | + entry = collections.OrderedDict() |
| 49 | + |
| 50 | + entry["entry"] = n + 1 |
| 51 | + |
| 52 | + entry["ctime_seconds"] = read("I") |
| 53 | + entry["ctime_nanoseconds"] = read("I") |
| 54 | + if pretty: |
| 55 | + entry["ctime"] = entry["ctime_seconds"] |
| 56 | + entry["ctime"] += entry["ctime_nanoseconds"] / 1000000000 |
| 57 | + del entry["ctime_seconds"] |
| 58 | + del entry["ctime_nanoseconds"] |
| 59 | + |
| 60 | + entry["mtime_seconds"] = read("I") |
| 61 | + entry["mtime_nanoseconds"] = read("I") |
| 62 | + if pretty: |
| 63 | + entry["mtime"] = entry["mtime_seconds"] |
| 64 | + entry["mtime"] += entry["mtime_nanoseconds"] / 1000000000 |
| 65 | + del entry["mtime_seconds"] |
| 66 | + del entry["mtime_nanoseconds"] |
| 67 | + |
| 68 | + entry["dev"] = read("I") |
| 69 | + entry["ino"] = read("I") |
| 70 | + |
| 71 | + # 4-bit object type, 3-bit unused, 9-bit unix permission |
| 72 | + entry["mode"] = read("I") |
| 73 | + if pretty: |
| 74 | + entry["mode"] = "%06o" % entry["mode"] |
| 75 | + |
| 76 | + entry["uid"] = read("I") |
| 77 | + entry["gid"] = read("I") |
| 78 | + entry["size"] = read("I") |
| 79 | + |
| 80 | + entry["sha1"] = binascii.hexlify(f.read(20)).decode("ascii") |
| 81 | + entry["flags"] = read("H") |
| 82 | + |
| 83 | + # 1-bit assume-valid |
| 84 | + entry["assume-valid"] = bool(entry["flags"] & (0b10000000 << 8)) |
| 85 | + # 1-bit extended, must be 0 in version 2 |
| 86 | + entry["extended"] = bool(entry["flags"] & (0b01000000 << 8)) |
| 87 | + # 2-bit stage (?) |
| 88 | + stage_one = bool(entry["flags"] & (0b00100000 << 8)) |
| 89 | + stage_two = bool(entry["flags"] & (0b00010000 << 8)) |
| 90 | + entry["stage"] = stage_one, stage_two |
| 91 | + # 12-bit name length, if the length is less than 0xFFF (else, 0xFFF) |
| 92 | + namelen = entry["flags"] & 0xFFF |
| 93 | + |
| 94 | + # 62 bytes so far |
| 95 | + entrylen = 62 |
| 96 | + |
| 97 | + if entry["extended"] and (index["version"] == 3): |
| 98 | + entry["extra-flags"] = read("H") |
| 99 | + # 1-bit reserved |
| 100 | + entry["reserved"] = bool(entry["extra-flags"] & (0b10000000 << 8)) |
| 101 | + # 1-bit skip-worktree |
| 102 | + entry["skip-worktree"] = bool(entry["extra-flags"] & (0b01000000 << 8)) |
| 103 | + # 1-bit intent-to-add |
| 104 | + entry["intent-to-add"] = bool(entry["extra-flags"] & (0b00100000 << 8)) |
| 105 | + # 13-bits unused |
| 106 | + # used = entry["extra-flags"] & (0b11100000 << 8) |
| 107 | + # check(not used, "Expected unused bits in extra-flags") |
| 108 | + entrylen += 2 |
| 109 | + |
| 110 | + if namelen < 0xFFF: |
| 111 | + entry["name"] = f.read(namelen).decode("utf-8", "replace") |
| 112 | + entrylen += namelen |
| 113 | + else: |
| 114 | + # Do it the hard way |
| 115 | + name = [] |
| 116 | + while True: |
| 117 | + byte = f.read(1) |
| 118 | + if byte == "\x00": |
| 119 | + break |
| 120 | + name.append(byte) |
| 121 | + entry["name"] = b"".join(name).decode("utf-8", "replace") |
| 122 | + entrylen += 1 |
| 123 | + |
| 124 | + padlen = (8 - (entrylen % 8)) or 8 |
| 125 | + nuls = f.read(padlen) |
| 126 | + check(set(nuls) == set(['\x00']), "padding contained non-NUL") |
| 127 | + |
| 128 | + yield entry |
| 129 | + |
| 130 | + f.close() |
| 131 | + |
| 132 | + |
| 133 | + |
| 134 | + |
| 135 | + |
0 commit comments