Skip to content

Commit

Permalink
Totally rewrote recovery script to use an entirely different approach
Browse files Browse the repository at this point in the history
based on:

  - transaction iteration/copy

  - scanning for transactions after bad data

This should allow recovery of data when:

  - only data records are damaged and when

  - multiple parts of a file are damaged

The interface has changed to not modify in place.

Other features:

  - Progress indicator

  - Verbose output

  - optional packing

  - index creation
  • Loading branch information
Jim Fulton committed Nov 7, 2001
1 parent 43ff28e commit 2d0d8c0
Showing 1 changed file with 311 additions and 4 deletions.
315 changes: 311 additions & 4 deletions src/ZODB/fsrecover.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,320 @@
# attributions are listed in the accompanying credits file.
#
##############################################################################


"""Simple script for repairing damaged FileStorage files.
Usage: %s [-f] input output
Recover data from a FileStorage data file, skipping over damaged
data. Any damaged data will be lost. This could lead to useless output
of critical data were lost.
Options:
-f
Force output to putput file even if it exists
-v level
Set the verbosity level:
0 -- Show progress indicator (default)
1 -- Show transaction times and sizes
2 -- Show transaction times and sizes, and
show object (record) ids, versions, and sizes.
-p
Copy partial transactions. If a data record in the middle of a
transaction is bad, the data up to the bad data are packed. The
output record is marked as packed. If this option is not used,
transaction with any bad data are skipped.
-P t
Pack data to t seconds in the past. Note that is the "-p"
option is used, then t should be 0.
Important note: The ZODB package must be imporable. You may need
to adjust the Python path accordingly.
"""

import sys
# Algorithm:
#
# position to start of input
# while 1:
# if end of file: break
# try: copy_transaction
# except:
# scan for transaction
# continue

import sys, os

if __name__ == '__main__' and len(sys.argv) < 3:
print __doc__ % sys.argv[0]

def die(mess=''):
if not mess: mess="%s: %s" % sys.exc_info()[:2]
print mess+'\n'
sys.exit(1)

try: import ZODB
except ImportError:
if os.path.exists('ZODB'): sys.path.append('.')
elif os.path.exists('FileStorage.py'): sys.path.append('..')
import ZODB


import getopt, ZODB.FileStorage, struct, time
from struct import unpack
from ZODB.utils import t32, p64, U64
from ZODB.TimeStamp import TimeStamp
from cPickle import loads
from ZODB.FileStorage import RecordIterator

class EOF(Exception): pass
class ErrorFound(Exception): pass

def error(mess, *args):
raise ErrorFound(mess % args)

def read_transaction_header(file, pos, file_size):
# Read the transaction record
seek=file.seek
read=file.read

seek(pos)
h=read(23)
if len(h) < 23: raise EOF

tid, stl, status, ul, dl, el = unpack(">8s8scHHH",h)
if el < 0: el=t32-el

tl=U64(stl)

if status=='c': raise EOF

if pos+(tl+8) > file_size:
error("bad transaction length at %s", pos)

if status not in ' up':
error('invalid status, %s, at %s', status, pos)

if tl < (23+ul+dl+el):
error('invalid transaction length, %s, at %s', tl, pos)

tpos=pos
tend=tpos+tl

if status=='u':
# Undone transaction, skip it
seek(tend)
h=read(8)
if h != stl: error('inconsistent transaction length at %s', pos)
pos=tend+8
return pos, None

pos=tpos+(23+ul+dl+el)
user=read(ul)
description=read(dl)
if el:
try: e=loads(read(el))
except: e={}
else: e={}

result=RecordIterator(
tid, status, user, description, e,
pos, (tend, file, seek, read,
tpos,
)
)

pos=tend

# Read the (intentionally redundant) transaction length
seek(pos)
h=read(8)
if h != stl:
error("redundant transaction length check failed at %s", pos)
pos=pos+8

return pos, result

def scan(file, pos, file_size):
seek=file.seek
read=file.read
while 1:
seek(pos)
data=read(8096)
if not data: return 0

s=0
while 1:
l=data.find('.', s)
if l < 0:
pos=pos+8096
break
if l > 8080:
pos = pos + l
break
s=l+1
tl=U64(data[s:s+8])
if tl < pos:
return pos + s + 8

def iprogress(i):
if i%2: print '.',
else: print (i/2)%10,
sys.stdout.flush()

def progress(p):
for i in range(p): iprogress(i)

def recover(argv=sys.argv):

try:
opts, (inp, outp) = getopt.getopt(argv[1:], 'fv:pP:')
force = partial = verbose = 0
pack = None
for opt, v in opts:
if opt == '-v': verbose = int(v)
elif opt == '-p': partial=1
elif opt == '-f': force=1
elif opt == '-P': pack=time.time()-float(v)


force = filter(lambda opt: opt[0]=='-f', opts)
partial = filter(lambda opt: opt[0]=='-p', opts)
verbose = filter(lambda opt: opt[0]=='-v', opts)
verbose = verbose and int(verbose[0][1]) or 0
print 'Recovering', inp, 'into', outp
except:
die()
print __doc__ % argv[0]


if os.path.exists(outp) and not force:
die("%s exists" % outp)

file=open(inp, "rb")
seek=file.seek
read=file.read
if read(4) != ZODB.FileStorage.packed_version:
die("input is not a file storage")

seek(0,2)
file_size=file.tell()

ofs=ZODB.FileStorage.FileStorage(outp, create=1)
_ts=None
ok=1
prog1=0
preindex={}; preget=preindex.get # waaaa
undone=0

pos=4
while pos:

try:
npos, transaction = read_transaction_header(file, pos, file_size)
except EOF:
break
except:
print "\n%s: %s\n" % sys.exc_info()[:2]
if not verbose: progress(prog1)
pos = scan(file, pos, file_size)
continue

if transaction is None:
undone = undone + npos - pos
pos=npos
continue
else:
pos=npos

tid=transaction.tid

if _ts is None:
_ts=TimeStamp(tid)
else:
t=TimeStamp(tid)
if t <= _ts:
if ok: print ('Time stamps out of order %s, %s' % (_ts, t))
ok=0
_ts=t.laterThan(_ts)
tid=`_ts`
else:
_ts = t
if not ok:
print ('Time stamps back in order %s' % (t))
ok=1

if verbose:
print 'begin',
if verbose > 1: print
sys.stdout.flush()

ofs.tpc_begin(transaction, tid, transaction.status)

if verbose:
print 'begin', pos, _ts,
if verbose > 1: print
sys.stdout.flush()

nrec=0
try:
for r in transaction:
oid=r.oid
if verbose > 1: print U64(oid), r.version, len(r.data)
pre=preget(oid, None)
s=ofs.store(oid, pre, r.data, r.version, transaction)
preindex[oid]=s
nrec=nrec+1
except:
if partial and nrec:
ofs._status='p'
ofs.tpc_vote(transaction)
ofs.tpc_finish(transaction)
if verbose: print 'partial'
else:
ofs.tpc_abort(transaction)
print "\n%s: %s\n" % sys.exc_info()[:2]
if not verbose: progress(prog1)
pos = scan(file, pos, file_size)
else:
ofs.tpc_vote(transaction)
ofs.tpc_finish(transaction)
if verbose:
print 'finish'
sys.stdout.flush()

if not verbose:
prog = pos * 20l / file_size
while prog > prog1:
prog1 = prog1 + 1
iprogress(prog1)


bad = file_size - undone - ofs._pos

print "\n%s bytes removed during recovery" % bad
if undone:
print "%s bytes of undone transaction data were skipped" % undone

if pack is not None:
print "Packing ..."
from ZODB.referencesf import referencesf
ofs.pack(pack, referencesf)

sys.path.append('.')
ofs.close()


import FileStorage
if __name__=='__main__': recover()

FileStorage.recover(sys.argv[1])

0 comments on commit 2d0d8c0

Please sign in to comment.