Various adjustments and bug fixes
Fix: force integers to strs in Popen arguments Refactored verify_file out of unlink_worker If a file already exists on S3 and is identical, don't re-upload Add some delays, etc, to allow things to settle Put a STOP in the gpg_queue for each process
This commit is contained in:
parent
7bda017245
commit
871613a6ea
1 changed files with 68 additions and 48 deletions
|
@ -92,17 +92,34 @@ def open_s3(accesskey, sharedkey, host):
|
||||||
return bucket
|
return bucket
|
||||||
|
|
||||||
def handle_progress(transmitted, pending):
|
def handle_progress(transmitted, pending):
|
||||||
logger.debug('send_file: %i of %i bytes transmitted (%.2f%%)' % (transmitted, pending, (transmitted/float(pending))*100))
|
logger.debug("send_file: %i of %i bytes transmitted (%.2f%%)", transmitted, pending, (transmitted/float(pending))*100)
|
||||||
|
|
||||||
|
def verify_file(bucket, filename):
|
||||||
|
"Returns True if the file size and md5sum match, False otherwise"
|
||||||
|
basefilename = os.path.basename(filename)
|
||||||
|
key = bucket.get_key(basefilename)
|
||||||
|
stat = os.stat(filename)
|
||||||
|
if key:
|
||||||
|
if key.size == stat[6]:
|
||||||
|
fp = open(filename)
|
||||||
|
local_md5 = hashlib.md5(fp.read())
|
||||||
|
fp.close()
|
||||||
|
if '"%s"' % local_md5.hexdigest() == key.etag:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def send_file(bucket, filename):
|
def send_file(bucket, filename):
|
||||||
basefilename = os.path.basename(filename)
|
basefilename = os.path.basename(filename)
|
||||||
|
|
||||||
k = Key(bucket)
|
k = Key(bucket)
|
||||||
k.key = basefilename
|
k.key = basefilename
|
||||||
if k.exists():
|
|
||||||
logger.warning('send_file: %s already exists on S3, overwriting' % basefilename)
|
|
||||||
k.set_contents_from_filename(filename, cb=handle_progress, reduced_redundancy=True)
|
|
||||||
|
|
||||||
|
if k.exists():
|
||||||
|
if verify_file(bucket, filename):
|
||||||
|
logger.warning("send_file: %s already exists and is identical, not overwriting", basefilename)
|
||||||
|
return k
|
||||||
|
logger.warning("send_file: %s already exists on S3, overwriting", basefilename)
|
||||||
|
|
||||||
|
k.set_contents_from_filename(filename, cb=handle_progress, reduced_redundancy=True)
|
||||||
return k
|
return k
|
||||||
|
|
||||||
def encryption_worker(in_q, out_q):
|
def encryption_worker(in_q, out_q):
|
||||||
|
@ -117,6 +134,7 @@ def encryption_worker(in_q, out_q):
|
||||||
out_q.put(result)
|
out_q.put(result)
|
||||||
logger.debug("encryption_worker: encrypted %s in %i seconds", filename, time.time()-cryptstart_time)
|
logger.debug("encryption_worker: encrypted %s in %i seconds", filename, time.time()-cryptstart_time)
|
||||||
logger.debug("encryption_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time()-start_time)
|
logger.debug("encryption_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time()-start_time)
|
||||||
|
time.sleep(5) # settle
|
||||||
|
|
||||||
def sending_worker(in_q, out_q, accesskey, sharedkey, host):
|
def sending_worker(in_q, out_q, accesskey, sharedkey, host):
|
||||||
"Sends things from the in_q using the send_file method"
|
"Sends things from the in_q using the send_file method"
|
||||||
|
@ -145,7 +163,7 @@ def sending_worker(in_q, out_q, accesskey, sharedkey, host):
|
||||||
|
|
||||||
if not done:
|
if not done:
|
||||||
# trip out
|
# trip out
|
||||||
logger.error('sending_worker: could not upload %s in %i retries')
|
logger.error('sending_worker: could not upload %s in %i retries', filename, retry_count)
|
||||||
else:
|
else:
|
||||||
size = os.path.getsize(filename)
|
size = os.path.getsize(filename)
|
||||||
sending_seconds = time.time() - sending_start
|
sending_seconds = time.time() - sending_start
|
||||||
|
@ -154,7 +172,7 @@ def sending_worker(in_q, out_q, accesskey, sharedkey, host):
|
||||||
out_q.put(filename)
|
out_q.put(filename)
|
||||||
|
|
||||||
logger.debug("sending_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time() - start_time)
|
logger.debug("sending_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time() - start_time)
|
||||||
out_q.put('STOP')
|
time.sleep(5) # settle
|
||||||
|
|
||||||
def unlink_worker(in_q, accesskey, sharedkey, host):
|
def unlink_worker(in_q, accesskey, sharedkey, host):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -162,25 +180,25 @@ def unlink_worker(in_q, accesskey, sharedkey, host):
|
||||||
bucket = open_s3(accesskey, sharedkey, host)
|
bucket = open_s3(accesskey, sharedkey, host)
|
||||||
for filename in iter(in_q.get, 'STOP'):
|
for filename in iter(in_q.get, 'STOP'):
|
||||||
counter += 1
|
counter += 1
|
||||||
basefilename = os.path.basename(filename)
|
retry_count = 0
|
||||||
key = bucket.get_key(basefilename)
|
max_retries = 3
|
||||||
stat = os.stat(filename)
|
done = False
|
||||||
if key:
|
while retry_count <= max_retries and not done:
|
||||||
if key.size == stat[6]:
|
if verify_file(bucket, filename):
|
||||||
fp = open(filename)
|
logger.debug("unlink_worker: deleting %s", filename)
|
||||||
local_md5 = hashlib.md5(fp.read())
|
|
||||||
fp.close()
|
|
||||||
if '"%s"' % local_md5.hexdigest() == key.etag:
|
|
||||||
logger.debug("unlink_worker: deleting %s", basefilename)
|
|
||||||
os.unlink(filename)
|
os.unlink(filename)
|
||||||
|
done = True
|
||||||
else:
|
else:
|
||||||
logger.error("unlink_worker: md5sum for %s did not match: %s != %s", basefilename, '"%s"' % local_md5.hexdigest(), key.etag)
|
retry_count += 1
|
||||||
else:
|
sleeptime = 2**retry_count
|
||||||
logger.error("unlink_worker: size mismatch for %s: %i != %i", basefilename, stat[6], key.size)
|
logger.error("unlink_worker: verify_file on %s returned false, retrying in %i seconds (%i/%i)", filename, sleeptime, retry_count, max_retries)
|
||||||
else:
|
time.sleep(sleeptime)
|
||||||
logger.error("unlink_worker: key does not exist: %s", basefilename)
|
|
||||||
|
if not done:
|
||||||
|
logger.error("unlink_worker: could not verify remote %s in %i retries", filename, retry_count)
|
||||||
|
|
||||||
logger.debug("unlink_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time() - start_time)
|
logger.debug("unlink_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time() - start_time)
|
||||||
|
time.sleep(5) # settle
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# Read in arguments, verify that they match the BackupPC standard exactly
|
# Read in arguments, verify that they match the BackupPC standard exactly
|
||||||
|
@ -207,8 +225,16 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
beginning = time.time()
|
beginning = time.time()
|
||||||
|
|
||||||
if share == '*':
|
# Create queues for workers
|
||||||
share = '\*'
|
gpg_queue = Queue()
|
||||||
|
send_queue = Queue()
|
||||||
|
unlink_queue = Queue()
|
||||||
|
|
||||||
|
queues = {
|
||||||
|
'gpg_queue': gpg_queue,
|
||||||
|
'send_queue': send_queue,
|
||||||
|
'unlink_queue': unlink_queue,
|
||||||
|
}
|
||||||
|
|
||||||
# Is there already evidence of this having been done before?
|
# Is there already evidence of this having been done before?
|
||||||
if glob.glob('%s/%s.*.tar.*' % (outLoc, host)):
|
if glob.glob('%s/%s.*.tar.*' % (outLoc, host)):
|
||||||
|
@ -239,7 +265,7 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
tarcmd = [tarCreate, '-t']
|
tarcmd = [tarCreate, '-t']
|
||||||
tarcmd.extend(['-h', host])
|
tarcmd.extend(['-h', host])
|
||||||
tarcmd.extend(['-n', bkupNum])
|
tarcmd.extend(['-n', str(bkupNum)])
|
||||||
tarcmd.extend(['-s', share])
|
tarcmd.extend(['-s', share])
|
||||||
tarcmd.extend(['.'])
|
tarcmd.extend(['.'])
|
||||||
|
|
||||||
|
@ -249,7 +275,7 @@ if __name__ == '__main__':
|
||||||
if splitSize > 0 and is_exe(splitPath):
|
if splitSize > 0 and is_exe(splitPath):
|
||||||
filehead = outfile + '.'
|
filehead = outfile + '.'
|
||||||
fileglob = filehead + '*'
|
fileglob = filehead + '*'
|
||||||
splitcmd = [splitPath, '-b', splitSize, '-', filehead]
|
splitcmd = [splitPath, '-b', str(splitSize), '-', filehead]
|
||||||
mesg += ', split into %i byte chunks' % splitSize
|
mesg += ', split into %i byte chunks' % splitSize
|
||||||
else:
|
else:
|
||||||
fileglob = outfile
|
fileglob = outfile
|
||||||
|
@ -261,31 +287,21 @@ if __name__ == '__main__':
|
||||||
logger.info("main: %s", mesg)
|
logger.info("main: %s", mesg)
|
||||||
logger.debug("main: executing tarcmd: %s > %s", ' '.join(tarcmd), outfile)
|
logger.debug("main: executing tarcmd: %s > %s", ' '.join(tarcmd), outfile)
|
||||||
|
|
||||||
outfp = open(outfile, 'wb')
|
tarfp = open(outfile, 'wb')
|
||||||
proc = Popen(tarcmd, preexec_fn=lambda : os.nice(10), stdout=outfile)
|
proc = Popen(tarcmd, preexec_fn=lambda : os.nice(10), stdout=tarfp)
|
||||||
proc.communicate()
|
proc.communicate()
|
||||||
outfp.close()
|
tarfp.close()
|
||||||
|
|
||||||
if splitcmd:
|
if splitcmd:
|
||||||
logger.debug("main: executing splitcmd: %s", ' '.join(splitcmd))
|
logger.debug("main: executing splitcmd: %s", ' '.join(splitcmd))
|
||||||
infp = open(outfile, 'rb')
|
tarfp = open(outfile, 'rb')
|
||||||
proc = Popen(splitcmd, preexec_fn=lambda : os.nice(10), stdin=infp)
|
proc = Popen(splitcmd, preexec_fn=lambda : os.nice(10), stdin=tarfp)
|
||||||
proc.communicate()
|
proc.communicate()
|
||||||
infp.close()
|
tarfp.close()
|
||||||
|
os.unlink(outfile)
|
||||||
|
|
||||||
logger.info("main: dumped %i files from %s #%i" % (len(glob.glob(fileglob)), host, bkupNum))
|
logger.info("main: dumped %i files from %s #%i" % (len(glob.glob(fileglob)), host, bkupNum))
|
||||||
|
|
||||||
# Create queues for handling encryption and file transfers
|
|
||||||
gpg_queue = Queue()
|
|
||||||
send_queue = Queue()
|
|
||||||
unlink_queue = Queue()
|
|
||||||
|
|
||||||
queues = {
|
|
||||||
'gpg_queue': gpg_queue,
|
|
||||||
'send_queue': send_queue,
|
|
||||||
'unlink_queue': unlink_queue,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Pre-run to check for artifacts
|
# Pre-run to check for artifacts
|
||||||
for i in glob.glob(fileglob):
|
for i in glob.glob(fileglob):
|
||||||
if not i.endswith('.gpg') and os.path.exists(i + '.gpg'):
|
if not i.endswith('.gpg') and os.path.exists(i + '.gpg'):
|
||||||
|
@ -303,10 +319,6 @@ if __name__ == '__main__':
|
||||||
logger.debug("main: adding %s to send_queue", i)
|
logger.debug("main: adding %s to send_queue", i)
|
||||||
send_queue.put(i)
|
send_queue.put(i)
|
||||||
|
|
||||||
# Put a STOP command at the end of the GPG queue.
|
|
||||||
gpg_queue.put('STOP')
|
|
||||||
gpg_queue_closed = True
|
|
||||||
|
|
||||||
# Start some handlers, wait until everything is done
|
# Start some handlers, wait until everything is done
|
||||||
try:
|
try:
|
||||||
process_count = cpu_count()
|
process_count = cpu_count()
|
||||||
|
@ -331,6 +343,11 @@ if __name__ == '__main__':
|
||||||
send_queue_closed = False
|
send_queue_closed = False
|
||||||
unlink_queue_closed = False
|
unlink_queue_closed = False
|
||||||
|
|
||||||
|
# Put STOP command(s) at the end of the GPG queue.
|
||||||
|
gpg_queue_closed = True
|
||||||
|
for i in range(process_count):
|
||||||
|
gpg_queue.put('STOP')
|
||||||
|
|
||||||
for i in procs:
|
for i in procs:
|
||||||
i.join()
|
i.join()
|
||||||
crypto_running = 0
|
crypto_running = 0
|
||||||
|
@ -348,9 +365,12 @@ if __name__ == '__main__':
|
||||||
logger.debug("main: process terminated: %s", i.name)
|
logger.debug("main: process terminated: %s", i.name)
|
||||||
|
|
||||||
for qname, q in queues.items():
|
for qname, q in queues.items():
|
||||||
|
time.sleep(5) # settle
|
||||||
if not q.empty():
|
if not q.empty():
|
||||||
logger.critical("main: queue %s not empty!", qname)
|
logger.critical("main: queue %s not empty!", qname)
|
||||||
raise Exception("queue not empty: %s" % qname)
|
raise Exception("queue not empty: %s" % qname)
|
||||||
|
else:
|
||||||
|
logger.debug("main: queue %s is empty", qname)
|
||||||
|
|
||||||
logger.debug("main: finalizing backup")
|
logger.debug("main: finalizing backup")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue