e4fd5ca8ac
This is still under some live development, for sure :-) Among other minor (yet significant) fixes, I've added another worker process to handle unlinking. This verifies that the file was transmitted properly (or at least with the right md5sum) before unlinking it.
366 lines
13 KiB
Python
Executable file
366 lines
13 KiB
Python
Executable file
#!/usr/bin/python -W ignore::DeprecationWarning
|
|
# A BackupPC script to archive a host's files to Amazon S3.
|
|
#
|
|
# Point $Conf{ArchiveClientCmd} at me.
|
|
# Requires python-boto
|
|
#
|
|
# Usage: BackupPC_archiveHost tarCreatePath splitPath parPath host bkupNum \
|
|
# compPath fileExt splitSize outLoc parFile share
|
|
#
|
|
# Create secrets.py such that it has:
|
|
# accesskey = 'amazon aws access key'
|
|
# sharedkey = 'amazon aws shared key'
|
|
# gpgsymmetrickey = 'gpg symmetric key -- make it good, but do not lose it'
|
|
|
|
import glob
|
|
import hashlib
|
|
import os
|
|
import socket
|
|
import sys
|
|
import time
|
|
|
|
from multiprocessing import Process, Queue, cpu_count
|
|
from subprocess import *
|
|
|
|
from boto.s3.connection import S3Connection
|
|
from boto.s3.key import Key
|
|
import boto.exception
|
|
|
|
import logging
|
|
import logging.handlers
|
|
|
|
import secrets
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
sysloghandler = logging.handlers.SysLogHandler('/dev/log',
|
|
facility=logging.handlers.SysLogHandler.LOG_DAEMON)
|
|
syslogformatter = logging.Formatter('%(filename)s: %(levelname)s: %(message)s')
|
|
sysloghandler.setFormatter(syslogformatter)
|
|
logger.addHandler(sysloghandler)
|
|
|
|
consolehandler = logging.StreamHandler(sys.stdout)
|
|
consoleformatter = logging.Formatter('%(asctime)s: %(levelname)s: %(message)s')
|
|
consolehandler.setFormatter(consoleformatter)
|
|
logger.addHandler(consolehandler)
|
|
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
def is_exe(fpath):
|
|
return os.path.exists(fpath) and os.access(fpath, os.X_OK)
|
|
|
|
def encrypt_file(filename, key, compress='/bin/cat'):
|
|
compressmap = {'cat': 'none', 'gzip': 'ZLIB', 'bzip2': 'BZIP2'}
|
|
if os.path.basename(compress) in compressmap.keys():
|
|
compress_algo = compressmap[os.path.basename(compress)]
|
|
else:
|
|
compress_algo = 'none'
|
|
|
|
cmd = ['/usr/bin/gpg', '--batch', '--no-tty']
|
|
cmd.extend(['--compress-algo', compress_algo])
|
|
cmd.extend(['--output', '%s.gpg' % filename])
|
|
cmd.extend(['--passphrase-fd', '0'])
|
|
cmd.extend(['--symmetric', filename])
|
|
|
|
if is_exe(cmd[0]):
|
|
logger.info('Encrypting %s (compression: %s)' % (filename, compress_algo))
|
|
logger.debug(`cmd`)
|
|
else:
|
|
logger.error('%s is not an executable file!' % cmd[0])
|
|
|
|
proc = Popen(cmd, preexec_fn=lambda : os.nice(10), stdin=PIPE, stdout=PIPE)
|
|
proc.communicate(key)
|
|
|
|
if os.path.exists(filename + '.gpg'):
|
|
oldfilesize = os.path.getsize(filename)
|
|
newfilesize = os.path.getsize(filename + '.gpg')
|
|
compressed = ((oldfilesize - newfilesize) / float(oldfilesize)) * 100
|
|
logger.info('%s shrunk by %.2f%% (%i -> %i bytes)' % (filename, compressed, oldfilesize, newfilesize))
|
|
os.unlink(filename)
|
|
return filename + '.gpg'
|
|
else:
|
|
logger.error('%s.gpg does not exist' % filename)
|
|
raise Exception
|
|
|
|
def open_s3(accesskey, sharedkey, host):
|
|
conn = S3Connection(accesskey, sharedkey, is_secure=True)
|
|
mybucketname = (accesskey + '-bkup-' + host).lower()
|
|
try:
|
|
bucket = conn.get_bucket(mybucketname)
|
|
except boto.exception.S3ResponseError:
|
|
logger.info('Creating bucket %s' % mybucketname)
|
|
bucket = conn.create_bucket(mybucketname)
|
|
bucket.set_acl('private')
|
|
return bucket
|
|
|
|
def handle_progress(transmitted, pending):
|
|
logger.info('%i of %i bytes transmitted (%.2f%%)' % (transmitted, pending, (transmitted/float(pending))*100))
|
|
|
|
def send_file(bucket, filename):
|
|
basefilename = os.path.basename(filename)
|
|
logger.info('Uploading %s...' % basefilename)
|
|
|
|
k = Key(bucket)
|
|
k.key = basefilename
|
|
if k.exists():
|
|
logger.warning('Duplicate filename %s! I hope that is OK.' % basefilename)
|
|
k.set_contents_from_filename(filename, cb=handle_progress, reduced_redundancy=True)
|
|
|
|
return k
|
|
|
|
def encryption_worker(in_q, out_q):
|
|
"Encrypts things from the in_q, puts them in the out_q"
|
|
start_time = time.time()
|
|
counter = 0
|
|
for filename, gpgkey, comppath in iter(in_q.get, 'STOP'):
|
|
counter += 1
|
|
cryptstart_time = time.time()
|
|
logger.info("Beginning encryption of %s.", filename)
|
|
result = encrypt_file(filename, gpgkey, comppath)
|
|
out_q.put(result)
|
|
logger.debug("encryption_worker: encrypted %s in %i seconds", filename, time.time()-cryptstart_time)
|
|
logger.debug("encryption_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time()-start_time)
|
|
|
|
def sending_worker(in_q, out_q, accesskey, sharedkey, host):
|
|
"Sends things from the in_q using the send_file method"
|
|
start_time = time.time()
|
|
counter = 0
|
|
for filename in iter(in_q.get, 'STOP'):
|
|
sending_start = time.time()
|
|
counter += 1
|
|
retry_count = 0
|
|
max_retries = 10
|
|
done = False
|
|
|
|
while retry_count <= max_retries and not done:
|
|
try:
|
|
logger.info("sending_worker: sending %s", filename)
|
|
bucket = open_s3(accesskey, sharedkey, host)
|
|
key = send_file(bucket, filename)
|
|
key.set_acl('private')
|
|
key.close()
|
|
done = True
|
|
except (boto.exception.S3ResponseError, socket.error), e:
|
|
retry_count += 1
|
|
sleeptime = 2**retry_count
|
|
logger.error('Encountered exception %s, retrying in %i seconds (%i/%i)', e, sleeptime, retry_count, max_retries)
|
|
time.sleep(sleeptime)
|
|
|
|
if not done:
|
|
# trip out
|
|
logger.error('sending_worker: could not upload %s in %i retries')
|
|
else:
|
|
size = os.path.getsize(filename)
|
|
sending_seconds = time.time() - sending_start
|
|
bytespersecond = size / sending_seconds
|
|
logger.debug("sending_worker: sent %s in %i seconds at %i bytes/second.", filename, sending_seconds, bytespersecond)
|
|
out_q.put(filename)
|
|
|
|
logger.debug("sending_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time() - start_time)
|
|
out_q.put('STOP')
|
|
|
|
def unlink_worker(in_q, accesskey, sharedkey, host):
|
|
start_time = time.time()
|
|
counter = 0
|
|
bucket = open_s3(accesskey, sharedkey, host)
|
|
for filename in iter(in_q.get, 'STOP'):
|
|
counter += 1
|
|
basefilename = os.path.basename(filename)
|
|
key = bucket.get_key(basefilename)
|
|
stat = os.stat(filename)
|
|
if key:
|
|
if key.size == stat[6]:
|
|
fp = open(filename)
|
|
local_md5 = hashlib.md5(fp.read())
|
|
fp.close()
|
|
if '"%s"' % local_md5.hexdigest() == key.etag:
|
|
logger.debug("unlink_worker: deleting %s", basefilename)
|
|
os.unlink(filename)
|
|
else:
|
|
logger.error("unlink_worker: md5sum for %s did not match: %s != %s", basefilename, '"%s"' % local_md5.hexdigest(), key.etag)
|
|
else:
|
|
logger.error("unlink_worker: size mismatch for %s: %i != %i", basefilename, stat[6], key.size)
|
|
else:
|
|
logger.error("unlink_worker: key does not exist: %s", basefilename)
|
|
|
|
logger.debug("unlink_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time() - start_time)
|
|
|
|
if __name__ == '__main__':
|
|
# Read in arguments, verify that they match the BackupPC standard exactly
|
|
if len(sys.argv) != 12:
|
|
sys.stderr.write("Usage: %s tarCreatePath splitPath parPath host bkupNum compPath fileExt splitSize outLoc parFile share\n" % sys.argv[0])
|
|
sys.exit(1)
|
|
else:
|
|
tarCreate = sys.argv[1]
|
|
splitPath = sys.argv[2]
|
|
parPath = sys.argv[3]
|
|
host = sys.argv[4]
|
|
bkupNum = int(sys.argv[5])
|
|
compPath = sys.argv[6]
|
|
fileExt = sys.argv[7]
|
|
splitSize = int(sys.argv[8])
|
|
outLoc = sys.argv[9]
|
|
parfile = sys.argv[10]
|
|
share = sys.argv[11]
|
|
|
|
for i in [tarCreate, compPath, splitPath, parPath]:
|
|
if i is not '' and not is_exe(i):
|
|
sys.stderr.write('Error: %s is not an executable program\n' % i)
|
|
sys.exit(1)
|
|
|
|
beginning = time.time()
|
|
|
|
if share == '*':
|
|
share = '\*'
|
|
|
|
# Is there already evidence of this having been done before?
|
|
if glob.glob('%s/%s.*.tar.*' % (outLoc, host)):
|
|
logger.info('Evidence of failed execution run prior! Finishing it.')
|
|
somefile = os.path.basename(glob.glob('%s/%s.*.tar.*' % (outLoc, host))[0])
|
|
keyparts = somefile.split('.')
|
|
encrypted = split = tarred = final = False
|
|
if keyparts[-1] == 'gpg':
|
|
keyparts.pop()
|
|
if keyparts[-1] != 'tar' and len(keyparts[-1]) is 2:
|
|
keyparts.pop()
|
|
if keyparts[-1] == 'tar':
|
|
keyparts.pop()
|
|
|
|
bkupNum = int(keyparts.pop())
|
|
|
|
filehead = '%s/%s.%i.tar.' % (outLoc, host, bkupNum)
|
|
fileglob = filehead + '*'
|
|
|
|
mesg = "Continuing upload for host %s, backup #%i" % (host, bkupNum)
|
|
if splitSize > 0 and is_exe(splitPath):
|
|
mesg += ', split into %i byte chunks' % splitSize
|
|
if secrets.gpgsymmetrickey:
|
|
mesg += ', encrypted with secret key'
|
|
logger.info(mesg)
|
|
else:
|
|
mesg = "Writing archive for host %s, backup #%i" % (host, bkupNum)
|
|
|
|
tarcmd = [tarCreate, '-t']
|
|
tarcmd.extend(['-h', host])
|
|
tarcmd.extend(['-n', bkupNum])
|
|
tarcmd.extend(['-s', share])
|
|
tarcmd.extend(['.'])
|
|
|
|
splitcmd = None
|
|
outfile = '%s/%s.%i.tar' % (outLoc, host, bkupNum)
|
|
|
|
if splitSize > 0 and is_exe(splitPath):
|
|
filehead = outfile + '.'
|
|
fileglob = filehead + '*'
|
|
splitcmd = [splitPath, '-b', splitSize, '-', filehead]
|
|
mesg += ', split into %i byte chunks' % splitSize
|
|
else:
|
|
fileglob = outfile
|
|
filehead = fileglob + '.'
|
|
|
|
if secrets.gpgsymmetrickey:
|
|
mesg += ', encrypted with secret key'
|
|
|
|
logger.info(mesg)
|
|
logger.debug('Executing tarcmd: %s > %s', ' '.join(tarcmd), outfile)
|
|
|
|
outfp = open(outfile, 'wb')
|
|
proc = Popen(tarcmd, preexec_fn=lambda : os.nice(10), stdout=outfile)
|
|
proc.communicate()
|
|
outfp.close()
|
|
|
|
if splitcmd:
|
|
logger.debug('Splitting file using splitcmd: %s', ' '.join(splitcmd))
|
|
infp = open(outfile, 'rb')
|
|
proc = Popen(splitcmd, preexec_fn=lambda : os.nice(10), stdin=infp)
|
|
proc.communicate()
|
|
infp.close()
|
|
|
|
logger.info('Beginning post-processing of %i files from %s #%i' % (len(glob.glob(fileglob)), host, bkupNum))
|
|
|
|
# Create queues for handling encryption and file transfers
|
|
gpg_queue = Queue()
|
|
send_queue = Queue()
|
|
unlink_queue = Queue()
|
|
|
|
queues = {
|
|
'gpg_queue': gpg_queue,
|
|
'send_queue': send_queue,
|
|
'unlink_queue': unlink_queue,
|
|
}
|
|
|
|
# Pre-run to check for artifacts
|
|
for i in glob.glob(fileglob):
|
|
if not i.endswith('.gpg') and os.path.exists(i + '.gpg'):
|
|
logger.info("Orphaned GPG file exists: %s", i + '.gpg')
|
|
os.unlink(i + '.gpg')
|
|
|
|
# Run again to send files to the relevant queue
|
|
for i in sorted(glob.glob(fileglob)):
|
|
if secrets.gpgsymmetrickey and not i.endswith('.gpg'):
|
|
# A tar file, unencrypted, needs encrypted.
|
|
logger.debug("Adding %s to gpg_queue", i)
|
|
gpg_queue.put([i, secrets.gpgsymmetrickey, compPath])
|
|
else:
|
|
# either encryption is off, or the file is already encrypted
|
|
logger.debug("Adding %s to send_queue", i)
|
|
send_queue.put(i)
|
|
|
|
# Put a STOP command at the end of the GPG queue.
|
|
gpg_queue.put('STOP')
|
|
gpg_queue_closed = True
|
|
|
|
# Start some handlers, wait until everything is done
|
|
try:
|
|
process_count = cpu_count()
|
|
except NotImplementedError:
|
|
process_count = 1
|
|
|
|
procs = []
|
|
|
|
for i in range(process_count):
|
|
p = Process(name="encryption_worker_%i" % i, target=encryption_worker, args=(gpg_queue, send_queue,))
|
|
p.start()
|
|
procs.append(p)
|
|
|
|
send_p = Process(name="send_worker", target=sending_worker, args=(send_queue, unlink_queue, secrets.accesskey, secrets.sharedkey, host))
|
|
send_p.start()
|
|
procs.append(send_p)
|
|
|
|
unlink_p = Process(name="unlink_worker", target=unlink_worker, args=(unlink_queue, secrets.accesskey, secrets.sharedkey, host))
|
|
unlink_p.start()
|
|
procs.append(unlink_p)
|
|
|
|
send_queue_closed = False
|
|
unlink_queue_closed = False
|
|
|
|
for i in procs:
|
|
i.join()
|
|
crypto_running = 0
|
|
for j in procs:
|
|
if j.name.startswith("encryption_worker") and j.is_alive():
|
|
crypto_running += 1
|
|
if crypto_running == 0 and not send_queue_closed:
|
|
send_queue.put('STOP')
|
|
send_queue_closed = True
|
|
logger.debug("main: queuing stop sentinel for send_queue")
|
|
if not send_p.is_alive() and not unlink_queue_closed:
|
|
unlink_queue.put('STOP')
|
|
unlink_queue_closed = True
|
|
logger.debug("main: queuing stop sentinel for unlink_queue")
|
|
logger.debug("main: process terminated: %s", i.name)
|
|
|
|
for qname, q in queues.items():
|
|
if not q.empty():
|
|
logger.critical("main: queue %s not empty!", qname)
|
|
raise Exception("queue not empty: %s" % qname)
|
|
|
|
logger.debug("main: finalizing backup")
|
|
|
|
# finalize the backup
|
|
bucket = open_s3(secrets.accesskey, secrets.sharedkey, host)
|
|
key = Key(bucket)
|
|
key.key = '%sCOMPLETE' % os.path.basename(filehead)
|
|
key.set_acl('private')
|
|
key.set_contents_from_string('%s %s "%s"' % (beginning, time.time(), mesg))
|
|
key.close()
|