#!/usr/bin/python -W ignore::DeprecationWarning # A BackupPC script to archive a host's files to Amazon S3. # # Point $Conf{ArchiveClientCmd} at me. # Requires python-boto # # Usage: BackupPC_archiveHost tarCreatePath splitPath parPath host bkupNum \ # compPath fileExt splitSize outLoc parFile share # # Create secrets.py such that it has: # accesskey = 'amazon aws access key' # sharedkey = 'amazon aws shared key' # gpgsymmetrickey = 'gpg symmetric key -- make it good, but do not lose it' # speedfile = '/path/to/a/speed-limit-file' import base64 import glob import md5 import os import secrets import socket import sys import time from multiprocessing import Process, Queue, cpu_count from subprocess import * from boto.s3.connection import S3Connection from boto.s3.key import Key import boto.exception import logging import logging.handlers logger = logging.getLogger('') loghandler = logging.handlers.SysLogHandler('/dev/log', facility=logging.handlers.SysLogHandler.LOG_DAEMON) logformatter = logging.Formatter('%(filename)s: %(levelname)s: %(message)s') loghandler.setFormatter(logformatter) logger.addHandler(loghandler) logger.setLevel(logging.DEBUG) def is_exe(fpath): return os.path.exists(fpath) and os.access(fpath, os.X_OK) def encrypt_file(filename, key, compress='/bin/cat'): compressmap = {'cat': 'none', 'gzip': 'ZLIB', 'bzip2': 'BZIP2'} if os.path.basename(compress) in compressmap.keys(): compress_algo = compressmap[os.path.basename(compress)] else: compress_algo = 'none' cmd = ['/usr/bin/gpg', '--batch', '--no-tty'] cmd.extend(['--compress-algo', compress_algo]) cmd.extend(['--output', '%s.gpg' % filename]) cmd.extend(['--passphrase-fd', '0']) cmd.extend(['--symmetric', filename]) if is_exe(cmd[0]): logger.info('Encrypting %s (compression: %s)' % (filename, compress_algo)) logger.debug(`cmd`) else: logger.error('%s is not an executable file!' % cmd[0]) proc = Popen(cmd, stdin=PIPE, stdout=PIPE) proc.communicate(key) if os.path.exists(filename + '.gpg'): oldfilesize = os.path.getsize(filename) newfilesize = os.path.getsize(filename + '.gpg') compressed = ((oldfilesize - newfilesize) / float(oldfilesize)) * 100 logger.info('%s shrunk by %.2f%% (%i -> %i bytes)' % (filename, compressed, oldfilesize, newfilesize)) os.unlink(filename) return filename + '.gpg' else: logger.error('%s.gpg does not exist' % filename) raise Exception def open_s3(accesskey, sharedkey, host): conn = S3Connection(accesskey, sharedkey, is_secure=False) mybucketname = (accesskey + '-bkup-' + host).lower() try: bucket = conn.get_bucket(mybucketname) except boto.exception.S3ResponseError: logger.info('Creating bucket %s' % mybucketname) bucket = conn.create_bucket(mybucketname) bucket.set_acl('private') return bucket def handle_progress(transmitted, pending): logger.debug('%i of %i bytes transmitted (%.2f%%)' % (transmitted, pending, (transmitted/float(pending))*100)) def send_file(bucket, filename): basefilename = os.path.basename(filename) if bucket.get_key(basefilename): logger.error('Duplicate filename %s! I hope that is OK.' % basefilename) k = Key(bucket) k.key = basefilename logger.info('Uploading %s...' % basefilename) fd = open(name=filename, mode='rb') putHeaders = {'x-amz-storage-class': 'REDUCED_REDUNDANCY'} k.set_contents_from_file(fd, headers=putHeaders, cb=handle_progress) return k def encryption_worker(in_q, out_q): "Encrypts things from the in_q, puts them in the out_q" start_time = time.time() counter = 0 for filename, gpgkey, comppath in iter(in_q.get, 'STOP'): counter += 1 cryptstart_time = time.time() logger.info("Beginning encryption of %s.", filename) result = encrypt_file(filename, gpgkey, comppath) out_q.put(result) logger.info("Finished encryption of %s in %i seconds.", filename, time.time()-cryptstart_time) logger.info("Encryption worker dying after %i tasks in %i seconds.", counter, time.time()-start_time) out_q.put('STOP') def sending_worker(in_q, accesskey, sharedkey, host): "Sends things from the in_q using the send_file method" start_time = time.time() counter = 0 bucket = open_s3(accesskey, sharedkey, host) for filename in iter(in_q.get, 'STOP'): sending_start = time.time() counter += 1 retry_count = 0 max_retries = 10 while retry_count <= max_retries: try: key = send_file(bucket, filename) key.set_acl('private') key.close() retry_count = max_retries+1 except (boto.exception.S3ResponseError, socket.error), e: retry_count += 1 sleeptime = 2**retry_count logger.error('Encountered exception %s, retrying in %i seconds (%i/%i)', e, sleeptime, retry_count, max_retries) time.sleep(sleeptime) size = os.path.getsize(filename) os.unlink(filename) sending_seconds = time.time() - sending_start bytespersecond = size / sending_seconds logger.info("Finished sending of %s in %i seconds, transfer speed %i bytes/second.", filename, sending_seconds, bytespersecond) if __name__ == '__main__': # Read in arguments if len(sys.argv) != 12: sys.stderr.write("Usage: %s tarCreatePath splitPath parPath host bkupNum \ compPath fileExt splitSize outLoc parFile share\n" % sys.argv[0]) sys.exit(1) else: tarCreate = sys.argv[1] splitPath = sys.argv[2] parPath = sys.argv[3] host = sys.argv[4] bkupNum = int(sys.argv[5]) compPath = sys.argv[6] fileExt = sys.argv[7] splitSize = int(sys.argv[8]) outLoc = sys.argv[9] parfile = sys.argv[10] share = sys.argv[11] for i in [tarCreate, compPath, splitPath, parPath]: if i is not '' and not is_exe(i): sys.stderr.write('Error: %s is not an executable program\n' % i) sys.exit(1) beginning = time.time() mesg = "Writing archive for host %s, backup #%i" % (host, bkupNum) if splitSize > 0 and is_exe(splitPath): mesg += ', split into %i byte chunks' % splitSize if secrets.gpgsymmetrickey: mesg += ', encrypted with secret key' logger.info(mesg) sys.stdout.write(time.strftime('%d-%H:%M:%S') + ": " + mesg + '\n') sys.stdout.flush() # Prepare the pipeline if share == '*': share = '\*' cmd = '%s -t -h %s -n %i -s %s . ' % (tarCreate, host, bkupNum, share) if splitSize > 0 and is_exe(splitPath): filehead = '%s/%s.%i.tar.' % (outLoc, host, bkupNum) fileglob = filehead + '*' cmd += '| %s -b %i - %s' % (splitPath, splitSize, filehead) else: fileglob = '%s/%s.%i.tar' % (outLoc, host, bkupNum) cmd += '> %s' % fileglob filehead = fileglob + '.' # is there already evidence of this having been done before? if glob.glob('%s/%s.*.tar.*' % (outLoc, host)): logger.info('Evidence of failed execution run prior! Finishing it.') somefile = os.path.basename(glob.glob('%s/%s.*.tar.*' % (outLoc, host))[0]) keyparts = somefile.split('.') encrypted = split = tarred = final = False if keyparts[-1] == 'gpg': keyparts.pop() if keyparts[-1] != 'tar' and len(keyparts[-1]) is 2: keyparts.pop() if keyparts[-1] == 'tar': keyparts.pop() bkupNum = int(keyparts.pop()) filehead = '%s/%s.%i.tar.' % (outLoc, host, bkupNum) fileglob = filehead + '*' mesg = "Continuing upload for host %s, backup #%i" % (host, bkupNum) if splitSize > 0 and is_exe(splitPath): mesg += ', split into %i byte chunks' % splitSize if secrets.gpgsymmetrickey: mesg += ', encrypted with secret key' logger.info(mesg) sys.stdout.write(time.strftime('%d-%H:%M:%S') + ": " + mesg + '\n') sys.stdout.flush() else: logger.debug('Executing %s' % cmd) returncode = os.system(cmd) if returncode != 0: logger.error('%s died with exit code %i' % (cmd, returncode)) sys.exit(1) logger.info('Beginning post-processing of %i files from %s #%i' % (len(glob.glob(fileglob)), host, bkupNum)) # Create queues for handling encryption and file transfers gpg_queue = Queue() send_queue = Queue() for i in sorted(glob.glob(fileglob)): if secrets.gpgsymmetrickey and not i.endswith('.gpg'): logger.debug("Adding %s to gpg_queue", i) gpg_queue.put([i, secrets.gpgsymmetrickey, compPath]) else: # either encryption is off, or the file is already encrypted logger.debug("Adding %s to send_queue", i) send_queue.put(i) # Put a STOP command at the end of the GPG queue. gpg_queue.put('STOP') # Start some handlers, wait until everything is done try: process_count = cpu_count() except NotImplementedError: process_count = 1 for i in range(process_count): Process(target=encryption_worker, args=(gpg_queue, send_queue,)).start() send_p = Process(target=sending_worker, args=(send_queue, secrets.accesskey, secrets.sharedkey, host)) send_p.start() while send_p.is_alive(): # Generate some output so that BackupPC doesn't time out mesg = "Checkpoint: %i task(s) in GPG queue, %i task(s) in S3 queue.\n" % (gpg_queue.qsize(), send_queue.qsize()) logger.info(mesg) sys.stdout.write(mesg + '\n') sys.stdout.flush() send_p.join(300) if not gpg_queue.empty(): raise Exception("GPG queue not empty") if not send_queue.empty(): raise Exception("Send queue not empty") sys.stdout.write("Reached end of queue! Finalizing backup.\n") sys.stdout.flush() # finalize the backup bucket = open_s3(secrets.accesskey, secrets.sharedkey, host) key = Key(bucket) key.key = '%sCOMPLETE' % os.path.basename(filehead) key.set_contents_from_string('%s %s "%s"' % (beginning, time.time(), mesg)) key.close()