#!/usr/bin/python -W ignore::DeprecationWarning # A BackupPC script to archive a host's files to Amazon S3. # # Point $Conf{ArchiveClientCmd} at me. # Requires python-boto # # Usage: BackupPC_archiveHost tarCreatePath splitPath parPath host bkupNum \ # compPath fileExt splitSize outLoc parFile share # # Create secrets.py such that it has: # accesskey = 'amazon aws access key' # sharedkey = 'amazon aws shared key' # gpgsymmetrickey = 'gpg symmetric key -- make it good, but do not lose it' import glob import hashlib import os import socket import sys import time from multiprocessing import Process, Queue, cpu_count from subprocess import * from boto.s3.connection import S3Connection from boto.s3.key import Key import boto.exception import logging import logging.handlers import secrets logger = logging.getLogger(__name__) sysloghandler = logging.handlers.SysLogHandler('/dev/log', facility=logging.handlers.SysLogHandler.LOG_DAEMON) syslogformatter = logging.Formatter('%(filename)s: %(levelname)s: %(message)s') sysloghandler.setFormatter(syslogformatter) logger.addHandler(sysloghandler) consolehandler = logging.StreamHandler(sys.stdout) consoleformatter = logging.Formatter('%(asctime)s: %(levelname)s: %(message)s') consolehandler.setFormatter(consoleformatter) logger.addHandler(consolehandler) logger.setLevel(logging.DEBUG) def is_exe(fpath): return os.path.exists(fpath) and os.access(fpath, os.X_OK) def encrypt_file(filename, key, compress='/bin/cat'): compressmap = {'cat': 'none', 'gzip': 'ZLIB', 'bzip2': 'BZIP2'} if os.path.basename(compress) in compressmap.keys(): compress_algo = compressmap[os.path.basename(compress)] else: compress_algo = 'none' cmd = ['/usr/bin/gpg', '--batch', '--no-tty'] cmd.extend(['--compress-algo', compress_algo]) cmd.extend(['--output', '%s.gpg' % filename]) cmd.extend(['--passphrase-fd', '0']) cmd.extend(['--symmetric', filename]) if is_exe(cmd[0]): logger.info('Encrypting %s (compression: %s)' % (filename, compress_algo)) logger.debug(`cmd`) else: logger.error('%s is not an executable file!' % cmd[0]) proc = Popen(cmd, preexec_fn=lambda : os.nice(10), stdin=PIPE, stdout=PIPE) proc.communicate(key) if os.path.exists(filename + '.gpg'): oldfilesize = os.path.getsize(filename) newfilesize = os.path.getsize(filename + '.gpg') compressed = ((oldfilesize - newfilesize) / float(oldfilesize)) * 100 logger.info('%s shrunk by %.2f%% (%i -> %i bytes)' % (filename, compressed, oldfilesize, newfilesize)) os.unlink(filename) return filename + '.gpg' else: logger.error('%s.gpg does not exist' % filename) raise Exception def open_s3(accesskey, sharedkey, host): conn = S3Connection(accesskey, sharedkey, is_secure=True) mybucketname = (accesskey + '-bkup-' + host).lower() try: bucket = conn.get_bucket(mybucketname) except boto.exception.S3ResponseError: logger.info('Creating bucket %s' % mybucketname) bucket = conn.create_bucket(mybucketname) bucket.set_acl('private') return bucket def handle_progress(transmitted, pending): logger.info('%i of %i bytes transmitted (%.2f%%)' % (transmitted, pending, (transmitted/float(pending))*100)) def send_file(bucket, filename): basefilename = os.path.basename(filename) logger.info('Uploading %s...' % basefilename) k = Key(bucket) k.key = basefilename if k.exists(): logger.warning('Duplicate filename %s! I hope that is OK.' % basefilename) k.set_contents_from_filename(filename, cb=handle_progress, reduced_redundancy=True) return k def encryption_worker(in_q, out_q): "Encrypts things from the in_q, puts them in the out_q" start_time = time.time() counter = 0 for filename, gpgkey, comppath in iter(in_q.get, 'STOP'): counter += 1 cryptstart_time = time.time() logger.info("Beginning encryption of %s.", filename) result = encrypt_file(filename, gpgkey, comppath) out_q.put(result) logger.info("Finished encryption of %s in %i seconds.", filename, time.time()-cryptstart_time) logger.info("Finished processing %i items in encryption queue in %i seconds.", counter, time.time()-start_time) out_q.put('STOP') def sending_worker(in_q, accesskey, sharedkey, host): "Sends things from the in_q using the send_file method" start_time = time.time() counter = 0 bucket = open_s3(accesskey, sharedkey, host) for filename in iter(in_q.get, 'STOP'): sending_start = time.time() counter += 1 retry_count = 0 max_retries = 10 while retry_count <= max_retries: try: key = send_file(bucket, filename) key.set_acl('private') key.close() retry_count = max_retries+1 except (boto.exception.S3ResponseError, socket.error), e: retry_count += 1 sleeptime = 2**retry_count logger.error('Encountered exception %s, retrying in %i seconds (%i/%i)', e, sleeptime, retry_count, max_retries) time.sleep(sleeptime) size = os.path.getsize(filename) os.unlink(filename) sending_seconds = time.time() - sending_start bytespersecond = size / sending_seconds logger.info("Finished sending of %s in %i seconds, transfer speed %i bytes/second.", filename, sending_seconds, bytespersecond) if __name__ == '__main__': # Read in arguments, verify that they match the BackupPC standard exactly if len(sys.argv) != 12: sys.stderr.write("Usage: %s tarCreatePath splitPath parPath host bkupNum compPath fileExt splitSize outLoc parFile share\n" % sys.argv[0]) sys.exit(1) else: tarCreate = sys.argv[1] splitPath = sys.argv[2] parPath = sys.argv[3] host = sys.argv[4] bkupNum = int(sys.argv[5]) compPath = sys.argv[6] fileExt = sys.argv[7] splitSize = int(sys.argv[8]) outLoc = sys.argv[9] parfile = sys.argv[10] share = sys.argv[11] for i in [tarCreate, compPath, splitPath, parPath]: if i is not '' and not is_exe(i): sys.stderr.write('Error: %s is not an executable program\n' % i) sys.exit(1) beginning = time.time() if share == '*': share = '\*' # Is there already evidence of this having been done before? if glob.glob('%s/%s.*.tar.*' % (outLoc, host)): logger.info('Evidence of failed execution run prior! Finishing it.') somefile = os.path.basename(glob.glob('%s/%s.*.tar.*' % (outLoc, host))[0]) keyparts = somefile.split('.') encrypted = split = tarred = final = False if keyparts[-1] == 'gpg': keyparts.pop() if keyparts[-1] != 'tar' and len(keyparts[-1]) is 2: keyparts.pop() if keyparts[-1] == 'tar': keyparts.pop() bkupNum = int(keyparts.pop()) filehead = '%s/%s.%i.tar.' % (outLoc, host, bkupNum) fileglob = filehead + '*' mesg = "Continuing upload for host %s, backup #%i" % (host, bkupNum) if splitSize > 0 and is_exe(splitPath): mesg += ', split into %i byte chunks' % splitSize if secrets.gpgsymmetrickey: mesg += ', encrypted with secret key' logger.info(mesg) else: mesg = "Writing archive for host %s, backup #%i" % (host, bkupNum) tarcmd = [tarCreate, '-t'] tarcmd.extend(['-h', host]) tarcmd.extend(['-n', bkupNum]) tarcmd.extend(['-s', share]) tarcmd.extend(['.']) splitcmd = None outfile = '%s/%s.%i.tar' % (outLoc, host, bkupNum) if splitSize > 0 and is_exe(splitPath): filehead = outfile + '.' fileglob = filehead + '*' splitcmd = [splitPath, '-b', splitSize, '-', filehead] mesg += ', split into %i byte chunks' % splitSize else: fileglob = outfile filehead = fileglob + '.' if secrets.gpgsymmetrickey: mesg += ', encrypted with secret key' logger.info(mesg) logger.debug('Executing tarcmd: %s > %s', ' '.join(tarcmd), outfile) outfp = open(outfile, 'wb') proc = Popen(tarcmd, preexec_fn=lambda : os.nice(10), stdout=outfile) proc.communicate() outfp.close() if splitcmd: logger.debug('Splitting file using splitcmd: %s', ' '.join(splitcmd)) infp = open(outfile, 'rb') proc = Popen(splitcmd, preexec_fn=lambda : os.nice(10), stdin=infp) proc.communicate() infp.close() logger.info('Beginning post-processing of %i files from %s #%i' % (len(glob.glob(fileglob)), host, bkupNum)) # Create queues for handling encryption and file transfers gpg_queue = Queue() send_queue = Queue() # Pre-run to check for artifacts for i in glob.glob(fileglob): if not i.endswith('.gpg') and os.path.exists(i + '.gpg'): logger.info("Orphaned GPG file exists: %s", i + '.gpg') os.unlink(i + '.gpg') # Run again to send files to the relevant queue for i in glob.glob(fileglob): if secrets.gpgsymmetrickey and not i.endswith('.gpg'): # A tar file, unencrypted, needs encrypted. logger.debug("Adding %s to gpg_queue", i) gpg_queue.put([i, secrets.gpgsymmetrickey, compPath]) else: # either encryption is off, or the file is already encrypted logger.debug("Adding %s to send_queue", i) send_queue.put(i) # Put a STOP command at the end of the GPG queue. gpg_queue.put('STOP') # Start some handlers, wait until everything is done try: process_count = cpu_count() except NotImplementedError: process_count = 1 for i in range(process_count): Process(target=encryption_worker, args=(gpg_queue, send_queue,)).start() send_p = Process(target=sending_worker, args=(send_queue, secrets.accesskey, secrets.sharedkey, host)) send_p.start() while send_p.is_alive(): # Generate some output so that BackupPC doesn't time out mesg = "Checkpoint: %i task(s) in GPG queue, %i task(s) in S3 queue.\n" % (gpg_queue.qsize(), send_queue.qsize()) logger.info(mesg) sys.stdout.write(mesg + '\n') sys.stdout.flush() send_p.join(300) if not gpg_queue.empty(): raise Exception("GPG queue not empty") if not send_queue.empty(): raise Exception("Send queue not empty") logger.info("Finalizing backup.") # finalize the backup bucket = open_s3(secrets.accesskey, secrets.sharedkey, host) key = Key(bucket) key.key = '%sCOMPLETE' % os.path.basename(filehead) key.set_contents_from_string('%s %s "%s"' % (beginning, time.time(), mesg)) key.close()