#!/usr/bin/python -W ignore::DeprecationWarning # A BackupPC script to archive a host's files to Amazon S3. # # Point $Conf{ArchiveClientCmd} at me. # Requires python-boto # # Usage: BackupPC_archiveHost tarCreatePath splitPath parPath host bkupNum \ # compPath fileExt splitSize outLoc parFile share # # Create secrets.py such that it has: # accesskey = 'amazon aws access key' # sharedkey = 'amazon aws shared key' # gpgsymmetrickey = 'gpg symmetric key -- make it good, but do not lose it' import glob import hashlib import os import socket import sys import time from multiprocessing import Process, Queue, cpu_count from subprocess import * from boto.s3.connection import S3Connection from boto.s3.key import Key import boto.exception import logging import logging.handlers import secrets logger = logging.getLogger(__name__) sysloghandler = logging.handlers.SysLogHandler('/dev/log', facility=logging.handlers.SysLogHandler.LOG_DAEMON) syslogformatter = logging.Formatter('%(filename)s: %(levelname)s: %(message)s') sysloghandler.setFormatter(syslogformatter) logger.addHandler(sysloghandler) consolehandler = logging.StreamHandler(sys.stdout) consoleformatter = logging.Formatter('%(asctime)s: %(levelname)s: %(message)s') consolehandler.setFormatter(consoleformatter) logger.addHandler(consolehandler) logger.setLevel(logging.DEBUG) def is_exe(fpath): return os.path.exists(fpath) and os.access(fpath, os.X_OK) def encrypt_file(filename, key, compress='/bin/cat'): compressmap = {'cat': 'none', 'gzip': 'ZLIB', 'bzip2': 'BZIP2'} if os.path.basename(compress) in compressmap.keys(): compress_algo = compressmap[os.path.basename(compress)] else: compress_algo = 'none' cmd = ['/usr/bin/gpg', '--batch', '--no-tty'] cmd.extend(['--compress-algo', compress_algo]) cmd.extend(['--output', '%s.gpg' % filename]) cmd.extend(['--passphrase-fd', '0']) cmd.extend(['--symmetric', filename]) if is_exe(cmd[0]): logger.info('Encrypting %s (compression: %s)' % (filename, compress_algo)) logger.debug(`cmd`) else: logger.error('%s is not an executable file!' % cmd[0]) proc = Popen(cmd, preexec_fn=lambda : os.nice(10), stdin=PIPE, stdout=PIPE) proc.communicate(key) if os.path.exists(filename + '.gpg'): oldfilesize = os.path.getsize(filename) newfilesize = os.path.getsize(filename + '.gpg') compressed = ((oldfilesize - newfilesize) / float(oldfilesize)) * 100 logger.info('%s shrunk by %.2f%% (%i -> %i bytes)' % (filename, compressed, oldfilesize, newfilesize)) os.unlink(filename) return filename + '.gpg' else: logger.error('%s.gpg does not exist' % filename) raise Exception def open_s3(accesskey, sharedkey, host): conn = S3Connection(accesskey, sharedkey, is_secure=True) mybucketname = (accesskey + '-bkup-' + host).lower() try: bucket = conn.get_bucket(mybucketname) except boto.exception.S3ResponseError: logger.info('Creating bucket %s' % mybucketname) bucket = conn.create_bucket(mybucketname) bucket.set_acl('private') return bucket def handle_progress(transmitted, pending): logger.info('%i of %i bytes transmitted (%.2f%%)' % (transmitted, pending, (transmitted/float(pending))*100)) def send_file(bucket, filename): basefilename = os.path.basename(filename) logger.info('Uploading %s...' % basefilename) k = Key(bucket) k.key = basefilename if k.exists(): logger.warning('Duplicate filename %s! I hope that is OK.' % basefilename) k.set_contents_from_filename(filename, cb=handle_progress, reduced_redundancy=True) return k def encryption_worker(in_q, out_q): "Encrypts things from the in_q, puts them in the out_q" start_time = time.time() counter = 0 for filename, gpgkey, comppath in iter(in_q.get, 'STOP'): counter += 1 cryptstart_time = time.time() logger.info("Beginning encryption of %s.", filename) result = encrypt_file(filename, gpgkey, comppath) out_q.put(result) logger.debug("encryption_worker: encrypted %s in %i seconds", filename, time.time()-cryptstart_time) logger.debug("encryption_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time()-start_time) def sending_worker(in_q, out_q, accesskey, sharedkey, host): "Sends things from the in_q using the send_file method" start_time = time.time() counter = 0 for filename in iter(in_q.get, 'STOP'): sending_start = time.time() counter += 1 retry_count = 0 max_retries = 10 done = False while retry_count <= max_retries and not done: try: logger.info("sending_worker: sending %s", filename) bucket = open_s3(accesskey, sharedkey, host) key = send_file(bucket, filename) key.set_acl('private') key.close() done = True except (boto.exception.S3ResponseError, socket.error), e: retry_count += 1 sleeptime = 2**retry_count logger.error('Encountered exception %s, retrying in %i seconds (%i/%i)', e, sleeptime, retry_count, max_retries) time.sleep(sleeptime) if not done: # trip out logger.error('sending_worker: could not upload %s in %i retries') else: size = os.path.getsize(filename) sending_seconds = time.time() - sending_start bytespersecond = size / sending_seconds logger.debug("sending_worker: sent %s in %i seconds at %i bytes/second.", filename, sending_seconds, bytespersecond) out_q.put(filename) logger.debug("sending_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time() - start_time) out_q.put('STOP') def unlink_worker(in_q, accesskey, sharedkey, host): start_time = time.time() counter = 0 bucket = open_s3(accesskey, sharedkey, host) for filename in iter(in_q.get, 'STOP'): counter += 1 basefilename = os.path.basename(filename) key = bucket.get_key(basefilename) stat = os.stat(filename) if key: if key.size == stat[6]: fp = open(filename) local_md5 = hashlib.md5(fp.read()) fp.close() if '"%s"' % local_md5.hexdigest() == key.etag: logger.debug("unlink_worker: deleting %s", basefilename) os.unlink(filename) else: logger.error("unlink_worker: md5sum for %s did not match: %s != %s", basefilename, '"%s"' % local_md5.hexdigest(), key.etag) else: logger.error("unlink_worker: size mismatch for %s: %i != %i", basefilename, stat[6], key.size) else: logger.error("unlink_worker: key does not exist: %s", basefilename) logger.debug("unlink_worker: queue is empty, terminating after %i items in %i seconds", counter, time.time() - start_time) if __name__ == '__main__': # Read in arguments, verify that they match the BackupPC standard exactly if len(sys.argv) != 12: sys.stderr.write("Usage: %s tarCreatePath splitPath parPath host bkupNum compPath fileExt splitSize outLoc parFile share\n" % sys.argv[0]) sys.exit(1) else: tarCreate = sys.argv[1] splitPath = sys.argv[2] parPath = sys.argv[3] host = sys.argv[4] bkupNum = int(sys.argv[5]) compPath = sys.argv[6] fileExt = sys.argv[7] splitSize = int(sys.argv[8]) outLoc = sys.argv[9] parfile = sys.argv[10] share = sys.argv[11] for i in [tarCreate, compPath, splitPath, parPath]: if i is not '' and not is_exe(i): sys.stderr.write('Error: %s is not an executable program\n' % i) sys.exit(1) beginning = time.time() if share == '*': share = '\*' # Is there already evidence of this having been done before? if glob.glob('%s/%s.*.tar.*' % (outLoc, host)): logger.info('Evidence of failed execution run prior! Finishing it.') somefile = os.path.basename(glob.glob('%s/%s.*.tar.*' % (outLoc, host))[0]) keyparts = somefile.split('.') encrypted = split = tarred = final = False if keyparts[-1] == 'gpg': keyparts.pop() if keyparts[-1] != 'tar' and len(keyparts[-1]) is 2: keyparts.pop() if keyparts[-1] == 'tar': keyparts.pop() bkupNum = int(keyparts.pop()) filehead = '%s/%s.%i.tar.' % (outLoc, host, bkupNum) fileglob = filehead + '*' mesg = "Continuing upload for host %s, backup #%i" % (host, bkupNum) if splitSize > 0 and is_exe(splitPath): mesg += ', split into %i byte chunks' % splitSize if secrets.gpgsymmetrickey: mesg += ', encrypted with secret key' logger.info(mesg) else: mesg = "Writing archive for host %s, backup #%i" % (host, bkupNum) tarcmd = [tarCreate, '-t'] tarcmd.extend(['-h', host]) tarcmd.extend(['-n', bkupNum]) tarcmd.extend(['-s', share]) tarcmd.extend(['.']) splitcmd = None outfile = '%s/%s.%i.tar' % (outLoc, host, bkupNum) if splitSize > 0 and is_exe(splitPath): filehead = outfile + '.' fileglob = filehead + '*' splitcmd = [splitPath, '-b', splitSize, '-', filehead] mesg += ', split into %i byte chunks' % splitSize else: fileglob = outfile filehead = fileglob + '.' if secrets.gpgsymmetrickey: mesg += ', encrypted with secret key' logger.info(mesg) logger.debug('Executing tarcmd: %s > %s', ' '.join(tarcmd), outfile) outfp = open(outfile, 'wb') proc = Popen(tarcmd, preexec_fn=lambda : os.nice(10), stdout=outfile) proc.communicate() outfp.close() if splitcmd: logger.debug('Splitting file using splitcmd: %s', ' '.join(splitcmd)) infp = open(outfile, 'rb') proc = Popen(splitcmd, preexec_fn=lambda : os.nice(10), stdin=infp) proc.communicate() infp.close() logger.info('Beginning post-processing of %i files from %s #%i' % (len(glob.glob(fileglob)), host, bkupNum)) # Create queues for handling encryption and file transfers gpg_queue = Queue() send_queue = Queue() unlink_queue = Queue() queues = { 'gpg_queue': gpg_queue, 'send_queue': send_queue, 'unlink_queue': unlink_queue, } # Pre-run to check for artifacts for i in glob.glob(fileglob): if not i.endswith('.gpg') and os.path.exists(i + '.gpg'): logger.info("Orphaned GPG file exists: %s", i + '.gpg') os.unlink(i + '.gpg') # Run again to send files to the relevant queue for i in sorted(glob.glob(fileglob)): if secrets.gpgsymmetrickey and not i.endswith('.gpg'): # A tar file, unencrypted, needs encrypted. logger.debug("Adding %s to gpg_queue", i) gpg_queue.put([i, secrets.gpgsymmetrickey, compPath]) else: # either encryption is off, or the file is already encrypted logger.debug("Adding %s to send_queue", i) send_queue.put(i) # Put a STOP command at the end of the GPG queue. gpg_queue.put('STOP') gpg_queue_closed = True # Start some handlers, wait until everything is done try: process_count = cpu_count() except NotImplementedError: process_count = 1 procs = [] for i in range(process_count): p = Process(name="encryption_worker_%i" % i, target=encryption_worker, args=(gpg_queue, send_queue,)) p.start() procs.append(p) send_p = Process(name="send_worker", target=sending_worker, args=(send_queue, unlink_queue, secrets.accesskey, secrets.sharedkey, host)) send_p.start() procs.append(send_p) unlink_p = Process(name="unlink_worker", target=unlink_worker, args=(unlink_queue, secrets.accesskey, secrets.sharedkey, host)) unlink_p.start() procs.append(unlink_p) send_queue_closed = False unlink_queue_closed = False for i in procs: i.join() crypto_running = 0 for j in procs: if j.name.startswith("encryption_worker") and j.is_alive(): crypto_running += 1 if crypto_running == 0 and not send_queue_closed: send_queue.put('STOP') send_queue_closed = True logger.debug("main: queuing stop sentinel for send_queue") if not send_p.is_alive() and not unlink_queue_closed: unlink_queue.put('STOP') unlink_queue_closed = True logger.debug("main: queuing stop sentinel for unlink_queue") logger.debug("main: process terminated: %s", i.name) for qname, q in queues.items(): if not q.empty(): logger.critical("main: queue %s not empty!", qname) raise Exception("queue not empty: %s" % qname) logger.debug("main: finalizing backup") # finalize the backup bucket = open_s3(secrets.accesskey, secrets.sharedkey, host) key = Key(bucket) key.key = '%sCOMPLETE' % os.path.basename(filehead) key.set_acl('private') key.set_contents_from_string('%s %s "%s"' % (beginning, time.time(), mesg)) key.close()