BackupPC_archive_host_B2/backup-manager.py

476 lines
20 KiB
Python
Raw Normal View History

2010-01-03 01:32:27 +01:00
#!/usr/bin/python -W ignore::DeprecationWarning
2011-10-02 17:36:20 +02:00
#
2010-01-03 01:32:27 +01:00
# Script to manage S3-stored backups
2011-10-02 17:36:20 +02:00
#
# Copyright (c) 2009-2011 Ryan S. Tucker
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
2010-01-03 01:32:27 +01:00
import optparse
import os
import pwd
2010-01-03 01:32:27 +01:00
import secrets
import sys
import time
from boto.s3.connection import S3Connection
from boto.s3.key import Key
import boto.exception
from collections import defaultdict
from math import log10
from subprocess import *
2010-01-03 01:32:27 +01:00
def open_s3(accesskey, sharedkey):
return S3Connection(accesskey, sharedkey)
def iter_backup_buckets(conn, name=None):
"""Yields an iterator of buckets that probably have backups in them."""
bucket_prefix = secrets.accesskey.lower() + '-bkup-'
if name:
bucket_prefix += name
buckets = conn.get_all_buckets()
for bucket in buckets:
if bucket.name.startswith(bucket_prefix):
yield bucket
def list_backups(bucket):
"""Returns a dict of backups in a bucket, with dicts of:
{hostname (str):
{Backup number (int):
{'date': Timestamp of backup (int),
'keys': A list of keys comprising the backup,
'hostname': Hostname (str),
'backupnum': Backup number (int),
'finalized': 0, or the timestamp the backup was finalized
2010-01-03 01:32:27 +01:00
}
}
}
"""
backups = {}
for key in bucket.list():
keyparts = key.key.split('.')
encrypted = split = tarred = final = False
2010-01-03 01:32:27 +01:00
if keyparts[-1] == 'COMPLETE':
final = True
keyparts.pop() # back to tar
keyparts.pop() # back to backup number
else:
if keyparts[-1] == 'gpg':
encrypted = True
keyparts.pop()
2010-01-03 01:32:27 +01:00
if keyparts[-1] != 'tar' and len(keyparts[-1]) is 2:
split = True
keyparts.pop()
2010-01-03 01:32:27 +01:00
if keyparts[-1] == 'tar':
tarred = True
keyparts.pop()
2010-01-03 01:32:27 +01:00
nextpart = keyparts.pop()
if nextpart == 'COMPLETE':
print("Stray file: %s" % key.key)
continue
backupnum = int(nextpart)
2010-01-03 01:32:27 +01:00
hostname = '.'.join(keyparts)
lastmod = time.strptime(key.last_modified, '%Y-%m-%dT%H:%M:%S.000Z')
if hostname in backups.keys():
if not backupnum in backups[hostname].keys():
backups[hostname][backupnum] = {'date': lastmod, 'hostname': hostname, 'backupnum': backupnum, 'finalized': 0, 'keys': [], 'finalkey': None, 'finalized_age': -1}
2010-01-03 01:32:27 +01:00
else:
backups[hostname] = {backupnum: {'date': lastmod, 'hostname': hostname, 'backupnum': backupnum, 'finalized': 0, 'keys': [], 'finalkey': None, 'finalized_age': -1}}
if final:
backups[hostname][backupnum]['finalized'] = lastmod
backups[hostname][backupnum]['finalkey'] = key
timestamp = time.mktime(lastmod)
delta = int(time.time() - timestamp + time.timezone)
backups[hostname][backupnum]['finalized_age'] = delta
else:
if lastmod < backups[hostname][backupnum]['date']:
backups[hostname][backupnum]['date'] = lastmod
backups[hostname][backupnum]['keys'].append(key)
2010-01-03 01:32:27 +01:00
return backups
def backups_by_age(conn, name=None):
"Returns a dict of {hostname: [(backupnum, age), ...]}"
results = defaultdict(list)
for bucket in iter_backup_buckets(conn, name=name):
for hostname, backups in list_backups(bucket).items():
for backupnum, statusdict in backups.items():
results[hostname].append((backupnum, statusdict['finalized_age']))
return results
def choose_host_to_backup(agedict, target_count=2):
"Takes a dict from backups_by_age, returns a hostname to back up."
host_scores = defaultdict(int)
for hostname, backuplist in agedict.items():
bl = sorted(backuplist, key=lambda x: x[1])
if len(bl) > 0 and bl[0][1] == -1:
# unfinalized backup alert
host_scores[hostname] += 200
bl.pop(0)
if len(bl) >= target_count:
host_scores[hostname] -= 100
host_scores[hostname] -= len(bl)
if len(bl) > 0:
# age of oldest backup helps score
oldest = bl[0]
host_scores[hostname] += log10(oldest[1])
# recency of newest backup hurts score
newest = bl[-1]
host_scores[hostname] -= log10(max(1, (oldest[1] - newest[1])))
for candidate, score in sorted(host_scores.items(), key=lambda x: x[1], reverse=True):
yield (candidate, score)
2010-01-03 01:32:27 +01:00
def iter_urls(keyset, expire=86400):
"""Given a list of keys and an optional expiration time (in seconds),
returns an iterator of URLs to fetch to reassemble the backup."""
for key in keyset:
yield key.generate_url(expires_in=expire)
def make_restore_script(backup, expire=86400):
"""Returns a quick and easy restoration script to restore the given system,
requires a backup, and perhaps expire"""
myhostname = backup['hostname']
mybackupnum = backup['backupnum']
myfilecount = len(backup['keys'])
myfriendlytime = time.strftime('%Y-%m-%d at %H:%M GMT', backup['date'])
myexpiretime = time.strftime('%Y-%m-%d at %H:%M GMT', time.gmtime(time.time()+expire))
myexpiretimestamp = time.time()+expire
output = []
output.append('#!/bin/sh\n')
output.append('# Restoration script for %s backup %s,\n' % (myhostname, mybackupnum))
output.append('# a backup created on %s.\n' % (myfriendlytime))
output.append('# To use: bash scriptname /path/to/put/the/files\n\n')
output.append('# WARNING: THIS FILE EXPIRES AFTER %s\n' % (myexpiretime))
output.append('if [ "`date +%%s`" -gt "%i" ];\n' % (myexpiretimestamp))
output.append(' then echo "Sorry, but this restore script is too old.";\n')
output.append(' exit 1;\n')
output.append('fi\n\n')
output.append('if [ -z "$1" ];\n')
output.append(' then echo "Usage: ./scriptname /path/to/restore/to";\n')
output.append(' exit 1;\n')
output.append('fi\n\n')
output.append('# Check the destination\n')
output.append('if [ ! -d $1 ];\n')
output.append(' then echo "Target $1 does not exist!";\n')
output.append(' exit 1;\n')
output.append('fi\n\n')
output.append('if [ -n "`ls --almost-all $1`" ];\n')
output.append(' then echo "Target $1 is not empty!";\n')
output.append(' exit 1;\n')
output.append('fi\n\n')
output.append('# cd to the destination, create a temporary workspace\n')
output.append('cd $1\n')
output.append('mkdir .restorescript-scratch\n\n')
output.append('# retrieve files\n')
mysortedfilelist = []
for key in backup['keys']:
output.append('wget -O $1/.restorescript-scratch/%s "%s"\n' % (key.name, key.generate_url(expires_in=expire)))
mysortedfilelist.append('.restorescript-scratch/' + key.name)
mysortedfilelist.sort()
output.append('\n# decrypt files\n')
output.append('gpg --decrypt-files << EOF\n')
output.append('\n'.join(mysortedfilelist))
output.append('\nEOF\n')
output.append('\n# join and untar files\n')
output.append('cat .restorescript-scratch/*.tar.?? | tar -xf -\n\n')
2010-01-03 01:32:27 +01:00
output.append('echo "DONE! Have a nice day."\n##\n')
2010-01-03 01:32:27 +01:00
return output
def start_archive(hosts):
"Starts an archive operation for a list of hosts."
if 'LOGNAME' in os.environ:
username = os.environ['LOGNAME']
else:
try:
username = pwd.getpwuid(os.getuid()).pw_name
except KeyError:
username = 'nobody'
scriptdir = os.path.dirname(sys.argv[0])
cmd = [os.path.join(scriptdir, 'BackupPC_archiveStart'), 'archives3',
username]
cmd.extend(hosts)
proc = Popen(cmd)
proc.communicate()
2010-01-03 01:32:27 +01:00
def main():
# check command line options
parser = optparse.OptionParser(
usage="usage: %prog [options] [list|delete|script]",
description="" +
"Companion maintenance script for BackupPC_archiveHost_s3. " +
"By default, it assumes the 'list' command, which displays all " +
"of the backups currently archived on S3. The 'delete' command " +
"is used to delete backups. The 'script' command produces a " +
"script that can be used to download and restore a backup."
)
parser.add_option("-H", "--host", dest="host",
help="Name of backed-up host")
parser.add_option("-b", "--backup-number", dest="backupnum",
help="Backup number")
parser.add_option("-a", "--age", dest="age",
help="Delete backups older than AGE days")
parser.add_option("-k", "--keep", dest="keep",
help="When used with --age, keep this many recent backups (default=1)", default=1)
parser.add_option("-f", "--filename", dest="filename",
help="Output filename for script")
parser.add_option("-x", "--expire", dest="expire",
help="Maximum age of script, default 86400 seconds")
parser.add_option("-t", "--test", dest="test", action="store_true",
help="Test mode; don't actually delete")
parser.add_option("-u", "--unfinalized", dest="unfinalized",
action="store_true", help="Consider unfinalized backups")
parser.add_option("-s", "--start-backups", dest="start",
action="store_true", help="When used with --age, start backups for hosts with fewer than keep+1 backups")
(options, args) = parser.parse_args()
2010-01-03 01:32:27 +01:00
conn = open_s3(secrets.accesskey, secrets.sharedkey)
if options.backupnum and not options.host:
parser.error('Must specify --host when specifying --backup-number')
if options.backupnum:
options.backupnum = int(options.backupnum)
# TODO: refactor this somewhere better
backups_by_age_list = backups_by_age(conn)
score_iter = choose_host_to_backup(backups_by_age_list, target_count=int(options.keep))
if len(args) == 0:
args.append('list')
if len(args) > 1:
parser.error('Too many arguments.')
if args[0] != 'delete' and options.age:
parser.error('--age only makes sense with delete')
if options.start and not (args[0] == 'delete' and options.age):
parser.error('--start-backups only makes sense with delete and --age')
if args[0] != 'script' and (options.expire or options.filename):
parser.error('--expire and --filename only make sense with script')
if args[0] in ['list', 'script', 'delete']:
if options.host:
buckets = iter_backup_buckets(conn, name=options.host)
if not buckets:
parser.error('No buckets found for host "%s"' % options.host)
else:
buckets = iter_backup_buckets(conn)
if not buckets:
parser.error('No buckets found!')
else:
parser.error('Invalid option: %s' + args[0])
if args[0] == 'script':
if not options.host:
parser.error('Must specify --host to generate a script for')
backups = list_backups(buckets.next())
if not options.backupnum and options.unfinalized:
# assuming highest number
options.backupnum = max(backups[options.host].keys())
elif not options.backupnum:
# assuming highest finalized number
options.backupnum = 0
for backup in backups[options.host].keys():
if backups[options.host][backup]['finalized'] > 0:
options.backupnum = max(options.backupnum, backup)
if options.backupnum == 0:
parser.error('No finalized backups found! Try --unfinalized if you dare')
2010-01-03 01:32:27 +01:00
backup = backups[options.host][options.backupnum]
if not options.expire:
options.expire = "86400"
if options.filename:
fd = open(options.filename, 'w')
fd.writelines(make_restore_script(backup, expire=int(options.expire)))
else:
sys.stdout.writelines(make_restore_script(backup, expire=int(options.expire)))
elif args[0] == 'list':
sys.stdout.write('%25s | %5s | %20s | %5s\n' % ("Hostname", "Bkup#", "Age", "Files"))
sys.stdout.write('-'*72 + '\n')
for bucket in buckets:
hostnames = list_backups(bucket)
for hostname in hostnames.keys():
backups = hostnames[hostname]
for backupnum in sorted(backups.keys()):
filecount = len(backups[backupnum]['keys'])
datestruct = backups[backupnum]['date']
if backups[backupnum]['finalized'] > 0:
inprogress = ''
else:
inprogress = '*'
timestamp = time.mktime(datestruct)
2010-01-04 22:09:08 +01:00
delta = int(time.time() - timestamp + time.timezone)
if delta < 3600:
prettydelta = '%i min ago' % (delta/60)
elif delta < 86400:
prettydelta = '%i hr ago' % (delta/3600)
else:
days = int(delta/60/60/24)
if days == 1:
s = ''
else:
s = 's'
prettydelta = '%i day%s ago' % (days, s)
sys.stdout.write('%25s | %5i | %20s | %5i%s\n' % (hostname, backupnum, prettydelta, filecount, inprogress))
sys.stdout.write('* == not yet finalized (Age == time of last activity)\n')
elif args[0] == 'delete':
if options.age:
maxage = int(options.age)*86400
needs_backup = []
for bucket in buckets:
hostnames = list_backups(bucket)
for hostname in hostnames.keys():
backups = hostnames[hostname]
backuplist = sorted(backups.keys())
oldest_timestamp = -1
# remove a number of recent backups from the delete list
2010-03-18 14:09:39 +01:00
to_ignore = int(options.keep)
while to_ignore > 0:
if len(backuplist) > 0:
backupnum = backuplist.pop()
filecount = len(backups[backupnum]['keys'])
2010-03-18 14:09:39 +01:00
datestruct = backups[backupnum]['date']
timestamp = time.mktime(datestruct)
delta = int(time.time() - timestamp + time.timezone)
2010-03-18 14:09:39 +01:00
if backups[backupnum]['finalized'] == 0:
sys.stdout.write('Ignoring in-progress backup %s #%i\n' % (hostname, backupnum))
else:
sys.stdout.write('Keeping recent backup %s #%i (%i files, age %.2f days)\n' % (hostname, backupnum, filecount, delta/86400.0))
if timestamp < oldest_timestamp:
oldest_timestamp = timestamp
2010-03-18 14:09:39 +01:00
to_ignore -= 1
else:
to_ignore = 0
deletes = 0
for backupnum in backuplist:
filecount = len(backups[backupnum]['keys'])
if backups[backupnum]['finalized'] > 0:
datestruct = backups[backupnum]['finalized']
else:
datestruct = backups[backupnum]['date']
timestamp = time.mktime(datestruct)
2010-01-04 22:09:08 +01:00
delta = int(time.time() - timestamp + time.timezone)
if delta > maxage:
2010-03-18 14:09:39 +01:00
if not options.unfinalized and backups[backupnum]['finalized'] == 0:
sys.stdout.write('Bypassing unfinalized backup %s #%i (%i files, age %.2f days)\n' % (hostname, backupnum, filecount, delta/86400.0))
else:
sys.stdout.write('Deleting %s #%i (%i files, age %.2f days)...' % (hostname, backupnum, filecount, delta/86400.0))
for key in backups[backupnum]['keys']:
if options.test:
sys.stdout.write('*')
else:
key.delete()
sys.stdout.write('.')
if backups[backupnum]['finalkey']:
if options.test:
sys.stdout.write('X')
else:
backups[backupnum]['finalkey'].delete()
sys.stdout.write('!')
sys.stdout.write('\n')
deletes += 1
if (len(backuplist)-deletes) < int(options.keep):
needs_backup.append((oldest_timestamp, hostname))
#if options.start and len(needs_backup) > 0:
# sys.stdout.write('Starting archive operation for host: %s\n' % sorted(needs_backup)[0][1])
# start_archive([sorted(needs_backup)[0][1]])
if options.start:
for candidate, score in score_iter:
if score > 0:
sys.stdout.write('Starting archive operation for host: %s (score=%g)\n' % (candidate, score))
start_archive([candidate])
break
elif options.host and options.backupnum:
for bucket in buckets:
hostnames = list_backups(bucket)
if options.host in hostnames.keys():
if options.backupnum not in hostnames[options.host].keys():
parser.error('Backup number %i not found' % options.backupnum)
toast = hostnames[options.host][options.backupnum]
filecount = len(toast['keys'])
if toast['finalized'] > 0:
datestruct = toast['finalized']
else:
datestruct = toast['date']
datestruct = toast['date']
timestamp = time.mktime(datestruct)
2010-01-04 22:09:08 +01:00
delta = int(time.time() - timestamp + time.timezone)
if options.unfinalized and toast['finalized'] > 0:
sys.stdout.write('Bypassing finalized backup %s #%i (%i files, age %.2f days)\n' % (hostname, backupnum, filecount, delta/86400.0))
else:
sys.stdout.write('Deleting %s #%i (%i files, age %.2f days)...' % (options.host, options.backupnum, filecount, delta/86400.0))
for key in toast['keys']:
if options.test:
sys.stdout.write('*')
else:
key.delete()
sys.stdout.write('.')
2010-03-09 23:23:22 +01:00
if toast['finalkey']:
if options.test:
sys.stdout.write('X')
else:
toast['finalkey'].delete()
sys.stdout.write('!')
sys.stdout.write('\n')
else:
parser.error('Host %s not found' % options.host)
else:
parser.error('Need either an age or a host AND backup number.')
2010-01-03 01:32:27 +01:00
if __name__ == '__main__':
main()