Reorganize script to be less hacky

A class for caching, less shady delete logic, etc.
This commit is contained in:
Ryan Tucker 2013-02-06 18:02:07 -05:00
parent 59eedb580b
commit 4f4574b727

View file

@ -37,23 +37,31 @@ from collections import defaultdict
from math import log10 from math import log10
from subprocess import * from subprocess import *
def open_s3(accesskey, sharedkey): class BackupManager:
return S3Connection(accesskey, sharedkey) def __init__(self, accesskey, sharedkey):
self._accesskey = accesskey
self._connection = S3Connection(accesskey, sharedkey)
def iter_backup_buckets(conn, name=None): self._buckets = None
"""Yields an iterator of buckets that probably have backups in them.""" self._bucketbackups = {}
self._backups = None
bucket_prefix = secrets.accesskey.lower() + '-bkup-' def _generate_backup_buckets(self):
if name: bucket_prefix = self._accesskey.lower() + '-bkup-'
bucket_prefix += name buckets = self._connection.get_all_buckets()
self._buckets = []
buckets = conn.get_all_buckets()
for bucket in buckets: for bucket in buckets:
if bucket.name.startswith(bucket_prefix): if bucket.name.startswith(bucket_prefix):
yield bucket self._buckets.append(bucket)
def list_backups(bucket): @property
def backup_buckets(self): # property
if self._buckets is None:
self._generate_backup_buckets()
return self._buckets
def _list_backups(self, bucket):
"""Returns a dict of backups in a bucket, with dicts of: """Returns a dict of backups in a bucket, with dicts of:
{hostname (str): {hostname (str):
{Backup number (int): {Backup number (int):
@ -116,11 +124,43 @@ def list_backups(bucket):
backups[hostname][backupnum]['keys'].append(key) backups[hostname][backupnum]['keys'].append(key)
return backups return backups
def backups_by_age(conn, name=None): def get_backups_by_bucket(self, bucket):
if bucket.name not in self._bucketbackups:
self._bucketbackups[bucket.name] = self._list_backups(bucket)
return self._bucketbackups[bucket.name]
@property
def all_backups(self): # property
if self._backups is None:
sys.stderr.write("Enumerating backups")
self._backups = {}
for bucket in self.backup_buckets:
for hostname, backups in self.get_backups_by_bucket(bucket).items():
sys.stderr.write('.')
sys.stderr.flush()
if hostname not in self._backups:
self._backups[hostname] = {}
self._backups[hostname].update(backups)
sys.stderr.write("\n")
return self._backups
def invalidate_host_cache(self, hostname):
nuke = []
for bucket in self._bucketbackups:
if hostname in self._bucketbackups[bucket]:
nuke.append(bucket)
for bucket in nuke:
if bucket in self._bucketbackups:
del self._bucketbackups[bucket]
self._backups = None
@property
def backups_by_age(self): # property
"Returns a dict of {hostname: [(backupnum, age), ...]}" "Returns a dict of {hostname: [(backupnum, age), ...]}"
results = defaultdict(list) results = defaultdict(list)
for bucket in iter_backup_buckets(conn, name=name): for hostname, backups in self.all_backups.items():
for hostname, backups in list_backups(bucket).items():
for backupnum, statusdict in backups.items(): for backupnum, statusdict in backups.items():
results[hostname].append((backupnum, statusdict['finalized_age'])) results[hostname].append((backupnum, statusdict['finalized_age']))
return results return results
@ -150,6 +190,24 @@ def choose_host_to_backup(agedict, target_count=2):
for candidate, score in sorted(host_scores.items(), key=lambda x: x[1], reverse=True): for candidate, score in sorted(host_scores.items(), key=lambda x: x[1], reverse=True):
yield (candidate, score) yield (candidate, score)
def choose_backups_to_delete(agedict, target_count=2, max_age=30):
"Takes a dict from backups_by_age, returns a list of backups to delete"
decimate = defaultdict(list)
for hostname, backuplist in agedict.items():
bl = []
for backup in sorted(backuplist, key=lambda x: x[1]):
if backup[1] > 0:
bl.append(backup)
while len(bl) > target_count:
backup = bl.pop()
if backup[1] > max_age*24*60*60:
decimate[hostname].append(backup)
return decimate
def iter_urls(keyset, expire=86400): def iter_urls(keyset, expire=86400):
"""Given a list of keys and an optional expiration time (in seconds), """Given a list of keys and an optional expiration time (in seconds),
returns an iterator of URLs to fetch to reassemble the backup.""" returns an iterator of URLs to fetch to reassemble the backup."""
@ -263,10 +321,11 @@ def main():
action="store_true", help="Consider unfinalized backups") action="store_true", help="Consider unfinalized backups")
parser.add_option("-s", "--start-backups", dest="start", parser.add_option("-s", "--start-backups", dest="start",
action="store_true", help="When used with --age, start backups for hosts with fewer than keep+1 backups") action="store_true", help="When used with --age, start backups for hosts with fewer than keep+1 backups")
parser.add_option("-l", "--list", dest="list", action="store_true", help="List stored backups after completing operations")
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
conn = open_s3(secrets.accesskey, secrets.sharedkey) bmgr = BackupManager(secrets.accesskey, secrets.sharedkey)
if options.backupnum and not options.host: if options.backupnum and not options.host:
parser.error('Must specify --host when specifying --backup-number') parser.error('Must specify --host when specifying --backup-number')
@ -274,10 +333,6 @@ def main():
if options.backupnum: if options.backupnum:
options.backupnum = int(options.backupnum) options.backupnum = int(options.backupnum)
# TODO: refactor this somewhere better
backups_by_age_list = backups_by_age(conn)
score_iter = choose_host_to_backup(backups_by_age_list, target_count=int(options.keep))
if len(args) == 0: if len(args) == 0:
args.append('list') args.append('list')
@ -295,12 +350,10 @@ def main():
if args[0] in ['list', 'script', 'delete']: if args[0] in ['list', 'script', 'delete']:
if options.host: if options.host:
buckets = iter_backup_buckets(conn, name=options.host) if options.host not in bmgr.all_backups:
if not buckets: parser.error('No backups found for host "%s"' % options.host)
parser.error('No buckets found for host "%s"' % options.host)
else: else:
buckets = iter_backup_buckets(conn) if len(bmgr.all_backups) == 0:
if not buckets:
parser.error('No buckets found!') parser.error('No buckets found!')
else: else:
parser.error('Invalid option: %s' + args[0]) parser.error('Invalid option: %s' + args[0])
@ -309,21 +362,19 @@ def main():
if not options.host: if not options.host:
parser.error('Must specify --host to generate a script for') parser.error('Must specify --host to generate a script for')
backups = list_backups(buckets.next())
if not options.backupnum and options.unfinalized: if not options.backupnum and options.unfinalized:
# assuming highest number # assuming highest number
options.backupnum = max(backups[options.host].keys()) options.backupnum = max(bmgr.all_backups[options.host].keys())
elif not options.backupnum: elif not options.backupnum:
# assuming highest finalized number # assuming highest finalized number
options.backupnum = 0 options.backupnum = 0
for backup in backups[options.host].keys(): for backup in bmgr.all_backups[options.host].keys():
if backups[options.host][backup]['finalized'] > 0: if bmgr.all_backups[options.host][backup]['finalized'] > 0:
options.backupnum = max(options.backupnum, backup) options.backupnum = max(options.backupnum, backup)
if options.backupnum == 0: if options.backupnum == 0:
parser.error('No finalized backups found! Try --unfinalized if you dare') parser.error('No finalized backups found! Try --unfinalized if you dare')
backup = backups[options.host][options.backupnum] backup = bmgr.all_backups[options.host][options.backupnum]
if not options.expire: if not options.expire:
options.expire = "86400" options.expire = "86400"
@ -333,13 +384,59 @@ def main():
fd.writelines(make_restore_script(backup, expire=int(options.expire))) fd.writelines(make_restore_script(backup, expire=int(options.expire)))
else: else:
sys.stdout.writelines(make_restore_script(backup, expire=int(options.expire))) sys.stdout.writelines(make_restore_script(backup, expire=int(options.expire)))
elif args[0] == 'list': elif args[0] == 'delete':
to_ignore = int(options.keep)
to_delete = []
if options.host and options.backupnum:
print("Will delete backup: %s %i (forced)" % (options.host, options.backupnum))
to_delete.append((options.host, options.backupnum))
elif options.age:
to_delete_dict = choose_backups_to_delete(bmgr.backups_by_age, target_count=to_ignore, max_age=int(options.age))
for hostname, backuplist in to_delete_dict.items():
for backupstat in backuplist:
print("Will delete backup: %s %i (expired, age=%g days)" % (hostname, backupstat[0], backupstat[1]/86400.0))
to_delete.append((hostname, backupstat[0]))
else:
parser.error('Need either an age or a host AND backup number.')
if len(to_delete) > 0:
for deletehost, deletebackupnum in to_delete:
hostbackups = bmgr.all_backups.get(deletehost, {})
deletebackup = hostbackups.get(deletebackupnum, {})
deletekeys = deletebackup.get('keys', [])
finalkey = deletebackup.get('finalkey', None)
if len(deletekeys) > 0:
sys.stdout.write("Deleting backup: %s %d (%d keys)" % (deletehost, deletebackupnum, len(deletekeys)))
for key in deletekeys:
if options.test:
sys.stdout.write('_')
else:
key.delete()
sys.stdout.write('.')
sys.stdout.flush()
if finalkey is not None:
if options.test:
sys.stdout.write('+')
else:
finalkey.delete()
sys.stdout.write('!')
sys.stdout.flush()
sys.stdout.write('\n')
if options.start:
for deletehost, deletebackupnum in to_delete:
bmgr.invalidate_host_cache(deletehost)
score_iter = choose_host_to_backup(bmgr.backups_by_age, target_count=int(options.keep)+1)
for candidate, score in score_iter:
if score > 0:
sys.stdout.write('Starting archive operation for host: %s (score=%g)\n' % (candidate, score))
start_archive([candidate])
break
if args[0] == 'list' or options.list:
sys.stdout.write('%25s | %5s | %20s | %5s\n' % ("Hostname", "Bkup#", "Age", "Files")) sys.stdout.write('%25s | %5s | %20s | %5s\n' % ("Hostname", "Bkup#", "Age", "Files"))
sys.stdout.write('-'*72 + '\n') sys.stdout.write('-'*72 + '\n')
for bucket in buckets: for hostname, backups in bmgr.all_backups.items():
hostnames = list_backups(bucket)
for hostname in hostnames.keys():
backups = hostnames[hostname]
for backupnum in sorted(backups.keys()): for backupnum in sorted(backups.keys()):
filecount = len(backups[backupnum]['keys']) filecount = len(backups[backupnum]['keys'])
datestruct = backups[backupnum]['date'] datestruct = backups[backupnum]['date']
@ -363,113 +460,6 @@ def main():
sys.stdout.write('%25s | %5i | %20s | %5i%s\n' % (hostname, backupnum, prettydelta, filecount, inprogress)) sys.stdout.write('%25s | %5i | %20s | %5i%s\n' % (hostname, backupnum, prettydelta, filecount, inprogress))
sys.stdout.write('* == not yet finalized (Age == time of last activity)\n') sys.stdout.write('* == not yet finalized (Age == time of last activity)\n')
elif args[0] == 'delete':
if options.age:
maxage = int(options.age)*86400
needs_backup = []
for bucket in buckets:
hostnames = list_backups(bucket)
for hostname in hostnames.keys():
backups = hostnames[hostname]
backuplist = sorted(backups.keys())
oldest_timestamp = -1
# remove a number of recent backups from the delete list
to_ignore = int(options.keep)
while to_ignore > 0:
if len(backuplist) > 0:
backupnum = backuplist.pop()
filecount = len(backups[backupnum]['keys'])
datestruct = backups[backupnum]['date']
timestamp = time.mktime(datestruct)
delta = int(time.time() - timestamp + time.timezone)
if backups[backupnum]['finalized'] == 0:
sys.stdout.write('Ignoring in-progress backup %s #%i\n' % (hostname, backupnum))
else:
sys.stdout.write('Keeping recent backup %s #%i (%i files, age %.2f days)\n' % (hostname, backupnum, filecount, delta/86400.0))
if timestamp < oldest_timestamp:
oldest_timestamp = timestamp
to_ignore -= 1
else:
to_ignore = 0
deletes = 0
for backupnum in backuplist:
filecount = len(backups[backupnum]['keys'])
if backups[backupnum]['finalized'] > 0:
datestruct = backups[backupnum]['finalized']
else:
datestruct = backups[backupnum]['date']
timestamp = time.mktime(datestruct)
delta = int(time.time() - timestamp + time.timezone)
if delta > maxage:
if not options.unfinalized and backups[backupnum]['finalized'] == 0:
sys.stdout.write('Bypassing unfinalized backup %s #%i (%i files, age %.2f days)\n' % (hostname, backupnum, filecount, delta/86400.0))
else:
sys.stdout.write('Deleting %s #%i (%i files, age %.2f days)...' % (hostname, backupnum, filecount, delta/86400.0))
for key in backups[backupnum]['keys']:
if options.test:
sys.stdout.write('*')
else:
key.delete()
sys.stdout.write('.')
if backups[backupnum]['finalkey']:
if options.test:
sys.stdout.write('X')
else:
backups[backupnum]['finalkey'].delete()
sys.stdout.write('!')
sys.stdout.write('\n')
deletes += 1
if (len(backuplist)-deletes) < int(options.keep):
needs_backup.append((oldest_timestamp, hostname))
#if options.start and len(needs_backup) > 0:
# sys.stdout.write('Starting archive operation for host: %s\n' % sorted(needs_backup)[0][1])
# start_archive([sorted(needs_backup)[0][1]])
if options.start:
for candidate, score in score_iter:
if score > 0:
sys.stdout.write('Starting archive operation for host: %s (score=%g)\n' % (candidate, score))
start_archive([candidate])
break
elif options.host and options.backupnum:
for bucket in buckets:
hostnames = list_backups(bucket)
if options.host in hostnames.keys():
if options.backupnum not in hostnames[options.host].keys():
parser.error('Backup number %i not found' % options.backupnum)
toast = hostnames[options.host][options.backupnum]
filecount = len(toast['keys'])
if toast['finalized'] > 0:
datestruct = toast['finalized']
else:
datestruct = toast['date']
datestruct = toast['date']
timestamp = time.mktime(datestruct)
delta = int(time.time() - timestamp + time.timezone)
if options.unfinalized and toast['finalized'] > 0:
sys.stdout.write('Bypassing finalized backup %s #%i (%i files, age %.2f days)\n' % (hostname, backupnum, filecount, delta/86400.0))
else:
sys.stdout.write('Deleting %s #%i (%i files, age %.2f days)...' % (options.host, options.backupnum, filecount, delta/86400.0))
for key in toast['keys']:
if options.test:
sys.stdout.write('*')
else:
key.delete()
sys.stdout.write('.')
if toast['finalkey']:
if options.test:
sys.stdout.write('X')
else:
toast['finalkey'].delete()
sys.stdout.write('!')
sys.stdout.write('\n')
else:
parser.error('Host %s not found' % options.host)
else:
parser.error('Need either an age or a host AND backup number.')
if __name__ == '__main__': if __name__ == '__main__':
main() main()