BackupPC_archive_host_B2/backup-manager.py

476 lines
18 KiB
Python
Raw Normal View History

2022-10-18 00:02:58 +02:00
#!/usr/bin/python3
2011-10-02 17:36:20 +02:00
#
2022-10-18 00:02:58 +02:00
# Script to manage B2-stored backups
2011-10-02 17:36:20 +02:00
#
2013-02-07 01:00:32 +01:00
# Copyright (c) 2009-2013 Ryan S. Tucker
2011-10-02 17:36:20 +02:00
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
2010-01-03 01:32:27 +01:00
2022-10-18 00:02:58 +02:00
from argparse import ArgumentParser
from collections import defaultdict
2022-10-18 00:02:58 +02:00
from datetime import datetime, timedelta
from os import environ, getuid
from pathlib import Path
from pwd import getpwuid
2013-02-07 01:00:32 +01:00
from subprocess import Popen
2022-10-18 00:02:58 +02:00
from sys import argv
2013-02-07 01:00:32 +01:00
2022-10-18 00:02:58 +02:00
from b2sdk.v2 import FileVersion
from b2sdk.v2 import InMemoryAccountInfo, B2Api, Bucket
from math import log10
from progress.bar import ChargingBar
2022-10-18 00:02:58 +02:00
import secrets
2013-02-07 01:00:32 +01:00
2022-10-18 00:02:58 +02:00
class BackupManager:
def __init__(self, access_key: str, shared_key: str):
self._access_key = access_key
b2_info = InMemoryAccountInfo()
self._connection = B2Api(b2_info)
self._connection.authorize_account("production", self._access_key, shared_key)
2010-01-03 01:32:27 +01:00
self._buckets = None
2022-10-18 00:02:58 +02:00
self._bucket_backups = {}
self._backups = None
2010-01-03 01:32:27 +01:00
2022-10-18 00:02:58 +02:00
def _generate_backup_buckets(self) -> [Bucket]:
bucket_prefix = f"{self._access_key}-bckpc-".lower()
buckets = self._connection.list_buckets()
self._buckets = []
2010-01-03 01:32:27 +01:00
for bucket in buckets:
if bucket.name.startswith(bucket_prefix):
self._buckets.append(bucket)
@property
2022-10-18 00:02:58 +02:00
def backup_buckets(self) -> [Bucket]:
if self._buckets is None:
self._generate_backup_buckets()
return self._buckets
2022-10-18 00:02:58 +02:00
@staticmethod
def _list_backups(bucket: Bucket) -> {}:
"""
Returns a dict of backups in a bucket, with dicts of:
{hostname (str):
{Backup number (int):
2022-10-18 00:02:58 +02:00
{
"date": Datetime of backup (int),
"files": A list of files comprising the backup,
"hostname": Hostname (str),
"backup_num": Backup number (int),
"finalized": 0, or the timestamp the backup was finalized,
"bucket": the bucket of the backup
}
}
}
"""
backups = {}
2022-10-18 00:02:58 +02:00
for file in filter(lambda e: isinstance(e, FileVersion), map(lambda e: e[0], bucket.ls())):
file: FileVersion = file
parts = file.file_name.split(".")
2013-02-07 01:00:32 +01:00
final = False
2022-10-18 00:02:58 +02:00
if parts[-1] == "COMPLETE":
final = True
2022-10-18 00:02:58 +02:00
parts.pop() # back to tar
parts.pop() # back to backup number
else:
2022-10-18 00:02:58 +02:00
if parts[-1] == "gpg":
parts.pop()
2022-10-18 00:02:58 +02:00
if parts[-1] != "tar" and len(parts[-1]) == 2:
parts.pop()
2022-10-18 00:02:58 +02:00
if parts[-1] == "tar":
parts.pop()
2022-10-18 00:02:58 +02:00
nextpart = parts.pop()
if nextpart == "COMPLETE":
print(f"Stray file: {file.file_name}")
continue
2022-10-18 00:02:58 +02:00
backup_num = int(nextpart)
hostname = ".".join(parts)
2022-10-18 00:02:58 +02:00
upload_timestamp = file.upload_timestamp//1000
lastmod = datetime.utcfromtimestamp(upload_timestamp)
if hostname in backups.keys():
2022-10-18 00:02:58 +02:00
if backup_num not in backups[hostname].keys():
backups[hostname][backup_num] = {
"date": lastmod,
"hostname": hostname,
"backup_num": backup_num,
"finalized": 0,
"files": [],
"final_file": None,
"finalized_age": -1,
"bucket": bucket
2013-02-07 01:00:32 +01:00
}
else:
2013-02-07 01:00:32 +01:00
backups[hostname] = {
2022-10-18 00:02:58 +02:00
backup_num: {
"date": lastmod,
"hostname": hostname,
"backup_num": backup_num,
"finalized": 0,
"files": [],
"final_file": None,
"finalized_age": -1,
"bucket": bucket
2013-02-07 01:00:32 +01:00
}
}
if final:
2022-10-18 00:02:58 +02:00
backups[hostname][backup_num]["finalized"] = upload_timestamp
backups[hostname][backup_num]["final_file"] = file
delta = int((lastmod - datetime.now()).total_seconds() * 1000000)
backups[hostname][backup_num]["finalized_age"] = delta
else:
2022-10-18 00:02:58 +02:00
if lastmod < backups[hostname][backup_num]["date"]:
backups[hostname][backup_num]["date"] = lastmod
backups[hostname][backup_num]["files"].append(file)
return backups
2022-10-18 00:02:58 +02:00
def get_backups_by_bucket(self, bucket: Bucket) -> {}:
if bucket.name not in self._bucket_backups:
self._bucket_backups[bucket.name] = self._list_backups(bucket)
2022-10-18 00:02:58 +02:00
return self._bucket_backups[bucket.name]
@property
2022-10-18 00:02:58 +02:00
def all_backups(self) -> [{}]:
if self._backups is None:
self._backups = {}
for bucket in self.backup_buckets:
2013-02-07 01:00:32 +01:00
backups_dict = self.get_backups_by_bucket(bucket)
for hostname, backups in backups_dict.items():
if hostname not in self._backups:
self._backups[hostname] = {}
self._backups[hostname].update(backups)
return self._backups
def invalidate_host_cache(self, hostname):
nuke = []
2022-10-18 00:02:58 +02:00
for bucket in self._bucket_backups:
if hostname in self._bucket_backups[bucket]:
nuke.append(bucket)
for bucket in nuke:
2022-10-18 00:02:58 +02:00
if bucket in self._bucket_backups:
del self._bucket_backups[bucket]
self._backups = None
@property
2022-10-18 00:02:58 +02:00
def backups_by_age(self):
"""
Returns a dict of {hostname: [(backup_num, age), ...]}
"""
results = defaultdict(list)
for hostname, backups in self.all_backups.items():
2022-10-18 00:02:58 +02:00
for backup_num, statusdict in backups.items():
results[hostname].append((backup_num,
statusdict["finalized_age"]))
return results
2013-02-07 01:00:32 +01:00
2022-10-18 00:02:58 +02:00
def choose_host_to_backup(age_dict, target_count=2):
"""
Takes a dict from backups_by_age, returns a hostname to back up.
"""
host_scores = defaultdict(int)
2022-10-18 00:02:58 +02:00
for hostname, backup_list in age_dict.items():
bl = sorted(backup_list, key=lambda x: x[1])
if len(bl) > 0 and bl[0][1] == -1:
# unfinalized backup alert
host_scores[hostname] += 200
bl.pop(0)
if len(bl) >= target_count:
host_scores[hostname] -= 100
host_scores[hostname] -= len(bl)
if len(bl) > 0:
2022-10-18 00:02:58 +02:00
# age of the oldest backup helps score
oldest = bl[0]
host_scores[hostname] += log10(oldest[1])
2022-10-18 00:02:58 +02:00
# recency of the newest backup hurts score
newest = bl[-1]
host_scores[hostname] -= log10(max(1, (oldest[1] - newest[1])))
2013-02-07 01:00:32 +01:00
for candidate, score in sorted(host_scores.items(),
key=lambda x: x[1], reverse=True):
2022-10-18 00:02:58 +02:00
yield candidate, score
2013-02-07 01:00:32 +01:00
def choose_backups_to_delete(agedict, target_count=2, max_age=30):
2022-10-18 00:02:58 +02:00
"""
Takes a dict from backups_by_age, returns a list of backups to delete
"""
decimate = defaultdict(list)
for hostname, backuplist in agedict.items():
bl = []
for backup in sorted(backuplist, key=lambda x: x[1]):
if backup[1] > 0:
bl.append(backup)
while len(bl) > target_count:
backup = bl.pop()
2013-02-07 01:00:32 +01:00
if backup[1] > (max_age * 24 * 60 * 60):
decimate[hostname].append(backup)
return decimate
2013-02-07 01:00:32 +01:00
2022-10-18 00:02:58 +02:00
def make_restore_script(backup: {}, expire=86400) -> str:
"""
Returns a quick and easy restoration script to restore the given system,
requires a backup, and perhaps expire
"""
hostname = backup["hostname"]
backup_num = backup["backup_num"]
bucket = backup["bucket"]
friendly_time = backup["date"].strftime("%Y-%m-%d at %H:%M GMT")
expire_time = datetime.now() + timedelta(seconds=expire)
files = [f"'{bucket.get_download_url(i.file_name)}'" for i in backup["files"] + [backup["final_file"]]]
output = f"""#!/bin/bash
# Restoration script for {hostname} backup {backup_num},
# a backup created on {friendly_time}.
# To use: bash scriptname /path/to/put/the/files
# WARNING: THIS FILE EXPIRES AFTER {expire_time.strftime("%Y-%m-%d at %H:%M GMT")}
if (( "$(date +%s)" > "{int(expire_time.timestamp() * 1000000)}" )); then
echo "Sorry, this restore script is too old."
exit 1
elif [ -z "$1" ]; then
echo "Usage: ./scriptname /path/to/restore/to"
exit 1
elif [ ! -d "$1" ]; then
echo "Target $1 does not exist!"
exit 1
elif [ -n "$(ls --almost-all "$1")" ]; then
echo "Target $1 is not empty!"
exit 1
fi
# cd to the destination, create a temporary workspace
cd "$1"
tmp_dir="$i/.restorescript-scratch"
mkdir "$tmp_dir"
files=({' '.join(files)})
token='{bucket.get_download_authorization(f'{hostname}.{backup_num}', expire)}'
declare a out_files
for i in "${{files[@]}}"; do
filename="$(echo "$i" | cut -d/ -f6)"
curl "$i" -o "$tmp_dir/$filename" -H "Authorization: $token"
if (( $? != 0 )); then
echo "Error during download !"
exit 1
fi
out_files+=("$tmp_dir/$filename")
done
# decrypt files
gpg --decrypt-files "${{out_files[@]}}"
# join and untar files
cat "$tmp_dir/*.tar.??" | tar -xf -
echo "DONE! Have a nice day."
"""
2010-01-03 01:32:27 +01:00
return output
2013-02-07 01:00:32 +01:00
def start_archive(hosts):
2022-10-18 00:02:58 +02:00
"""
Starts an archive operation for a list of hosts.
"""
if "LOGNAME" in environ:
username = environ["LOGNAME"]
else:
try:
2022-10-18 00:02:58 +02:00
username = getpwuid(getuid()).pw_name
except KeyError:
2022-10-18 00:02:58 +02:00
username = "nobody"
2022-10-18 00:02:58 +02:00
cmd = [Path(argv[0]).parents[0] / "BackupPC_archiveStart", "archives3", username]
cmd.extend(hosts)
proc = Popen(cmd)
proc.communicate()
2013-02-07 01:00:32 +01:00
2022-10-18 00:02:58 +02:00
def script(parser: ArgumentParser, bmgr: BackupManager, host: str, unfinalized: bool, backup_num: int = None,
expire: int = 86400, filename: str = None):
if not backup_num and unfinalized:
# assuming highest number
backup_num = max(bmgr.all_backups[host].keys())
elif not backup_num:
# assuming highest finalized number
backup_num = 0
for backup in bmgr.all_backups[host].keys():
if bmgr.all_backups[host][backup]["finalized"] > 0:
backup_num = max(backup_num, backup)
if backup_num == 0:
parser.error("No finalized backups found! Try --unfinalized if you dare")
backup = bmgr.all_backups[host][backup_num]
if filename:
with open(filename, "w") as fd:
fd.write(make_restore_script(backup, expire=expire))
else:
2022-10-18 00:02:58 +02:00
print(make_restore_script(backup, expire=expire))
def delete(bm: BackupManager, keep: int, host: str, backup_num: int, age: int, test: bool,
start: bool):
to_delete = []
if host and backup_num:
print(f"Will delete backup: {host} {backup_num} (forced)")
to_delete.append((host, backup_num))
elif age:
to_delete_dict = choose_backups_to_delete(bm.backups_by_age, target_count=keep, max_age=age)
for hostname, backup_list in to_delete_dict.items():
for backup_stat in backup_list:
print(f"Will delete backup: {hostname} {backup_stat[0]} (expired at {backup_stat[1] / 86400.0} days)")
to_delete.append((hostname, backup_stat[0]))
else:
return
for delete_host, delete_backup_num in to_delete:
host_backups = bm.all_backups.get(delete_host, {})
delete_backup = host_backups.get(delete_backup_num, {})
delete_files = delete_backup.get("files", [])
final_file = delete_backup.get("final_file", None)
if len(delete_files) > 0:
for file in ChargingBar(f"Deleting backup {delete_host} #{delete_backup_num}:", max=len(delete_files)).\
iter(delete_files):
if not test:
file.delete()
if final_file and not test:
final_file.delete()
if start:
for delete_host, delete_backup_num in to_delete:
bm.invalidate_host_cache(delete_host)
score_iter = choose_host_to_backup(bm.backups_by_age, target_count=int(keep) + 1)
for candidate, score in score_iter:
if score > 0:
print(f"Starting archive operation for host: {candidate} (score={score})")
start_archive([candidate])
break
def list_backups(bm: BackupManager):
print(f"{'Hostname':>25} | {'Bkup#':>5} | {'Age':>30} | {'Files':>5}")
print(("-" * 72))
for hostname, backups in bm.all_backups.items():
for backup_num in sorted(backups.keys()):
filecount = len(backups[backup_num]["files"])
date = backups[backup_num]["date"]
if backups[backup_num]["finalized"] > 0:
in_progress = ""
else:
in_progress = "*"
print(f"{hostname:>25} | {backup_num:>5} | {str(datetime.now() - date):>30} | {filecount:>5}{in_progress}")
print("* = not yet finalized (Age = time of last activity)")
2022-10-18 00:02:58 +02:00
def main():
parser = ArgumentParser(description="Companion maintenance script for BackupPC_archiveHost_s3. " +
"By default, it assumes the 'list' command, which displays all " +
"of the backups currently archived on B2. The 'delete' command " +
"is used to delete backups. The 'script' command produces a " +
"script that can be used to download and restore a backup.")
parser.add_argument("-l", "--list", dest="list", action="store_true",
help="List stored backups after completing operations")
subparsers = parser.add_subparsers(required=True, dest="action")
subparsers.add_parser("list")
delete_parser = subparsers.add_parser("delete")
delete_parser.add_argument("-s", "--start-backups", dest="start", action="store_true",
help="When used with --age, start backups for hosts with fewer than keep+1 backups")
delete_parser.add_argument("-k", "--keep", dest="keep", help="When used with --age, keep this many recent backups",
default=1)
delete_parser.add_argument("-t", "--test", dest="test", action="store_true",
help="Test mode; don't actually delete")
delete_parser.add_argument("-H", "--host", dest="host", help="Name of backed-up host")
delete_parser.add_argument("-b", "--backup-number", dest="backup_num", type=int, help="Backup number")
delete_parser.add_argument("-a", "--age", dest="age", help="Delete backups older than AGE days")
script_parser = subparsers.add_parser("script")
script_parser.add_argument("-H", "--host", dest="host", required=True, help="Name of backed-up host")
script_parser.add_argument("-b", "--backup-number", dest="backup_num", type=int, help="Backup number")
script_parser.add_argument("-f", "--filename", dest="filename", help="Output filename for script")
script_parser.add_argument("-x", "--expire", dest="expire", default=86400, help="Maximum age of script")
script_parser.add_argument("-u", "--unfinalized", dest="unfinalized", action="store_true",
help="Consider unfinalized backups")
args = parser.parse_args()
bm = BackupManager(secrets.access_key, secrets.shared_key)
if args.action == "script" or args.action == "delete":
if args.backup_num and not args.host:
parser.error("Must specify --host when specifying --backup-number")
if args.host:
if args.host not in bm.all_backups:
parser.error(f"No backups found for host \"{args.host}\"")
else:
2022-10-18 00:02:58 +02:00
if len(bm.all_backups) == 0:
parser.error("No buckets found!")
if args.action == "script":
script(parser, bm, args.host, args.backup_num, args.unfinalized, args.expire, args.filename)
elif args.action == "delete":
if not (args.age or args.host or args.backup_num):
parser.error("--age or --host and --backup-number are required")
elif args.host and not args.backup_num:
parser.error("--backup-number required with --host")
elif args.age and (args.host or args.backup_num):
parser.error("--age can't be combined with --host or --backup-number")
elif args.start and not args.age:
parser.error("--start-backups only makes sense with --age")
delete(bm, args.keep, args.host, args.backup_num, args.age, args.test, args.start)
if args.action == "list" or args.list:
list_backups(bm)
if __name__ == "__main__":
2010-01-03 01:32:27 +01:00
main()