Mini Shell
"""Backup Crons"""
from argparse import ArgumentParser
import os
import logging
import random
import traceback
from subprocess import check_call
import sys
import time
from typing import TYPE_CHECKING, List
from bakmgr.errors import TaskError
from .configs import setup_logging, Conf, TaskConf, FilesConf
from .api.quotas import check_quota
from .api.bakauth import BakAuthError, MonError, add_problem
from .api.bakauth import clear_problem, mon_update, get_reg_details
from .api.restic import Restic, ResticError
if TYPE_CHECKING:
from .api.restic.data import Snapshot
MYSQLDUMP = [
'mysqldump',
'--all-databases',
'--skip-lock-tables',
'--hex-blob',
'--single-transaction',
'--force',
]
PG_DUMPALL = ['pg_dumpall', '--clean']
ONE_DAY = 86400
# FIXME: add a cron lock
def main():
"""cron entry point"""
try:
check_updates()
except Exception as exc:
add_problem(MonError.UPDATES, str(exc))
else:
clear_problem(MonError.UPDATES)
try:
sched()
except KeyboardInterrupt:
sys.exit("Received SIGINT")
except BakAuthError as exc:
add_problem(MonError.BAKAUTH, str(exc))
except ResticError as exc:
add_problem(MonError.RESTIC, str(exc))
except Exception:
add_problem(MonError.CRON_CRASH, traceback.format_exc())
else:
# TODO: review each clear_problem usage
clear_problem(MonError.CRON_CRASH, MonError.RESTIC, MonError.BAKAUTH)
finally:
mon_update()
def check_updates():
# FIXME: this is a temp solution
try:
last = os.stat('/opt/bakmgr/var/.last_update').st_mtime
except OSError:
last = 0
if time.time() - last < 86400:
return
with open('/opt/bakmgr/var/.last_update', 'wb'):
pass
os.utime('/opt/bakmgr/var/.last_update')
print("Running bakmgr-update")
check_call(['/opt/bakmgr/bin/bakmgr-update'])
print("Running bakmgr-update --deps")
check_call(['/opt/bakmgr/bin/bakmgr-update', '--deps'])
def sched():
if os.getuid() != 0:
sys.exit('This tool must run as root')
if parse_args().splay:
sleep = random.randint(0, 240) # 0 to 4 mins
logging.debug('sleeping %d seconds to splay the cron', sleep)
time.sleep(sleep)
conf = Conf()
setup_logging(conf, 'backup cron')
reg = get_reg_details()
restic = Restic(conf, reg)
backups = restic.get_backups()
try:
retain = check_quota(conf)
except BakAuthError as exc:
add_problem(MonError.BAKAUTH, str(exc))
mon_update()
return
ran = MySQLBackupTask(conf.mysql, restic, backups['mysql'], retain).run()
ran = (
PgSQLBackupTask(conf.pgsql, restic, backups['pgsql'], retain).run()
and ran
)
ran = (
FileBackupTask(conf.files, restic, backups['files'], retain).run()
and ran
)
if ran:
restic.prune()
def parse_args():
"""Parse sys.argv"""
parser = ArgumentParser(description=__doc__)
parser.add_argument(
'--splay',
action='store_true',
help='When running on cron, this is used to splay the cron and ease '
'work on remote servers',
)
return parser.parse_args()
class BackupTask:
def __init__(
self,
task_conf: TaskConf,
restic: Restic,
mon_error: MonError,
completed: List['Snapshot'],
retain: int,
) -> None:
if completed:
self.last_completed = completed[0]
else:
self.last_completed = None
self.retain = retain
self.restic = restic
self.task_conf = task_conf
self.main_conf = task_conf.conf
self.mon_error = mon_error
if not task_conf.enabled:
logging.debug("%s: task is disabled", task_conf.label)
clear_problem(self.mon_error)
return
def run(self) -> bool:
if self.last_completed:
last_time = self.last_completed.timestamp
min_age = time.time() - (ONE_DAY * self.task_conf.interval)
if last_time > min_age:
logging.debug("%s completed recently", self.task_conf.label)
return False
self.wait_for_load()
logging.info("%s: starting", self.task_conf.label)
try:
self.restic.rotate(self.retain) # in case previous rotation failed
self.task()
self.restic.rotate(self.retain)
except (ResticError, TaskError) as exc:
add_problem(self.mon_error, f"{type(exc).__name__}: {exc}")
except Exception:
add_problem(self.mon_error, traceback.format_exc())
else:
clear_problem(self.mon_error)
logging.info('Completed backup run')
finally:
mon_update()
return True
def task(self) -> None:
raise NotImplementedError
def wait_for_load(self):
"""Wait until server load is acceptable"""
limit = self.main_conf.max_load
if limit <= 0.0: # don't wait
return
mins_waited = 0
while max(os.getloadavg()) > limit and mins_waited < 60:
print('Load is too high. Waiting 60s...')
time.sleep(60)
mins_waited += 1
if mins_waited:
logging.warning(
'backup start was delayed %d minute(s) due to high server load',
mins_waited,
)
class FileBackupTask(BackupTask):
def __init__(
self,
task_conf: TaskConf,
restic: Restic,
completed: List['Snapshot'],
retain: int,
) -> None:
super().__init__(task_conf, restic, MonError.FILES, completed, retain)
def task(self) -> None:
task_conf: FilesConf = self.task_conf
self.restic.backup_paths(task_conf.include, task_conf.exclude)
class MySQLBackupTask(BackupTask):
def __init__(
self,
task_conf: TaskConf,
restic: Restic,
completed: List['Snapshot'],
retain: int,
) -> None:
super().__init__(task_conf, restic, MonError.MYSQL, completed, retain)
def task(self) -> None:
self.restic.backup_sql('mysql', MYSQLDUMP)
class PgSQLBackupTask(BackupTask):
def __init__(
self,
task_conf: TaskConf,
restic: Restic,
completed: List['Snapshot'],
retain: int,
) -> None:
super().__init__(task_conf, restic, MonError.PGSQL, completed, retain)
def task(self) -> None:
self.restic.backup_sql('pgsql', PG_DUMPALL)
Zerion Mini Shell 1.0