Mini Shell
"""Restic object definition"""
import time
import os
import logging
import functools
import json
from subprocess import Popen, PIPE, CalledProcessError
from typing import List, Dict, Union, TYPE_CHECKING
from itertools import chain
from botocore.client import Config as BotoConfig
from botocore.exceptions import ClientError
from boto3.resources.base import ServiceResource as S3Bucket
import boto3
from bakmgr.api.bakauth import BAKAUTH1, post, BakAuthError, get_reg_details
from bakmgr.server_info import find_bin
from bakmgr.errors import TaskError
from .data import Snapshot
from .procs import ResticProc
from .errors import ResticError, ResticLockedError, ResticBadIndexError
if TYPE_CHECKING:
from bakmgr.configs import Conf
RESTIC = '/usr/bin/restic'
MAX_LOCK_WAIT = 1800 # wait a max 30 mins for a restic lock
LOCK_SLEEP = 60 # wait 1 min between each retry if the repo was locked
def auto_retry(func):
"""Decorator to automatically recover from
ResticLockedError and ResticBadIndexError"""
@functools.wraps(func)
def _auto_retry(self, *args, **kwargs):
waited = 0
while True:
try:
return func(self, *args, **kwargs)
except ResticLockedError as exc:
if exc.unlock_ok():
self.unlock()
else:
if waited > MAX_LOCK_WAIT:
raise # waited as long as we can
# wait, then retry the func() above
waited += LOCK_SLEEP
time.sleep(LOCK_SLEEP)
continue
except ResticBadIndexError as exc:
# after repair, hit the func() below and never reach the
# while loop again
exc.restic.repair()
return func(self, *args, **kwargs)
return _auto_retry
class Restic:
"""Handles restic commands"""
def __init__(self, conf: 'Conf', reg: Union[dict, None] = None):
self.limit = conf.max_load
self._gomaxprocs = conf.max_cpus
if reg is None:
reg = get_reg_details()
self._endpoint = reg['endpoint']
self._restic_pass = reg['restic_pass']
self._access_key = reg['access_key']
self._secret_key = reg['secret_key']
self._bucket = reg['bucket']
self._svr_class = reg['svr_class']
# prepare access to the repo if needed
# can raise either BotoCoreError or ClientError from botocore.exceptions
self._init_repo()
@property
def env(self) -> Dict[str, str]:
"""Restic environment variable dict"""
ret = {
'GOGC': '1',
'RESTIC_CACHE_DIR': '/opt/bakmgr/var/.cache',
'RESTIC_REPOSITORY': f's3:{self._endpoint}/{self._bucket}',
'RESTIC_PASSWORD': self._restic_pass,
'AWS_ACCESS_KEY_ID': self._access_key,
'AWS_SECRET_ACCESS_KEY': self._secret_key,
}
if self._gomaxprocs > 0:
ret['GOMAXPROCS'] = str(self._gomaxprocs)
return ret
def proc(self, *cmd_args, mon: bool, **kwargs) -> ResticProc:
"""Returns a running ResticProc()"""
cmd = [RESTIC]
cmd.extend(cmd_args)
return ResticProc(
cmd,
restic=self,
limit=self.limit if mon else 0,
env=self.env,
**kwargs,
)
def execv(self, *cmd_args):
"""os.execv restic rather than run as a subprocess"""
cmd = [RESTIC]
cmd.extend(cmd_args)
os.environ.update(self.env)
os.execv(RESTIC, cmd)
def unlock(self):
try:
self.proc('unlock', mon=False).complete(check=True)
except CalledProcessError as exc:
raise ResticError(exc, self) from exc
@auto_retry
def backup_paths(self, include: List[str], exclude: List[str]):
"""Run restic backup"""
excl = chain(*[['--exclude', str(x)] for x in exclude])
incl = [str(x) for x in include]
proc = self.proc('backup', '--tag', 'files', *excl, *incl, mon=True)
logging.debug(proc.complete(check=True, ok_codes=[0, 3]).stdout)
@auto_retry
def backup_sql(self, dbtype: str, dump_cmd: List[str]):
"""Backup a sql dump"""
if not find_bin(dump_cmd[0]):
raise TaskError(f"Could not find {dump_cmd[0]}")
args = [
'backup',
'--tag',
dbtype,
'--stdin',
'--stdin-filename',
f'/root/{dbtype}_dump.sql',
]
with Popen(dump_cmd, stdout=PIPE) as dump:
proc = self.proc(*args, mon=True, stdin=dump.stdout)
logging.debug(proc.complete(check=True).stdout)
@auto_retry
def rotate(self, retain: int):
"""Rotate out old backups"""
remove = []
for snaps in self.get_backups().values():
remove.extend(snaps[:-retain])
if not remove:
return
logging.debug('rotating out %d old backups', len(remove))
proc = self.proc('forget', *[x.id for x in remove], mon=False)
logging.debug(proc.complete(check=True).stdout)
@auto_retry
def prune(self):
"""Prune the restic repo"""
logging.debug('pruning repo')
proc = self.proc('prune', '--cleanup-cache', mon=False)
logging.debug(proc.complete(check=True).stdout)
def get_backups(self) -> Dict[str, List[Snapshot]]:
"""Get snapshots by type and sort newest first"""
baks = {'files': [], 'mysql': [], 'pgsql': []}
for snapshot in self.snapshots():
for key, snaps in baks.items():
if key in snapshot.tags:
snaps.append(snapshot)
break
for snaps in baks.values():
snaps.sort(key=lambda x: x.timestamp, reverse=True) # newest first
return baks
def repair(self):
"""Attempts data repair"""
logging.info('Checking cloud data indexes...')
try:
self._bucket_repair()
except BakAuthError as exc:
logging.error(exc)
logging.info('Checking backup data indexes...')
self._rebuild_index()
def _bucket_repair(self):
# request for ceph to check the bucket's index
# this initial post may raise BakAuthError
task_id = post(
# TODO: see if this can be bakauth3
bakauths=[BAKAUTH1],
uri='/buckets/queue_repair',
data={'user': 'root', 'bucket': self._bucket},
)
# wait until it's finished
state_msg = 'QUEUED'
start = time.time()
while state_msg in ('QUEUED', 'STARTED'):
if time.time() - start > 1200:
# detatch if we've been waiting > 20 mins; that isn't normal
logging.warning('repair task still running after 20 mins')
break
time.sleep(10.0)
try:
state_msg = post(
bakauths=[BAKAUTH1],
uri='/lookup/check_task',
data={'task_id': task_id},
)
except BakAuthError as exc:
logging.warning(exc)
break
def _rebuild_index(self):
proc = self.proc('rebuild-index', '--read-all-packs', mon=False)
try:
proc.complete(check=True)
except CalledProcessError as exc:
raise ResticError(exc, self) from exc
@auto_retry
def snapshots(self, tag: Union[str, None] = None) -> List[Snapshot]:
"""Get a list of snapshots; optionally filter to a tag"""
args = ['snapshots', '--json']
if tag:
args.extend(['--tag', tag])
proc = self.proc(*args, mon=False)
try:
ret = proc.complete(check=True)
except CalledProcessError as exc:
raise ResticError(exc, self) from exc
return [Snapshot(restic=self, data=x) for x in json.loads(ret.stdout)]
def s3_bucket(self) -> S3Bucket:
"""Gets a boto3 s3.Bucket for this repo"""
config = BotoConfig(
connect_timeout=30,
retries={'max_attempts': 2},
read_timeout=30,
)
return boto3.resource(
's3',
endpoint_url=self._endpoint,
aws_access_key_id=self._access_key,
aws_secret_access_key=self._secret_key,
config=config,
).Bucket(self._bucket)
@auto_retry
def _init_repo(self) -> bool:
"""Initializes a restic repo if it hasn't been done already"""
try:
# if /keys/ has items in it
if list(self.s3_bucket().objects.filter(Prefix='keys/')):
return # restic is already initialized
except ClientError as exc:
if 'NoSuchBucket' not in str(exc):
raise
bucket = self.s3_bucket()
bucket.create()
bucket.wait_until_exists()
ret = self.proc('init', mon=False).complete(check=False)
if 'created restic repository' not in ret.stdout:
raise ResticError(ret, self)
Zerion Mini Shell 1.0