Mini Shell
"""Main Restic objects and functions"""
from typing import Literal, TextIO, Union, Optional, overload
from collections.abc import Generator
from pathlib import PurePath
import json
import functools
from subprocess import CalledProcessError, PIPE
import logging
import boto3
from botocore.client import Config as BotoConfig
from botocore.exceptions import BotoCoreError, ClientError
from boto3.resources.base import ServiceResource as S3Bucket
import cproc
from .data import (
ResticRepo,
Snapshot,
ResticCmd,
SnapPath,
SnapFile,
SnapDir,
Backup,
SQLBackupGroup,
SQLBackupItem,
UserBackupDicts,
UserBackups,
)
from .exc import (
ResticError,
ResticBadIndexError,
ResticLockedError,
)
RESTIC_PATH = '/usr/bin/restic'
def locked_retry(func):
"""Wraps a function and makes it automatically retry on ResticLockedError"""
@functools.wraps(func)
def _locked_retry(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except ResticLockedError as exc:
if not exc.unlock_ok():
raise
self.unlock()
return func(self, *args, **kwargs)
return _locked_retry
class Restic:
"""Handles restic commands
Args:
endpoint (str): S3 server base url with https:// (excluding s3: prefix)
cluster (str | None): Optional cluster name. Only used by repr()
repo (ResticRepo): dataclass containing bucket name and key info
lim (int, float, ProcLimit, None): max load limit. This can be a static
limit if you set it to a float or int, or for more flexibility,
use a ``ProcLimit`` instance. If unset, processes will not be
ratelimited
tmp_dir (Path | str): TMPDIR to use. Defaults to '/var/tmp/restic'.
cache_dir (Path, str | None): RESTIC_CACHE_DIR to use. Set this to
None to disable cache. Defaults to '/var/cache/restic/%(bucket)s'
gomaxprocs (int, None): max CPU cores to allow restic to use
"""
__module__ = 'restic'
def __init__(
self,
*,
endpoint: str,
cluster: Union[str, None] = None,
repo: ResticRepo,
lim: Union[int, float, cproc.ProcLimit, None] = None,
tmp_dir: Union[PurePath, str] = '/var/tmp/restic',
cache_dir: Union[PurePath, str, None] = '/var/cache/restic/%(bucket)s',
gomaxprocs: Union[int, None] = 2,
):
self.endpoint = endpoint
self.cluster = cluster
self.repo = repo
self.lim = lim
self.tmp_dir = str(tmp_dir)
self.gomaxprocs = gomaxprocs
if cache_dir is None:
self.cache_dir = None
else:
self.cache_dir = str(cache_dir) % {'bucket': repo.bucket}
self.rebuild_index = self.repair_index
def __repr__(self):
if self.cluster:
return f'Restic<{self.cluster}:{self.repo.bucket}>'
return f'Restic<{self.repo.bucket}>'
@property
def env(self) -> dict[str, str]:
"""Restic environment variable dict"""
ret = {
'TMPDIR': self.tmp_dir,
'GOGC': '1',
'RESTIC_PASSWORD': self.repo.restic_pass,
'AWS_ACCESS_KEY_ID': self.repo.access_key,
'AWS_SECRET_ACCESS_KEY': self.repo.secret_key,
}
if self.cache_dir:
ret['RESTIC_CACHE_DIR'] = self.cache_dir
if self.gomaxprocs is not None and self.gomaxprocs > 0:
ret['GOMAXPROCS'] = str(self.gomaxprocs)
return ret
def s3_bucket(self, timeout=30, retries=2) -> S3Bucket:
"""Gets a boto3 s3.Bucket for this repo
Args:
timeout (int): network timeout in secs
retries (int): times to retry on timeout
Returns:
s3.Bucket: boto3 resource for this bucket
"""
config = BotoConfig(
connect_timeout=timeout,
retries={'max_attempts': retries},
read_timeout=timeout,
)
return boto3.resource(
's3',
endpoint_url=self.endpoint,
aws_access_key_id=self.repo.access_key,
aws_secret_access_key=self.repo.secret_key,
config=config,
).Bucket(self.repo.bucket)
def init_key_exists(self) -> Union[bool, None]:
"""Check if the /keys/ path has items in it, meaning that restic
has been initialized
Raises:
botocore.exceptions.ClientError: connection problem
botocore.exceptions.BotoCoreError: boto3 error
Returns:
(bool | None): whether the keys path existed in the bucket, or
None if the bucket did not exist at all
"""
try:
return (
len(list(self.s3_bucket().objects.filter(Prefix='keys/'))) > 0
)
except ClientError as exc:
if 'NoSuchBucket' in str(exc):
return None
raise
def prepare(self) -> bool:
"""Prepare access to this repo if needed and preemptively fix
issues connecting to it. Plugins should probably run this first
Returns:
bool: whether this repo is ready for access
"""
try:
self.init_repo()
except (BotoCoreError, ClientError) as exc:
logging.error('%s %s: %s', self, type(exc).__name__, exc)
return False
except ResticLockedError as exc:
if not exc.unlock_ok():
logging.error(
'%s: could not automatically unlock\n%s', exc.name, exc
)
return False
try:
self.unlock()
except ResticError:
pass # we'll try init_repo() one more time
except ResticBadIndexError as exc:
try:
self.rebuild_index()
except ResticError:
pass # we'll try init_repo() one more time
except ResticError as exc:
logging.error('%s: %s', exc.name, exc)
return False
try:
self.init_repo()
except (BotoCoreError, ClientError) as exc:
logging.error('%s %s: %s', self, type(exc).__name__, exc)
return False
except ResticError as exc:
logging.error('%s: %s', exc.name, exc)
return False
return True
def init_repo(self) -> bool:
"""Initializes a restic repo if it hasn't been done already
Raises:
botocore.exceptions.ClientError: connection problem
botocore.exceptions.BotoCoreError: boto3 error
ResticError: if the restic init command fails with an error
Returns:
bool: True if initialized or False if already done
"""
if exists := self.init_key_exists():
return False
if exists is None: # bucket was missing
bucket = self.s3_bucket()
bucket.create()
bucket.wait_until_exists()
ret = self.build('init').run(stdout=PIPE, no_lim=True)
if 'already initialized' in ret.stderr:
return False
if 'created restic repository' not in ret.stdout:
raise ResticError(ret)
return True
def unlock(self):
"""Run restic unlock
Raises:
ResticError: if the restic unlock command fails
"""
try:
self.build('unlock').run(check=True, no_lim=True)
except CalledProcessError as exc:
raise ResticError(exc) from exc
@locked_retry
def repair_index(self):
"""Run restic repair index
Raises:
ResticError: if the command fails
"""
# There's also a 'repair packs' and 'repair snapshots' command, but
# automating that is tricky and dangerous, so we're leaving that to
# manual troubleshooting only.
try:
self.build('repair', 'index', '--read-all-packs').run(
check=True, no_lim=True
)
except CalledProcessError as exc:
raise ResticError(exc) from exc
@locked_retry
def prune(self, no_lim: bool = True):
"""Run restic prune
Args:
no_lim (bool): do not CPU limit the command as it runs regardless
of the lim arg in Restic
Raises:
ResticError: if the command fails
"""
if self.cache_dir:
args = ('prune', '--cleanup-cache')
else:
args = ('prune',)
try:
self.build(*args).run(check=True, no_lim=no_lim)
except CalledProcessError as exc:
raise ResticError(exc) from exc
def build(self, *args) -> ResticCmd:
"""Build a ResticCmd object that can be used to execute the requested
restic command
Args:
*args (tuple[str]): restic subcommand and args
Returns:
ResticCmd: a restic command executor
"""
cmd = self._basecmd
cmd.extend(args)
return ResticCmd(cmd, restic=self)
@property
def _basecmd(self) -> list[str]:
cmd = [
RESTIC_PATH,
f'--repo=s3:{self.endpoint}/{self.repo.bucket}',
]
if not self.cache_dir:
cmd.append('--no-cache')
return cmd
@locked_retry
def snapshots(
self,
*,
tags: Union[list[str], None] = None,
timeout: Union[int, float, None] = None,
) -> list[Snapshot]:
"""Get a list of restic snapshots in this repo. If this server is a
Backup Manager client, see the ``get_backups()`` function instead
args:
tags (list[str], optional): only consider snapshots which include
this tag list. To check with AND logic, specify a single tag as
``["tag1,tag2"]``. To check for either tag, specify
independently as ``["tag1", "tag2"]``
timeout (float | int | None): optional command timeout
Raises:
ResticError: if the restic snapshots command failed
subprocess.TimeoutExpired: if timeout was specified and exceeded
Returns:
list[Snapshot]: Snapshots found in the repo
"""
args = ['snapshots', '--json']
if tags:
for tag in tags:
args.extend(['--tag', tag])
try:
ret = self.build(*args).run(
stdout=PIPE, check=True, no_lim=True, timeout=timeout
)
except CalledProcessError as exc:
raise ResticError(exc) from exc
return [Snapshot(restic=self, data=x) for x in json.loads(ret.stdout)]
def backup(
self,
paths: list[Union[str, PurePath]],
*,
tags: list[str],
bwlimit: Union[int, None] = None,
excludes: Optional[list[Union[str, PurePath]]] = None,
exclude_files: Optional[list[Union[str, PurePath]]] = None,
quiet: bool = True,
) -> ResticCmd:
"""Crafts a ResticCmd to backup a list of paths.
Warning:
return codes 0 and 3 should be considered success.
See https://github.com/restic/restic/pull/2546
Args:
paths (list[str | PurePath]): list of paths to backup
tags (list[str]): list of labels for the snapshot
bwlimit (int | None): limits uploads to a max in KiB/s. Defaults
to unlimited
excludes (list[str | PurePath], optional): list of paths to exclude
exclude_files (list[str | PurePath], optional): list of paths of
files containing glob patterns to exclude
quiet (bool): add the --quiet flag. Defaults True
Returns:
ResticCmd: a restic command executor
"""
args = self._backup_args(tags, bwlimit, quiet)
if excludes is not None:
for exclude in excludes:
args.extend(['--exclude', str(exclude)])
if exclude_files is not None:
for exclude_file in exclude_files:
args.extend(['--exclude-file', str(exclude_file)])
args.extend([str(x) for x in paths])
return self.build(*args)
def upload(
self,
stream: Union[str, TextIO],
/,
path: Union[str, PurePath],
tags: list[str],
bwlimit: Union[int, None] = None,
quiet: bool = True,
) -> None:
"""Uploads a stream or str to the restic repo
Args:
stream (str | TextIO): data source to upload, such as the stdout of
a mysqldump process
path (str | PurePath): the --stdin-filename to use. This isn't
necessarily where the data came from, but is where restic will
say it did in the snapshot's metadata
tags (list[str]): list of labels for the snapshot
bwlimit (int | None): limits uploads to a max in KiB/s. Defaults
to unlimited
quiet (bool): add the --quiet flag. Defaults True
Raises:
ResticError: if the restic command failed
"""
cmd = self.upload_cmd(path, tags, bwlimit, quiet)
try:
if isinstance(stream, str):
cmd.run(input=stream, check=True)
else:
cmd.run(stdin=stream, check=True)
except CalledProcessError as exc:
raise ResticError(exc) from exc
def upload_cmd(
self,
path: Union[str, PurePath],
tags: list[str],
bwlimit: Union[int, None] = None,
quiet: bool = True,
) -> ResticCmd:
"""Like `upload` but returns a ResticCmd rather than running directly
Args:
path (str | PurePath): the --stdin-filename to use. This isn't
necessarily where the data came from, but is where restic will
say it did in the snapshot's metadata
tags (list[str]): list of labels for the snapshot
bwlimit (int | None): limits uploads to a max in KiB/s. Defaults
to unlimited
quiet (bool): add the --quiet flag. Defaults True
Returns:
ResticCmd: restic command executor
"""
args = self._backup_args(tags, bwlimit, quiet)
args.extend(['--stdin', '--stdin-filename', str(path)])
return self.build(*args)
def dump(
self, snap: Union[Snapshot, str], filename: Union[str, PurePath]
) -> ResticCmd:
"""Crafts a ResticCmd to dump a file from the specified snapshot ID
Args:
snap (Snapshot | str): snapshot or snapshot ID to dump data from
filename (str | PurePath): filename to retrieve
Returns:
ResticCmd: restic command executor
"""
snap_id = snap.id if isinstance(snap, Snapshot) else snap
return self.build('dump', snap_id, str(filename))
def restore(
self,
snap: Union[Snapshot, str],
*,
includes: Optional[list[Union[str, PurePath]]] = None,
excludes: Optional[list[Union[str, PurePath]]] = None,
target: Union[str, PurePath] = '/',
) -> ResticCmd:
"""Crafts a ResticCmd to restore a snapshot
Args:
snap (str): snapshot or snapshot ID to restore from
includes (list[str | PurePath], optional): --include paths
excludes (list[str | PurePath], optional): --exclude paths
target (str | PurePath): base directory prefix to restore to.
Defaults to '/', which restores to the original path
Returns:
ResticCmd: restic command executor
"""
snap_id = snap.id if isinstance(snap, Snapshot) else snap
args = ['restore', '--target', str(target)]
if includes is not None:
for include in includes:
args.extend(['--include', str(include)])
if excludes is not None:
for exclude in excludes:
args.extend(['--exclude', str(exclude)])
args.append(snap_id)
return self.build(*args)
@staticmethod
def _backup_args(
tags: list[str], bwlimit: Union[int, None], quiet: bool
) -> list[str]:
"""Builds the base backup subcommand"""
args = ['backup']
if quiet:
args.append('--quiet')
if bwlimit is not None and bwlimit > 0:
args.extend(['--limit-upload', str(bwlimit)])
for tag in tags:
args.extend(['--tag', tag])
return args
@locked_retry
def forget(self, *ids, prune: bool = False, no_lim: bool = True):
"""Run restic forget
Args:
*ids (Snapshot | str): snapshots to remove, specified by
either a Snapshot object or snapshot ID str
prune (bool): whether to automatically run prune if at
least one snapshot was removed. Defaults to False
no_lim (bool): do not CPU limit the command as it runs regardless
of the lim arg in Restic
Raises:
ResticError: if the restic forget command fails
"""
args = ['forget']
if prune:
if self.cache_dir:
args.extend(['--prune', '--cleanup-cache'])
else:
args.append('--prune')
args.extend([x.id if isinstance(x, Snapshot) else x for x in ids])
try:
self.build(*args).run(check=True, no_lim=no_lim)
except CalledProcessError as exc:
raise ResticError(exc) from exc
def listdir(
self, snap: Union[Snapshot, str], path: Union[str, PurePath]
) -> list[Union[SnapFile, SnapDir]]:
"""Like ``scandir`` but return a list instead of a Generator.
Returns items found in a requested path in a restic.Snapshot
Args:
snap (Snapshot | str): snapshot to list the contents of, supplied
either by its Snapshot instance of snapshot ID
path (str | PurePath): full path inside the snapshot to list the
contents of
Raises:
ValueError: requested path was not a full path
ResticError: Any error listing snapshot contents from restic
Returns:
list[SnapFile | SnapDir]: files or directories
"""
return list(self.scandir(snap=snap, path=path))
@locked_retry
def scandir(
self, snap: Union[Snapshot, str], path: Union[str, PurePath]
) -> Generator[Union[SnapFile, SnapDir], None, None]:
"""Iterates over items found in a requested path in a restic.Snapshot
Args:
snap (Snapshot | str): snapshot to list the contents of, supplied
either by its Snapshot instance of snapshot ID
path (str | PurePath): full path inside the snapshot to list the
contents of
Raises:
ValueError: requested path was not a full path
ResticError: Any error listing snapshot contents from restic
Yields:
Generator[SnapFile | SnapDir, None, None]: files or directories
"""
snap_id = snap.id if isinstance(snap, Snapshot) else snap
path = str(path)
if not path.startswith('/'):
raise ValueError('path must be a full path')
path = path.rstrip('/')
if path == '':
path = '/'
cmd = self.build('ls', '--json', '--long', snap_id, path)
try:
lines = cmd.run(
stdout=PIPE, check=True, no_lim=True
).stdout.splitlines()
except CalledProcessError as exc:
raise ResticError(exc) from exc
# stdout is formatted as line-deliminted JSON dicts
if len(lines) < 2:
return
snapshot = Snapshot(restic=self, data=json.loads(lines.pop(0)))
while lines:
data = json.loads(lines.pop(0))
yield SnapPath(
snapshot=snapshot,
restic=self,
name=data['name'],
type=data['type'],
path=data['path'],
uid=data['uid'],
gid=data['gid'],
mode=data.get('mode', None),
permissions=data.get('permissions', None),
)
@overload
def get_backups(
self,
user: Optional[str] = None,
*,
timeout: Union[int, float, None] = None,
serialize: Literal[True],
snapshots: Optional[list[Snapshot]] = None,
) -> dict[str, UserBackupDicts]:
...
@overload
def get_backups(
self,
user: Optional[str] = None,
*,
timeout: Union[int, float, None] = None,
serialize: Literal[False],
snapshots: Optional[list[Snapshot]] = None,
) -> dict[str, UserBackups]:
...
def get_backups(
self,
user: Optional[str] = None,
*,
timeout: Union[int, float, None] = None,
serialize: bool = True,
snapshots: Optional[list[Snapshot]] = None,
) -> dict[str, dict[str, list]]:
"""Get backups for a backups 3.x Backup Manager client
Args:
user (str, optional): only list backups found for this user
timeout (int | float | None): timeout for the underlying
restic snapshots command
serialize (bool): if True, return as a json-serializable dict. If
False, return as Backup, SQLBackupGroup, and SQLBackupItem
objects
snapshots (list[Snapshot], optional): if provided, scan the backups
found in this list rather than executing ``restic snapshots``
Returns:
dict[str, dict[str, list]]: Top-level keys are usernames.
Second-level are the type of backup. Values are a list of backups
found which may be dicts or objects depending on the serialize arg
"""
tag = None if user is None else f'user:{user}'
if snapshots:
all_snaps = snapshots.copy()
if tag:
all_snaps = [x for x in all_snaps if tag in x.tags]
else:
all_snaps = self.snapshots(
tags=None if user is None else [tag],
timeout=timeout,
)
out = {}
sqls = []
sql_map = {}
while all_snaps:
try:
bak = Backup(all_snaps.pop())
except (ValueError, KeyError):
continue # snapshot did not contain a valid backup
if isinstance(bak, SQLBackupItem):
sqls.append(bak)
else:
if bak.user not in out:
out[bak.user] = {}
if bak.type not in out[bak.user]:
out[bak.user][bak.type] = []
out[bak.user][bak.type].append(bak)
if isinstance(bak, SQLBackupGroup):
sql_map[(bak.type, bak.user, bak.time)] = bak
for bak in sqls:
key = (bak.type, bak.user, bak.time)
if key in sql_map:
sql_map[key].dbs[bak.dbname] = bak
# else the backup run was interrupted; backup-runner will delete
# this snapshot on its next run
if serialize:
self.serialize(out)
return out
@staticmethod
def serialize(backups):
"""Converts the return of get_backups(serialize=False) to a
json-serializable dict"""
for user in backups:
for bak_type, objs in backups[user].items():
backups[user][bak_type] = [x.serialize() for x in objs]
class S3Tool:
"""Wrapper object for moving items around in an S3 Bucket
Args:
s3_bucket (S3Bucket): service resource from ``Restic.s3_bucket()``
"""
def __init__(self, s3_bucket: S3Bucket):
self.moved = []
self.bucket = s3_bucket
def copy(self, src: str, dest: str):
"""Copy an item in the S3 bucket"""
return self.bucket.copy({'Bucket': self.bucket.name, 'Key': src}, dest)
def delete(self, key: str):
"""Delete an item in the S3 bucket"""
return self.bucket.delete_objects(
Bucket=self.bucket.name, Delete={'Objects': [{'Key': key}]}
)
def move(self, src: str, dest: str):
"""Move an item in the S3 bucket"""
self.copy(src, dest)
self.delete(src)
self.moved.append(dest)
Zerion Mini Shell 1.0