Mini Shell

Direktori : /proc/self/root/opt/saltstack/salt/extras-3.10/restic/
Upload File :
Current File : //proc/self/root/opt/saltstack/salt/extras-3.10/restic/base.py

"""Main Restic objects and functions"""

from typing import Literal, TextIO, Union, Optional, overload
from collections.abc import Generator
from pathlib import PurePath
import json
import functools
from subprocess import CalledProcessError, PIPE
import logging
import boto3
from botocore.client import Config as BotoConfig
from botocore.exceptions import BotoCoreError, ClientError
from boto3.resources.base import ServiceResource as S3Bucket
import cproc

from .data import (
    ResticRepo,
    Snapshot,
    ResticCmd,
    SnapPath,
    SnapFile,
    SnapDir,
    Backup,
    SQLBackupGroup,
    SQLBackupItem,
    UserBackupDicts,
    UserBackups,
)
from .exc import (
    ResticError,
    ResticBadIndexError,
    ResticLockedError,
)

RESTIC_PATH = '/usr/bin/restic'


def locked_retry(func):
    """Wraps a function and makes it automatically retry on ResticLockedError"""

    @functools.wraps(func)
    def _locked_retry(self, *args, **kwargs):
        try:
            return func(self, *args, **kwargs)
        except ResticLockedError as exc:
            if not exc.unlock_ok():
                raise
        self.unlock()
        return func(self, *args, **kwargs)

    return _locked_retry


class Restic:
    """Handles restic commands

    Args:
        endpoint (str): S3 server base url with https:// (excluding s3: prefix)
        cluster (str | None): Optional cluster name. Only used by repr()
        repo (ResticRepo): dataclass containing bucket name and key info
        lim (int, float, ProcLimit, None): max load limit. This can be a static
            limit if you set it to a float or int, or for more flexibility,
            use a ``ProcLimit`` instance. If unset, processes will not be
            ratelimited
        tmp_dir (Path | str): TMPDIR to use. Defaults to '/var/tmp/restic'.
        cache_dir (Path, str | None): RESTIC_CACHE_DIR to use. Set this to
            None to disable cache. Defaults to '/var/cache/restic/%(bucket)s'
        gomaxprocs (int, None): max CPU cores to allow restic to use
    """

    __module__ = 'restic'

    def __init__(
        self,
        *,
        endpoint: str,
        cluster: Union[str, None] = None,
        repo: ResticRepo,
        lim: Union[int, float, cproc.ProcLimit, None] = None,
        tmp_dir: Union[PurePath, str] = '/var/tmp/restic',
        cache_dir: Union[PurePath, str, None] = '/var/cache/restic/%(bucket)s',
        gomaxprocs: Union[int, None] = 2,
    ):
        self.endpoint = endpoint
        self.cluster = cluster
        self.repo = repo
        self.lim = lim
        self.tmp_dir = str(tmp_dir)
        self.gomaxprocs = gomaxprocs
        if cache_dir is None:
            self.cache_dir = None
        else:
            self.cache_dir = str(cache_dir) % {'bucket': repo.bucket}
        self.rebuild_index = self.repair_index

    def __repr__(self):
        if self.cluster:
            return f'Restic<{self.cluster}:{self.repo.bucket}>'
        return f'Restic<{self.repo.bucket}>'

    @property
    def env(self) -> dict[str, str]:
        """Restic environment variable dict"""
        ret = {
            'TMPDIR': self.tmp_dir,
            'GOGC': '1',
            'RESTIC_PASSWORD': self.repo.restic_pass,
            'AWS_ACCESS_KEY_ID': self.repo.access_key,
            'AWS_SECRET_ACCESS_KEY': self.repo.secret_key,
        }
        if self.cache_dir:
            ret['RESTIC_CACHE_DIR'] = self.cache_dir
        if self.gomaxprocs is not None and self.gomaxprocs > 0:
            ret['GOMAXPROCS'] = str(self.gomaxprocs)
        return ret

    def s3_bucket(self, timeout=30, retries=2) -> S3Bucket:
        """Gets a boto3 s3.Bucket for this repo

        Args:
            timeout (int): network timeout in secs
            retries (int): times to retry on timeout

        Returns:
            s3.Bucket: boto3 resource for this bucket
        """
        config = BotoConfig(
            connect_timeout=timeout,
            retries={'max_attempts': retries},
            read_timeout=timeout,
        )
        return boto3.resource(
            's3',
            endpoint_url=self.endpoint,
            aws_access_key_id=self.repo.access_key,
            aws_secret_access_key=self.repo.secret_key,
            config=config,
        ).Bucket(self.repo.bucket)

    def init_key_exists(self) -> Union[bool, None]:
        """Check if the /keys/ path has items in it, meaning that restic
        has been initialized

        Raises:
            botocore.exceptions.ClientError: connection problem
            botocore.exceptions.BotoCoreError: boto3 error

        Returns:
            (bool | None): whether the keys path existed in the bucket, or
            None if the bucket did not exist at all
        """
        try:
            return (
                len(list(self.s3_bucket().objects.filter(Prefix='keys/'))) > 0
            )
        except ClientError as exc:
            if 'NoSuchBucket' in str(exc):
                return None
            raise

    def prepare(self) -> bool:
        """Prepare access to this repo if needed and preemptively fix
        issues connecting to it. Plugins should probably run this first

        Returns:
            bool: whether this repo is ready for access
        """
        try:
            self.init_repo()
        except (BotoCoreError, ClientError) as exc:
            logging.error('%s %s: %s', self, type(exc).__name__, exc)
            return False
        except ResticLockedError as exc:
            if not exc.unlock_ok():
                logging.error(
                    '%s: could not automatically unlock\n%s', exc.name, exc
                )
                return False
            try:
                self.unlock()
            except ResticError:
                pass  # we'll try init_repo() one more time
        except ResticBadIndexError as exc:
            try:
                self.rebuild_index()
            except ResticError:
                pass  # we'll try init_repo() one more time
        except ResticError as exc:
            logging.error('%s: %s', exc.name, exc)
            return False
        try:
            self.init_repo()
        except (BotoCoreError, ClientError) as exc:
            logging.error('%s %s: %s', self, type(exc).__name__, exc)
            return False
        except ResticError as exc:
            logging.error('%s: %s', exc.name, exc)
            return False
        return True

    def init_repo(self) -> bool:
        """Initializes a restic repo if it hasn't been done already

        Raises:
            botocore.exceptions.ClientError: connection problem
            botocore.exceptions.BotoCoreError: boto3 error
            ResticError: if the restic init command fails with an error

        Returns:
            bool: True if initialized or False if already done
        """
        if exists := self.init_key_exists():
            return False
        if exists is None:  # bucket was missing
            bucket = self.s3_bucket()
            bucket.create()
            bucket.wait_until_exists()
        ret = self.build('init').run(stdout=PIPE, no_lim=True)
        if 'already initialized' in ret.stderr:
            return False
        if 'created restic repository' not in ret.stdout:
            raise ResticError(ret)
        return True

    def unlock(self):
        """Run restic unlock

        Raises:
            ResticError: if the restic unlock command fails
        """
        try:
            self.build('unlock').run(check=True, no_lim=True)
        except CalledProcessError as exc:
            raise ResticError(exc) from exc

    @locked_retry
    def repair_index(self):
        """Run restic repair index

        Raises:
            ResticError: if the command fails
        """
        # There's also a 'repair packs' and 'repair snapshots' command, but
        # automating that is tricky and dangerous, so we're leaving that to
        # manual troubleshooting only.
        try:
            self.build('repair', 'index', '--read-all-packs').run(
                check=True, no_lim=True
            )
        except CalledProcessError as exc:
            raise ResticError(exc) from exc

    @locked_retry
    def prune(self, no_lim: bool = True):
        """Run restic prune

        Args:
            no_lim (bool): do not CPU limit the command as it runs regardless
                of the lim arg in Restic

        Raises:
            ResticError: if the command fails
        """
        if self.cache_dir:
            args = ('prune', '--cleanup-cache')
        else:
            args = ('prune',)
        try:
            self.build(*args).run(check=True, no_lim=no_lim)
        except CalledProcessError as exc:
            raise ResticError(exc) from exc

    def build(self, *args) -> ResticCmd:
        """Build a ResticCmd object that can be used to execute the requested
        restic command

        Args:
            *args (tuple[str]): restic subcommand and args

        Returns:
            ResticCmd: a restic command executor
        """
        cmd = self._basecmd
        cmd.extend(args)
        return ResticCmd(cmd, restic=self)

    @property
    def _basecmd(self) -> list[str]:
        cmd = [
            RESTIC_PATH,
            f'--repo=s3:{self.endpoint}/{self.repo.bucket}',
        ]
        if not self.cache_dir:
            cmd.append('--no-cache')
        return cmd

    @locked_retry
    def snapshots(
        self,
        *,
        tags: Union[list[str], None] = None,
        timeout: Union[int, float, None] = None,
    ) -> list[Snapshot]:
        """Get a list of restic snapshots in this repo. If this server is a
        Backup Manager client, see the ``get_backups()`` function instead

        args:
            tags (list[str], optional): only consider snapshots which include
                this tag list. To check with AND logic, specify a single tag as
                ``["tag1,tag2"]``. To check for either tag, specify
                independently as ``["tag1", "tag2"]``
            timeout (float | int | None): optional command timeout

        Raises:
            ResticError: if the restic snapshots command failed
            subprocess.TimeoutExpired: if timeout was specified and exceeded

        Returns:
            list[Snapshot]: Snapshots found in the repo
        """
        args = ['snapshots', '--json']
        if tags:
            for tag in tags:
                args.extend(['--tag', tag])
        try:
            ret = self.build(*args).run(
                stdout=PIPE, check=True, no_lim=True, timeout=timeout
            )
        except CalledProcessError as exc:
            raise ResticError(exc) from exc
        return [Snapshot(restic=self, data=x) for x in json.loads(ret.stdout)]

    def backup(
        self,
        paths: list[Union[str, PurePath]],
        *,
        tags: list[str],
        bwlimit: Union[int, None] = None,
        excludes: Optional[list[Union[str, PurePath]]] = None,
        exclude_files: Optional[list[Union[str, PurePath]]] = None,
        quiet: bool = True,
    ) -> ResticCmd:
        """Crafts a ResticCmd to backup a list of paths.

        Warning:
            return codes 0 and 3 should be considered success.
            See https://github.com/restic/restic/pull/2546

        Args:
            paths (list[str | PurePath]): list of paths to backup
            tags (list[str]): list of labels for the snapshot
            bwlimit (int | None): limits uploads to a max in KiB/s. Defaults
                to unlimited
            excludes (list[str | PurePath], optional): list of paths to exclude
            exclude_files (list[str | PurePath], optional): list of paths of
                files containing glob patterns to exclude
            quiet (bool): add the --quiet flag. Defaults True

        Returns:
            ResticCmd: a restic command executor
        """
        args = self._backup_args(tags, bwlimit, quiet)
        if excludes is not None:
            for exclude in excludes:
                args.extend(['--exclude', str(exclude)])
        if exclude_files is not None:
            for exclude_file in exclude_files:
                args.extend(['--exclude-file', str(exclude_file)])
        args.extend([str(x) for x in paths])
        return self.build(*args)

    def upload(
        self,
        stream: Union[str, TextIO],
        /,
        path: Union[str, PurePath],
        tags: list[str],
        bwlimit: Union[int, None] = None,
        quiet: bool = True,
    ) -> None:
        """Uploads a stream or str to the restic repo

        Args:
            stream (str | TextIO): data source to upload, such as the stdout of
                a mysqldump process
            path (str | PurePath): the --stdin-filename to use. This isn't
                necessarily where the data came from, but is where restic will
                say it did in the snapshot's metadata
            tags (list[str]): list of labels for the snapshot
            bwlimit (int | None): limits uploads to a max in KiB/s. Defaults
                to unlimited
            quiet (bool): add the --quiet flag. Defaults True

        Raises:
            ResticError: if the restic command failed
        """
        cmd = self.upload_cmd(path, tags, bwlimit, quiet)
        try:
            if isinstance(stream, str):
                cmd.run(input=stream, check=True)
            else:
                cmd.run(stdin=stream, check=True)
        except CalledProcessError as exc:
            raise ResticError(exc) from exc

    def upload_cmd(
        self,
        path: Union[str, PurePath],
        tags: list[str],
        bwlimit: Union[int, None] = None,
        quiet: bool = True,
    ) -> ResticCmd:
        """Like `upload` but returns a ResticCmd rather than running directly

        Args:
            path (str | PurePath): the --stdin-filename to use. This isn't
                necessarily where the data came from, but is where restic will
                say it did in the snapshot's metadata
            tags (list[str]): list of labels for the snapshot
            bwlimit (int | None): limits uploads to a max in KiB/s. Defaults
                to unlimited
            quiet (bool): add the --quiet flag. Defaults True

        Returns:
            ResticCmd: restic command executor
        """
        args = self._backup_args(tags, bwlimit, quiet)
        args.extend(['--stdin', '--stdin-filename', str(path)])
        return self.build(*args)

    def dump(
        self, snap: Union[Snapshot, str], filename: Union[str, PurePath]
    ) -> ResticCmd:
        """Crafts a ResticCmd to dump a file from the specified snapshot ID

        Args:
            snap (Snapshot | str): snapshot or snapshot ID to dump data from
            filename (str | PurePath): filename to retrieve

        Returns:
            ResticCmd: restic command executor
        """
        snap_id = snap.id if isinstance(snap, Snapshot) else snap
        return self.build('dump', snap_id, str(filename))

    def restore(
        self,
        snap: Union[Snapshot, str],
        *,
        includes: Optional[list[Union[str, PurePath]]] = None,
        excludes: Optional[list[Union[str, PurePath]]] = None,
        target: Union[str, PurePath] = '/',
    ) -> ResticCmd:
        """Crafts a ResticCmd to restore a snapshot

        Args:
            snap (str): snapshot or snapshot ID to restore from
            includes (list[str | PurePath], optional): --include paths
            excludes (list[str | PurePath], optional): --exclude paths
            target (str | PurePath): base directory prefix to restore to.
                Defaults to '/', which restores to the original path

        Returns:
            ResticCmd: restic command executor
        """
        snap_id = snap.id if isinstance(snap, Snapshot) else snap
        args = ['restore', '--target', str(target)]
        if includes is not None:
            for include in includes:
                args.extend(['--include', str(include)])
        if excludes is not None:
            for exclude in excludes:
                args.extend(['--exclude', str(exclude)])
        args.append(snap_id)
        return self.build(*args)

    @staticmethod
    def _backup_args(
        tags: list[str], bwlimit: Union[int, None], quiet: bool
    ) -> list[str]:
        """Builds the base backup subcommand"""
        args = ['backup']
        if quiet:
            args.append('--quiet')
        if bwlimit is not None and bwlimit > 0:
            args.extend(['--limit-upload', str(bwlimit)])
        for tag in tags:
            args.extend(['--tag', tag])
        return args

    @locked_retry
    def forget(self, *ids, prune: bool = False, no_lim: bool = True):
        """Run restic forget

        Args:
            *ids (Snapshot | str): snapshots to remove, specified by
                either a Snapshot object or snapshot ID str
            prune (bool): whether to automatically run prune if at
                least one snapshot was removed. Defaults to False
            no_lim (bool): do not CPU limit the command as it runs regardless
                of the lim arg in Restic

        Raises:
            ResticError: if the restic forget command fails
        """
        args = ['forget']
        if prune:
            if self.cache_dir:
                args.extend(['--prune', '--cleanup-cache'])
            else:
                args.append('--prune')
        args.extend([x.id if isinstance(x, Snapshot) else x for x in ids])
        try:
            self.build(*args).run(check=True, no_lim=no_lim)
        except CalledProcessError as exc:
            raise ResticError(exc) from exc

    def listdir(
        self, snap: Union[Snapshot, str], path: Union[str, PurePath]
    ) -> list[Union[SnapFile, SnapDir]]:
        """Like ``scandir`` but return a list instead of a Generator.
        Returns items found in a requested path in a restic.Snapshot

        Args:
            snap (Snapshot | str): snapshot to list the contents of, supplied
                either by its Snapshot instance of snapshot ID
            path (str | PurePath): full path inside the snapshot to list the
                contents of

        Raises:
            ValueError: requested path was not a full path
            ResticError: Any error listing snapshot contents from restic

        Returns:
            list[SnapFile | SnapDir]: files or directories
        """
        return list(self.scandir(snap=snap, path=path))

    @locked_retry
    def scandir(
        self, snap: Union[Snapshot, str], path: Union[str, PurePath]
    ) -> Generator[Union[SnapFile, SnapDir], None, None]:
        """Iterates over items found in a requested path in a restic.Snapshot

        Args:
            snap (Snapshot | str): snapshot to list the contents of, supplied
                either by its Snapshot instance of snapshot ID
            path (str | PurePath): full path inside the snapshot to list the
                contents of

        Raises:
            ValueError: requested path was not a full path
            ResticError: Any error listing snapshot contents from restic

        Yields:
            Generator[SnapFile | SnapDir, None, None]: files or directories
        """
        snap_id = snap.id if isinstance(snap, Snapshot) else snap
        path = str(path)
        if not path.startswith('/'):
            raise ValueError('path must be a full path')
        path = path.rstrip('/')
        if path == '':
            path = '/'
        cmd = self.build('ls', '--json', '--long', snap_id, path)
        try:
            lines = cmd.run(
                stdout=PIPE, check=True, no_lim=True
            ).stdout.splitlines()
        except CalledProcessError as exc:
            raise ResticError(exc) from exc
        # stdout is formatted as line-deliminted JSON dicts
        if len(lines) < 2:
            return
        snapshot = Snapshot(restic=self, data=json.loads(lines.pop(0)))
        while lines:
            data = json.loads(lines.pop(0))
            yield SnapPath(
                snapshot=snapshot,
                restic=self,
                name=data['name'],
                type=data['type'],
                path=data['path'],
                uid=data['uid'],
                gid=data['gid'],
                mode=data.get('mode', None),
                permissions=data.get('permissions', None),
            )

    @overload
    def get_backups(
        self,
        user: Optional[str] = None,
        *,
        timeout: Union[int, float, None] = None,
        serialize: Literal[True],
        snapshots: Optional[list[Snapshot]] = None,
    ) -> dict[str, UserBackupDicts]:
        ...

    @overload
    def get_backups(
        self,
        user: Optional[str] = None,
        *,
        timeout: Union[int, float, None] = None,
        serialize: Literal[False],
        snapshots: Optional[list[Snapshot]] = None,
    ) -> dict[str, UserBackups]:
        ...

    def get_backups(
        self,
        user: Optional[str] = None,
        *,
        timeout: Union[int, float, None] = None,
        serialize: bool = True,
        snapshots: Optional[list[Snapshot]] = None,
    ) -> dict[str, dict[str, list]]:
        """Get backups for a backups 3.x Backup Manager client

        Args:
            user (str, optional): only list backups found for this user
            timeout (int | float | None): timeout for the underlying
                restic snapshots command
            serialize (bool): if True, return as a json-serializable dict. If
                False, return as Backup, SQLBackupGroup, and SQLBackupItem
                objects
            snapshots (list[Snapshot], optional): if provided, scan the backups
                found in this list rather than executing ``restic snapshots``

        Returns:
            dict[str, dict[str, list]]: Top-level keys are usernames.
            Second-level are the type of backup. Values are a list of backups
            found which may be dicts or objects depending on the serialize arg
        """
        tag = None if user is None else f'user:{user}'
        if snapshots:
            all_snaps = snapshots.copy()
            if tag:
                all_snaps = [x for x in all_snaps if tag in x.tags]
        else:
            all_snaps = self.snapshots(
                tags=None if user is None else [tag],
                timeout=timeout,
            )
        out = {}
        sqls = []
        sql_map = {}
        while all_snaps:
            try:
                bak = Backup(all_snaps.pop())
            except (ValueError, KeyError):
                continue  # snapshot did not contain a valid backup
            if isinstance(bak, SQLBackupItem):
                sqls.append(bak)
            else:
                if bak.user not in out:
                    out[bak.user] = {}
                if bak.type not in out[bak.user]:
                    out[bak.user][bak.type] = []
                out[bak.user][bak.type].append(bak)
                if isinstance(bak, SQLBackupGroup):
                    sql_map[(bak.type, bak.user, bak.time)] = bak
        for bak in sqls:
            key = (bak.type, bak.user, bak.time)
            if key in sql_map:
                sql_map[key].dbs[bak.dbname] = bak
            # else the backup run was interrupted; backup-runner will delete
            # this snapshot on its next run
        if serialize:
            self.serialize(out)
        return out

    @staticmethod
    def serialize(backups):
        """Converts the return of get_backups(serialize=False) to a
        json-serializable dict"""
        for user in backups:
            for bak_type, objs in backups[user].items():
                backups[user][bak_type] = [x.serialize() for x in objs]


class S3Tool:
    """Wrapper object for moving items around in an S3 Bucket

    Args:
        s3_bucket (S3Bucket): service resource from ``Restic.s3_bucket()``
    """

    def __init__(self, s3_bucket: S3Bucket):
        self.moved = []
        self.bucket = s3_bucket

    def copy(self, src: str, dest: str):
        """Copy an item in the S3 bucket"""
        return self.bucket.copy({'Bucket': self.bucket.name, 'Key': src}, dest)

    def delete(self, key: str):
        """Delete an item in the S3 bucket"""
        return self.bucket.delete_objects(
            Bucket=self.bucket.name, Delete={'Objects': [{'Key': key}]}
        )

    def move(self, src: str, dest: str):
        """Move an item in the S3 bucket"""
        self.copy(src, dest)
        self.delete(src)
        self.moved.append(dest)

Zerion Mini Shell 1.0