Mini Shell

Direktori : /proc/self/root/opt/imh-scan/
Upload File :
Current File : //proc/self/root/opt/imh-scan/clamlib.py

"""Clamscan and freshclam classes"""
from contextlib import ExitStack
import getpass
import pwd
import logging
from pathlib import Path
import shlex
import shutil
import sys
import os
import threading
from dataclasses import dataclass
import time
import re
from typing import IO, Union
from subprocess import Popen, CalledProcessError, TimeoutExpired
from subprocess import run, DEVNULL, PIPE
from datetime import timedelta
import tempfile
import psutil
import requests
import rads
import rads.color as c
from cproc import Proc

IS_ROOT = os.getuid() == 0
DEFS_SERVER = 'https://repo.imhadmin.net/open/shellscan/v3/'
DEF_FILES = ('imh.yara',)
HEUR_FILES = ('heuristic.yara',)
HEUR_DIR = '/opt/imh-scan/sigs/heuri/'
DEFS_DIR = '/opt/imh-scan/sigs/yara/'
FRESH_CACHE = '/opt/imh-scan/sigs/last_freshclam'
DUMMY = '/opt/imh-scan/sigs/new/new.yara'
COMMAND_TIMEOUT = int(timedelta(weeks=1).total_seconds())
FOUND_RE = re.compile(r'(.*)\: (.*) FOUND$', re.DOTALL)
HEURISTIC_RE = re.compile(r'(?:YARA.)?[Hh]euristic')
CUR_USER_HOME = Path(pwd.getpwnam(getpass.getuser()).pw_dir)
HOME_RE = re.compile(r'(/home[0-9]?/[a-zA-Z0-9]{1,16})/')


@dataclass
class ScanResult:
    """Clamscan result"""

    rcode: int
    command: str
    hits_found: dict[Path, str]
    heur_found: dict[Path, str]
    summary: str

    @property
    def __dict__(self) -> dict:
        return {
            'rcode': self.rcode,
            'command': self.command,
            'hits_found': {str(k): v for k, v in self.hits_found.items()},
            'heur_found': {str(k): v for k, v in self.heur_found.items()},
            'summary': self.summary,
        }

    @property
    def all_found(self) -> dict[Path, str]:
        return self.heur_found | self.hits_found


class ClamParser(threading.Thread):
    """Thread for parsing clamscan stdout"""

    def __init__(
        self,
        proc: Popen[str],
        scanner: 'Scanner',
        open_logfiles: list[IO],
        print_items: bool,
    ):
        super().__init__(target=self.parse, daemon=True)
        self.proc = proc
        self.scanner = scanner
        self._scanning_summary = False
        self._print_items = print_items
        self.heur_found: dict[Path, str] = {}
        self.hits_found: dict[Path, str] = {}
        self.summary_lines: list[str] = []
        self.open_logfiles = open_logfiles
        self.start()

    def parse(self):
        for line in self._iter_found():
            if self._scanning_summary:
                if self._print_items:
                    print(line, end='')
                self.summary_lines.append(line)
            else:
                self._handle_found(line)

    def _iter_found(self):
        prev = ''
        for line in self.proc.stdout:
            for log_file in self.open_logfiles:
                log_file.write(line)
            if self._scanning_summary:
                yield line
                continue
            if line.endswith('SCAN SUMMARY -----------\n'):
                self._scanning_summary = True
                prev = ''
                yield line
                continue
            if line.endswith('FOUND\n'):
                yield f"{prev}{line}".rstrip()
                prev = ''
            else:
                prev += line

    def _handle_found(self, data: str):
        match = FOUND_RE.match(data)
        if not match:
            self.scanner.log.error("imh-scan bug: Regex failed on %r", data)
            return
        path_str, rule = match.groups()
        path = Path(path_str)
        if not path.is_file():
            self.scanner.log.error('imh-scan bug? %r is not a file', path)
            return
        if is_heur := bool(HEURISTIC_RE.search(rule)):
            self.heur_found[path] = rule
        else:
            self.hits_found[path] = rule
        self._print_found(path, rule, is_heur)

    def _print_found(self, path: str, rule: str, is_heur: bool) -> None:
        if not self._print_items:
            return
        color = c.yellow if is_heur else c.red
        print(
            shlex.quote(str(path)), color(f"{rule} FOUND"), sep=': ', flush=True
        )


class Scanner:
    """Handles executing clamscan and freshclam"""

    def __init__(
        self,
        *,
        exclude: list[str],
        verbose: bool,
        extra_heuri: bool,
        install: bool,
        update: bool,
        heuristic: bool,
        phishing: bool,
        disable_media: bool,
        disable_excludes: bool,
        disable_default: bool,
        disable_freshclam: bool,
        disable_maldetect: bool,
        disable_new_yara: bool,
    ):
        """Initializes variables"""
        self.verbose = verbose
        self.install = install
        self.update = update
        self.log = rads.setup_logging(
            path=None,
            name='imh_scan.Scanner',
            loglevel=logging.DEBUG if verbose else logging.INFO,
            print_out=sys.stdout,
            fmt='%(message)s',
        )
        self.cmd_paths = self._check_deps(
            disable_maldetect=disable_maldetect,
            disable_freshclam=disable_freshclam,
            disable_default=disable_default,
        )
        self.command = self._make_command(
            disable_media=disable_media,
            phishing=phishing,
            heuristic=heuristic,
            exclude=exclude,
            disable_new_yara=disable_new_yara,
            disable_excludes=disable_excludes,
            extra_heuri=extra_heuri,
            disable_maldetect=disable_maldetect,
            disable_default=disable_default,
        )

    def _check_deps(
        self,
        disable_maldetect: bool,
        disable_freshclam: bool,
        disable_default: bool,
    ):
        """error handling and installing of dependencies vps can use any clamav
        packages installing only supports imh centos repo"""
        shared_permit = os.getuid() == 0 and not rads.IMH_ROLE == 'shared'
        clamdb_need = not disable_default
        freshclam_need = not disable_default and not disable_freshclam
        maldet_need = not disable_maldetect
        deps_d = {
            'clamscan': {
                'need': True,
                'permit': shared_permit,
                'pkg': 'imh-clamav',
                'paths': [
                    '/opt/imh-clamav/usr/bin/clamscan',
                    '/usr/bin/clamscan',
                    '/bin/clamscan',
                    '/usr/local/cpanel/3rdparty/bin/clamscan',
                ],
            },
            'clamav-db': {
                'need': clamdb_need,
                'permit': shared_permit,
                'pkg': 'imh-clamav',
                'paths': [
                    '/opt/imh-clamav/var/clamav',
                    '/var/clamav',
                    '/var/lib/clamav',
                ],
            },
            'freshclam': {
                'need': freshclam_need,
                'permit': shared_permit,
                'pkg': 'imh-clamav',
                'paths': [
                    '/opt/imh-clamav/usr/bin/freshclam',
                    '/usr/bin/freshclam',
                    '/usr/local/cpanel/3rdparty/bin/freshclam',
                    '/bin/freshclam',
                ],
            },
            'freshconf': {
                'need': freshclam_need,
                'permit': shared_permit,
                'pkg': 'imh-clamav',
                'paths': [
                    '/opt/imh-clamav/etc/freshclam.conf',
                    '/etc/freshclam.conf',
                ],
            },
            'maldet': {
                'need': maldet_need,
                'permit': shared_permit,
                'pkg': 'maldet-imh',
                'paths': [
                    '/opt/maldetect/sigs',
                    '/usr/local/maldetect/sigs',
                ],
            },
        }
        install_list = []
        failed_list = []
        paths = {}
        for dep, opts in deps_d.items():
            # skips if not needed
            if not opts['need']:
                continue
            # checks if missing
            found_path = _find_binary(opts['paths'])
            if found_path:
                self.log.debug('found path %s', found_path)
                paths[dep] = found_path
                continue
            self.log.error('missing dependancy %s', dep)
            # checks if allowed to install
            if not opts['permit']:
                self.log.error('not permitted to install missing %s', dep)
                failed_list.append(dep)
                continue
            if opts['pkg'] not in install_list:
                install_list.append(opts['pkg'])
            # adds imh's path expecting the install to work
            paths[dep] = opts['paths'][0]
        # fails if not allowed to install
        if failed_list:
            self.log.error('missing pkgs: %s', failed_list)
            self.log.error('allowed to install: %s', install_list)
            sys.exit(1)
        # already installed
        if not install_list:
            return paths
        # install missing deps
        req = self._install_deps(install_list)
        if not req:
            self.log.error("Failed to install dependancies %s", install_list)
            sys.exit(0)
        return paths

    def _install_deps(self, dep):
        if self.install:
            ret = 'y'
        else:
            ret = ask_prompt(
                f'Would you like to install {dep}? (y|n)',
                chars=('y', 'n'),
            )
        if ret == 'n':
            self.log.warning('exiting')
            sys.exit(0)
        try:
            run(
                ['yum', '-y', 'install'] + dep,
                stdout=None if self.verbose else DEVNULL,
                check=True,
            )
        except FileNotFoundError as exc:
            self.log.error(exc)
            sys.exit(1)
        except CalledProcessError:
            self.log.fatal('error running yum, exiting')
            sys.exit(1)
        return True

    def cpu_wait(self):
        cpu_limit = psutil.cpu_count() - 1
        while True:
            loadavg = os.getloadavg()[0]
            if loadavg > cpu_limit:
                self.log.warning(
                    'Load too high to start clamscan (%s/%s), sleeping 30s',
                    loadavg,
                    cpu_limit,
                )
                time.sleep(30)
            else:
                return

    def update_defs(self, disable_freshclam: bool, disable_default: bool):
        """Updates the custom definitions"""
        for dir_name in DEFS_DIR, HEUR_DIR:
            Path(dir_name).mkdir(mode=0o755, parents=True, exist_ok=True)
        self.log.debug('Definitions to get: %s', DEF_FILES + HEUR_FILES)
        for def_file in DEF_FILES:
            url = f'{DEFS_SERVER}{def_file}'
            path = f'{DEFS_DIR}{def_file}'
            self._download_file(url, path)
        for def_file in HEUR_FILES:
            url = f'{DEFS_SERVER}{def_file}'
            path = f'{HEUR_DIR}{def_file}'
            self._download_file(url, path)
        if self.update:
            self.log.warning('Just updating defintions')
            self._freshclam()
            sys.exit(0)
        if disable_default or disable_freshclam:
            return
        self._freshclam()

    def _download_file(self, url: str, dest: str):
        self.log.debug('Downloading %s to %s', url, dest)
        try:
            with requests.get(url, stream=True, timeout=30) as req:
                req.raise_for_status()
                with open(str(dest) + '.tmp', 'wb') as file:
                    for chunk in req.iter_content(chunk_size=8192):
                        file.write(chunk)
        except requests.RequestException as exc:
            self.log.error("Unable to retrieve %s, skipping\n%s", url, exc)
        os.rename(dest + '.tmp', dest)

    def _freshclam(self):
        """Runs freshclam for system"""
        now = int(time.time())
        if os.path.exists(FRESH_CACHE) and not self.update:
            with open(FRESH_CACHE, encoding='ascii') as file:
                try:
                    last_run = int(file.read().strip())
                    if last_run + 86400 > now:
                        self.log.warning(
                            'freshclam ran less than a day ago, skipping'
                        )
                        return
                except Exception as exc:
                    self.log.error('error reading %s\n%s', FRESH_CACHE, exc)

        freshclam_conf = f"--config-file={self.cmd_paths['freshconf']}"
        fresh_cmd = [self.cmd_paths['freshclam'], freshclam_conf]
        self.log.debug('freshclam command: %s', fresh_cmd)
        try:
            with Popen(fresh_cmd, stdout=PIPE, encoding='utf-8') as proc:
                for line in proc.stdout:
                    if not self.verbose:
                        continue
                    self._freshclam_print(line)
        except OSError as exc:
            self.log.error("ERROR: freshclam failed: %s", exc)
            return
        else:
            if proc.returncode:
                self.log.error(
                    'ERROR: freshclam failed, database.clamav.net '
                    'probably offline'
                )
                return
        with open(FRESH_CACHE, 'w', encoding='ascii') as file:
            file.write(str(now))
        proc.stdout.close()

    def write_ok(self, path):
        try:
            testfile = tempfile.TemporaryFile(dir=path)
            testfile.close()
        except OSError:
            self.log.debug('%s is not writeable', path)
            return False
        self.log.debug('%s is writeable', path)
        return True

    def _init_logs(
        self,
        stack: ExitStack,
        log_tuples: list[tuple[Path, Union[pwd.struct_passwd, None]]],
    ) -> list[IO]:
        files = []
        for log_path, owner in log_tuples:
            try:
                log_path.parent.mkdir(mode=0o700, parents=True, exist_ok=True)
                if owner is not None:
                    os.chown(log_path.parent, owner.pw_uid, owner.pw_gid)
            except OSError as exc:
                self.log.fatal('%s\nerror in _init_logs', exc)
                sys.exit(2)
            files.append(
                stack.enter_context(log_path.open('a', encoding='utf-8'))
            )
            if owner is not None:
                os.chown(log_path, owner.pw_uid, owner.pw_gid)
            log_path.chmod(mode=0o600)
        return files

    def _make_command(
        self,
        disable_media: bool,
        phishing: bool,
        heuristic: bool,
        disable_new_yara: bool,
        disable_excludes: bool,
        extra_heuri: bool,
        disable_maldetect: bool,
        disable_default: bool,
        exclude: bool,
    ):
        cmd = [
            self.cmd_paths['clamscan'],
            '-r',
            '--normalize=no',
            '--cross-fs=yes',
            '-i',
        ]
        if extra_heuri:
            cmd.append('--heuristic-alerts=yes')
        if phishing:
            cmd.append('--phishing-sigs=yes')
        if not disable_default:
            cmd.extend(['-d', self.cmd_paths['clamav-db']])
        cmd.extend(['-d', DEFS_DIR])  # custom imh dbs and maldetect
        if not disable_maldetect:
            cmd.extend(['-d', self.cmd_paths['maldet']])
        if not disable_new_yara and os.path.exists(DUMMY):
            cmd.extend(['-d', f'{DUMMY}'])
        if heuristic:
            cmd.extend(['-d', HEUR_DIR])
        # excludes common false positive/time wasting dirs for cPanel
        if disable_media:
            cmd.append(
                r'--exclude=\.(jpe?g|png|gif|mp(eg|4|g)|mov|avi|wmv|flv)$'
            )
        if not disable_excludes:
            cmd.append(
                r'--exclude=/home[0-9]?/[^/]*/'
                r'(quarantine*|mail/|etc/|logs/.*(\.tar)?\.gz'
                r'|tmp/awstats/.*.txt|tmp/webalizer/'
                r'(.*usage_.*.html|webalizer\.current))'
            )
        if exclude:
            for path in exclude:
                cmd.extend(['--exclude', path])
        cmd.append('--')
        return cmd

    def scan(
        self,
        scan_paths: list[str],
        log_tuples: list[tuple[Path, Union[pwd.struct_passwd, None]]],
        print_items: bool = False,
    ) -> ScanResult:
        """Runs clamscan"""
        scan_path_strs = list(map(str, scan_paths))
        assert scan_paths
        cmd = self.command.copy()
        cmd.extend(scan_path_strs)
        self.log.warning('Scan command: %s', c.cyan(shlex.join(cmd)))
        try:
            with ExitStack() as stack:
                open_logfiles = self._init_logs(stack, log_tuples)
                try:
                    with Proc(
                        cmd,
                        lim=os.cpu_count(),
                        stdout=PIPE,
                        stderr=None,
                        encoding='utf-8',
                        errors='surrogateescape',
                    ) as proc:
                        parser = ClamParser(
                            proc=proc,
                            scanner=self,
                            open_logfiles=open_logfiles,
                            print_items=print_items,
                        )
                        try:
                            proc.wait(timeout=COMMAND_TIMEOUT)
                        except TimeoutExpired:
                            proc.kill()
                        parser.join()
                except KeyboardInterrupt:
                    proc.kill()
                    self.log.error(
                        "Scan interrupted; continuing with what it found so "
                        "far, if any"
                    )
                else:
                    if proc.returncode < 0:
                        self.log.error(
                            "The clamscan process was killed with signal %d; "
                            "continuing with what it found so far, if any",
                            -proc.returncode,
                        )
        except Exception as exc:
            self.log.fatal('Error: clamav fatal error:\n %s', exc)
            sys.exit(1)
        if proc.returncode < 0:
            for log_path, _ in log_tuples:
                with open(log_path, 'a', encoding='utf-8') as file:
                    file.write(
                        'Scan was interrupted with kill signal '
                        f'{-proc.returncode}\n'
                    )
                    if proc.returncode == -9:
                        file.write(
                            "This usually means an out-of-memory condition\n"
                        )
        else:  # not killed
            if not parser.hits_found and not parser.heur_found:
                # but still nothing found
                for log_path, _ in log_tuples:
                    with open(log_path, 'w', encoding='utf-8') as file:
                        file.write('No malware detected\n')
        parser.join()  # to be safe; should already be joined
        return ScanResult(
            rcode=proc.returncode,
            command=shlex.join(cmd),
            hits_found=parser.hits_found,
            heur_found=parser.heur_found,
            summary=''.join(parser.summary_lines),
        )

    def _freshclam_print(self, line: str):
        """Styles clamscan output and prints it"""
        line = re.sub(r'(ClamAV update process started)', c.bold(r'\1'), line)
        line = re.sub(
            r'(WARNING|(?:YARA.)?[Hh]euristic):', c.yellow(r'\1') + ':', line
        )
        line = re.sub(r'(ERROR):', c.red(r'\1') + ':', line)
        line = re.sub(r'((?:main|daily|bytecode)\.c[lv]d)', c.cyan(r'\1'), line)
        self.log.warning(line)


def ask_prompt(*lines: str, chars: tuple[str]) -> str:
    """Simple yes or no checker"""
    answer = ''
    question = '\n'.join(lines)
    while answer not in chars:
        answer = input(c.bold(question) + '\n').strip().lower()
    return answer


def _find_binary(paths_to_check: list[str]) -> Union[str, None]:
    """used to find clamav and freshclam"""
    for path in paths_to_check:
        if os.path.exists(path):
            return path
    return None


def jail_files(paths: list[Path], *, time_str: Union[str, None] = None):
    """Decides the quarantine dir per file and runs jail_mv"""
    if time_str is None:
        time_str = str(int(time.time()))
    if not paths:
        print('Nothing to quarantine')
        return
    printed_roots = []
    for file_path in paths:
        if match := HOME_RE.match(str(file_path)):
            user_home = Path(match.group(1))
            jail_root = user_home / f'quarantine/quarantine_{time_str}'
        elif rads.IMH_ROLE == 'shared' and IS_ROOT:
            jail_root = Path(f'/home/t1bin/quarantine/quarantine_{time_str}')
        else:
            jail_root = CUR_USER_HOME / f'quarantine/quarantine_{time_str}'
        if jail_root not in printed_roots:
            print('Quarantine root: %s', jail_root)
            printed_roots.append(jail_root)
        try:
            jail_mv(file_path, jail_root)
        except Exception as exc:
            print(
                exc,
                f'Error: quarantine failed for {file_path}',
                sep='\n',
                file=sys.stderr,
            )


def jail_mv(file_path: Path, jail_root: Path):
    """Quarantine function retaining dir structure"""
    jail_root.mkdir(mode=0o700, parents=True, exist_ok=True)
    os.chmod(jail_root, 0o700)
    dest_dir = jail_root / str(file_path.parent).lstrip('/')
    dest_dir.mkdir(parents=True, exist_ok=True)
    shutil.copystat(file_path.parent, dest_dir)
    copy_uid_guid(file_path.parent, dest_dir)
    dest_path = dest_dir / file_path.name
    shutil.copy2(file_path, dest_path)
    copy_uid_guid(file_path, dest_path)
    os.remove(file_path)
    os.chmod(jail_root, 0o000)


def copy_uid_guid(src, dst):
    src_stats = os.stat(src)
    dst_stats = os.stat(dst)
    if (
        src_stats.st_uid == dst_stats.st_uid
        and src_stats.st_gid == dst_stats.st_gid
    ):
        return
    try:
        os.chown(dst, src_stats.st_uid, src_stats.st_gid)
    except OSError:
        pass

Zerion Mini Shell 1.0