Mini Shell

Direktori : /opt/sharedrads/nlp_scripts/
Upload File :
Current File : //opt/sharedrads/nlp_scripts/nlp_funcs.py

from typing import Union, IO, TypedDict
import time
import os
from pathlib import Path
from collections import defaultdict
from subprocess import Popen, PIPE
import sys
from dns import resolver, reversename, exception as dns_exc
import rads


def ptr_lookup(ipaddr: str) -> str:
    """Gets a PTR record for a given IP"""
    try:
        myresolver = resolver.Resolver()
        myresolver.lifetime = 1.0
        myresolver.timeout = 1.0
        return str(myresolver.query(reversename.from_address(ipaddr), "PTR")[0])
    except resolver.NXDOMAIN:
        return "No Record Found"
    except dns_exc.Timeout:
        return "Query Timed Out"
    except Exception:
        return "Resolver Error"


def open_log(target: Union[str, None]) -> IO:
    if target is None:
        # no domlog was specified from cli args
        if not sys.stdin.isatty():
            # use stdin if attached
            return sys.stdin
        # try to guess cPanel user based on cwd
        cwd = Path(os.getenv('PWD', os.getcwd()))
        if cwd != Path('/home') and cwd.is_relative_to('/home'):
            user = str(cwd).split('/')[2]
            if rads.is_cpuser(user):
                return choose_domlog(user).open('rb')
        sys.exit("No domlog found to parse")
    path = Path(target)
    if path.is_file():
        return path.open('rb')
    if rads.is_cpuser(target):
        return choose_domlog(target).open('rb')
    sys.exit("No valid file specified.")


def choose_domlog(user: str) -> Path:
    """Lists the users' domlogs folder and finds the largest file modified
    within the last 24 hours that is not an ftp log"""
    if Path('/etc/cpanel/ea4/is_ea4').is_file():
        domlog_dir = Path('/var/log/apache2/domlogs', user)
    else:
        domlog_dir = Path('/usr/local/apache/domlogs', user)
    logs: dict[Path, int] = {}  # path -> size
    last_24h = time.time() - 86400
    if not domlog_dir.is_dir():
        sys.exit(f"No recent domlogs found for {user}")
    for entry in domlog_dir.iterdir():
        if entry.name.endswith('-ftp_log') or not entry.is_file():
            continue
        try:
            stat = entry.stat()
        except OSError:
            continue
        if stat.st_mtime < last_24h:
            continue
        logs[entry] = stat.st_size
    if not logs:
        sys.exit(f"No recent domlogs found for {user}")
    sorted(logs.items(), key=lambda item: item[1])
    path = sorted(logs.items(), key=lambda item: item[1])[-1][0]
    print(
        f"Information Parsed from: {path} for the last 24 hours",
        file=sys.stdout,
    )
    return path


def start_nlp(script: str, date: Union[None, str], stdin: IO):
    cmd = ['bash', script, 'stdin']
    if date:
        cmd.append(date)
    return Popen(
        cmd,
        stdin=stdin,
        stdout=PIPE,
        encoding='utf-8',
        errors='ignore',
    )


class NLPData(TypedDict):
    """Return format of get_input()"""

    hourly_hits: dict[str, dict[int, int]]  # {date: {hour: hits}}
    http_codes: dict[int, int]  # {code: hits}
    src_ips: list[list[str]]  # [[count, ipaddr], ...]
    user_agents: list[list[str]]  # [[count, UAstring], ...]
    requests: list[list[str]]  # [[count, httpcode, method uri], ...]
    dynamic: list[list[str]]  # [[count, httpcode, method uri], ...]


def get_input(stdout: str, nlines: int):
    """Runs the awk script and sorts output from it into a data structure"""
    hourly_hits = defaultdict(dict)
    data: list[list[str]] = [[], [], [], [], []]
    for line in stdout:
        line = line.rstrip()
        # the first character in each line will be:
        # '0' - Number of hits by hour
        # '1' - HTTP response codes
        # '2' - Source IPs
        # '3' - User Agents
        # '4' - Requests
        # '5' - Requests for non-static content, query strings stripped off
        if char := int(line[0]):  # if > 0
            data[char - 1].append(line[2:])
        else:
            date_str = line[11:22]
            hour, hits = line[23:].split('|')
            hourly_hits[date_str][int(hour)] = int(hits)
        if min(list(map(len, data[1:]))) == nlines:
            break  # if all metrics have hit max lines, stop parsing stdout
    # dict order is preserved in python 3.7+
    hourly_hits = dict(
        sorted(
            hourly_hits.items(), key=lambda x: time.strptime(x[0], '%d/%b/%Y')
        )
    )
    return {
        'hourly_hits': hourly_hits,
        'http_codes': dict(x[9:].split('|') for x in data[0]),
        'src_ips': [x.split('|') for x in data[1]],
        'user_agents': [x.split('|') for x in data[2]],
        'requests': [x.split('|') for x in data[3]],
        'dynamic': [x.split('|') for x in data[4]],
    }

Zerion Mini Shell 1.0