Mini Shell

Direktori : /opt/sharedrads/python/send_customer_str/
Upload File :
Current File : //opt/sharedrads/python/send_customer_str/nlp.py

#!/opt/imh-python/bin/python3
"""New Apache Log Parser - customer html version"""
from typing import Union
import sys
from pathlib import Path
from argparse import ArgumentParser
import re
import yaml
from prettytable import PrettyTable
import rads

NLP_SCRIPTS = Path('/opt/sharedrads/nlp_scripts')
sys.path.insert(0, str(NLP_SCRIPTS))
import nlp_funcs as nlp  # pylint: disable=wrong-import-position

AWK_SCRIPT = str(NLP_SCRIPTS / 'nlp-awk.sh')

ERROR = '#FF0000'
OKAY = '#008000'
WARN = '#FF8C00'
BLUE = '#0000FF'
BOT_RE = re.compile('bot|spider|crawl')

# fmt: off
CODE_COLORS = {
    200: OKAY, 206: WARN, 301: OKAY, 302: OKAY, 303: WARN,
    304: OKAY, 400: ERROR, 401: ERROR, 403: ERROR, 404: ERROR,
    405: ERROR, 406: ERROR, 413: WARN, 500: ERROR, 501: ERROR,
}
# fmt: on


def read_urls() -> dict[str, str]:
    path = Path('/opt/sharedrads/etc/kb_urls.yaml')
    with open(path, encoding='ascii') as kbs:
        kb_urls = yaml.load(kbs, yaml.SafeLoader)
    key = 'hub' if rads.IMH_CLASS == 'hub' else 'imh'
    return kb_urls[key]


NLINES = 10
KB_URLS = read_urls()


def linkify(link: str, text: str) -> str:
    return f'<a href="{link}">{text}</a>'


def print_header(text: str):
    """Prints a pretty header"""
    print(f'<span class="header">{text}</span>')


def colorize(text: Union[str, int], color: str) -> str:
    return f'<span style="color: {color}">{text}</span>'


def print_hourly_hits(data: dict[str, dict[int, int]]):
    tbl = PrettyTable(["Hour", "Hits"])
    for hourly_hits in data.values():
        for hour, hits in hourly_hits.items():
            tbl.add_row([hour, hits])
    table = tbl.get_html_string(attributes={"class": "hits"})
    print(table)


def print_http_codes(data: dict[int, int]):
    tbl = PrettyTable(["Response Code", 'Hits'])
    for hitcode, hits in data.items():
        if color := CODE_COLORS.get(hitcode, None):
            guide = f"{KB_URLS['http_codes']}#{hitcode}"
            link = colorize(linkify(guide, hitcode), color)
            tbl.add_row([link, colorize(hits, color)])
        else:
            tbl.add_row([hitcode, hits])
    table = tbl.get_html_string(attributes={"class": "codes"})
    print(unescape(table))


def print_weighted_ips(data: list[list[str]]):
    total_hits = sum(int(x) for x, _ in data)
    tbl = PrettyTable(['Hits', 'IP', 'Reverse DNS', 'Potential Solution'])
    block_link = linkify(KB_URLS['deny_manager'], 'Evaluate blocking the IP')
    bot_link = linkify(KB_URLS['robots'], 'Rate Limit Bots')
    googbot_link = linkify(KB_URLS['google_bot'], 'Rate Limit Google Bots')
    for hits, addr in data:
        hits = int(hits)
        ptr = nlp.ptr_lookup(addr)
        if hits > total_hits * 0.40:
            tbl.add_row(
                [colorize(hits, ERROR), colorize(addr, ERROR), ptr, block_link]
            )
        elif hits > total_hits * 0.20:
            tbl.add_row(
                [colorize(hits, WARN), colorize(addr, WARN), ptr, block_link]
            )
        elif 'googlebot' in ptr:
            tbl.add_row([hits, addr, ptr, googbot_link])
        elif 'msnbot' in ptr:
            tbl.add_row([hits, addr, ptr, bot_link])
        else:
            tbl.add_row([hits, addr, ptr, ''])
    table = tbl.get_html_string(attributes={"class": "agent"})
    print(unescape(table))


def print_weighted_user_agents(data: list[list[str]]):
    guide = linkify(KB_URLS['htaccess'], 'Block Unwanted Users')
    botguide = linkify(KB_URLS['robots'], 'Set Crawl Delay')
    tbl = PrettyTable(['Hits', 'Agent', 'Potential Solution'])
    tbl.align['Agent'] = 'l'
    total_hits = sum(int(x) for x, _ in data)
    for count, agent in data:
        hits = int(count)
        agent = agent[:90]
        if hits > total_hits * 0.3:
            tbl.add_row([colorize(hits, ERROR), colorize(agent, ERROR), guide])
        elif hits > total_hits * 0.2:
            tbl.add_row([colorize(hits, WARN), colorize(agent, WARN), guide])
        elif BOT_RE.search(agent.lower()):
            tbl.add_row([colorize(hits, BLUE), colorize(agent, BLUE), botguide])
        else:
            tbl.add_row([hits, agent, ''])
    table = tbl.get_html_string(attributes={"class": "agent"})
    print(unescape(table))


def unescape(html: str) -> str:
    return html.replace('&lt;', '<').replace('&gt;', '>').replace('&quot;', '"')


def print_non_static_duplicates(data: list[list[str]]):
    tbl = PrettyTable(
        ["Hits", "Response", "Access Location", 'Possible Solution']
    )
    tbl.align['Access Location'] = 'l'
    for count, httpcode, uri in data:
        if color := CODE_COLORS.get(httpcode, None):
            code = colorize(httpcode, color)
        else:
            code = httpcode
        if 'wp-login' in uri:
            link = linkify(KB_URLS['wp_login'], 'WordPress Brute Prevention')
        elif 'xmlrpc' in uri:
            link = linkify(KB_URLS['wp_xmlrpc'], 'Disable XMLRPC')
        elif 'wp-comments-post' in uri:
            link = linkify(KB_URLS['wp_spam'], 'Comment Spam Prevention')
        elif 'wp-cron' in uri:
            link = linkify(KB_URLS['wp_cron'], 'Add wp-cron scheduling')
        elif 'admin-ajax' in uri:
            link = linkify(KB_URLS['wp_ajax'], 'Disable Heartbeat')
        else:
            link = ''
        tbl.add_row([count, code, uri, link])
    table = tbl.get_html_string(attributes={"class": "agent"})
    print(unescape(table))


def print_duplicate_requests(data: list[list[str]]):
    tbl = PrettyTable(["Hits", "Response", "Access Location"])
    tbl.align['Access Location'] = 'l'
    for count, httpcode, uri in data:
        if color := CODE_COLORS.get(httpcode, None):
            code = colorize(httpcode, color)
        else:
            code = httpcode
        tbl.add_row([count, code, uri])
    table = tbl.get_html_string(attributes={"class": "agent"})
    print(unescape(table))


def print_out(data: nlp.NLPData):
    """Prints all the things"""
    print_header('Hourly hits and response codes')
    print_hourly_hits(data['hourly_hits'])
    print_http_codes(data['http_codes'])
    print_header("Duplicate requests")
    print_duplicate_requests(data['requests'][:NLINES])
    print_header("Requests for non-static content")
    print_non_static_duplicates(data['dynamic'][:NLINES])
    print_header('Top User Agents')
    print_weighted_user_agents(data['user_agents'][:NLINES])
    print_header('Top IPs')
    print_weighted_ips(data['src_ips'][:NLINES])


def parse_args():
    """parse sys.argv"""
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('target', metavar='cPanel user')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    with nlp.open_log(args.target) as log:
        with nlp.start_nlp(AWK_SCRIPT, None, log) as proc:
            data = nlp.get_input(proc.stdout, NLINES)
    print_out(data)


if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        pass

Zerion Mini Shell 1.0