Mini Shell
#!/opt/imh-python/bin/python3
"""New Apache Log Parser - customer html version"""
from typing import Union
import sys
from pathlib import Path
from argparse import ArgumentParser
import re
import yaml
from prettytable import PrettyTable
import rads
NLP_SCRIPTS = Path('/opt/sharedrads/nlp_scripts')
sys.path.insert(0, str(NLP_SCRIPTS))
import nlp_funcs as nlp # pylint: disable=wrong-import-position
AWK_SCRIPT = str(NLP_SCRIPTS / 'nlp-awk.sh')
ERROR = '#FF0000'
OKAY = '#008000'
WARN = '#FF8C00'
BLUE = '#0000FF'
BOT_RE = re.compile('bot|spider|crawl')
# fmt: off
CODE_COLORS = {
200: OKAY, 206: WARN, 301: OKAY, 302: OKAY, 303: WARN,
304: OKAY, 400: ERROR, 401: ERROR, 403: ERROR, 404: ERROR,
405: ERROR, 406: ERROR, 413: WARN, 500: ERROR, 501: ERROR,
}
# fmt: on
def read_urls() -> dict[str, str]:
path = Path('/opt/sharedrads/etc/kb_urls.yaml')
with open(path, encoding='ascii') as kbs:
kb_urls = yaml.load(kbs, yaml.SafeLoader)
key = 'hub' if rads.IMH_CLASS == 'hub' else 'imh'
return kb_urls[key]
NLINES = 10
KB_URLS = read_urls()
def linkify(link: str, text: str) -> str:
return f'<a href="{link}">{text}</a>'
def print_header(text: str):
"""Prints a pretty header"""
print(f'<span class="header">{text}</span>')
def colorize(text: Union[str, int], color: str) -> str:
return f'<span style="color: {color}">{text}</span>'
def print_hourly_hits(data: dict[str, dict[int, int]]):
tbl = PrettyTable(["Hour", "Hits"])
for hourly_hits in data.values():
for hour, hits in hourly_hits.items():
tbl.add_row([hour, hits])
table = tbl.get_html_string(attributes={"class": "hits"})
print(table)
def print_http_codes(data: dict[int, int]):
tbl = PrettyTable(["Response Code", 'Hits'])
for hitcode, hits in data.items():
if color := CODE_COLORS.get(hitcode, None):
guide = f"{KB_URLS['http_codes']}#{hitcode}"
link = colorize(linkify(guide, hitcode), color)
tbl.add_row([link, colorize(hits, color)])
else:
tbl.add_row([hitcode, hits])
table = tbl.get_html_string(attributes={"class": "codes"})
print(unescape(table))
def print_weighted_ips(data: list[list[str]]):
total_hits = sum(int(x) for x, _ in data)
tbl = PrettyTable(['Hits', 'IP', 'Reverse DNS', 'Potential Solution'])
block_link = linkify(KB_URLS['deny_manager'], 'Evaluate blocking the IP')
bot_link = linkify(KB_URLS['robots'], 'Rate Limit Bots')
googbot_link = linkify(KB_URLS['google_bot'], 'Rate Limit Google Bots')
for hits, addr in data:
hits = int(hits)
ptr = nlp.ptr_lookup(addr)
if hits > total_hits * 0.40:
tbl.add_row(
[colorize(hits, ERROR), colorize(addr, ERROR), ptr, block_link]
)
elif hits > total_hits * 0.20:
tbl.add_row(
[colorize(hits, WARN), colorize(addr, WARN), ptr, block_link]
)
elif 'googlebot' in ptr:
tbl.add_row([hits, addr, ptr, googbot_link])
elif 'msnbot' in ptr:
tbl.add_row([hits, addr, ptr, bot_link])
else:
tbl.add_row([hits, addr, ptr, ''])
table = tbl.get_html_string(attributes={"class": "agent"})
print(unescape(table))
def print_weighted_user_agents(data: list[list[str]]):
guide = linkify(KB_URLS['htaccess'], 'Block Unwanted Users')
botguide = linkify(KB_URLS['robots'], 'Set Crawl Delay')
tbl = PrettyTable(['Hits', 'Agent', 'Potential Solution'])
tbl.align['Agent'] = 'l'
total_hits = sum(int(x) for x, _ in data)
for count, agent in data:
hits = int(count)
agent = agent[:90]
if hits > total_hits * 0.3:
tbl.add_row([colorize(hits, ERROR), colorize(agent, ERROR), guide])
elif hits > total_hits * 0.2:
tbl.add_row([colorize(hits, WARN), colorize(agent, WARN), guide])
elif BOT_RE.search(agent.lower()):
tbl.add_row([colorize(hits, BLUE), colorize(agent, BLUE), botguide])
else:
tbl.add_row([hits, agent, ''])
table = tbl.get_html_string(attributes={"class": "agent"})
print(unescape(table))
def unescape(html: str) -> str:
return html.replace('<', '<').replace('>', '>').replace('"', '"')
def print_non_static_duplicates(data: list[list[str]]):
tbl = PrettyTable(
["Hits", "Response", "Access Location", 'Possible Solution']
)
tbl.align['Access Location'] = 'l'
for count, httpcode, uri in data:
if color := CODE_COLORS.get(httpcode, None):
code = colorize(httpcode, color)
else:
code = httpcode
if 'wp-login' in uri:
link = linkify(KB_URLS['wp_login'], 'WordPress Brute Prevention')
elif 'xmlrpc' in uri:
link = linkify(KB_URLS['wp_xmlrpc'], 'Disable XMLRPC')
elif 'wp-comments-post' in uri:
link = linkify(KB_URLS['wp_spam'], 'Comment Spam Prevention')
elif 'wp-cron' in uri:
link = linkify(KB_URLS['wp_cron'], 'Add wp-cron scheduling')
elif 'admin-ajax' in uri:
link = linkify(KB_URLS['wp_ajax'], 'Disable Heartbeat')
else:
link = ''
tbl.add_row([count, code, uri, link])
table = tbl.get_html_string(attributes={"class": "agent"})
print(unescape(table))
def print_duplicate_requests(data: list[list[str]]):
tbl = PrettyTable(["Hits", "Response", "Access Location"])
tbl.align['Access Location'] = 'l'
for count, httpcode, uri in data:
if color := CODE_COLORS.get(httpcode, None):
code = colorize(httpcode, color)
else:
code = httpcode
tbl.add_row([count, code, uri])
table = tbl.get_html_string(attributes={"class": "agent"})
print(unescape(table))
def print_out(data: nlp.NLPData):
"""Prints all the things"""
print_header('Hourly hits and response codes')
print_hourly_hits(data['hourly_hits'])
print_http_codes(data['http_codes'])
print_header("Duplicate requests")
print_duplicate_requests(data['requests'][:NLINES])
print_header("Requests for non-static content")
print_non_static_duplicates(data['dynamic'][:NLINES])
print_header('Top User Agents')
print_weighted_user_agents(data['user_agents'][:NLINES])
print_header('Top IPs')
print_weighted_ips(data['src_ips'][:NLINES])
def parse_args():
"""parse sys.argv"""
parser = ArgumentParser(description=__doc__)
parser.add_argument('target', metavar='cPanel user')
args = parser.parse_args()
return args
def main():
args = parse_args()
with nlp.open_log(args.target) as log:
with nlp.start_nlp(AWK_SCRIPT, None, log) as proc:
data = nlp.get_input(proc.stdout, NLINES)
print_out(data)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
pass
Zerion Mini Shell 1.0