Mini Shell

Direktori : /opt/imh-python/lib/python3.9/site-packages/ngxutil/
Upload File :
Current File : //opt/imh-python/lib/python3.9/site-packages/ngxutil/report.py

# vim: set ts=4 sw=4 expandtab syntax=python:
"""

ngxutil.report
ANSI & HTML report generator

@author J. Hipps <jacobh@inmotionhosting.com>

"""

import os
import sys
import logging
import re
from glob import glob

import arrow
from prettytable import PrettyTable

from ngxutil import default_cache_base
from ngxutil import logparse, influx
from ngxutil.vts import parse_vts
from ngxutil.cache import read_cache_file
from ngxutil.util import *

# HTML table attribs
tab_attribs = {'style': "border: 3px solid black; border-collapse: collapse; width: 650px; "
                        "font-size: 16px; font-family: Helvetica,sans-serif; cell-padding: 10px"}

logger = logging.getLogger('ngxutil')


def repgen_header(vdat):
    """
    Generate server status header
    Returns: (plain_text, html)
    """
    # calc uptime
    try:
        uptime = format_uptime((vdat.get('nowMsec') - vdat.get('loadMsec')) / 1000.0)
    except:
        uptime = "???"

    pt = PrettyTable()
    pt.field_names = ('Server', 'Version', 'Uptime')
    try:
        pt.add_row((vdat.get('hostName'), vdat.get('nginxVersion'), uptime))
    except:
        pass
    return (pt.get_string(), pt.get_html_string() + "<br/>")

def repgen_server(span=None, tail=None, noflux=False, infile='/var/log/nginx/access.log'):
    """
    Generate a server-wide report
    """
    vdata = parse_vts()
    ro, rh = repgen_header(vdata)
    rdom = repgen_domain('*', vdata, noheader=True, span=span, tail=tail, noflux=noflux, infile=infile)
    if rdom is None:
        return None
    ro += "\n\n" + rdom[0] + "\n\n"
    rh += "\n" + rdom[1] + "\n"
    return (ro, rh)

def repgen_domain(domain, vdata=None, noheader=False, span=None, tail=None, noflux=False, infile='/var/log/nginx/access.log'):
    """
    Generate a report for @domain
    """
    if vdata is None:
        vdata = parse_vts()

    ro = ""; rh = ""

    duser = get_domain_owner(domain)
    if not duser:
        return None

    try:
        tvd = vdata['serverZones'][duser['vhost']]
    except:
        logger.error("Failed to retrieve status data for domain %s. Data not yet available.", domain)
        return None

    # determine loglines to read
    if tail is not None:
        llines = tail
    else:
        if span:
            llines = None
        else:
            llines = vdata['connections']['requests']

    logdata = None

    # Use data from InfluxDB series, if available
    if not noflux:
        logdata = influx.fetch_all(duser['vhost'].lower(), span)
        if logdata is None:
            logger.warning("Failed to retrieve data from InfluxDB. Aborting. Use --noflux to force logparse input.")
            return None

    if logdata is None:
        # Get access.log data, then filter it based on the vhost field
        try:
            logger.info("Parsing log '%s' (%s)...", infile, format_size(os.stat(infile).st_size))
        except Exception as e:
            logger.error("Unable to stat input file: %s", str(e))
            sys.exit(100)
        aclog = logparse.parse_log(infile, span=span, lastlines=llines)
        logdata = logparse.filter_log(aclog, 'vhost', duser['vhost'].lower())

    xpc = logparse.get_cache_hitrate(logdata)

    # Domain header table
    if span:
        rqtot = len(logdata)
        rtimes = logparse.get_reqtime(logdata)
        rx_fmt = "---"
        tx_fmt = format_size(logparse.get_bytes(logdata))
        avgtime_fmt = "Min/Avg/Max"
        avgtime_str = "{min:01.03f} / {avg:01.03f} / {max:01.03f}".format(**rtimes)
    else:
        rqtot = tvd['requestCounter']
        rx_fmt = format_size(tvd['inBytes'])
        tx_fmt = format_size(tvd['outBytes'])
        avgtime_fmt = "Avg"
        avgtime_str = "{:01.03f}".format(float(sum(tvd['requestMsecs']['msecs'])) / \
                                         float(len(tvd['requestMsecs']['msecs'])) / 1000.0)
    pt = PrettyTable()
    pt.field_names = ('RX bytes', 'TX bytes', 'Requests', 'Req Time (%s)' % (avgtime_fmt))
    pt.add_row((rx_fmt, tx_fmt, rqtot, avgtime_str))

    if span:
        rduration = format_uptime(int(span) * 3600)
    else:
        rduration = format_uptime((vdata.get('nowMsec') - vdata.get('loadMsec')) / 1000.0)

    if not noheader:
        ro = strcolor('white', "<< {} / {} >>\n".format(domain, duser['vhost']))
        rh = "<h1 style=\"font: 48px Helvetica,sans-serif\">" + domain + " / " + duser['vhost'] + "</h1>\n" + \
             gen_email_header(rduration)
    ro += pt.get_string() + "\n\n"
    rh += strip_colors(pt.get_html_string(attributes=tab_attribs)) + "<br/>\n"

    # Add in cache stats
    cinfo = repgen_cache_info(duser['owner'], duser['vhost'], domain)
    ro += cinfo[0]; rh += cinfo[1]

    # Build response code table
    pt = PrettyTable()
    pt.field_names = ('1xx', '2xx', '3xx', '4xx', '5xx', 'TOTAL')
    if span:
        rpc = logparse.get_status_hitrate(logdata)
        pt.add_row((strcolor('magenta', mkpct(rpc.get('1xx', 0), rqtot)),
                    strcolor('green', mkpct(rpc.get('2xx', 0), rqtot)),
                    strcolor('blue', mkpct(rpc.get('3xx', 0), rqtot)),
                    strcolor('yellow', mkpct(rpc.get('4xx', 0), rqtot)),
                    strcolor('red', mkpct(rpc.get('5xx', 0), rqtot)),
                    strcolor('white', mkpct(rqtot, rqtot))))
    else:
        pt.add_row((strcolor('magenta', mkpct(tvd['responses']['1xx'], rqtot)),
                    strcolor('green', mkpct(tvd['responses']['2xx'], rqtot)),
                    strcolor('blue', mkpct(tvd['responses']['3xx'], rqtot)),
                    strcolor('yellow', mkpct(tvd['responses']['4xx'], rqtot)),
                    strcolor('red', mkpct(tvd['responses']['5xx'], rqtot)),
                    strcolor('white', mkpct(rqtot, rqtot))))
    ro += strcolor('white', "HTTP RESPONSE CODES\n") + pt.get_string() + "\n\n"
    rh += "<h2 style=\"font: 24px Helvetica,sans-serif\">HTTP Response Codes</h2>\n" + \
          strip_colors(pt.get_html_string(attributes=tab_attribs)) + "<br/>\n"

    # Build UCS tables
    hit_dtot = rqtot - xpc.get('STATIC/TYPE', 0) - xpc.get('STATIC/PATH', 0) - xpc.get('DISABLED', 0) - xpc.get('BYPASS', 0)
    hit_stot = rqtot - xpc.get('DISABLED', 0) - xpc.get('BYPASS', 0) - xpc.get('HIT', 0)
    hit_btot = rqtot - xpc.get('STATIC/TYPE', 0) - xpc.get('STATIC/PATH', 0)

    ptx = PrettyTable()
    ptx.field_names = ('HIT (Dynamic)', 'HIT (Static)', 'DISABLED+BYPASS')
    ptx.add_row((strcolor('green', mkpct(xpc.get('HIT', 0), hit_dtot)),
                strcolor('yellow', mkpct(xpc.get('STATIC/TYPE', 0) + xpc.get('STATIC/PATH', 0), hit_stot)),
                strcolor('red', mkpct(xpc.get('BYPASS', 0) + xpc.get('DISABLED', 0), hit_btot))))

    pt = PrettyTable()
    pt.field_names = ('HIT', 'MISS', 'BYPASS', 'EXPIRED', 'UPDATING')
    pt.add_row((strcolor('green', mkpct(xpc.get('HIT', 0), rqtot)),
                strcolor('red', mkpct(xpc.get('MISS', 0), rqtot)),
                strcolor('magenta', mkpct(xpc.get('BYPASS', 0), rqtot)),
                strcolor('yellow', mkpct(xpc.get('EXPIRED', 0), rqtot)),
                strcolor('yellow', mkpct(xpc.get('UPDATING', 0), rqtot))))

    pt2 = PrettyTable()
    pt2.field_names = ('STATIC/TYPE', 'STATIC/PATH', 'DISABLED', 'TOTAL')
    pt2.add_row((strcolor('green', mkpct(xpc.get('STATIC/TYPE', 0), rqtot)),
                 strcolor('green', mkpct(xpc.get('STATIC/PATH', 0), rqtot)),
                 strcolor('magenta', mkpct(xpc.get('DISABLED', 0), rqtot)),
                 strcolor('white', mkpct(rqtot, rqtot))))

    ro += strcolor('white', "CACHE STATUS\n") + ptx.get_string() + "\n" + \
          pt.get_string() + "\n" + pt2.get_string() + "\n\n"
    rh += "<h2 style=\"font: 24px Helvetica,sans-serif\">Cache Status</h2>\n" + \
          strip_colors(ptx.get_html_string(attributes=tab_attribs)) + "<br/>\n" + \
          strip_colors(pt.get_html_string(attributes=tab_attribs)) + "<br/>\n" + \
          strip_colors(pt2.get_html_string(attributes=tab_attribs)) + "<br/>\n"

    # Show NLP datas
    fullurl = (domain == '*')
    cinfo = repgen_nlp(logdata, user=duser['owner'], fullurl=fullurl)
    ro += cinfo[0]; rh += cinfo[1]

    rh += gen_email_footer()
    return (ro, rh)

def repgen_cache_info(zone, vhost=None, domain=None, cbase=default_cache_base):
    """
    Return stats about currently cached pages
    """
    gpath = os.path.join(cbase, zone, '*', '*')
    tally = {'files': 0, 'size': 0}
    for tfile in glob(gpath):
        cdata = read_cache_file(tfile)
        if not cdata:
            continue
        if domain:
            if not re.match(r'^https?(GET|HEAD|OPTIONS)(' + domain.lower() + r'|' + vhost.lower() + r')/?.*$', cdata['key'], re.I):
                continue
        tally['files'] += 1
        tally['size'] += cdata['stat'].st_size

    # Build cache stats info
    pt = PrettyTable()
    pt.field_names = ('Total Files', 'Total Size')
    pt.add_row((strcolor('white', tally['files']),
               strcolor('white', format_size(tally['size']))))
    ro = strcolor('white', "CACHE ZONE STATS\n") + pt.get_string() + "\n\n"
    rh = "<h2 style=\"font: 24px Helvetica,sans-serif\">Cache Zone Status</h2>\n" + \
         strip_colors(pt.get_html_string(attributes=tab_attribs)) + "<br/>\n"
    return (ro, rh)

def repgen_nlp(logdata=None, user=None, lastlines=10000, topcount=10, rresolve=True, maxwidth=65, fullurl=False):
    """
    Generate an ANSI and HTML report of top hits
    If @logdata is not supplied, then a fresh read of the log is done,
    pulling the last @lastlines lines from the file
    """
    if not logdata:
        logdata = logparse.parse_log('/var/log/nginx/access.log', lastlines=lastlines)

    ro = ""; rh = ""

    # Top Hosts
    utop = list(logparse.get_top_hosts(logdata, topcount=topcount).values())
    pt = PrettyTable()
    pt.add_column('HITS', [x['hits'] for x in utop])
    pt.add_column('HOST', [x['host'] for x in utop])
    pt.align['HITS'] = 'r'
    pt.align['HOST'] = 'l'

    ro += strcolor('white', "TOP HOSTS\n") + pt.get_string() + "\n\n"
    rh += "<h2 style=\"font: 24px Helvetica,sans-serif\">Top Hosts</h2>\n" + \
          strip_colors(pt.get_html_string(attributes=tab_attribs)) + "<br/>\n"

    # Top URIs
    utop = list(logparse.get_top_uris(logdata, czone=user, topcount=topcount).values())
    pt = PrettyTable()
    pt.add_column('HITS', [x['hits'] for x in utop])
    pt.add_column('METHOD', [x['method'] for x in utop])
    pt.add_column('STATUS', [x['status'] for x in utop])
    pt.add_column('CACHE', [x['cache'] for x in utop])
    if fullurl:
        pt.add_column('HOST/URI', [(x['host'] + x['uri'])[:maxwidth] for x in utop])
        pt.align['HOST/URI'] = 'l'
    else:
        pt.add_column('URI', [x['uri'][:maxwidth] for x in utop])
        pt.align['URI'] = 'l'
    pt.align['HITS'] = 'r'

    ro += strcolor('white', "TOP URIs\n") + pt.get_string() + "\n\n"
    rh += "<h2 style=\"font: 24px Helvetica,sans-serif\">Top URIs</h2>\n" + \
          strip_colors(pt.get_html_string(attributes=tab_attribs)) + "<br/>\n"

    # Top User-Agents
    utop = logparse.get_top_agents(logdata, topcount=topcount)
    pt = PrettyTable()
    pt.add_column('COUNT', list(utop.values()))
    pt.add_column('USER-AGENT', [x[:maxwidth] for x in list(utop.keys())])
    pt.align['COUNT'] = 'r'
    pt.align['USER-AGENT'] = 'l'

    ro += strcolor('white', "TOP USER-AGENTS\n") + pt.get_string() + "\n\n"
    rh += "<h2 style=\"font: 24px Helvetica,sans-serif\">Top User-Agents</h2>\n" + \
          strip_colors(pt.get_html_string(attributes=tab_attribs)) + "<br/>\n"

    # Top Locations
    if logdata and logdata[0].get('loc') is not None:
        utop = logparse.get_top_locations(logdata, topcount=topcount)
        pt = PrettyTable()
        pt.add_column('COUNT', list(utop.values()))
        pt.add_column('LOCATION', list(utop.keys()))
        pt.align['COUNT'] = 'r'
        pt.align['LOCATION'] = 'l'

        ro += strcolor('white', "TOP LOCATIONS\n") + pt.get_string() + "\n\n"
        rh += "<h2 style=\"font: 24px Helvetica,sans-serif\">Top Locations</h2>\n" + \
              strip_colors(pt.get_html_string(attributes=tab_attribs)) + "<br/>\n"

    # Top Client IPs
    utop = logparse.get_top_ips(logdata, rresolve=rresolve, topcount=topcount)
    pt = PrettyTable()
    pt.add_column('HITS', [x['hits'] for x in list(utop.values())])
    pt.add_column('IP', list(utop.keys()))
    pt.add_column('HOST/PTR', [x['reverse'] for x in list(utop.values())])
    pt.align['HITS'] = 'r'
    pt.align['HOST/PTR'] = 'l'

    ro += strcolor('white', "TOP CLIENT IPs\n") + pt.get_string()
    rh += "<h2 style=\"font: 24px Helvetica,sans-serif\">Top Client IPs</h2>\n" + \
          strip_colors(pt.get_html_string(attributes=tab_attribs)) + "<br/>\n"

    return (ro, rh)

def gen_email_header(tperiod):
    """
    Return email stats header
    """
    outstr = """
    <table style="border: 1px solid #444; border-collapse: collapse; width: 650px; font-size: 12px; font-family: Helvetica,sans-serif">
    <tr>
    <td>Generated on <b>%s</b></td>
    <td>Period: <b>%s</b></td>
    </tr>
    </table>
    """ % (arrow.now().format("YYYY-MM-DD HH:mm:ss ZZ"), tperiod)
    return outstr

def gen_email_footer():
    """
    Return email stats footer
    """
    outstr = """
    <h3 style="font: 18px Helvetica,sans-serif">Glossary</h3>
    <table style="border: 1px solid #aaa; border-collapse: collapse; width: 650px; font-size: 12px; font-family: Helvetica,sans-serif">
    <tr><td>
    <li><b>HIT</b> - A valid (not expired) page was served from the cache</li>
    <li><b>MISS</b> - No valid cached page was available. A request was made to the application, and possibly cached for later use</li>
    <li><b>BYPASS</b> - The cache was bypassed due to meeting certain criteria (such as being logged in, using admin or user portal, using a checkout page in an ecommerce store, etc.)</li>
    <li><b>EXPIRED</b> - An expired page was served from the cache. This can happen if the origin server/application threw an error. Instead of showing the user an error page, an expired page from the cache is shown.</li>
    <li><b>UPDATING</b> - An expired page was served from the cache. Another request is in progress (by another client), which will update the cached page. This status typically only happens during very high traffic periods</li>
    <li><b>STATIC/TYPE</b> - Static content that was served directly by NGINX. The file extension (type) was used to serve the file directly from NGINX, rather than requesting the resource from Apache</li>
    <li><b>STATIC/PATH</b> - Static content that was served directly by NGINX. A specifically-defined path was used to make this determination</li>
    <li><b>DISABLED</b> - Caching has been explicitly disabled by the user</li>
    <li><b>TOTAL</b> - Total number of all requests. All constituent statuses may not add up to 100%, since pages without a specific cache status are not reported</li>
    </td></tr>
    </table>
    """
    return outstr

Zerion Mini Shell 1.0