Mini Shell

Direktori : /opt/imh-python/lib/python3.9/site-packages/ngxutil/
Upload File :
Current File : //opt/imh-python/lib/python3.9/site-packages/ngxutil/logparse.py

# vim: set ts=4 sw=4 expandtab syntax=python:
"""

ngxutil.logparse
Nginx access.log parser

@author J. Hipps <jacobh@inmotionhosting.com>

"""

import logging
import re
import socket
import math
from time import time
from collections import OrderedDict

import arrow
from tailer import Tailer

from ngxutil.cache import find_cache_item_url
from ngxutil.util import *

logger = logging.getLogger('ngxutil')

LOGRGX = r'(?P<clientip>[0-9a-fA-F\.\:]{7,32}) (?P<ident>[^ ]+) (?P<user>[^ ]+) ' \
         r'\[(?P<timestamp>[^\]]+)\] "(?P<method>[A-Za-z0-9]+) (?P<uri>.+) ' \
         r'HTTP/(?P<httpversion>[0-9\.]{1,4})" (?P<status>[0-9]{3}) (?P<bytes>[0-9]+|\-) ' \
         r'"(?P<referrer>.+)" "(?P<agent>[^\"]+)" ' \
         r'(?:\[loc=(?P<loc>[^\]]+)\] )?' \
         r'\[rt=(?P<request_time>[^\]]+)\] \[proto=(?P<protocol>[^\]]+)\] \[args=(?P<args>[^\]]*)\] ' \
         r'\[ucs=(?P<upstream_cache_status>[^\]]*)\] \[xpc=(?P<proxy_cache>[^\]]+)\] ' \
         r'\[uct=(?P<upstream_connect_time>[^\]]*)\] \[gz=(?P<gzip_ratio>[^\]]*)\]' \
         r'(?: \[br=(?P<br_ratio>[^\]]*)\])?' \
         r'(?: \[tls=(?P<tls_version>[^\]]*)\] \[cipher=(?P<tls_ciphers>[^\]]*)\] \[scheme=(?P<scheme>[^\]]*)\])?' \
         r'(?: \[eorigin=(?P<eorigin>[^\]]*)\])? ' \
         r'(?P<vhost>[a-zA-Z\._:\-0-9]+) (?P<server_host>[a-zA-Z\._:\-0-9]+) ' \
         r'(?P<server_hostname>[a-zA-Z\._:\-0-9]+) (?P<connection_id>[0-9]+)'


class NgxTailer(Tailer):
    """
    NGINX access log reader
    Reads chunks of the logfile in reverse, parses each line,
    then stops when conditions are met.
    Extends tailer.Tailer from the PyTailer module
    """
    rgx = re.compile(LOGRGX)
    span_start = arrow.get(0)
    lastlines = None

    def up_next_lines(self, lines=100):
        """
        Return the next @lines of the file in reverse
        """
        __pstart = time()
        end_pos = self.file.tell()

        for i in range(lines):
            if not self.seek_line():
                break

        f_pos = self.file.tell()
        data = self.file.read(end_pos - f_pos - 1)
        self.seek(f_pos)
        if data:
            sl = self.splitlines(data)
        else:
            sl = []
        __pdelta = time() - __pstart
        logger.debug("*** NgxTailer prof: up_next_lines() d-t = %f", __pdelta)
        return sl


    def read_log(self, lastlines=None, span=None):
        """
        Read @lastlines and/or @span from log
        """
        __rstart = time()
        self.lastlines = lastlines
        if span:
            try:
                self.span_start = arrow.now().shift(hours=int(span) * -1)
            except Exception as e:
                logger.error("Failed to parse span start time: %s", str(e))
                return None
        else:
            self.span_start = arrow.get(0)

        logger.debug("Using span=%s (span_start=%s) / lastlines=%s", span, self.span_start, lastlines)
        self.seek_end()

        logset = []
        while True:
            __pstart = time()
            tset = self.parse_lineset(self.up_next_lines(1000))
            if not len(tset):
                break
            else:
                logset += tset
            if self.lastlines and len(logset) >= self.lastlines:
                break
            __pdelta = time() - __pstart
            logger.debug("*** NgxTailer prof: read_log() loop d-t = %f [len(tset) = %d / len(logset) = %d]", __pdelta, len(tset), len(logset))

        __rdelta = time() - __rstart
        logger.debug("NgxTailer: Parsed %d lines in %f seconds", len(logset), __rdelta)
        return logset

    def parse_lineset(self, lines):
        """
        Parse set of @lines, return list
        """
        __pstart = time()
        lset = []
        for tline in lines:
            pline = self.parse_logline(tline)
            if pline:
                lset.append(pline)
        __pdelta = time() - __pstart
        logger.debug("*** NgxTailer prof: parse_lineset() d-t = %f", __pdelta)
        return lset

    def parse_logline(self, line):
        """
        Parse a single log @line, return string
        """
        try:
            pline = self.rgx.match(line.strip()).groupdict()
            pdate = arrow.get(pline['timestamp'], 'DD/MMM/YYYY:HH:mm:ss Z')
            if pdate < self.span_start:
                return None
            return pline
        except Exception as e:
            logger.debug("Failed to parse line ('%s'): %s", line.strip(), str(e))
            return None


def get_cache_hitrate(logdata):
    """
    Return a dict of cache statuses and corresponding hitrates
    """
    odict = {}
    for tline in logdata:
        tpc = tline.get('proxy_cache')
        if tpc not in odict:
            odict[tpc] = 1
        else:
            odict[tpc] += 1
    return odict

def get_status_hitrate(logdata):
    """
    Return a dict of response statuses and corresponding hitrates, ignores
    5xx hitrates if eorigin is cpanel.
    """
    odict = {'1xx': 0, '2xx': 0, '3xx': 0, '4xx': 0, '5xx': 0}
    for tline in logdata:
        tpc = tline.get('status')
        eorigin = tline.get('eorigin')
        if tpc:
            if eorigin == 'cpanel' and tpc[0] == '5':
                continue
        if tpc not in odict:
            odict[tpc] = 1
        else:
            odict[tpc] += 1
        try:
            odict[tpc[0]+'xx'] += 1
        except:
            pass
    return odict

def get_reqtime(logdata):
    """
    Return min/avg/max request time (floats)
    """
    odict = {}
    rtimes = [float(x.get('request_time', 0.0)) for x in logdata]
    odict['avg'] = math.fsum(rtimes) / float(len(rtimes))
    odict['min'] = min(rtimes)
    odict['max'] = max(rtimes)
    return odict

def get_bytes(logdata):
    """
    Return TX bytes
    """
    return math.fsum([float(x.get('bytes', 0.0)) for x in logdata])

def get_top_uris(logdata, rmethod=None, czone=None, topcount=10, colorize=True):
    """
    Return list of top URIs and number of hits, up to a max of @topcount
    If @rmethod is set, only methods that match will be returned
    (eg. POST, GET, etc.)
    If @czone is set to the cache zone, add a 'cache' key to show
    current cache status
    """
    tally = {}
    for tline in logdata:
        if rmethod and rmethod != tline['method']:
            continue
        tkey = "{method} {status} {server_host} {uri}".format(**tline)
        if tkey not in tally:
            tally[tkey] = 1
        else:
            tally[tkey] += 1

    odict = OrderedDict()

    for tkey, thit in sorted(tally.items(), key=lambda x: x[1], reverse=True)[:topcount]:
        tsplit = tkey.split(' ', 3)
        if czone:
            citem = find_cache_item_url(czone, 'http://{}{}'.format(tsplit[2], tsplit[3]), tsplit[0])
            if not citem:
                citem = find_cache_item_url(czone, 'https://{}{}'.format(tsplit[2], tsplit[3]), tsplit[0])
            if citem:
                if citem['expired']:
                    cstat = strcolor('yellow', 'EXPIRED') if colorize else 'expired'
                else:
                    cstat = strcolor('green', 'VALID') if colorize else 'valid'
            else:
                cstat = strcolor('red', 'NO') if colorize else 'no'
        else:
            cstat = '-'
        odict[tkey] = {'method': tsplit[0], 'status': tsplit[1], 'host': tsplit[2],
                       'uri': tsplit[3], 'hits': thit, 'cache': cstat}
    return dict(odict)

def get_top_hosts(logdata, topcount=10):
    """
    Return list of top Hosts and number of hits, up to a max of @topcount
    """
    tally = {}
    for tline in logdata:
        tkey = "{server_host}".format(**tline)
        if tkey not in tally:
            tally[tkey] = 1
        else:
            tally[tkey] += 1

    odict = OrderedDict()

    for tkey, thit in sorted(tally.items(), key=lambda x: x[1], reverse=True)[:topcount]:
        odict[tkey] = {'host': tkey, 'hits': thit}
    return dict(odict)

def get_top_locations(logdata, topcount=10):
    """
    Return list of top locations and number of hits, up to a max of @topcount
    """
    tally = {}
    for tline in logdata:
        if tline['loc'] not in tally:
            tally[tline['loc']] = 1
        else:
            tally[tline['loc']] += 1

    return dict(OrderedDict(sorted(tally.items(), key=lambda x: x[1], reverse=True)[:topcount]))

def get_top_agents(logdata, topcount=10):
    """
    Return list of top User-Agents and number of hits, up to a max of @topcount
    """
    tally = {}
    for tline in logdata:
        if tline['agent'] not in tally:
            tally[tline['agent']] = 1
        else:
            tally[tline['agent']] += 1

    return dict(OrderedDict(sorted(tally.items(), key=lambda x: x[1], reverse=True)[:topcount]))

def get_top_ips(logdata, rresolve=True, rtimeout=2.0, topcount=10):
    """
    Return list of top client IPs and number of hits, up to a max of @topcount
    If @rresolve is True, reverse lookups will be performed
    """
    tally = {}
    for tline in logdata:
        if tline['clientip'] not in tally:
            tally[tline['clientip']] = 1
        else:
            tally[tline['clientip']] += 1

    socket.setdefaulttimeout(rtimeout)
    odict = OrderedDict()

    for tip, thit in sorted(tally.items(), key=lambda x: x[1], reverse=True)[:topcount]:
        if rresolve:
            try:
                tres = socket.gethostbyaddr(tip)[0]
            except:
                tres = None
        else:
            tres = None
        odict[tip] = {'hits': thit, 'reverse': tres}
    return dict(odict)

def get_totals(logdata):
    """
    Calculate totals/averages for request time, size, etc.
    """
    odict = {}
    for treq in logdata:
        odict['rt_tot'] += float(request_time)

    return odict

def filter_log(logdata, field, match):
    """
    Filter log lines by @field, matching @match
    """
    if match == '*':
        return logdata
    else:
        return [x for x in logdata if x.get(field, '').lower() == match]

def parse_log(logpath, lastlines=None, span=None):
    """
    Parse access log from @logpath; only parse last @lastlines, if defined
    """
    try:
        with open(logpath, 'r') as f:
            xtail = NgxTailer(f)
            glines = xtail.read_log(lastlines, span)
            logger.debug("Read %d lines from %s (requested %s)", len(glines), logpath, lastlines)
    except Exception as e:
        logger.error("Failed to read log [%s]: %s", logpath, str(e))
        return None
    return glines

Zerion Mini Shell 1.0