Mini Shell

Direktori : /opt/imh-python/lib/python3.9/site-packages/ngxstats/
Upload File :
Current File : //opt/imh-python/lib/python3.9/site-packages/ngxstats/postgres.py

# vim: set ts=4 sw=4 expandtab syntax=python:
"""

ngxstats.postgres
Realtime Nginx stats aggregation tool
Postgres writer process

Copyright (c) 2019-2020 InMotion Hosting, Inc.
https://www.inmotionhosting.com/

@author J. Hipps <jacobh@inmotionhosting.com>

See schema.sql for a description of Postgres table schemas

"""

import os
import multiprocessing
import logging
import logging.handlers
from time import time

import arrow
import psycopg2
from psycopg2.extras import RealDictCursor, Inet, register_hstore
from setproctitle import setproctitle
from netaddr import IPAddress

from ngxstats.util import gconf, check_alive

logger = logging.getLogger('ngxstats')


def start(inq):
    """
    Postgres writer
    """
    ppid = os.getppid()
    logger.info(
        "postgres: subservice started. pid = %d / ppid = %d",
        multiprocessing.current_process().pid,
        ppid,
    )
    try:
        setproctitle("ngxstats: postgres")
    except Exception:
        pass

    # Setup initial connection
    con, cur = connect()

    if not con:
        logger.critical("postgres: aborting due to failed Postgres connection")
        os._exit(1)  # pylint: disable=protected-access

    # Main processing loop
    while True:
        # Check if master is still alive
        if not check_alive(ppid):
            logger.info("postgres: master %d has died. aborting.", ppid)
            os._exit(0)  # pylint: disable=protected-access

        # Grab the next batch, hot off the queue
        # This will block for 1 second, check if the parent is alive,
        # then block again until it receives a measurement
        try:
            tbatch = inq.get(1)
            logger.debug(
                "postgres: received new batch of %d items", len(tbatch)
            )
        except multiprocessing.TimeoutError:
            continue

        qbatch = []
        qt_start = time()
        for treq in tbatch:
            # Parse timestamp
            try:
                ttstamp = arrow.get(
                    treq.get('time_local'), 'DD/MMM/YYYY:HH:mm:ss Z'
                ).datetime
            except Exception:
                logger.warning(
                    "postgres: failed to convert to timestamp: '%s'",
                    treq.get('time_local'),
                )
                ttstamp = arrow.now().datetime

            # For future use: this dict is cast into an HSTORE type during insertion.
            # Can be used to store additional attributes that don't need indexing
            extra_attrs = {}

            # Build query string and perform data conversions
            qstr = (
                "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, "
                "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
            )
            qdata = (
                ttstamp,
                safeinet(treq.get('remote_addr')),
                safestr(treq.get('remote_user'), 64),
                treq.get('request'),
                safecast(treq.get('status'), int, 0),
                safecast(treq.get('body_bytes_sent'), int, 0),
                treq.get('referer'),
                safestr(treq.get('user_agent')),
                safestr(treq.get('geoip_country'), 64),
                safestr(treq.get('geoip_city'), 64),
                safecast(treq.get('request_time'), float, 0.0),
                safestr(treq.get('proto'), 16),
                safestr(treq.get('ucs'), 20),
                safestr(treq.get('xpc'), 20),
                safecast(treq.get('uct'), float, 0.0),
                safecast(treq.get('gz'), float),
                safecast(treq.get('brotli'), float),
                safestr(treq.get('tls'), 10),
                safestr(treq.get('cipher'), 64),
                safestr(treq.get('scheme'), 8),
                safestr(treq.get('eorigin'), 8),
                safestr(treq.get('server_name')),
                safestr(treq.get('http_host')),
                safestr(treq.get('hostname').strip().split('.', maxsplit=1)[0]),
                safecast(treq.get('connection_id'), int),
                extra_attrs,
            )

            # Use mogrify to get an interpolated query string for this entry
            qstrlog = cur.mogrify(qstr, qdata).decode('utf8', errors='ignore')
            # logger.debug("postgres: process 1 entry: %s", qstrlog)
            qbatch.append(qstrlog)

        # Batch INSERT all queued data at once; much faster/more efficient than doing
        # queries one-by-one or with executemany
        if qbatch:
            qt_time = time() - qt_start
            try:
                cur.execute("INSERT INTO access_log VALUES " + ','.join(qbatch))
                con.commit()
                logger.debug(
                    "postgres: commit ok: wrote %d entries in %01.2fs",
                    len(qbatch),
                    qt_time,
                )
            except Exception as e:
                logger.error(
                    "postgres: failed to commit writes to Postgres: %s", str(e)
                )
                # XXX-TODO: figure out a better way to determine the failed entry without
                # discarding the whole batch
                # inq.put(tbatch)
                # logger.warning("postgres: pushed failed batch back onto the queue")
                logger.warning(
                    "postgres: discarding batch with %d requests", len(tbatch)
                )
                logger.warning("postgres: reconnecting...")
                con, cur = connect()


def connect():
    """
    Connect/reconnect to Postgres, register types, and setup cursor
    """
    try:
        con = psycopg2.connect(gconf.pg_socket)
        logger.info("postgres: connected OK")
    except Exception as e:
        logger.error(
            "postgres: failed to connect to Postgres server: %s", str(e)
        )
        return (None, None)

    try:
        register_hstore(con, globally=True)
    except Exception as e:
        logger.error("postgres: failed to register types: %s", str(e))
        return (None, None)

    try:
        cur = con.cursor(cursor_factory=RealDictCursor)
    except Exception as e:
        logger.error("postgres: failed to create cursor: %s", str(e))
        return (None, None)

    return (con, cur)


def safestr(ival, maxlen=256, nullok=False):
    """
    Ensures string @ival is smaller than @maxlen
    """
    try:
        if len(ival) > maxlen:
            logger.warning(
                "postgres: safestr: value '%s' was truncated to %d chars",
                ival,
                maxlen,
            )
        ostr = ival[:maxlen]
    except Exception:
        ostr = None if nullok else ""
    return ostr


def safecast(ival, itype, dval=None):
    """
    Safely cast @ival to @itype
    Returns @dval if cast fails
    """
    try:
        return itype(ival)
    except Exception as e:  # pylint: disable=unused-variable
        # logger.warning("postgres: cast '%s' to type %s failed: %s", ival, str(itype), str(e))
        return dval


def safeinet(ipaddr):
    """
    Returns Inet(ipaddr) if @ipaddr is valid
    Otherwise returns None
    """
    try:
        # If ipaddr is invalid, an AddrFormatError will get thrown
        if IPAddress(ipaddr) is not None:
            return Inet(ipaddr)
    except Exception:
        pass
    return None

Zerion Mini Shell 1.0