Mini Shell
# vim: set ts=4 sw=4 expandtab syntax=python:
"""
ngxstats.postgres
Realtime Nginx stats aggregation tool
Postgres writer process
Copyright (c) 2019-2020 InMotion Hosting, Inc.
https://www.inmotionhosting.com/
@author J. Hipps <jacobh@inmotionhosting.com>
See schema.sql for a description of Postgres table schemas
"""
import os
import multiprocessing
import logging
import logging.handlers
from time import time
import arrow
import psycopg2
from psycopg2.extras import RealDictCursor, Inet, register_hstore
from setproctitle import setproctitle
from netaddr import IPAddress
from ngxstats.util import gconf, check_alive
logger = logging.getLogger('ngxstats')
def start(inq):
"""
Postgres writer
"""
ppid = os.getppid()
logger.info(
"postgres: subservice started. pid = %d / ppid = %d",
multiprocessing.current_process().pid,
ppid,
)
try:
setproctitle("ngxstats: postgres")
except Exception:
pass
# Setup initial connection
con, cur = connect()
if not con:
logger.critical("postgres: aborting due to failed Postgres connection")
os._exit(1) # pylint: disable=protected-access
# Main processing loop
while True:
# Check if master is still alive
if not check_alive(ppid):
logger.info("postgres: master %d has died. aborting.", ppid)
os._exit(0) # pylint: disable=protected-access
# Grab the next batch, hot off the queue
# This will block for 1 second, check if the parent is alive,
# then block again until it receives a measurement
try:
tbatch = inq.get(1)
logger.debug(
"postgres: received new batch of %d items", len(tbatch)
)
except multiprocessing.TimeoutError:
continue
qbatch = []
qt_start = time()
for treq in tbatch:
# Parse timestamp
try:
ttstamp = arrow.get(
treq.get('time_local'), 'DD/MMM/YYYY:HH:mm:ss Z'
).datetime
except Exception:
logger.warning(
"postgres: failed to convert to timestamp: '%s'",
treq.get('time_local'),
)
ttstamp = arrow.now().datetime
# For future use: this dict is cast into an HSTORE type during insertion.
# Can be used to store additional attributes that don't need indexing
extra_attrs = {}
# Build query string and perform data conversions
qstr = (
"(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, "
"%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
)
qdata = (
ttstamp,
safeinet(treq.get('remote_addr')),
safestr(treq.get('remote_user'), 64),
treq.get('request'),
safecast(treq.get('status'), int, 0),
safecast(treq.get('body_bytes_sent'), int, 0),
treq.get('referer'),
safestr(treq.get('user_agent')),
safestr(treq.get('geoip_country'), 64),
safestr(treq.get('geoip_city'), 64),
safecast(treq.get('request_time'), float, 0.0),
safestr(treq.get('proto'), 16),
safestr(treq.get('ucs'), 20),
safestr(treq.get('xpc'), 20),
safecast(treq.get('uct'), float, 0.0),
safecast(treq.get('gz'), float),
safecast(treq.get('brotli'), float),
safestr(treq.get('tls'), 10),
safestr(treq.get('cipher'), 64),
safestr(treq.get('scheme'), 8),
safestr(treq.get('eorigin'), 8),
safestr(treq.get('server_name')),
safestr(treq.get('http_host')),
safestr(treq.get('hostname').strip().split('.', maxsplit=1)[0]),
safecast(treq.get('connection_id'), int),
extra_attrs,
)
# Use mogrify to get an interpolated query string for this entry
qstrlog = cur.mogrify(qstr, qdata).decode('utf8', errors='ignore')
# logger.debug("postgres: process 1 entry: %s", qstrlog)
qbatch.append(qstrlog)
# Batch INSERT all queued data at once; much faster/more efficient than doing
# queries one-by-one or with executemany
if qbatch:
qt_time = time() - qt_start
try:
cur.execute("INSERT INTO access_log VALUES " + ','.join(qbatch))
con.commit()
logger.debug(
"postgres: commit ok: wrote %d entries in %01.2fs",
len(qbatch),
qt_time,
)
except Exception as e:
logger.error(
"postgres: failed to commit writes to Postgres: %s", str(e)
)
# XXX-TODO: figure out a better way to determine the failed entry without
# discarding the whole batch
# inq.put(tbatch)
# logger.warning("postgres: pushed failed batch back onto the queue")
logger.warning(
"postgres: discarding batch with %d requests", len(tbatch)
)
logger.warning("postgres: reconnecting...")
con, cur = connect()
def connect():
"""
Connect/reconnect to Postgres, register types, and setup cursor
"""
try:
con = psycopg2.connect(gconf.pg_socket)
logger.info("postgres: connected OK")
except Exception as e:
logger.error(
"postgres: failed to connect to Postgres server: %s", str(e)
)
return (None, None)
try:
register_hstore(con, globally=True)
except Exception as e:
logger.error("postgres: failed to register types: %s", str(e))
return (None, None)
try:
cur = con.cursor(cursor_factory=RealDictCursor)
except Exception as e:
logger.error("postgres: failed to create cursor: %s", str(e))
return (None, None)
return (con, cur)
def safestr(ival, maxlen=256, nullok=False):
"""
Ensures string @ival is smaller than @maxlen
"""
try:
if len(ival) > maxlen:
logger.warning(
"postgres: safestr: value '%s' was truncated to %d chars",
ival,
maxlen,
)
ostr = ival[:maxlen]
except Exception:
ostr = None if nullok else ""
return ostr
def safecast(ival, itype, dval=None):
"""
Safely cast @ival to @itype
Returns @dval if cast fails
"""
try:
return itype(ival)
except Exception as e: # pylint: disable=unused-variable
# logger.warning("postgres: cast '%s' to type %s failed: %s", ival, str(itype), str(e))
return dval
def safeinet(ipaddr):
"""
Returns Inet(ipaddr) if @ipaddr is valid
Otherwise returns None
"""
try:
# If ipaddr is invalid, an AddrFormatError will get thrown
if IPAddress(ipaddr) is not None:
return Inet(ipaddr)
except Exception:
pass
return None
Zerion Mini Shell 1.0