Mini Shell
#!/opt/imh-python/bin/python3
"""Parses MySQL general query logs"""
import configparser
import sys
from pathlib import Path
import re
from datetime import datetime, timedelta
import argparse
from typing import IO, Union
from pymysql.optionfile import Parser as PyMySQLParser
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
# fmt: off
parser.add_argument(
"-q", "--quiet", action="store_false", dest="verbose",
help="Suppress non-error output",
)
parser.add_argument(
"-o", "--output", metavar="FILE",
help="Write output to FILE (default: stdout)",
)
parser.add_argument(
"-r", "--regex", type=re.compile, metavar="REGEX",
help="Tally arbitrary REGEX string (slow)",
)
display = parser.add_mutually_exclusive_group()
display.add_argument(
"-u", "--user", metavar="USER",
help="Output USER's queries instead of summary",
)
display.add_argument(
'-s', '--sort', default='total',
choices=['select', 'insert', 'update', 'replace', 'regex', 'total'],
help='Sort summary by a type of query',
)
parser.add_argument(
'filename', nargs='?',
help='file to read from. optional - defaults to try stdin',
)
# fmt: on
args = parser.parse_args()
if args.filename == '-':
args.filename = None
return args
class MySQLUser:
"""Holds a user name and tracks numbers of queries"""
num_select: int
num_insert: int
num_update: int
num_replace: int
num_regex: int
def __init__(self):
self.num_select = 0
self.num_insert = 0
self.num_update = 0
self.num_replace = 0
self.num_regex = 0
@property
def num_total(self) -> int:
return sum(
(
self.num_select,
self.num_insert,
self.num_update,
self.num_replace,
self.num_regex,
)
)
@staticmethod
def header(qps: bool, reg: bool, file=sys.stdout):
cols = ['Sel', 'Upd', 'Ins', 'Repl']
if reg:
cols.append('Regex')
print('User'.rjust(16), end='', file=file)
for col in cols:
print('', f"Num{col}".rjust(8), end='', file=file)
if qps:
print('', f"{col}/s".rjust(8), end='', file=file)
def show(self, total_secs: float, reg: bool, file=sys.stdout):
cols = ['select', 'update', 'insert', 'replace']
if reg:
cols.append('regex')
for col in cols:
val: int = getattr(self, f'num_{col}')
print('', str(val).rjust(8), end='', file=file)
if total_secs != 0:
print(
'',
f"{int(val / total_secs)}qps".rjust(8),
end='',
file=file,
)
print(file=file)
class TimeTracker:
def __init__(self):
self.first_date: Union[str, None] = None
self.last_date: Union[str, None] = None
self.total_time = timedelta()
def add_to_total(self) -> None:
first = self.first_datetime
last = self.last_datetime
if first and last: # not None
self.total_time += last - first
@property
def first_datetime(self) -> Union[datetime, None]:
if self.first_date:
return self.stamp_to_datetime(self.first_date)
return None
@property
def last_datetime(self) -> Union[datetime, None]:
if self.last_date:
return self.stamp_to_datetime(self.last_date)
return None
@staticmethod
def stamp_to_datetime(mysql_stamp: str) -> datetime:
"""convert mysql timestamp to datetime object"""
return datetime.strptime(mysql_stamp, '%y%m%d %H:%M:%S')
def print_age(self):
if first := self.first_datetime:
time_delta = datetime.now() - first
total_seconds = time_delta.total_seconds()
print(
f"First timestamp at {self.first_date}",
f"({int(total_seconds / 3600)} hours,",
f"{int(total_seconds / 60 % 60)} minutes,",
f"{int(total_seconds % 60)} seconds ago)",
file=sys.stderr,
)
else:
print("No timestamps found in log file")
class StateTracker:
def __init__(self, verbose: bool):
self.query_id = "0"
self.username = "NO_SUCH_USER"
self.verbose = verbose
self.id_table: dict[str, str] = {}
self.user_table: dict[str, MySQLUser] = {}
self.times = TimeTracker()
def handle_match(self, line: str, match: re.Match) -> None:
if parsed_date := match.group(1): # if it's got a date group
if not self.times.first_date: # and we've never set a date before
self.times.first_date = parsed_date # set our first date
if self.verbose:
self.times.print_age()
self.times.last_date = parsed_date # set our last date
if match.group(3) == "Connect": # if it's a connection
self.query_id = match.group(2) # get the query id
if self.query_id in self.id_table:
# We have hit a SERIOUS problem. This likely means that
# mysql restarted. We're dumping the time and query_id
# lookup tables.
if 'Access denied for user' in line or ' as on' in line:
return
self.times.add_to_total()
# don't have to do the user table because that data in
# theory is still good (qps = total queries / total time)
self.id_table.clear()
self.times.last_date = None
self.times.first_date = None
self.username = match.group(4) # set user_name
# create the entry with user name as the value and the id as
# the index
self.id_table[self.query_id] = self.username
# if the user name is new (could be, could already exist)
if self.username not in self.user_table:
# create a new counter class for it using the user name
# as the lookup key
self.user_table[self.username] = MySQLUser()
elif match.group(3) in ("Query", "Execute"):
# if this is a query ...
self.query_id = match.group(2) # get the id
try:
# get the user name from our lookup table
# (the user who started it)
self.username = self.id_table[self.query_id]
except KeyError:
self.username = "NO_SUCH_USER"
if self.username not in self.user_table:
self.user_table[self.username] = MySQLUser()
# get the type of query (select, insert, update, etc.)
query_type = match.group(4).lower()
if query_type == "select":
self.user_table[self.username].num_select += 1
elif query_type == "update":
self.user_table[self.username].num_update += 1
elif query_type == "insert":
self.user_table[self.username].num_insert += 1
elif query_type == "replace":
self.user_table[self.username].num_replace += 1
else: # must be init db, prepare, or execute
query_id = match.group(2) # get the id
try:
# get the user name from our lookup table
# (the user who started it)
self.username = self.id_table[query_id]
except KeyError:
self.username = "NO_SUCH_USER"
if self.username not in self.user_table:
self.user_table[self.username] = MySQLUser()
def handle_user_match(self, match: re.Match) -> None:
try:
# dirty trick. Try to get the ID, but what if the match
# wasn't a query and didn't match our regex?
self.query_id = match.group(2)
except Exception:
# we can re-use the last query_id, which hasn't been unset
# since the last matching Query! That makes the user_name
# likely to be the same as well, so we reuse it
pass
try:
# get the user name from our lookup table
# (the user who started it)
self.username = self.id_table[self.query_id]
except KeyError:
self.username = "NO_SUCH_USER"
if not self.username in self.user_table:
self.user_table[self.username] = MySQLUser()
self.user_table[self.username].num_regex += 1
def gen_log_path() -> Union[str, None]:
"""Reads mysqld.general_log_file from my.cnf"""
try:
parser = PyMySQLParser(strict=False)
if not parser.read('/etc/my.cnf'):
return None
path = Path(parser.get('mysqld', 'general_log_file')).resolve()
if path == Path('/dev/null'):
print("MySQL log points to /dev/null currently", file=sys.stderr)
return None
return str(path)
except configparser.Error:
return None
def open_log(args) -> IO:
"""Finds/Opens query log"""
if not args.filename and sys.stdin.isatty():
args.filename = gen_log_path()
if args.filename is None:
sys.exit("Could not get default log file from /etc/my.cnf")
if args.verbose:
print(
f"Reading from the default log file, `{args.filename}'",
file=sys.stderr,
)
if args.filename:
try:
return open(args.filename, encoding='utf-8', errors='replace')
except OSError as exc:
sys.exit(f"Failed to open log file `{args.filename}': {exc}")
if args.verbose:
print(
"MySQL general query log parser reading from stdin/pipe...",
file=sys.stderr,
)
return sys.stdin
def parse_log(
query_log: IO,
user_regex: Union[re.Pattern, None],
user: Union[str, None],
state: StateTracker,
out_file: IO,
) -> StateTracker:
# Search entry v2, group(1)=(None|Timestamp), group(2)=(ConnectionID),
# group(3)=(Connect|Query), group(4)=(UserName|QueryType)
search_re = re.compile(
r"([0-9]{6}[\s]+[0-9:]+)*[\s]+([0-9]+)\s"
r"(Connect|Query|Init DB|Prepare|Execute)[\s]+([a-zA-Z0-9]+)"
)
# main parser loop
while line := query_log.readline():
match = search_re.match(line)
user_match = user_regex.search(line) if user_regex else None
if not match and not user_match:
continue
if match:
state.handle_match(line=line, match=match)
if user_match:
state.handle_user_match(match=match)
# --user was supplied and matches this line
if user and state.username == user:
try:
print(line, end='', file=out_file)
except Exception:
sys.exit(0)
return state
def summarize(
state: StateTracker,
sort_by: str,
out_file: IO,
verbose: bool,
user_regex: Union[re.Pattern, None],
user: Union[str, None],
):
if user: # we were in per-user mode. Skip summary page
return
if not state.times.first_date: # no timestamps found at all
sys.exit("Not enough data to parse, please try a longer log file.")
total_secs = state.times.total_time.total_seconds()
show_reg = user_regex is not None
if total_secs == 0:
print('Not enough timestamps logged to display QPS', file=out_file)
sorted_entries = sorted(
state.user_table.items(),
key=lambda x: getattr(x[1], sort_by),
)
if verbose:
MySQLUser.header(qps=total_secs != 0, reg=show_reg, file=out_file)
print(file=out_file)
for username, counts in sorted_entries:
print(username.rjust(16), end='', file=out_file)
counts.show(total_secs=total_secs, reg=show_reg, file=out_file)
def main():
args = parse_args()
# determine where to write output
if args.output:
out_file = open(args.output, "w", encoding='utf-8')
else:
out_file = sys.stdout
with out_file:
with open_log(args) as query_log:
state = StateTracker(args.verbose)
parse_log(query_log, args.regex, args.user, state, out_file)
state.times.add_to_total()
summarize(
state,
f"num_{args.sort}",
out_file,
args.verbose,
args.regex,
args.user,
)
if __name__ == '__main__':
main()
Zerion Mini Shell 1.0