Mini Shell

Direktori : /proc/self/root/opt/sharedrads/mysql/
Upload File :
Current File : //proc/self/root/opt/sharedrads/mysql/queryparser.py

#!/opt/imh-python/bin/python3
"""Parses MySQL general query logs"""
import configparser
import sys
from pathlib import Path
import re
from datetime import datetime, timedelta
import argparse
from typing import IO, Union
from pymysql.optionfile import Parser as PyMySQLParser


def parse_args():
    parser = argparse.ArgumentParser(description=__doc__)
    # fmt: off
    parser.add_argument(
        "-q", "--quiet", action="store_false", dest="verbose",
        help="Suppress non-error output",
    )
    parser.add_argument(
        "-o", "--output", metavar="FILE",
        help="Write output to FILE (default: stdout)",
    )
    parser.add_argument(
        "-r", "--regex", type=re.compile, metavar="REGEX",
        help="Tally arbitrary REGEX string (slow)",
    )
    display = parser.add_mutually_exclusive_group()
    display.add_argument(
        "-u", "--user", metavar="USER",
        help="Output USER's queries instead of summary",
    )
    display.add_argument(
        '-s', '--sort', default='total',
        choices=['select', 'insert', 'update', 'replace', 'regex', 'total'],
        help='Sort summary by a type of query',
    )
    parser.add_argument(
        'filename', nargs='?',
        help='file to read from. optional - defaults to try stdin',
    )
    # fmt: on
    args = parser.parse_args()
    if args.filename == '-':
        args.filename = None
    return args


class MySQLUser:
    """Holds a user name and tracks numbers of queries"""

    num_select: int
    num_insert: int
    num_update: int
    num_replace: int
    num_regex: int

    def __init__(self):
        self.num_select = 0
        self.num_insert = 0
        self.num_update = 0
        self.num_replace = 0
        self.num_regex = 0

    @property
    def num_total(self) -> int:
        return sum(
            (
                self.num_select,
                self.num_insert,
                self.num_update,
                self.num_replace,
                self.num_regex,
            )
        )

    @staticmethod
    def header(qps: bool, reg: bool, file=sys.stdout):
        cols = ['Sel', 'Upd', 'Ins', 'Repl']
        if reg:
            cols.append('Regex')
        print('User'.rjust(16), end='', file=file)
        for col in cols:
            print('', f"Num{col}".rjust(8), end='', file=file)
            if qps:
                print('', f"{col}/s".rjust(8), end='', file=file)

    def show(self, total_secs: float, reg: bool, file=sys.stdout):
        cols = ['select', 'update', 'insert', 'replace']
        if reg:
            cols.append('regex')
        for col in cols:
            val: int = getattr(self, f'num_{col}')
            print('', str(val).rjust(8), end='', file=file)
            if total_secs != 0:
                print(
                    '',
                    f"{int(val / total_secs)}qps".rjust(8),
                    end='',
                    file=file,
                )
        print(file=file)


class TimeTracker:
    def __init__(self):
        self.first_date: Union[str, None] = None
        self.last_date: Union[str, None] = None
        self.total_time = timedelta()

    def add_to_total(self) -> None:
        first = self.first_datetime
        last = self.last_datetime
        if first and last:  # not None
            self.total_time += last - first

    @property
    def first_datetime(self) -> Union[datetime, None]:
        if self.first_date:
            return self.stamp_to_datetime(self.first_date)
        return None

    @property
    def last_datetime(self) -> Union[datetime, None]:
        if self.last_date:
            return self.stamp_to_datetime(self.last_date)
        return None

    @staticmethod
    def stamp_to_datetime(mysql_stamp: str) -> datetime:
        """convert mysql timestamp to datetime object"""
        return datetime.strptime(mysql_stamp, '%y%m%d %H:%M:%S')

    def print_age(self):
        if first := self.first_datetime:
            time_delta = datetime.now() - first
            total_seconds = time_delta.total_seconds()
            print(
                f"First timestamp at {self.first_date}",
                f"({int(total_seconds / 3600)} hours,",
                f"{int(total_seconds / 60 % 60)} minutes,",
                f"{int(total_seconds % 60)} seconds ago)",
                file=sys.stderr,
            )
        else:
            print("No timestamps found in log file")


class StateTracker:
    def __init__(self, verbose: bool):
        self.query_id = "0"
        self.username = "NO_SUCH_USER"
        self.verbose = verbose
        self.id_table: dict[str, str] = {}
        self.user_table: dict[str, MySQLUser] = {}
        self.times = TimeTracker()

    def handle_match(self, line: str, match: re.Match) -> None:
        if parsed_date := match.group(1):  # if it's got a date group
            if not self.times.first_date:  # and we've never set a date before
                self.times.first_date = parsed_date  # set our first date
                if self.verbose:
                    self.times.print_age()
            self.times.last_date = parsed_date  # set our last date
        if match.group(3) == "Connect":  # if it's a connection
            self.query_id = match.group(2)  # get the query id
            if self.query_id in self.id_table:
                # We have hit a SERIOUS problem.  This likely means that
                # mysql restarted. We're dumping the time and query_id
                # lookup tables.
                if 'Access denied for user' in line or ' as  on' in line:
                    return
                self.times.add_to_total()
                # don't have to do the user table because that data in
                # theory is still good (qps = total queries / total time)
                self.id_table.clear()
                self.times.last_date = None
                self.times.first_date = None
            self.username = match.group(4)  # set user_name
            # create the entry with user name as the value and the id as
            # the index
            self.id_table[self.query_id] = self.username
            # if the user name is new (could be, could already exist)
            if self.username not in self.user_table:
                # create a new counter class for it using the user name
                # as the lookup key
                self.user_table[self.username] = MySQLUser()
        elif match.group(3) in ("Query", "Execute"):
            # if this is a query ...
            self.query_id = match.group(2)  # get the id
            try:
                # get the user name from our lookup table
                # (the user who started it)
                self.username = self.id_table[self.query_id]
            except KeyError:
                self.username = "NO_SUCH_USER"
                if self.username not in self.user_table:
                    self.user_table[self.username] = MySQLUser()
            # get the type of query (select, insert, update, etc.)
            query_type = match.group(4).lower()
            if query_type == "select":
                self.user_table[self.username].num_select += 1
            elif query_type == "update":
                self.user_table[self.username].num_update += 1
            elif query_type == "insert":
                self.user_table[self.username].num_insert += 1
            elif query_type == "replace":
                self.user_table[self.username].num_replace += 1
        else:  # must be init db, prepare, or execute
            query_id = match.group(2)  # get the id
            try:
                # get the user name from our lookup table
                # (the user who started it)
                self.username = self.id_table[query_id]
            except KeyError:
                self.username = "NO_SUCH_USER"
                if self.username not in self.user_table:
                    self.user_table[self.username] = MySQLUser()

    def handle_user_match(self, match: re.Match) -> None:
        try:
            # dirty trick.  Try to get the ID, but what if the match
            # wasn't a query and didn't match our regex?
            self.query_id = match.group(2)
        except Exception:
            # we can re-use the last query_id, which hasn't been unset
            # since the last matching Query!  That makes the user_name
            # likely to be the same as well, so we reuse it
            pass
        try:
            # get the user name from our lookup table
            # (the user who started it)
            self.username = self.id_table[self.query_id]
        except KeyError:
            self.username = "NO_SUCH_USER"
        if not self.username in self.user_table:
            self.user_table[self.username] = MySQLUser()
        self.user_table[self.username].num_regex += 1


def gen_log_path() -> Union[str, None]:
    """Reads mysqld.general_log_file from my.cnf"""
    try:
        parser = PyMySQLParser(strict=False)
        if not parser.read('/etc/my.cnf'):
            return None
        path = Path(parser.get('mysqld', 'general_log_file')).resolve()
        if path == Path('/dev/null'):
            print("MySQL log points to /dev/null currently", file=sys.stderr)
            return None
        return str(path)
    except configparser.Error:
        return None


def open_log(args) -> IO:
    """Finds/Opens query log"""
    if not args.filename and sys.stdin.isatty():
        args.filename = gen_log_path()
        if args.filename is None:
            sys.exit("Could not get default log file from /etc/my.cnf")
        if args.verbose:
            print(
                f"Reading from the default log file, `{args.filename}'",
                file=sys.stderr,
            )
    if args.filename:
        try:
            return open(args.filename, encoding='utf-8', errors='replace')
        except OSError as exc:
            sys.exit(f"Failed to open log file `{args.filename}': {exc}")
    if args.verbose:
        print(
            "MySQL general query log parser reading from stdin/pipe...",
            file=sys.stderr,
        )
    return sys.stdin


def parse_log(
    query_log: IO,
    user_regex: Union[re.Pattern, None],
    user: Union[str, None],
    state: StateTracker,
    out_file: IO,
) -> StateTracker:

    # Search entry v2, group(1)=(None|Timestamp), group(2)=(ConnectionID),
    # group(3)=(Connect|Query), group(4)=(UserName|QueryType)
    search_re = re.compile(
        r"([0-9]{6}[\s]+[0-9:]+)*[\s]+([0-9]+)\s"
        r"(Connect|Query|Init DB|Prepare|Execute)[\s]+([a-zA-Z0-9]+)"
    )

    # main parser loop
    while line := query_log.readline():
        match = search_re.match(line)
        user_match = user_regex.search(line) if user_regex else None
        if not match and not user_match:
            continue
        if match:
            state.handle_match(line=line, match=match)
        if user_match:
            state.handle_user_match(match=match)
        # --user was supplied and matches this line
        if user and state.username == user:
            try:
                print(line, end='', file=out_file)
            except Exception:
                sys.exit(0)
    return state


def summarize(
    state: StateTracker,
    sort_by: str,
    out_file: IO,
    verbose: bool,
    user_regex: Union[re.Pattern, None],
    user: Union[str, None],
):
    if user:  # we were in per-user mode. Skip summary page
        return
    if not state.times.first_date:  # no timestamps found at all
        sys.exit("Not enough data to parse, please try a longer log file.")
    total_secs = state.times.total_time.total_seconds()
    show_reg = user_regex is not None
    if total_secs == 0:
        print('Not enough timestamps logged to display QPS', file=out_file)
    sorted_entries = sorted(
        state.user_table.items(),
        key=lambda x: getattr(x[1], sort_by),
    )
    if verbose:
        MySQLUser.header(qps=total_secs != 0, reg=show_reg, file=out_file)
        print(file=out_file)
    for username, counts in sorted_entries:
        print(username.rjust(16), end='', file=out_file)
        counts.show(total_secs=total_secs, reg=show_reg, file=out_file)


def main():
    args = parse_args()
    # determine where to write output
    if args.output:
        out_file = open(args.output, "w", encoding='utf-8')
    else:
        out_file = sys.stdout
    with out_file:
        with open_log(args) as query_log:
            state = StateTracker(args.verbose)
            parse_log(query_log, args.regex, args.user, state, out_file)
        state.times.add_to_total()
        summarize(
            state,
            f"num_{args.sort}",
            out_file,
            args.verbose,
            args.regex,
            args.user,
        )


if __name__ == '__main__':
    main()

Zerion Mini Shell 1.0