Mini Shell
#!/opt/imh-python/bin/python3
"""Submit tickets to the STR queue for corrupt MyISAM tables
after attempting automatic repair"""
from collections.abc import Generator
from pathlib import Path
from argparse import ArgumentParser
from datetime import datetime
import platform
import time
import json
import sys
from subprocess import run, PIPE, DEVNULL
from tabulate import tabulate
import pymysql
from pymysql.cursors import DictCursor
import rads
MAX_MB = 300
SAVE_FILE = Path('/home/nagios/corrupt_myisam.json')
LOGGER = rads.setup_logging(
name='mysql_corruption_str',
path='/home/nagios/mysql_corruption_str.log',
loglevel='INFO',
print_out='stdout',
fmt='%(asctime)s %(levelname)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p',
)
def main():
"""Main: find corrupt MyISAM tables and InnoDB tablespace conflicts"""
timestamp, no_str = parse_args() # determine timestamp of previous cron run
try:
data = json.loads(SAVE_FILE.read_text('utf-8'))
except (ValueError, OSError):
LOGGER.critical('Cannot read from %s', SAVE_FILE)
sys.exit(1)
still_broken = list(filter(still_damaged, iter_crashed(data, timestamp)))
send_myisam_str(still_broken, no_str)
def parse_args() -> tuple[int, bool]:
"""Parse commandline arguments, return timestamp of last cron run"""
parser = ArgumentParser(description=__doc__)
# fmt: off
parser.add_argument(
'-i', '--interval', metavar='INTERVAL', type=int, required=True,
help='The interval in hours which the this cron is configured. '
'This cron will not alert for errors older than this interval.',
)
# fmt: on
parser.add_argument(
'--no-str', action='store_true', help='Do not submit an STR ticket'
)
args = parser.parse_args()
if args.interval < 1:
sys.exit('invalid interval')
interval_secs = 3600 * args.interval
return int(time.time() - interval_secs), args.no_str
def repair_table(db_name: str, tbl_name: str) -> bool:
"""Attempt to repair the supplied table"""
with pymysql.connect(
read_default_file='/root/.my.cnf', database=db_name
) as conn, conn.cursor(DictCursor) as cur:
cur.execute(f"REPAIR TABLE `{tbl_name.replace('`', '``')}`")
return cur.fetchone()['Msg_text'] == 'OK'
def still_damaged(tbl_tuple: tuple[str, str]) -> bool:
db_name, tbl_name = tbl_tuple
# If the DB is too large to repair, don't worry about it.
try:
db_size = db_size_mb(db_name)
except Exception:
return False # assume too big to auto-repair
if db_size > MAX_MB:
return False
try:
return not repair_table(db_name, tbl_name)
except Exception:
return True
def db_size_mb(db_name: str) -> int:
"""Get size of /var/lib/mysql/<db_name> in MiB"""
ret = run(
['du', '-sB', '1M', str(Path('/var/lib/mysql', db_name))],
stdout=PIPE,
stderr=DEVNULL,
timeout=10,
check=True,
)
return int(ret.stdout.split(maxsplit=1)[0])
def iter_crashed(data, timestamp) -> Generator[tuple[str, str], None, None]:
"""Go through each database in `data` and iterate (db_name, table) for each
corrupt table which belongs to a database with errors a recent timestamp"""
# format for strftime would be %-H for hour, but strptime wants just %H
fmt = '%Y-%m-%d %H:%M:%S'
for db_name, tbl_names in data['crashed'].items():
db_name: str
tbl_names: list[str]
# example error_time: '2018-11-28 8:59:30'
error_time: str = data['error_times'][db_name]
error_timestamp = datetime.strptime(error_time, fmt).timestamp()
if error_timestamp <= timestamp:
continue
for tbl_name in tbl_names:
yield db_name, tbl_name
def send_myisam_str(still_broken: list[list[str]], no_str: bool):
"""Given `still_broken`, send an str for broken myisam tables"""
if not still_broken:
LOGGER.debug('No broken MyISAM tables - no STR submitted')
return
still_broken_joined = ', '.join(map('.'.join, still_broken))
if no_str:
LOGGER.warning(
"Would have sent STR for broken MyISAM tables: %s",
still_broken_joined,
)
return
LOGGER.warning(
'Sending email for broken MyISAM tables: %s',
still_broken_joined,
)
table = tabulate(
still_broken, headers=['Database', 'Table'], tablefmt="simple"
)
rads.send_email(
to_addr='str@imhadmin.net',
subject=f'Damaged MyISAM tables on {platform.node()}',
body=f"""
By tailing the MySQL error logs on this server, the following MyISAM tables were
found to be corrupt.
{table}
This ticket will be resubmitted until the MySQL no longer logs new errors for
this database.""".lstrip(),
)
if __name__ == '__main__':
try:
with rads.lock('mysql_corruption_str'):
main()
except rads.LockError:
sys.exit('Another instance already running. Exiting.')
Zerion Mini Shell 1.0