Mini Shell
Direktori : /opt/sharedrads/ |
|
Current File : //opt/sharedrads/limit_bots |
#!/opt/imh-python/bin/python3
import argparse
import os
import platform
import pwd
import re
import sys
import yaml
from pathlib import Path
from pp_api import PowerPanel
from rads import CpuserError, get_owner, OUR_RESELLERS, UserData
pp = PowerPanel()
class BotBlocker():
def __init__(self, user: str, **kwargs):
# Generate list of bots to exclude from block list
local_bot_excludes = Path(f'/home/{user}/.imh/limit_bots.yaml')
try:
with open(local_bot_excludes, 'r') as bots_file:
bot_excludes = yaml.safe_load(bots_file)
except Exception:
bot_excludes = []
# Generate list of bots to block in .htaccess
if kwargs.get('bots'):
self.bots = [bot for bot in kwargs.get('bots') if bot not in bot_excludes]
else:
self.bots = [bot for bot in [
'AhrefsBot',
'Amazonbot',
'Barkrowler',
'Bytespider',
'ClaudeBot',
'DotBot',
'Googlebot',
'GPTBot',
'MJ12bot',
'PetalBot',
'SemrushBot',
'bingbot',
'facebookexternalhit',
'meta-externalagent',
] if bot not in bot_excludes]
# cPanel user
self.user = user
# .htaccess rule to Block bots
self.htaccess_rule = (
"RewriteEngine On\n"
f"RewriteCond %{{HTTP_USER_AGENT}} ({'|'.join(self.bots)}) [NC]\n"
"RewriteRule .* - [F,L]"
)
def __repr__(self) -> str:
return (
f"BotBlocker(user={self.user!r}, bots={self.bots!r})"
)
def write_htaccess(self, docroot):
"""block bots by adding to .htaccess"""
htaccess_path = Path(docroot) / ".htaccess"
block_start = "# BEGIN InMotion Hosting Block Bots"
block_end = "# END InMotion Hosting Block Bots"
if htaccess_path.exists():
# Read existing content
existing_content = htaccess_path.read_text()
# Regex pattern to match the block between BEGIN and END tags
pattern = re.compile(rf'({block_start})(.*?){block_end}\n', re.DOTALL)
# Replace the block content if it exists
if pattern.search(existing_content):
updated_content = pattern.sub(rf'{block_start}\n{self.htaccess_rule}\n{block_end}\n', existing_content)
htaccess_path.write_text(updated_content)
print(f"Replaced existing block in {htaccess_path}")
else:
# Append new block if it doesn't exist
htaccess_path.write_text(f'{block_start}\n{self.htaccess_rule}\n{block_end}\n' + existing_content)
print(f"Added new block to {htaccess_path}")
else:
# Create .htaccess file and write the block snippet
htaccess_path.write_text(f'{block_start}\n{self.htaccess_rule}\n{block_end}\n')
# Set ownership to user:user
uid = pwd.getpwnam(self.user).pw_uid
gid = pwd.getpwnam(self.user).pw_gid
os.chown(htaccess_path, uid=uid, gid=gid)
print(f"{htaccess_path} file created with block bots snippet")
def write_robots_txt(self, docroot):
"""ratelimit bots by creating robots.txt"""
robots_txt_path = Path(docroot) / "robots.txt"
# Check if robots.txt already exists
if robots_txt_path.exists():
print(f"{robots_txt_path} already exists. No changes made.")
return
# Write the robots.txt file if it doesn't exist
robots_txt_path.write_text("User-agent: *\nCrawl-delay: 30\n")
# Set ownership to user:user
uid = pwd.getpwnam(self.user).pw_uid
gid = pwd.getpwnam(self.user).pw_gid
os.chown(robots_txt_path, uid=uid, gid=gid)
print(f"Rate limit added to {robots_txt_path}")
def get_calling_username():
try:
blame = f'{os.getlogin()}:{pwd.getpwuid(os.geteuid()).pw_name}'
except OSError:
blame = pwd.getpwuid(os.geteuid()).pw_name
return blame
def send_pp_email(user: str, bots: list):
"""Send suspension email"""
# IMH - High Resource Usage from Bots id 1068
# WHH - High Resource Usage from Bots id 1069
# IMH Reseller - High Resource Usage from Bots for Sub-Account id 1070
# variable2 == user
if 'hub' in platform.node():
template_id = '1069'
send_to = user
else:
# If the customer is the child of a reseller, use the
# reseller template instead
owner = get_owner(user)
if owner not in OUR_RESELLERS:
template_id = '1070'
send_to = owner
else:
template_id = '1068'
send_to = user
template_info = pp.call("notification.fetch-template", template=template_id)
if template_info.status == 0:
variables = {}
for variable in template_info.data['variables']:
if variable['description'] == "Child User":
variables[variable['name']] = user
elif variable['description'] == "USER_AGENTS":
variables[variable['name']] = ', '.join(bots)
response = pp.call(
"notification.send",
template=template_id,
cpanelUser=send_to,
**variables,
)
if response.status == 0:
print("Sent email, review at %s" % response.data['reviewUrl'])
logged_in_user = get_calling_username().split(':')[0]
if logged_in_user == "root":
reporter = "auto"
else:
reporter = logged_in_user
pp(
'hosting.insert-note',
user=user,
admin_user=reporter,
flagged=False,
type='SYS - RA',
# Prepend user to the note because the hosting.insert-note
# endpoint doesn't seem to honor the 'admin_user' parameter
# This issue is tracked in Devel #4775
# https://trac.imhtech.net/Development/ticket/4775
note=f'{reporter}: High Resource Usage from Bots',
)
return
print(
"Could not send Power Panel email or note acct, please do this manually!"
)
def parse_args():
"""Parse args"""
parser = argparse.ArgumentParser(description="Limit bots with robots.txt and block them in .htaccess")
parser.add_argument("user", type=str, help="cPanel username")
parser.add_argument("--domains", nargs='+', help="List of domains to limit/block")
parser.add_argument("--all-domains", action="store_true", help="Apply changes to all domains for the user")
parser.add_argument("--bots", nargs='+', help="List of bots to block (override defaults)")
return parser.parse_args()
def main():
# Parse args
args = parse_args()
# Exit if invalid arguments were provided
if not args.all_domains and not args.domains:
print("Please provide args to --domains or use --all-domains. Examples:")
print(" - limit_bots userna5 --domains domain1.com --bots facebookexternalhit Amazonbot")
print(" - limit_bots userna5 --domains domain1.com domain2.com")
print(" - limit_bots userna5 --all-domains")
sys.exit(1)
# Retrieve UserData for user
try:
udata = UserData(args.user)
except CpuserError as e:
print("Invalid cPanel User. exiting")
return
# Create BotBlocker object
bot_blocker = BotBlocker(udata.user, bots=args.bots)
# Create list of docroots to limit
docroots = []
if args.all_domains:
docroots = udata.all_roots
else:
# Retrieve docroot for each domain provided in args from UserData
domains = [udata.primary] + udata.addons + udata.subs + udata.parked
docroots.extend(domain.docroot for domain in domains if domain.domain in args.domains)
# Apply block/limit to docroots
for docroot in docroots:
bot_blocker.write_htaccess(docroot)
bot_blocker.write_robots_txt(docroot)
# Send Power Panel e-mail to user
send_pp_email(udata.user, bot_blocker.bots)
if __name__ == "__main__":
main()
Zerion Mini Shell 1.0