Mini Shell

Direktori : /opt/sharedrads/
Upload File :
Current File : //opt/sharedrads/limit_bots

#!/opt/imh-python/bin/python3
import argparse
import os
import platform
import pwd
import re
import sys
import yaml
from pathlib import Path

from pp_api import PowerPanel
from rads import CpuserError, get_owner, OUR_RESELLERS, UserData

pp = PowerPanel()

class BotBlocker():
    def __init__(self, user: str, **kwargs):
        # Generate list of bots to exclude from block list
        local_bot_excludes = Path(f'/home/{user}/.imh/limit_bots.yaml')
        try:
            with open(local_bot_excludes, 'r') as bots_file:
                bot_excludes = yaml.safe_load(bots_file)
        except Exception:
            bot_excludes = []
        
        # Generate list of bots to block in .htaccess
        if kwargs.get('bots'):
            self.bots = [bot for bot in kwargs.get('bots') if bot not in bot_excludes]
        else:
            self.bots = [bot for bot in [
                'AhrefsBot',
                'Amazonbot',
                'Barkrowler',
                'Bytespider',
                'ClaudeBot',
                'DotBot',
                'Googlebot',
                'GPTBot',
                'MJ12bot',
                'PetalBot',
                'SemrushBot',
                'bingbot',
                'facebookexternalhit',
                'meta-externalagent',
            ] if bot not in bot_excludes]

        # cPanel user
        self.user = user

        # .htaccess rule to Block bots
        self.htaccess_rule = (
            "RewriteEngine On\n"
            f"RewriteCond %{{HTTP_USER_AGENT}} ({'|'.join(self.bots)}) [NC]\n"
            "RewriteRule .* - [F,L]"
        )

    def __repr__(self) -> str:
        return (
            f"BotBlocker(user={self.user!r}, bots={self.bots!r})"
        )

    def write_htaccess(self, docroot):
        """block bots by adding to .htaccess"""
        htaccess_path = Path(docroot) / ".htaccess"
        block_start = "# BEGIN InMotion Hosting Block Bots"
        block_end = "# END InMotion Hosting Block Bots"

        if htaccess_path.exists():
            # Read existing content
            existing_content = htaccess_path.read_text()

            # Regex pattern to match the block between BEGIN and END tags
            pattern = re.compile(rf'({block_start})(.*?){block_end}\n', re.DOTALL)

            # Replace the block content if it exists
            if pattern.search(existing_content):
                updated_content = pattern.sub(rf'{block_start}\n{self.htaccess_rule}\n{block_end}\n', existing_content)
                htaccess_path.write_text(updated_content)
                print(f"Replaced existing block in {htaccess_path}")
            else:
                # Append new block if it doesn't exist
                htaccess_path.write_text(f'{block_start}\n{self.htaccess_rule}\n{block_end}\n' + existing_content)
                print(f"Added new block to {htaccess_path}")
        else:
            # Create .htaccess file and write the block snippet
            htaccess_path.write_text(f'{block_start}\n{self.htaccess_rule}\n{block_end}\n')

            # Set ownership to user:user
            uid = pwd.getpwnam(self.user).pw_uid
            gid = pwd.getpwnam(self.user).pw_gid        
            os.chown(htaccess_path, uid=uid, gid=gid)

            print(f"{htaccess_path} file created with block bots snippet")

    def write_robots_txt(self, docroot):
        """ratelimit bots by creating robots.txt"""
        robots_txt_path = Path(docroot) / "robots.txt"
        
        # Check if robots.txt already exists
        if robots_txt_path.exists():
            print(f"{robots_txt_path} already exists. No changes made.")
            return
        
        # Write the robots.txt file if it doesn't exist
        robots_txt_path.write_text("User-agent: *\nCrawl-delay: 30\n")

        # Set ownership to user:user
        uid = pwd.getpwnam(self.user).pw_uid
        gid = pwd.getpwnam(self.user).pw_gid        
        os.chown(robots_txt_path, uid=uid, gid=gid)

        print(f"Rate limit added to {robots_txt_path}")

def get_calling_username():
    try:
        blame = f'{os.getlogin()}:{pwd.getpwuid(os.geteuid()).pw_name}'
    except OSError:
        blame = pwd.getpwuid(os.geteuid()).pw_name
    return blame

def send_pp_email(user: str, bots: list):
    """Send suspension email"""
    # IMH - High Resource Usage from Bots id 1068
    # WHH - High Resource Usage from Bots id 1069
    # IMH Reseller - High Resource Usage from Bots for Sub-Account id 1070

    # variable2 == user
    if 'hub' in platform.node():
        template_id = '1069'
        send_to = user
    else:
        # If the customer is the child of a reseller, use the
        # reseller template instead
        owner = get_owner(user)
        if owner not in OUR_RESELLERS:
            template_id = '1070'
            send_to = owner
        else:
            template_id = '1068'
            send_to = user

    template_info = pp.call("notification.fetch-template", template=template_id)
    if template_info.status == 0:
        variables = {}
        for variable in template_info.data['variables']:
            if variable['description'] == "Child User":
                variables[variable['name']] = user
            elif variable['description'] == "USER_AGENTS":
                variables[variable['name']] = ', '.join(bots)

        response = pp.call(
            "notification.send",
            template=template_id,
            cpanelUser=send_to,
            **variables,
        )
        if response.status == 0:
            print("Sent email, review at %s" % response.data['reviewUrl'])
            logged_in_user = get_calling_username().split(':')[0]
            if logged_in_user == "root":
                reporter = "auto"
            else:
                reporter = logged_in_user
            pp(
                'hosting.insert-note',
                user=user,
                admin_user=reporter,
                flagged=False,
                type='SYS - RA',
                # Prepend user to the note because the hosting.insert-note
                # endpoint doesn't seem to honor the 'admin_user' parameter
                # This issue is tracked in Devel #4775
                # https://trac.imhtech.net/Development/ticket/4775
                note=f'{reporter}: High Resource Usage from Bots',
            )

            return
    print(
        "Could not send Power Panel email or note acct, please do this manually!"
    )


def parse_args():
    """Parse args"""
    parser = argparse.ArgumentParser(description="Limit bots with robots.txt and block them in .htaccess")
    parser.add_argument("user", type=str, help="cPanel username")
    parser.add_argument("--domains", nargs='+', help="List of domains to limit/block")
    parser.add_argument("--all-domains", action="store_true", help="Apply changes to all domains for the user")
    parser.add_argument("--bots", nargs='+', help="List of bots to block (override defaults)")

    return parser.parse_args()


def main():
    # Parse args
    args = parse_args()

    # Exit if invalid arguments were provided
    if not args.all_domains and not args.domains:
        print("Please provide args to --domains or use --all-domains. Examples:")
        print(" - limit_bots userna5 --domains domain1.com --bots facebookexternalhit Amazonbot")
        print(" - limit_bots userna5 --domains domain1.com domain2.com")
        print(" - limit_bots userna5 --all-domains")
        sys.exit(1)

    # Retrieve UserData for user
    try:
        udata = UserData(args.user)
    except CpuserError as e:
        print("Invalid cPanel User. exiting")
        return

    # Create BotBlocker object
    bot_blocker = BotBlocker(udata.user, bots=args.bots)

    # Create list of docroots to limit
    docroots = []
    if args.all_domains:
        docroots = udata.all_roots
    else:
        # Retrieve docroot for each domain provided in args from UserData
        domains = [udata.primary] + udata.addons + udata.subs + udata.parked
        docroots.extend(domain.docroot for domain in domains if domain.domain in args.domains)

    # Apply block/limit to docroots
    for docroot in docroots:
        bot_blocker.write_htaccess(docroot)
        bot_blocker.write_robots_txt(docroot)

    # Send Power Panel e-mail to user
    send_pp_email(udata.user, bot_blocker.bots)

if __name__ == "__main__":
    main()

Zerion Mini Shell 1.0