Mini Shell

Direktori : /opt/sharedrads/domainchecker/
Current File : //opt/sharedrads/domainchecker/domainchecker.py
#!/opt/imh-python/bin/python3
'''
Checks domains in /etc/trueuserdomains and looks for obvious
fraud or phishing domains on cPanel servers.

For more documentation, please visit:
http://wiki.inmotionhosting.com/index.php?title=Domain_Checker
'''


import smtplib
import sys
import os
import re
import logging
import argparse
from pathlib import Path
from errno import EACCES
from collections import OrderedDict
import rads

__version__ = 'v0.1 Fraud Domain Checker'
__author__ = 'RileyL, KolbyH, SeanC, AlexK'

DEFAULT = {
    'email_address': 'abuse@inmotionhosting.com',
    'log_file': '/var/log/domainchecker.log',
    'badwords_file': '/opt/sharedrads/domainchecker/badwords',
    'whitelist_file': '.imh/domainchecker.whitelist',
    'domains_file': '/etc/trueuserdomains',
}


class DomainChecker:
    '''
    Checks for suspicious domain names on cPanel servers
    and sends an email notification if any are found.
    Both the badwords and whitelist are stored in a plain text file.

    Example usage:
    >>> from domainchecker import DomainChecker
    >>> checker = DomainChecker(
            badwords_file='/opt/sharedrads/domainchecker/badwords',
            whitelist_file='/opt/sharedrads/domainchecker/whitelist',
            domains_file='/etc/trueuserdomains',
            email_address='abuse@inmotionhosting.com'
            logger=rads_logger)
    >>> checker.run()
    '''

    def __init__(
        self,
        badwords_file: str,
        whitelist_file: str,
        domains_file: str,
        email_address: str,
        logger: logging.Logger,
    ):
        '''
        Load definitions, whitelist, and domains
        Initialize list for matches

        :param badwords_file: - plain text file of possible phishing terms.
        :param whitelist_file: - plain text file of known good domains.
        :param domains_file: - plain text file of domains on the server.
        :param logger: - logging object to use with functions named:
                         critical() error() warning() info() and debug()
        '''

        # Get logger object
        self.logger = logger

        # Get address
        self.email_address = email_address

        # Load definitions
        self.definitions = []
        self.definitions_file = badwords_file
        self.load_definitions(badwords_file)

        # Load domains
        self.domains = []
        self.domains_file = domains_file
        self.load_domains(domains_file)

        # Load cPanel users in the format: ['userna5', 'example.com']
        self.users = []
        self.load_users(domains_file)

        # Load whitelist
        self.whitelist = []
        self.whitelist_file = whitelist_file
        self.load_whitelists(whitelist_file)

        # Remove whitelist domains from domains list
        for user, domain in self.users:
            if domain in self.whitelist:
                self.users.remove([user, domain])

        # Remove whitelist domains from users list
        for user, domain in self.users:
            if domain in self.whitelist:
                self.users.remove([user, domain])
                print(user, domain)

        # Remove internal usernames and any associated domains
        for user, domain in self.users:
            for internal_user in rads.SYS_USERS:
                if user == internal_user:
                    self.logger.debug(
                        'Removed internal account: ' + user + '/' + domain
                    )
                    self.users.remove([user, domain])
                    self.domains.remove(domain)

        # List of matches found
        self.matches = []
        self.match_text = ''

    def load_definitions(self, target_file):
        '''
        Returns definitions of possible phishing words from file.

        :param file: File to load definitions from.
        '''
        try:
            with open(target_file, encoding='utf-8') as definitions_file:
                for line in definitions_file:
                    self.definitions.append(line.strip())
        except OSError:
            self.logger.error('Could not open ' + target_file)

        # Remove any blank lines from definitions
        for line in self.definitions:
            if line == '':
                self.definitions.remove(line)

    def load_whitelists(self, target_file: str):
        '''
        Returns whitelist domains from files.

        :param file: File to load whitelists from.
        '''
        for user, _ in self.users:
            user_whitelist = Path('/home', user, target_file.lstrip('/'))
            try:
                basedir = user_whitelist.parent
                if not basedir.exists():
                    basedir.mkdir(mode=0o600)
                    os.chown(basedir, 0, 0)
                user_whitelist.touch(mode=0o600, exist_ok=True)
                with open(user_whitelist, encoding='utf-8') as whitelist_file:
                    for line in whitelist_file:
                        self.whitelist.append(line.strip())
            except OSError:
                self.logger.error('Could not open %s', user_whitelist)

    def load_domains(self, target_file):
        '''
        Returns domains on the server as a list.

        :param file: File to load domains on the server.
        '''
        domain_pattern = re.compile(r'^(.*):')
        try:
            with open(target_file, encoding='utf-8') as domains_file:
                for line in domains_file:
                    # Strip away domain owner and just get domain
                    domain = domain_pattern.search(line)
                    line = domain.group()[:-1]
                    self.domains.append(line.strip())
        except OSError:
            self.logger.error('Could not open %s', target_file)

    def load_users(self, target_file):
        '''
        Load cPanel users in the format: ['userna5', 'example.com']

        :param file: File to load domains on the server.
        '''
        # regex patterns to get users and domains from trueuserdomains file
        users_pattern = re.compile(r':(.*)$')
        domain_pattern = re.compile(r'^(.*):')
        try:
            with open(target_file, encoding='utf-8') as domains_file:
                for line in domains_file:
                    # Get user from line
                    user = users_pattern.search(line)
                    the_user = user.group()[2:]
                    # Get domain from line
                    domain = domain_pattern.search(line)
                    the_domain = domain.group()[:-1]
                    # check if user is cPanel user
                    if rads.is_cpuser(the_user):
                        # add information to self.users
                        owner_info = [the_user, the_domain]
                        self.users.append(owner_info)
        except OSError:
            self.logger.error('Could not open %s', target_file)

    def regex_search(self, bad_word, domain):
        '''
        Uses the '*' character as "any" (globbing)
        the following link contains more information on globbing:
             http://www.tldp.org/LDP/abs/html/globbingref.html

        Searches for string bad_word in string domain
        if match is found, it's added to self.matches
        and self.match_text is updated

        :param bad_word: str - bad word to check
        :param domain: str - domain to check
        '''
        # Ensure no whitelisted domains were matched
        for allowed_domain in self.whitelist:
            if allowed_domain in domain:
                return

        # create search string
        search_string = ''
        segments = [i for i, ltr in enumerate(bad_word) if ltr == '*']
        last = 0
        for next_segment in segments:
            search_string += bad_word[last:next_segment] + r'(.*)'
            last = next_segment + 1
        search_string += bad_word[last : len(bad_word)]
        search_string = search_string.encode('string-escape')
        search_pattern = re.compile(search_string)

        # if bad_word found in search for string
        if search_pattern.search(domain):
            # if match found, add domain of match found to email
            self.matches.append(domain)
            self.match_text += domain + ': '
            # add cPanel account owner information to email
            for user, dom in self.users:
                # if there is a user, add it to the email
                if dom == domain and rads.is_cpuser(user):
                    self.match_text += user
            # if not on new line, add new line
            if not self.match_text.endswith('\n'):
                self.match_text += '\n'
        return

    def plaintext_search(self, bad_word, domain):
        '''
        searches for string bad_word in string domain
        if match is found, it's added to self.matches
        and self.match_text is updated

        :param bad_word: str - bad word to check
        :param domain: str - domain to check
        '''
        if bad_word in domain:
            # Ensure no whitelisted domains were matched
            for allowed_domain in self.whitelist:
                if allowed_domain in domain:
                    return
            # if match found, add domain of match found to email
            self.matches.append(domain)
            self.match_text += domain + ': '
            # add cPanel account owner information to email
            for user, dom in self.users:
                # if there is a user, add it to the email
                if dom == domain and rads.is_cpuser(user):
                    self.match_text += user
            if not self.match_text.endswith('\n'):
                self.match_text += '\n'
        return

    def run(self):
        '''
        Open /etc/trueuserdomains and checks every line from the badwords list.
        Matches found that are in the whitelist are ignored
        '''
        for domain in self.domains:
            for bad_word in self.definitions:
                if '*' in bad_word:  # If globbing, search with regex.
                    self.regex_search(bad_word, domain)
                else:  # If not globbing, search regularly.
                    self.plaintext_search(bad_word, domain)

        # Remove any duplicates
        self.matches = list(set(self.matches))

        # Reference: https://stackoverflow.com/questions/28518340
        self.match_text = '\n'.join(
            list(OrderedDict.fromkeys(self.match_text.split('\n')))
        )

        if self.matches:
            self.send_email()
        else:
            self.logger.info('No matches to possible phishing words found.')

    def add_to_whitelist(self, domains):
        '''
        Add domains to the whitelist file.

        :param domain: str - Domains to add to the whitelist.
        '''
        domain_list = domains.split(' ')
        for user, user_domain in self.users:
            if user_domain in domain_list:
                user_whitelist = '/home/' + user + '/' + self.whitelist_file
                try:
                    with open(user_whitelist, 'a+', encoding='utf-8') as file:
                        file.write(domains + '\n')
                except OSError as error:
                    self.logger.error(error)
                self.logger.info(domains + ' added to whitelist.')

    def send_email(self):
        '''
        Email report of possible fraudulent or phishing domains.
        '''
        to_addr = self.email_address
        subject = 'Possible fraudulent or phishing domains found!'
        body = (
            'The following domain(s) contain keywords that may be '
            + 'fraudulent activity:\n'
            + self.match_text[:-1]
        )
        self.logger.info('An email will now be sent for review by T2S staff')
        self.logger.debug(body)
        try:
            rads.send_email(to_addr, subject, body, errs=True)
        except (smtplib.SMTPException, OSError) as exc:
            self.logger.error(exc)
            self.logger.error(
                'Could not send email regarding '
                'possible phishing domain(s) failed.'
            )
        else:
            self.logger.info('Email sent successfully.')


def accessible(filename, mode):
    '''
    Verify access to file

    :param filename: str - name of file to check
    :param mode: str - mode to test
    '''
    try:
        with open(filename, mode) as _:  # pylint: disable=unspecified-encoding
            pass
        return True
    except OSError as error:
        print(error)
        return False


def parse_arguments():
    '''
    Parse the arguments using argparse
    '''
    parser = argparse.ArgumentParser(prog='domainchecker', description=__doc__)
    parser.add_argument(
        '-a',
        '--add-to-whitelist',
        dest='added_whitelist_domains',
        default='',
        help='domain to add to whitelist',
    )
    parser.add_argument(
        '-b',
        '--badwords',
        dest='badwords_file',
        default=DEFAULT['badwords_file'],
        help='file containing bad words definitions',
    )
    parser.add_argument(
        '-d',
        '--domains',
        dest='domains_file',
        default=DEFAULT['domains_file'],
        help='file containing list of domains to check',
    )
    parser.add_argument(
        '-e',
        '--email-address',
        dest='email_address',
        default=DEFAULT['email_address'],
        help='domain to add to whitelist',
    )
    parser.add_argument(
        '-l',
        '--loglevel',
        dest='loglevel',
        default='info',
        choices=['critical', 'error', 'warning', 'info', 'debug'],
        help='level of verbosity in logging',
    )
    parser.add_argument(
        '-o',
        '--output',
        dest='log_file',
        default=DEFAULT['log_file'],
        help='file to send logs to',
    )
    parser.add_argument(
        '-v',
        '--verbose',
        dest='verbose',
        default=False,
        action='store_true',
        help='output logs to command line',
    )
    parser.add_argument(
        '-w',
        '--whitelist',
        dest='whitelist_file',
        default=DEFAULT['whitelist_file'],
        help='file containing whitelisted domains',
    )

    args = parser.parse_args()  # Parse arguments

    if args.verbose:  # Set CLI output to std if verbose
        args.output = sys.stderr
    else:
        args.output = None

    # Set logging value from optional argment
    if args.loglevel == 'critical':
        args.loglevel = logging.CRITICAL
    elif args.loglevel == 'error':
        args.loglevel = logging.ERROR
    elif args.loglevel == 'warning':
        args.loglevel = logging.WARNING
    elif args.loglevel == 'info':
        args.loglevel = logging.INFO
    elif args.loglevel == 'debug':
        args.loglevel = logging.DEBUG
    else:  # Logging set to INFO by default.
        args.loglevel = logging.INFO

    # Verify access to files
    files = [
        (args.log_file, 'a+'),
        (args.badwords_file, 'r'),
        (args.domains_file, 'r'),
    ]

    for _file_, mode in files:
        if not accessible(_file_, mode):
            sys.exit(EACCES)

    return args


def main():
    '''
    usage: domainchecker [-h] [-a ADDED_WHITELIST_DOMAINS] [-b BADWORDS_FILE]
                         [-d DOMAINS_FILE] [-e EMAIL_ADDRESS]
                         [-l {critical,error,warning,info,debug}] [-o LOG_FILE]
                         [-v] [-w WHITELIST_FILE]

    Checks domains in /etc/trueuserdomains and looks for obvious
    fraud or phishing domains on cPanel servers.

    For more documentation, please visit:
    http://wiki.inmotionhosting.com/index.php?title=Domain_Checker

    optional arguments:
      -h, --help            show this help message and exit
      -a ADDED_WHITELIST_DOMAINS, --add-to-whitelist ADDED_WHITELIST_DOMAINS
                            domain to add to whitelist
      -b BADWORDS_FILE, --badwords BADWORDS_FILE
                            file containing bad words definitions
      -d DOMAINS_FILE, --domains DOMAINS_FILE
                            file containing list of domains to check
      -e EMAIL_ADDRESS, --email-address EMAIL_ADDRESS
                            domain to add to whitelist
      -l {critical,error,warning,info,debug}, --loglevel
                            level of verbosity in logging
      -o LOG_FILE, --output LOG_FILE
                            file to send logs to
      -v, --verbose         output logs to command line
      -w WHITELIST_FILE, --whitelist WHITELIST_FILE
                            file containing whitelisted domains
    '''
    # parse arguments
    args = parse_arguments()

    # set up logging
    rads_logger = rads.setup_logging(
        path=args.log_file,
        name='domainchecker',
        loglevel=args.loglevel,
        print_out=args.output,
    )

    # run domain checker
    checker = DomainChecker(
        badwords_file=args.badwords_file,
        whitelist_file=args.whitelist_file,
        domains_file=args.domains_file,
        email_address=args.email_address,
        logger=rads_logger,
    )

    # if we are just adding a domain to the whitelist, add the domain.
    if args.added_whitelist_domains:
        checker.add_to_whitelist(args.added_whitelist_domains)
    else:
        checker.run()


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        sys.exit()
Zerion Mini Shell 1.0