#!/usr/bin/python3
import re
import os
import urllib.request
import urllib.parse
import pwd
import subprocess
import string
import functools
from os import listdir, lstat
from stat import S_IFREG, S_IFDIR
from xml.dom import minidom

if not hasattr(__builtins__, 'any'):
    def any(iterable):
        for i in iterable:
            if i:
                return True
        return False


class ApacheConfParser(object):
    def __init__(self, apache_conf, apache_dir):
        self.apache_dir = apache_dir
        self.apache_conf = apache_conf

    def get_all_docroots(self):
        docroots = []
        conf_file_stack = [self.apache_conf]
        include_pattern = re.compile("Include\s+[\"']?(\S+)[\"']?")
        docroot_pattern = re.compile("DocumentRoot\s+(\S+)")
        while conf_file_stack:
            conf_file = conf_file_stack.pop()
            try:
                with open(conf_file, "r") as fd:
                    data = fd.read()
            except IOError:
                continue
            includes = include_pattern.findall(data)
            conf_file_stack.extend(includes)
            droots = [dr.strip("'\"") for dr in docroot_pattern.findall(data)]
            docroots.extend(droots)
        return list(set(docroots))

    @classmethod
    def auto_discover(cls):
        try:
            httpd_output = subprocess.Popen(['httpd', '-V'], stdout=subprocess.PIPE).communicate()[0]
            httpd_output = httpd_output.decode('utf-8')
        except:
            raise Exception("I can forgive many things... But you have to have apache!")
        adir_match = re.search('-D HTTPD_ROOT="(.*?)"', httpd_output)
        aconf_match = re.search('-D SERVER_CONFIG_FILE="(.*?)"', httpd_output)
        if not adir_match or not aconf_match:
            raise Exception("Could Not Determine Location of Apache Conf File")
        apache_dir = adir_match.group(1)
        apache_conf = os.path.join(apache_dir, aconf_match.group(1))
        return cls(apache_conf, apache_dir)

    def iter_docroots(self):
        return self.get_all_docroots()

def alpha2num(full_string):
    return ''.join(c for c in full_string if c.isdigit())

def cmp_versions(a, b):
    if a == b:
        return 0

    vpattern = re.compile(r"([\d\.]+)-?(.*)")
    match_a = vpattern.match(a)
    match_b = vpattern.match(b)

    if not match_a:
        return -1
    if not match_b:
        return 1

    version_a, release_type_a = match_a.groups()
    version_b, release_type_b = match_b.groups()

    dot_count_a = version_a.count('.')
    dot_count_b = version_b.count('.')

    if dot_count_a > dot_count_b:
        version_b += ".0" * (dot_count_a - dot_count_b)
    elif dot_count_a < dot_count_b:
        version_a += ".0" * (dot_count_b - dot_count_a)

    for x, y in zip(version_a.split('.'), version_b.split('.')):
        val = (int(alpha2num(x)) > int(alpha2num(y))) - (int(alpha2num(x)) < int(alpha2num(y)))
        if val:
            return val

    if release_type_a and not release_type_b:
        return -1
    elif not release_type_a and release_type_b:
        return 1
    elif not release_type_a and not release_type_b:
        return 0

    rtypes = ["alpha", "beta", "delta", "RC"]
    string_a, num_a = re.match("([a-zA-Z]+)(\d+)?", release_type_a).groups()
    string_b, num_b = re.match("([a-zA-Z]+)(\d+)?", release_type_b).groups()
    if string_a in rtypes:
        if string_b in rtypes:
            if rtypes.index(string_a) > rtypes.index(string_b):
                return 1
            elif rtypes.index(string_a) < rtypes.index(string_b):
                return -1
            else:
                return cmp(num_a, num_b)
        else:
            return -1
    else:
        if string_b in rtypes:
            return 1
        else:
            return 0


class ApplicationSignature(object):
    __slots__ = (
        "application_name",
        "min_overlap",
        "_sig_files",
        "_sig_file_count",
        "_version_funcs",
        "warn_versions",
        "acceptable_versions",
        "best_version",
        "upgrade_url",
        "upgrade_instructions_url",
        "upgrade_instructions"
    )

    def __init__(self):
        self.application_name = None
        self.min_overlap = 1
        self._sig_files = set()
        self._sig_file_count = 0.

        self._version_funcs = []
        self.warn_versions = []
        self.acceptable_versions = []
        self.best_version = None

        self.upgrade_url = None
        self.upgrade_instructions_url = None
        self.upgrade_instructions = None

    def __repr__(self):
        return "<AppSignature: '%s'>" % (self.application_name,)

    def match(self, path, fileset):
        overlap = len(self._sig_files.intersection(fileset)) / self._sig_file_count
        if overlap >= self.min_overlap:
            return {
                "name": self.application_name,
                "path": path,
                "overlap": overlap,
                "warn_versions": self.warn_versions,
                "acceptable_versions": self.acceptable_versions,
                "best_version": self.best_version,
                "detected_versions": [vf(path) for vf in self._version_funcs]
            }
        return None

    @classmethod
    def get_xml_signatures_from_string(cls, xmlStr):
        doc = minidom.parseString(xmlStr)
        signatures = []
        for sigNode in doc.getElementsByTagName('signature'):
            _sig = cls()
            _sig.application_name = sigNode.getElementsByTagName('applicationName')[0].firstChild.data
            _sig.min_overlap = float(sigNode.getElementsByTagName('minOverlap')[0].firstChild.data)

            uUrlNode = sigNode.getElementsByTagName('upgradeUrl')
            if uUrlNode:
                _sig.upgrade_url = uUrlNode[0].firstChild.data

            uInstructUrlNode = sigNode.getElementsByTagName('upgradeInstructionsUrl')
            if uInstructUrlNode:
                _sig.upgrade_instructions_url = uInstructUrlNode[0].firstChild.data

            uInstructsNode = sigNode.getElementsByTagName('upgradeInstructions')
            if uInstructsNode:
                _sig.upgrade_instructions = uInstructsNode[0].firstChild.data

            fileSignaturesNode = sigNode.getElementsByTagName('fileSignatures')[0]
            for fnameNode in fileSignaturesNode.getElementsByTagName('filename'):
                _sig._sig_files.add(fnameNode.firstChild.data)
            _sig._sig_file_count = float(len(_sig._sig_files))

            versionNode = sigNode.getElementsByTagName('versions')[0]
            for warnVersion in versionNode.getElementsByTagName('warnVersion'):
                _sig.warn_versions.append(warnVersion.firstChild.data)
            for acceptableVersion in versionNode.getElementsByTagName('acceptableVersion'):
                if acceptableVersion.getAttribute("newest") == "true":
                    _sig.best_version = acceptableVersion.firstChild.data
                _sig.acceptable_versions.append(acceptableVersion.firstChild.data)
            if not _sig.best_version:
                _sig.best_version = sorted(_sig.acceptable_versions, key=functools.cmp_to_key(cmp_versions))[-1]

            versionSigsNode = sigNode.getElementsByTagName('versionSignatures')[0]
            for versionSignature in versionSigsNode.getElementsByTagName('versionSignature'):
                filename = versionSignature.getElementsByTagName('filename')[0].firstChild.data
                pattern = versionSignature.getElementsByTagName('regexPattern')[0].firstChild.data
                _sig._version_funcs.append(gen_regex_version_func(filename, pattern))

            signatures.append(_sig)
        return tuple(signatures)

    @classmethod
    def get_xml_signatures_from_url(cls, url, useragent="vdetect2"):
        req = urllib.request.Request(url, headers={"User-Agent": useragent})
        return cls.get_xml_signatures_from_string(urllib.request.urlopen(req).read())

    @classmethod
    def get_xml_signatures_from_file(cls, filename):
        fd = open(filename)
        data = fd.read()
        fd.close()
        return cls.get_xml_signatures_from_string(data)


def walk_paths(paths, maxdepth, maxdirsize=None, file_filter=None, dir_walk_filter=None):
    paths = [os.path.abspath(path) for path in paths]
    directories = paths
    destructable_paths = paths
    f_names = []
    d_names = []
    if maxdepth is not None:
        while destructable_paths:
            directories = [destructable_paths.pop(0)]
            real_maxdepth = directories[0].count('/') + maxdepth
            while directories:
                del f_names[:]
                del d_names[:]
                root = directories.pop(0)
                try:
                    names = listdir(root)
                except:
                    continue
                for name in names:
                    abs_path = os.path.abspath(root + '/' + name)
                    f_stat = lstat(abs_path)
                    f_mode = f_stat.st_mode
                    f_size = f_stat.st_size
                    if f_mode & S_IFREG and (not file_filter or file_filter.match(name)):
                        f_names.append(name)
                    elif f_mode & S_IFDIR:
                        if not abs_path in paths and abs_path.count('/') <= real_maxdepth \
                            and (not maxdirsize or f_size <= maxdirsize) \
                            and (not dir_walk_filter or dir_walk_filter.match(name)):
                            directories.append(abs_path)
                        d_names.append(name)
                yield root, d_names, f_names
    else:
        while destructable_paths:
            directories = [destructable_paths.pop(0)]
            while directories:
                del f_names[:]
                del d_names[:]
                root = directories.pop(0)
                try:
                    names = listdir(root)
                except:
                    continue
                for name in names:
                    abs_path = os.path.abspath(root + '/' + name)
                    f_stat = lstat(abs_path)
                    f_mode = f_stat.st_mode
                    f_size = f_stat.st_size
                    if f_mode & S_IFREG and (not file_filter or file_filter.match(name)):
                        f_names.append(name)
                    elif f_mode & S_IFDIR:
                        if not abs_path in paths \
                            and (not maxdirsize or f_size <= maxdirsize) \
                            and (not dir_walk_filter or dir_walk_filter.match(name)):
                            directories.append(abs_path)
                        d_names.append(name)
                yield root, d_names, f_names


def gen_regex_version_func(filename, pattern):
    c_patt = re.compile(pattern, re.MULTILINE | re.DOTALL)

    def _(path):
        abs_name = path + '/' + filename
        if not os.path.exists(abs_name):
            return None
        fd = open(abs_name)
        data = fd.read()
        fd.close()
        match = c_patt.search(data)
        if match:
            return '.'.join(match.groups())
        return None

    return _


def prepare_report(report_data, only_outdated=False):
    result = []
    for report in report_data:

        detected_versions = report["detected_versions"]
        if not any(detected_versions):
            report["level"] = -1
        else:
            rlevels = []
            rversions = []
            for version in detected_versions:
                if not version:
                    continue
                if version == report["best_version"]:
                    rlevels = [0]
                    rversions = [version]
                    break
                elif cmp_versions(version, report["best_version"]) == 1:
                    rlevels = [3]
                    rversions = [version]
                    break
                elif version in report["acceptable_versions"]:
                    rlevels.append(0)
                    rversions.append(version)
                elif version in report["warn_versions"]:
                    rlevels.append(1)
                    rversions.append(version)
                else:
                    rlevels.append(2)
                    rversions.append(version)
            report["level"] = max(rlevels)
            report["version"] = rversions[rlevels.index(report["level"])]

        if report not in result and (not only_outdated or report["level"] >= 1):
            result.append(report)
    return result


def pretty_report(report_data, **kwargs):
    colors = {
        -1: "",
        0: "\033[1;32m",
        1: "\033[1;33m",
        2: "\033[1;31m",
        3: "\033[1;34m"
    }

    print("\n====================================\n--=== VERSION DETECTION REPORT ===--\n====================================")
    unk = sorted([r for r in report_data if r["level"] == -1], key=lambda x: (x['name'], x.get('version', "UNKNOWN")))
    info = sorted([r for r in report_data if r["level"] == 0 or r["level"] == 3],
                  key=lambda x: (x['name'], x.get('version', "UNKNOWN")))
    warn = sorted([r for r in report_data if r["level"] == 1], key=lambda x: (x['name'], x.get('version', "UNKNOWN")))
    crit = sorted([r for r in report_data if r["level"] == 2], key=lambda x: (x['name'], x.get('version', "UNKNOWN")))

    names = set()
    if not any(report_data):
        print("No Applications Found")
        sys.exit()

    if unk:
        print("\nUnversioned Applications:")
        print("=" * 40)
        if kwargs.get('color'):
            for r in unk:
                print('%s%-16s :: %8s :: %-60s\033[m' % (
                    colors[r["level"]],
                    r["name"],
                    r.get('version', "UNKNOWN"),
                    r["path"]
                ))
        else:
            for r in unk:
                print('% -16s :: % 8s :: % -60s' % (r["name"], r.get('version', "UNKNOWN"), r["path"]))
    if info:
        print("\nUp-To-Date Applications:")
        print("=" * 40)
        if kwargs.get('color'):
            for r in info:
                print('%s%-16s :: %8s :: %-60s\033[m' % (
                    colors[r["level"]],
                    r["name"],
                    r.get('version', "UNKNOWN"),
                    r["path"]
                ))
        else:
            for r in info:
                print('% -16s :: % 8s :: % -60s' % (r["name"], r.get('version', "UNKNOWN"), r["path"]))
    if warn:
        print("\nOutdated Applications:")
        print("=" * 40)
        if kwargs.get('color'):
            for r in warn:
                names.add(r["name"])
                print('%s%-16s :: %8s :: %-60s\033[m' % (
                    colors[r["level"]],
                    r["name"],
                    r.get('version', "UNKNOWN"),
                    r["path"]
                ))
        else:
            for r in warn:
                names.add(r["name"])
                print('% -16s :: % 8s :: % -60s' % (r["name"], r.get('version', "UNKNOWN"), r["path"]))
    if crit:
        print("\nVulnerable Applications:")
        print("=" * 40)
        if kwargs.get('color'):
            for r in crit:
                names.add(r["name"])
                print('%s%-16s :: %8s :: %-60s\033[m' % (
                    colors[r["level"]],
                    r["name"],
                    r.get('version', "UNKNOWN"),
                    r["path"]
                ))
        else:
            for r in crit:
                names.add(r["name"])
                print('% -16s :: % 8s :: % -60s' % (r["name"], r.get('version', "UNKNOWN"), r["path"]))

    relevant_signatures = []
    found_names = []
    for sig in kwargs.get('signatures', []):
        if sig.application_name in names and sig.application_name not in found_names:
            relevant_signatures.append(sig)
            found_names.append(sig.application_name)

    for sig in relevant_signatures:
        if sig.upgrade_instructions:
            print()
            print("For instructions on updating your %s instance(s) to the latest version (%s), please see the information below:" % 
                  (sig.application_name, ', '.join(v for v in sig.acceptable_versions)))
            print("----------------------------------------")
            print(sig.upgrade_instructions)
        else:
            if sig.upgrade_instructions_url:
                print()
                print("For instructions on updating your %s instance(s) to the latest version (%s), please see the link below:" % 
                      (sig.application_name, ', '.join(v for v in sig.acceptable_versions)))
                print("----------------------------------------")
                print(sig.upgrade_instructions_url)
                if sig.upgrade_url:
                    print()
                    print("Alternatively, you can download the full %s package here:" % sig.application_name)
                    print("----------------------------------------")
                    print(sig.upgrade_url)
            if sig.upgrade_url:
                print()
                print("You can download the full %s upgrade package here:" % sig.application_name)
                print("----------------------------------------")
                print(sig.upgrade_url)


def json_report(report_data, **kwargs):
    import json

    if not kwargs.get('dups'):
        print(json.dumps([[r["level"], r["name"].lower(), r["path"], r.get("version", "UNKNOWN")] for r in report_data]))
    else:
        result = []
        for r in report_data:
            for version in set(r["detected_versions"]):
                if not version:
                    continue
                result.append([r["level"], r["name"].lower(), r["path"], version])
        print(json.dumps(result))


def csv_report(report_data, **kwargs):
    if not kwargs.get('dups'):
        for r in report_data:
            print((kwargs.get('delim', '\t').join()
                [str(r["level"]), r["name"].lower(), r["path"], r.get("version", "UNKNOWN")]))
    else:
        result = []
        for r in report_data:
            for version in set(r["detected_versions"]):
                if not version:
                    continue
                result.append([str(r["level"]), r["name"].lower(), r["path"], version])
        for r in result:
            print(kwargs.get('delim', '\t').join(r))


def get_user_home(user):
    try:
        pwd_entry = pwd.getpwnam(user)
    except KeyError:
        print('Unable to locate homedir for the user {0}'.format(user))
        exit(1)
    return pwd_entry.pw_dir+'/'


def path_to_user_home(path):
    try:
        uid = lstat(path).st_uid
    except OSError:
        return ""
    try:
        pwd_entry = pwd.getpwuid(uid)
    except KeyError:
        return ""
    return pwd_entry.home


def get_docroots(paths):
    acp = ApacheConfParser.auto_discover()
    docroots = []
    for entry in acp.iter_docroots():
        if any([entry.startswith(p) for p in paths]):
            docroots.append(entry)
    return docroots

# TODO: Does plesk even (lift) have sub-accounts?
def get_associated_users(user):
    try:
        user_entrys = open('/etc/trueuserowners', 'r').readlines()
    except IOError:
        raise Exception("Reseller Scanning only available on cPanel boxes")
    users = []
    for entry in user_entrys:
        try:
            u, owner = entry.split(': ')
        except:
            continue
        if owner.strip('\r\t\n ') == user:
            users.append(u)
    return users

# TODO: Make plesk-sensitive changes.
def options():
    from optparse import OptionParser

    parser = OptionParser()
    parser.add_option("--directory",
                      action="store", metavar="DIR", dest="directory", default=None,
                      help="Directories to search (accepts comma seperated lists of directories).")
    parser.add_option("--user",
                      action="store", dest="user", default=None,
                      help="Search user's docroots (accepts comma seperated lists of users).")
    parser.add_option("--reseller",
                      action="store", metavar="USER", dest="reseller", default=None,
                      help="Search reseller and all associated resold accounts.")
    parser.add_option("--maxdepth",
                      action="store", type=int, dest="maxdepth", default=4,
                      help="Maximum search depth. (-1 for unlimited, 0 for only current directory)")
    parser.add_option("--user-agent",
                      action="store", metavar="URL", dest="useragent", default='vdetect2',
                      help="User Agent to use when requesting signatures.")
    parser.add_option("--sig-loc",
                      action="store", metavar="URL", dest="sigloc",
                      default='http://sak.dev.gatorsec.net/bin/vdetect.xml',
                      help="comma seperated list of URLs where signatures are located.")
    parser.add_option("--signature-url",
                      action="store", metavar="URL", dest="signatureurl",
                      help="comma seperated list of URLs where signatures are located.")
    parser.add_option("--server-scan",
                      action="store_true", dest="alldocroots", default=False,
                      help="Scan the entire server.")
    parser.add_option("--all-docroots",
                      action="store_true", dest="alldocroots", default=False,
                      help="Scan the entire server.")
    parser.add_option("--show-vulns",
                      action="store_true", dest="showvulns", default=False,
                      help="Show known vulnerabilties in an application version.")
    parser.add_option("--no-progress",
                      action="store_true", dest="no_progress", default=False,
                      help="Don't display the progress bar.")
    parser.add_option("--plain",
                      action="store_true", dest="plain", default=False,
                      help="Display non-pretty output (no colors, no progress). Default when redirecting output")
    parser.add_option("--json",
                      action="store_true", dest="json", default=False,
                      help="JSON output")
    parser.add_option("--csv",
                      action="store_true", dest="csv", default=False,
                      help="CSV output")
    parser.add_option("--csv-delim",
                      dest="csvdelim", default='\t',
                      help="Delimiter used when using the --csv option (defaults to tab delimited)")
    parser.add_option("--dups",
                      action="store_true", dest="show_dups", default=False,
                      help="Show all detected application versions without de-duplication (only works with --csv or --json")
    parser.add_option("--outdated", dest="only_outdated", default=False, action="store_true",
                      help="Only show outdated software")

    opts, args = parser.parse_args()

    if opts.signatureurl:
        opts.sigloc = opts.signatureurl

    return opts


if __name__ == "__main__":
    import sys

    opts = options()

    paths = []
    if opts.directory or opts.user or opts.reseller or opts.alldocroots:
        if opts.directory:
            paths.extend(opts.directory.split(','))
        if opts.user:
            users = opts.user.split(',')
            homedirs = [get_user_home(user) for user in users]
            paths.extend(get_docroots(homedirs))
        if opts.reseller:
            users = []
            for reseller in opts.reseller.split(','):
                users.extend(get_associated_users(reseller))
            homedirs = [get_user_home(user) for user in users]
            paths.extend(get_docroots(homedirs))
        if opts.alldocroots:
            paths.extend(get_docroots(['/']))
    else:
        cwd = os.getcwd() + '/'
        m = re.match(r"/\w+/\w+/", cwd)
        if m:
            paths.extend(get_docroots([m.group(0)]))
        else:
            print("\nERROR: This script must be run from a user's home directory (e.g., /home/username)\n")
            sys.exit(1)

    if opts.maxdepth is None:
        maxdepth = 4
    elif opts.maxdepth == -1:
        maxdepth = None
    else:
        maxdepth = int(opts.maxdepth)

    if not sys.stdout.isatty() or opts.csv or opts.json:
        opts.plain = True

    parsed_url = urllib.parse.urlparse(opts.sigloc)
    if parsed_url[0]:
        signatures = ApplicationSignature.get_xml_signatures_from_url(opts.sigloc, opts.useragent)
    else:
        signatures = ApplicationSignature.get_xml_signatures_from_file(opts.sigloc)

    ffilter = re.compile(
        r".*\.(?:conf|config|png|jpg|jpeg|config|css|gif|header|html|py|ico|inc|php|sh|shtml|swf|txt|xml|tmpl)$",
        re.IGNORECASE)
    dwfilter = re.compile(r"^(?!virtfs|mail|cache|images|image|img|tmp)(.*)$", re.IGNORECASE)
    report_data = []
    dircount = 0
    try:
        for root, dirs, files in walk_paths(paths, maxdepth, 32768, ffilter, dwfilter):
            if not opts.plain and not opts.no_progress:
                dircount += 1
                print("Directories scanned: %08d" % dircount, '\r',)
                sys.stdout.flush()
            if files or dirs:
                fset = set(dirs + files)
                for sig in signatures:
                    match = sig.match(root, fset)
                    if match:
                        report_data.append(match)
    finally:
        report = prepare_report(report_data, opts.only_outdated)

        if opts.json:
            json_report(report_data, dups=opts.show_dups)
        elif opts.csv:
            csv_report(report, delim=(opts.csvdelim or '\t'), dups=opts.show_dups)
        else:
            if opts.plain:
                color = False
            else:
                color = True
            pretty_report(report, color=color, signatures=signatures, dups=opts.show_dups)
