#!/usr/bin/python3
# Script to check for known gotchas that occur between
# major versions of htcondor. This script will only check
# for gotchas to the next version.

# Imports Always needed
import os
import sys
import subprocess
import textwrap
import argparse
from enum import Enum
# Temporary needed imports (for a check)
# End imports

try:
    import htcondor
except ImportError:
    print(
        textwrap.dedent(
            """
            Failed to find HTCondor Python bindings.
            Please check your current Python environment or install the bindings if needed:
            https://htcondor.readthedocs.io/en/latest/apis/python-bindings/install.html
            """
        ),
        file=sys.stderr,
    )
    sys.exit(1)


class Debug(Enum):
    WARNING = 0
    BASE = 1
    DETAILED = 2
    MAX = 3

    def __gt__(self, other):
        if self.__class__ is other.__class__:
            return self.value > other.value
        return NotImplemented

    def __ge__(self, other):
        if self.__class__ is other.__class__:
            return self.value >= other.value
        return NotImplemented


class System(Enum):
    STANDARD = 0
    CE = 1
    BOTH = 2


MAX_TAG_LEN = 8


class Check:
    def __init__(self, **kwargs):
        self.version = kwargs["version"]
        self.test = kwargs["test"]
        self.name = kwargs["name"]
        self.tag = kwargs["tag"]
        self.synopsis = kwargs.get("synopsis", "No check synopsis provided.")
        self.desc = kwargs.get("desc", "No detailed check description provided.")
        self.is_warning = kwargs.get("warning", False)
        self.daemons = kwargs.get("daemons", [])
        self.needs_root = kwargs.get("root", False)
        self.system = kwargs.get("system", System.BOTH)

        assert len(self.tag) <= MAX_TAG_LEN and len(self.tag) > 0
        assert type(self.version) is tuple
        assert type(self.daemons) is list

        self.desc = textwrap.dedent(self.desc)


NEXT_MAJOR_VERSION = (24, 0, 0)
CURRENT_VERSION = tuple(int(x) for x in htcondor.param["CONDOR_VERSION"].split("."))
IS_WINDOWS = os.name == "nt"
VERBOSITY = Debug.BASE


def format_print(msg, offset=0, newline=False, err=False, debug=Debug.BASE):
    """Custom print function to help with output spacing"""
    if debug > VERBOSITY:
        return

    f = sys.stderr if err else sys.stdout
    if newline:
        print(file=f)

    for line in textwrap.dedent(msg).split("\n"):
        if line.strip() == "":
            continue
        if "$(<EMPTY_LINE>)" in line:
            print(file=f)
            continue
        line_offset = offset + len(line)
        print(f"{line:>{line_offset}s}", file=f)


def make_version_str(version):
    """Convert Version Tuple into period seperated string"""
    return ".".join([f"{n}" for n in version])


# ===============================Incompatibility Check Functions===============================
def warn_dagman_env(*args, **kwargs):
    """Warn about DAGMan cleansing its environment"""
    format_print(
        """
        Warning:
            DAGMan no longer copies the entire environment from
            which it was submitted. This will affect any DAG items
            that depend on environment variables, including jobs
            and scripts (Pre, Post, & Hold) submitted by DAGMan.
        """,
        offset=4,
        newline=True,
        debug=Debug.WARNING,
    )


def warn_user_records(*args, **kwargs):
    """Warn about AP user records backporting issue"""
    format_print(
        """
        Warning:
            The AP job queue log will have additional information
            written into it. HTCondor versions older than V10.6.0
            are not able to digest this new information. Once
            upgraded, the earliest versions that HTCondor is able
            able to downgrade to without issues are V10.5.0 (Feature)
            and V10.0.4 (LTS).
        $(<EMPTY_LINE>)
            UPGRADE WILL BREAK ABILITY TO DOWNGRADE TO CERTAIN VERSIONS
        """,
        offset=4,
        newline=True,
        debug=Debug.WARNING,
    )


def warn_job_router_syntax_deprecation(*args, **kwargs):
    """Warn about old job router configuration syntax becoming deprecated."""
    deprecated_knobs = ["DEFAULTS", "ENTRIES", "ENTRIES_CMD", "ENTRIES_FILE"]
    found_knobs = []

    for knob in deprecated_knobs:
        full_name = "JOB_ROUTER_" + knob
        if full_name in htcondor.param:
            found_knobs.append(full_name)

    if len(found_knobs) > 0:
        format_print(
            """
            Warning:
                The following configuration macro(s):
            """,
            offset=4,
            newline=True,
            debug=Debug.WARNING,
        )
        for knob in found_knobs:
            format_print(f"- {knob}", offset=12, debug=Debug.WARNING)
        format_print(
            """
            Are deprecated and will be removed for V24 of HTCondor.
            The new configuration syntax for the job router is defined
            using JOB_ROUTER_ROUTE_NAMES and JOB_ROUTER_ROUTE_<name>.
            For more information visit:
                https://htcondor.readthedocs.io/en/latest/grid-computing/job-router.html#the-htcondor-job-router
            $(<EMPTY_LINE>)
            Note: The removal will occur during the lifetime of the
                  HTCondor V23 feature series.
            """,
            offset=8,
            debug=Debug.WARNING,
        )
        return False
    return True


def check_desktop_policy(*args, **kwargs):
    """Check for use of desktop policy configuration knobs that became hidden in V10.4.0"""
    # Config items formerly available 100% of the time
    attrs = [
        "ActivationTimer",
        "ConsoleBusy",
        "CpuBusy",
        "CpuIdle",
        "JustCPU",
        "KeyboardBusy",
        "KeyboardNotBusy",
        "LastCkpt",
        "MachineBusy",
        "NonCondorLoadAvg",
    ]
    default_suspend_line = "SUSPEND = ($(KeyboardBusy) || ( (CpuBusyTime > 120) && $(ActivationTimer) > 90))"
    found_attrs = set()
    has_cpu_busy_time = []

    p = subprocess.run(["condor_config_val", "-dump"], stdout=subprocess.PIPE)
    cmd_output = p.stdout.rstrip().decode()
    has_problem = False
    for line in cmd_output.split("\n"):
        line = line.strip()
        check = line.lower()
        parts = check.split("=", 1)
        if len(parts) < 2:
            continue
        for attr in attrs:
            if attr.lower() in parts[1]:
                if attr == "CpuBusy":
                    pos = check.find("cpubusy")
                    false_positive = True
                    while pos != -1:
                        if check.find("cpubusytime", pos) != pos:
                            false_positive = False
                            break
                        pos = check.find("cpubusy", pos + 1)
                    if false_positive:
                        continue
                not_in_other_def = True
                for other in attrs:
                    if other.lower() == parts[0].strip():
                        not_in_other_def = False
                        break
                if not_in_other_def:
                    found_attrs.add(attr)
        if "cpubusytime" in check and line != default_suspend_line:
            has_cpu_busy_time.append(line)
            has_problem = True

    knobs = [
        "IS_OWNER",
        "KILL",
        "PREEMPT",
        "SLOTS_CONNECTED_TO_KEYBOARD",
        "SUSPEND",
    ]
    count_using_policy = 0
    for knob in knobs:
        p = subprocess.run(["condor_config_val", "-v", knob], stdout=subprocess.PIPE)
        cmd_output = p.stdout.rstrip().decode()
        for line in cmd_output.split("\n"):
            line = line.upper()
            if "POLICY:DESKTOP" in line or "POLICY:UWCS_DESKTOP" in line:
                count_using_policy += 1
    if count_using_policy >= 3:
        found_attrs.clear()

    custom_set = []
    for attr in found_attrs:
        p = subprocess.run(
            ["condor_config_val", "-v", attr],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        cmd_output = p.stdout.rstrip().decode()
        is_default = False
        for line in cmd_output.split("\n"):
            if "<default>" in line.lower():
                has_problem = True
                is_default = True
                break
        if not is_default:
            custom_set.append(attr)

    for attr in custom_set:
        found_attrs.remove(attr)

    if has_problem:
        format_print(
            "Error checking for desktop policy configuration macros:", newline=True
        )
        need_newline = False
        if len(found_attrs) > 0:
            found_attrs = sorted(found_attrs)
            format_print("The following configuration macros:", offset=4, newline=True)
            for attr in found_attrs:
                format_print(f"- {attr}", offset=6)
            format_print(
                """
                Are no longer added to the configuration table by default.
                To re-enable these macros add one of the following to the
                configuration:
                    - use FEATURE:POLICY_EXPR_FRAGMENTS
                    - use POLICY:DESKTOP
                    - use POLICY:UWCS_DESKTOP
                """,
                offset=4,
            )
            need_newline = True
        if len(has_cpu_busy_time) > 0:
            format_print(
                """
                The attribute CpuBusyTime no longer represents a delta time
                in V10.4.0+ of HTCondor, but rather a time stamp of when
                the CPU became busy. This should be replaced with 'CpuBusyTimer'.
                The following lines are effected:
                """,
                newline=need_newline,
                offset=4,
            )
            for line in has_cpu_busy_time:
                format_print(line, offset=6)
        return False
    return True


def check_pslot_default(*args, **kwargs):
    """Check for use of RANK in startd config due to incompatibility with
    P-Slots on by default in V23.0.0"""
    for key in htcondor.param.keys():
        if key.lower().startswith("slot_type_"):
            return True
    rank = htcondor.param.get("RANK")
    if rank is None:
        pass
    else:
        try:
            temp = float(rank)
        except ValueError:
            format_print(
                f"""
                Issue found with StartD Rank:
                $(<EMPTY_LINE>)
                    Partitionable Slot is turned on by default in V23.0.0 of
                    HTCondor. This makes the configuration 'RANK = {rank}'
                    inconsequential. To continue having RANK be applied to jobs
                    ran on this EP configure the use of static slots by adding
                    either custom slot types or 'use FEATURE:StaticSlots'.
                """,
                newline=True,
            )
            return False
    return True


# =============================================================================================
INCOMPATIBILITY_CHECKS = [
    Check(
        test=warn_dagman_env,
        name="DAGMan Environment Cleanse",
        tag="DAG-ENV",
        synopsis="DAGMan no longer grabs the entire environment it is submitted from",
        version=(10, 4, 0),
        daemons=["SCHEDD"],
        warning=True,
    ),
    Check(
        test=warn_user_records,
        name="User Records in the Schedd",
        tag="USER-REC",
        synopsis="New user records feature will prevent downgrades to certain versions",
        version=(10, 5, 0),
        daemons=["SCHEDD"],
        warning=True,
    ),
    Check(
        test=warn_job_router_syntax_deprecation,
        name="Job Router Config Deprecation",
        tag="JRC-DEP",
        synopsis="Old Job Router Configuration Syntax is Deprecated",
        version=NEXT_MAJOR_VERSION,
        daemons=["JOB_ROUTER"],
        warning=True,
    ),
    Check(
        test=check_desktop_policy,
        name="Desktop Policy Configuration Macros",
        tag="DESKTOP",
        synopsis="Checks for use of desktop policy configuration macros",
        desc="""
             This check looks at two changes related to desktop policy
             configuration macros:
                 1. A set of Desktop Policy macros available in configuration
                    no longer appear by default. (i.e. CpuBusy, KeyboardBusy, etc)
                 2. The configuration macro CpuBusyTime has changed meaning.
             """,
        version=(10, 4, 0),
    ),
    Check(
        test=check_pslot_default,
        name="Enable P-Slots as default",
        tag="P-SLOT",
        synopsis="Use of StartD RANK doesn't function with new Partitionable Slot default",
        version=(23, 0, 0),
        daemons=["STARTD"],
    ),
]


def list_tags():
    """Function to list all available check TAGs for filtering purposes"""
    format_print(
        """
        Incompatibility Check Tags:
              TAGS  |  Check
            ==================
        """,
        newline=True,
    )
    for check in INCOMPATIBILITY_CHECKS:
        format_print(f"{check.tag.upper():<{MAX_TAG_LEN}}| '{check.name}'", offset=4)


def list_checks(is_ce, ignore_list, only_list):
    format_print(
        f"""
        Incompatibility Checks for upgrading to HTCondor V{NEXT_MAJOR_VERSION[0]}:
            (For detailed descriptions use -v/--verbose)
        """,
        newline=True,
    )

    i = 0
    total = len(INCOMPATIBILITY_CHECKS)
    cnt_fmt = len(str(total))
    pad_len = (2 * cnt_fmt) + 4

    for check in INCOMPATIBILITY_CHECKS:
        i += 1
        if check.tag.upper() in ignore_list:
            continue

        if len(only_list) > 0:
            check_tag = True
            if "WARNINGS" in only_list and check.is_warning:
                check_tag = False
            if check_tag and check.tag.upper() not in only_list:
                continue

        if is_ce and check.system == System.STANDARD:
            continue
        elif not is_ce and check.system == System.CE:
            continue

        if IS_WINDOWS and check.needs_root:
            continue

        root = "Requires ROOT" if check.needs_root else ""
        ver = make_version_str(check.version)
        padding = " " * pad_len

        format_print(
            f"""
            ({i:>{cnt_fmt}}/{total}) [{check.tag:<{MAX_TAG_LEN}s}] '{check.name}'
            {padding}{root}
            {padding}Version : V{ver}
            {padding}Synopsis: {check.synopsis}
            """,
            offset=4,
            newline=(i == 1),
        )

        format_print(
            check.desc, offset=(4 + pad_len), newline=True, debug=Debug.DETAILED
        )
        format_print("", newline=True, debug=Debug.DETAILED)


def run_checks(args, ignore_list, only_list, daemons):
    """Function that runs all checks make it through filtering"""
    num_failed_checks = 0
    num_checks_ran = 0

    for check in INCOMPATIBILITY_CHECKS:
        if check.tag.upper() in ignore_list:
            continue

        if len(only_list) > 0:
            check_tag = True
            if "WARNINGS" in only_list and check.is_warning:
                check_tag = False
            if check_tag and check.tag.upper() not in only_list:
                continue

        if args.ce and check.system == System.STANDARD:
            continue
        elif not args.ce and check.system == System.CE:
            continue

        if IS_WINDOWS and check.needs_root:
            continue

        if check.needs_root and os.geteuid() != 0:
            format_print(
                f"Warning: Skipping check '{check.name}' because it requires root.",
            )
            continue

        if not args.all and check.version <= CURRENT_VERSION:
            continue

        if not args.warnings and check.is_warning:
            continue

        missing_daemon = False
        for daemon in check.daemons:
            if daemon.upper() not in daemons:
                missing_daemon = True
                break
        if missing_daemon:
            continue

        num_checks_ran += 1

        format_print(
            f"Checking {check.name} [{check.tag}]:", newline=True, debug=Debug.DETAILED
        )

        if not check.test(args):
            num_failed_checks += 1
        else:
            format_print("No issue found with check", offset=8, debug=Debug.DETAILED)

    return (num_failed_checks, num_checks_ran)


class NegateAction(argparse.Action):
    def __call__(self, parser, args, values, option_string=None):
        setattr(args, self.dest, option_string.rstrip("-").startswith("no"))


def parse_args():
    """Function to parse tool command line arguments"""
    parser = argparse.ArgumentParser(
        prog="condor_upgrade_check",
        description=textwrap.dedent(
            """
            Tool to help check for known incompatabilites with a current
            HTCondor install when upgrading to a new major version.

            This should be ran on a host with HTCondor installed prior
            to upgrading to new major version (V23 -> V24) to check current
            installation for potential issues that may occur during an
            upgrade.

            Examples:
                condor_upgrade_check
                condor_upgrade_check --tags
                condor_upgrade_check --dump
                condor_upgrade_check -i BAR -vv
                condor_upgrade_check -ce --only FOO BAR BAZ --no-warnings
            """
        ),
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument(
        "-ce",
        "--ce",
        "-CE",
        "--CE",
        dest="ce",
        action="store_true",
        help="Do checks for HTCondor-CE environment.",
    )

    parser.add_argument(
        "-a",
        "--all",
        action="store_true",
        help="Run all available checks including ones for issues introduced in versions older than the installed system.",
    )

    parser.add_argument(
        "-i",
        "--ignore",
        nargs="+",
        action="extend",
        metavar="TAG",
        default=[],
        help="Skip the provided TAGs. This option takes precedence over the --only option.",
    )

    parser.add_argument(
        "-o",
        "--only",
        nargs="+",
        action="extend",
        metavar="TAG",
        default=[],
        help="Check only the provided TAGs. Use the tag WARNINGS to only check warnings",
    )

    parser.add_argument(
        "-w",
        "--warnings",
        "--no-warnings",
        dest="warnings",
        action=NegateAction,
        nargs=0,
        default=True,
        help="Enable or disable persistent upgrade warnings (i.e. issues that can't be tested).",
    )

    parser.add_argument(
        "-d",
        "--dump",
        action="store_true",
        help="Display available incompatibility checks.",
    )

    parser.add_argument(
        "-v",
        "--verbose",
        action="count",
        default=1,
        help="Increase tool output verbosity.",
    )

    parser.add_argument(
        "-t",
        "--tags",
        action="store_true",
        help="Display the incompatibility check TAGs which can be used with --only & --ignore",
    )

    return parser.parse_args()


def main():
    if len(INCOMPATIBILITY_CHECKS) == 0:
        ver = make_version_str(NEXT_MAJOR_VERSION)
        format_print(
            f"Currently no known incompatibilities to check for in preparation for V{ver}",
            newline=True,
            offset=4,
        )
        sys.exit(0)

    args = parse_args()

    global VERBOSITY
    try:
        VERBOSITY = Debug(args.verbose)
    except ValueError:
        VERBOSITY = Debug.MAX

    ignore_list = [tag.upper() for tag in args.ignore]
    only_list = [tag.upper() for tag in args.only]

    if args.tags:
        list_tags()
    elif args.dump:
        list_checks(args.ce, ignore_list, only_list)
    else:
        system = "HTCondor"

        if args.ce:
            system += "-CE"
            global CURRENT_VERSION
            config = "/etc/condor-ce/condor_config"
            if not os.path.exists(config):
                format_print(
                    f"""Error: {system} Specified but failed to locate {config}""",
                    err=True,
                    newline=True,
                )
                sys.exit(1)
            os.environ["CONDOR_CONFIG"] = config
            htcondor.reload_config()
            CURRENT_VERSION = tuple(
                int(x) for x in htcondor.param["CONDOR_VERSION"].split(".")
            )

        format_print(
            f"Checking {system} for possible issues with known incompatibilities upgrading to V{NEXT_MAJOR_VERSION[0]}",
            newline=True,
        )

        daemons = htcondor.param["DAEMON_LIST"].upper()

        num_failed, num_ran = run_checks(args, ignore_list, only_list, daemons)
        format_print(
            f"""
            Final Report:
                {num_ran} checks tested against currently installed {system}.
            """,
            newline=True,
            debug=Debug.DETAILED,
        )
        if num_failed == 0:
            format_print(
                f"""
                No issues found with installed {system} for known incompatibilities.
                Upgrade to V{NEXT_MAJOR_VERSION[0]} should be safe.
                """,
                newline=(Debug.DETAILED > VERBOSITY),
                offset=4,
            )
        else:
            format_print(
                f"Current install failed {num_failed} checks.",
                offset=4,
                debug=Debug.DETAILED,
            )

    format_print(
        """
        To ask any questions regarding incompatibilities email:
            htcondor-users@cs.wisc.edu
        """,
        newline=True,
    )


if __name__ == "__main__":
    main()
