#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

# First generation of agents output only the process command line:
# /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6

# Second generation of agents output the user in brackets in the first columns:
# (root) /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6

# Third generation (from 1.1.5) output also virtual memory, resident memory and %CPU:
# (class,122376,88128,0.0) /usr/jre1.6.0_13/bin/java -Dn=Cart_16TH13 -Dmcs.node=zbgh1ca -Dmcs.mdt.redundan

# Forth generation (>=1.2.5), additional columns in bracket:
# (user, virtual_size, resident_size, %cpu, processID, pagefile_usage, usermodetime, kernelmodetime, openHandles, threadCount) name
#(\\KLAPPRECHNER\ab,29284,2948,0,3124,904,400576,901296,35,1)    NOTEPAD.EXE

# Sixth generation (>=1.2.7) adds an optional etime, joined by "/" with the CPU time

# The plugin "psperf.bat" is deprecated. As of version 1.2.5 all of this information
# is reported by the windows agent itself. However, we still support sections from psperf.bat
# if the agent version is lower than 1.2.5.
# Windows agent now ships a plugin "psperf.bat" that adds a section from wmic
# to the output:
# <<<ps:sep(44)>>>
# [wmic process]
# ^M
# Node,KernelModeTime,Name,PageFileUsage,ThreadCount,UserModeTime,VirtualSize,WorkingSetSize^M
# WINDOWSXP,43478281250,System Idle Process,0,2,0,0,28672^M
# WINDOWSXP,155781250,System,0,59,0,1957888,253952^M
# WINDOWSXP,468750,smss.exe,176128,3,156250,3928064,442368^M
# WINDOWSXP,56406250,csrss.exe,1863680,12,11406250,25780224,3956736^M
# WINDOWSXP,18593750,winlogon.exe,6832128,19,4843750,59314176,2686976^M
# WINDOWSXP,167500000,services.exe,1765376,16,13750000,22601728,4444160^M
# WINDOWSXP,16875000,lsass.exe,3964928,21,3906250,43462656,6647808^M
# WINDOWSXP,8750000,VBoxService.exe,1056768,8,468750,26652672,3342336^M


# New since 1.2.1i2: WATO compatible syntax
#
# Holds a list of rules which are matching hosts by names or tags and
# where each rule holds a dictionary.
#
# Each of those entries defines the following options:
#
# 1. descr:    item name to be used for the service description
# 2. match:    matching-definition
# 3. user:     user definition
# 5. perfdata: monitor with perfdata
# 4. levels:   four numbers (thresholds)
inventory_processes_rules = []

inventory_processes = []
inventory_processes_perf = []
ANY_USER = None
GRAB_USER = False


# FIXME: Direct access to g_item_state is not allowed in checks normally,
# but there is no such API to access and modify it like it is needed here.
def ps_cleanup_counters(parsed):
    # remove legacy key used for some kind of caching
    try:
        del g_item_state["last.cleared.ps_"]
    except KeyError:
        pass

    pids = ps_get_current_pids(parsed)

    for ident in ps_get_counters_to_delete(pids):
        del g_item_state[ident]


# Get the idents of the counters which can be deleted because the process id of
# the counter is not found anymore in the process table.
#
# Handle these formats of idents:
# Old string based keys: 'ps_stat.pcpu.669': (1448634267.875281, 1),
# New magic keys:        ('ps', None, 'ps_wmic.kernel.692'): (1448633487.573496, 1092007),
def ps_get_counters_to_delete(pids):
    counters_to_delete = []
    for ident, state in g_item_state.iteritems():
        ident_type = type(ident)
        if ident_type == tuple and ident[0] == "ps":
            check_ident = ident[2]
        elif ident_type != tuple and (ident.startswith("ps_stat") or ident.startswith("ps_wmic")):
            check_ident = ident
        else:
            continue

        pid = check_ident.split(".")[-1]
        if pid.isdigit() and pid not in pids:
            counters_to_delete.append(ident)
    return counters_to_delete


def ps_get_current_pids(parsed):
    pids = []
    for line in parsed:
        process_info = line[1]
        if ps_has_extended_perfdata(process_info):
            pids.append(process_info[4])
    return pids


# FIXME: Refactor this function for better readability
def ps_merge_wmic_info(info):
    ps_result     = []
    lines         = iter(info)
    wmic_info     = {}
    use_wmic_info = True
    cpu_cores     = 1

    is_wmic = False
    while True:
        try:
            line = lines.next()
            if line[-1] == '[wmic process]':
                is_wmic = True
                wmic_headers = ["node"] + lines.next()[1:]
                continue
            elif line[-1] == '[wmic process end]':
                is_wmic = False
                continue
        except StopIteration:
            break # Finished with all lines

        if not is_wmic:
            # Ignore data from the psperf.bat if the agent has this feature builtin
            if use_wmic_info and len(line[1].split(",")) >= 10:
                use_wmic_info = False
            if not use_wmic_info:
                # We need to determine the number of cpu_cores without the wmic_info
                if len(line) < 3:
                    # On some rare instances the name of the process is missing
                    # This line will be ignored
                    continue
                if line[2].lower() == "system idle process":
                    cpu_cores = int(line[1][1:-1].split(",")[9])
            ps_result.append(line)
        else:
            row = dict(zip(wmic_headers, line))
            # Row might be damaged. I've seen this agent output:
            # Node - TILE-BUILDER02
            # ERROR:
            # Description = Quota violation
            #
            # Node,
            if "Name" in row and "ProcessId" in row:
                wmic_info[(row["node"], row["Name"], row["ProcessId"])] = row


    # Add wmic_info to process table, but only if present:
    if use_wmic_info and wmic_info:
        def get_ps_info(node, name, processId = 0):
            for key, value in wmic_info.items():
                if (not processId and key[0:2] == (node, name)) or key == (node, name, processId):
                    # each info is only returned once!
                    del wmic_info[key]
                    return value
        info = []
        seen_pids = set([]) # Remove duplicate entries
        for line in ps_result:
            psinfo = get_ps_info(line[0], line[1])
            # Get number of CPU cores from system idle process
            if psinfo:
                if "ThreadCount" in wmic_headers and psinfo["Name"].lower() == "system idle process":
                    cpu_cores = int(psinfo["ThreadCount"])
                pid = int(psinfo["ProcessId"])
                if pid not in seen_pids:
                    seen_pids.add(pid)
                    virt     = int(psinfo["VirtualSize"])    / 1024 # Bytes -> KB
                    resi     = int(psinfo["WorkingSetSize"]) / 1024 # Bytes -> KB
                    pagefile = int(psinfo["PageFileUsage"])  / 1024 # Bytes -> KB
                    userc    = int(psinfo["UserModeTime"])   # do not resolve counter here!
                    kernelc  = int(psinfo["KernelModeTime"]) # do not resolve counter here!
                    handlec  = int(psinfo.get("HandleCount", 0)) # Only in newer psperf.bat versions
                    threadc  = int(psinfo["ThreadCount"])    # do not resolve counter here!
                    line[1:1] = [ "(unknown,%d,%d,0,%d,%d,%d,%d,%d,%d,)" %
                                    (virt, resi, pid, pagefile, userc, kernelc, handlec, threadc) ]
            info.append(line)

    return cpu_cores, info


# This mainly formats the line[1] element which contains the process info (user,...)
def ps_parse_process_entries(parsed):
    # line[0] = node
    # line[1] = process_info OR (if no process info available) = process name
    for line in parsed:
        process_info = line[1]
        if process_info[0] == "(" and process_info[-1] == ")":
            line[1] = tuple(process_info[1:-1].split(","))
        else:
            # Make number of columns in line consistent for discovery/check
            line.insert(1, (None,))

    # Filter out any lines where no process command line is available, e.g.
    # [None, u'(<defunct>,,,)']
    parsed = [ x for x in parsed if len(x) > 2 ]
    return parsed


# Produces a list of lists where each sub list is built as follows:
# [
#     [None, (u'root', u'35156', u'4372', u'00:00:05/2-14:14:49', u'1'), u'/sbin/init'],
# ]
# First element:  The node the data comes from in a cluster or None
# Second element: The process info tuple (see ps.include: check_ps_common() for details on the elements)
# Third element:  The process command line
def parse_ps(info):
    cpu_cores, parsed = ps_merge_wmic_info(info)

    parsed = ps_parse_process_entries(parsed)

    # Cleanup counters of processes which do not exist anymore
    ps_cleanup_counters(parsed)

    return cpu_cores, parsed


def inventory_ps(info):
    (cpu_cores, parsed), mem_info, solaris_mem_info, statgrab_mem_info = info
    return inventory_ps_common(inventory_processes, inventory_processes_rules, parsed)


def check_ps(item, params, info):
    (cpu_cores, parsed), mem_info, solaris_mem_info, statgrab_mem_info = info
    if mem_info:
        total_ram = parse_proc_meminfo_bytes(mem_info).get("MemTotal")
    elif solaris_mem_info:
        total_ram = solaris_mem_info.get("MemTotal") * 1024
    elif statgrab_mem_info:
        total_ram = statgrab_mem_info.get("MemTotal") * 1024
    else:
        total_ram = None

    return check_ps_common(item, params, parsed, cpu_cores = cpu_cores, total_ram = total_ram)


check_info['ps'] = {
    "parse_function"          : parse_ps,
    "inventory_function"      : inventory_ps,
    "check_function"          : check_ps,
    "service_description"     : "Process %s",
    "includes"                : [ "ps.include", "mem.include" ],
    "has_perfdata"            : True,
    "node_info"               : True, # add first column with actual host name
    "group"                   : "ps",
    "default_levels_variable" : "ps_default_levels",
    "extra_sections"          : [ "mem", "solaris_mem", "statgrab_mem" ],
}

# NOTE: This check is deprecated and will be removed any decade now. ps now
# does always performance data.
check_info['ps.perf'] = {
    "check_function"          : check_ps,
    "service_description"     : "Process %s",
    "includes"                : [ "ps.include", "mem.include" ],
    "has_perfdata"            : True,
    "node_info"               : True, # add first column with actual host name
    "group"                   : "ps",
    "default_levels_variable" : "ps_default_levels",
    "extra_sections"          : [ "mem", "solaris_mem", "statgrab_mem" ],
}
