#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2013             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# ails.  You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

# First generation of agents output only the process command line:
# /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6

# Second generation of agents output the user in brackets in the first columns:
# (root) /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6

# Third generation (from 1.1.5) output also virtual memory, resident memory and %CPU:
# (class,122376,88128,0.0) /usr/jre1.6.0_13/bin/java -Dn=Cart_16TH13 -Dmcs.node=zbgh1ca -Dmcs.mdt.redundan

# New since 1.2.1i2: WATO compatible syntax
#
# Holds a list of rules which are matching hosts by names or tags and
# where each rule holds a dictionary.
#
# Each of those entries defines the following options:
#
# 1. descr:    item name to be used for the service description
# 2. match:    matching-definition
# 3. user:     user definition
# 5. perfdata: monitor with perfdata
# 4. levels:   four numbers (thresholds)
inventory_processes_rules = []

inventory_processes = []
inventory_processes_perf = []
ANY_USER = None
GRAB_USER = False

def inventory_ps(info):
    return inventory_ps_common(inventory_processes, info)

def inventory_ps_perf(info):
    return inventory_ps_common(inventory_processes_perf, info, True)

def inventory_ps_common(invdata, info, handle_perfdata = False):
    inventory = []

    entries = []

    # Handle inventory_processes, inventory_processes_perf variables
    for inventry in invdata:
        # New in 1.1.7i1: inventory_processes may be prefixed with list of
        # host names or tags + ALL_HOSTS
        tags = None
        if len(inventry) == 8:
            tags = []
            hostlist = inventry[0]
            inventry = inventry[1:]
        elif len(inventry) == 9:
            tags = inventry[0]
            hostlist = inventry[1]
            inventry = inventry[2:]

        # Filter out entries with do not match our current host
        if tags != None and ( not hosttags_match_taglist(tags_of_host(g_hostname), tags) or
                              not in_extraconf_hostlist(hostlist, g_hostname)):
            continue

        entries.append(inventry)

    # Handle new wato style inventory_processes_rules
    for rule in inventory_processes_rules:
        taglist, hostlist = rule[1:3]
        # Filter out entries with do not match our current host
        if not hosttags_match_taglist(tags_of_host(g_hostname), taglist) \
           or not in_extraconf_hostlist(hostlist, g_hostname):
            continue

        v = rule[0]

        # Make ps skip rules for ps.perf and vice versa
        if handle_perfdata != v['perfdata']:
            continue

        entries.append((v['descr'], v['match'], v['user']) + v['levels'])

    for servicedesc, pattern, userspec, warnmin, okmin, okmax, warnmax in entries:
        num_perc_s = servicedesc.count("%s")
        for line in info:
            # First entry in line is the node name or None for non-clusters
            ps = line[1:]
            l_user = [userspec]
            matches = process_matches(ps, pattern, l_user)
            if matches != False:
                if len(matches) < num_perc_s:
                    raise MKGeneralException("Invalid entry in inventory_processes: service description '%s' contains "
                            "%d times '%%s', but regular expression '%s' contains only %d subexpression(s)." % \
                            (servicedesc, num_perc_s, pattern, len(matches)))

                if userspec == GRAB_USER:
                    i_userspec = l_user[0]
                    i_servicedesc = servicedesc.replace("%u", l_user[0])
                else:
                    i_userspec = userspec
                    i_servicedesc = servicedesc

                # New in 1.2.2b4: Alle %1, %2, etc. to be replaced with first, second, ...
                # group. This allows a reordering of the matched groups
                for nr, group in enumerate(matches):
                    i_servicedesc = i_servicedesc.replace("%%%d" % (nr+1), group)

                # It is allowed (1.1.4) that the pattern contains more subexpressions then the
                # service description. In that case only the first subexpressions are used as
                # item.
                i_servicedesc = i_servicedesc % (matches[:num_perc_s])

                # Problem here: We need to instantiate all subexpressions
                # with their actual values of the found process.
                i_pattern = instantiate_regex_pattern(pattern, matches)
                inv = ( i_servicedesc, ( i_pattern, i_userspec, warnmin, okmin, okmax, warnmax ) )
                if inv not in inventory:
                    inventory.append(inv)

    return inventory

def instantiate_regex_pattern(pattern, matches):
    for m in matches:
        pattern = instantiate_regex_pattern_once(pattern, m)
    return pattern

def instantiate_regex_pattern_once(pattern, match):
    # this correctly handles \( and \) but not [^)] - sorry
    return re.compile(r"(?<!\\)\(.*?(?<!\\)\)").sub(escape_regex_chars(match), pattern, 1)

def escape_regex_chars(match):
    r = ""
    for c in match:
        if c in r"[]\().?{}|*^$":
            r += "\\"
        r += c
    return r


def process_matches(ps, procname, l_user):
    # procname is either:
    # 1. a string beginning with ~. Then it is interpreted as regular expression
    # that must match the *beginning* of the process line. Please check the output of
    # check_mk -d HOSTNAME. Note: groups of whitespaces are reduced to one single
    # whitespace!
    # 2. a string *not* beginning with ~: It must be equal to the first column
    # in the process table (i.e. the process name). No regular expressions are
    # applied. A simple string compare is done.

    # agent might output username in brackets in the first columns
    userspec = l_user[0]
    if ps[0].startswith("(") and ps[0].endswith(")") and len(ps) > 1:
        addinfo = ps[0][1:-1].split(",")
        user = addinfo[0]
        if userspec and userspec != user:
            return False
        l_user[0] = user # return actual user that way
        ps = ps[1:]
    else:
        l_user[0] = None

    if not procname:
        return ()

    elif not procname[0].startswith("~"):
        if ps[0] == procname:
            return ()
    else:
        pattern = procname[1:]
        reg = compiled_regexes.get(pattern)
        if not reg:
            reg = re.compile(pattern)
            compiled_regexes[pattern] = reg
        matchobject = reg.match(" ".join(ps))
        if matchobject:
            return matchobject.groups()
    return False

def check_procs(item, params, info, with_perfdata):
    if len(params) == 5:
        procname, warnmin, okmin, okmax, warnmax = params
        user = None
    elif len(params) == 6:
        procname, user, warnmin, okmin, okmax, warnmax = params
    else:
        return (3, "UNKNOWN - invalid check parameters: %s" % (params,))

    count = 0
    virtual_size   = 0
    resident_size  = 0
    percent_cpu    = 0.0
    extended_perfdata = False

    running_on = set([]) # collect information about nodes the processes run on
    for line in info:
        node_name = line[0]
        ps = line[1:]
        if process_matches(ps, procname, [user]) != False:
            count += 1
            if node_name != None:
                running_on.add(node_name)
            if ps[0][0] == '(':
                addinfo = ps[0][1:-1].split(",")
                if len(addinfo) >= 4: # extended performance data
                    virtual_size += int(addinfo[1])  # kB
                    resident_size += int(addinfo[2]) # kB
                    percent_cpu += savefloat(addinfo[3])
                    extended_perfdata = True

    if with_perfdata:
        perfdata = [ ("count", count, okmax+1, warnmax+1, 0) ]
        if count == 0 and not extended_perfdata:
            # No matching process found. Therefore we don't know yet,
            # wether the agents sends extended data for processes.
            # We need to know this in order to create the RRD with the
            # correct number of DSes. To just look at the first process
            # in the agent output to make sure. We assume that at least
            # one process is always present.
            firstword = info[0][1]
            if firstword[0] == '(' and firstword.count(",") >= 3:
                extended_perfdata = True
        if extended_perfdata:
            perfdata += [ ("vsz", virtual_size),
                         ("rss", resident_size),
                         ("pcpu", percent_cpu) ]
    else:
        perfdata = []

    infotext = " - %d processes" % count
    if running_on:
        infotext += " [running on %s]" % ", ".join(running_on)

    if count > warnmax or count < warnmin:
        return (2, "CRIT" + infotext + " (ok from %d to %d)" % (okmin, okmax), perfdata)
    elif count > okmax or count < okmin:
        return (1, "WARN" + infotext + " (ok from %d to %d)" % (okmin, okmax), perfdata)
    else:
        return (0, "OK" + infotext, perfdata)

check_info['ps'] = {
    "check_function"          : lambda i,p,n: check_procs(i,p,n,False),
    "inventory_function"      : inventory_ps,
    "service_description"     : "proc_%s",
    "has_perfdata"            : False,
    "node_info"               : True, # add first column with actual host name
    "group"                   : "ps",
}

check_info['ps.perf'] = {
    "check_function"          : lambda i,p,n: check_procs(i,p,n,True),
    "inventory_function"      : inventory_ps_perf,
    "service_description"     : "proc_%s",
    "has_perfdata"            : True,
    "node_info"               : True, # add first column with actual host name
    "group"                   : "ps",
}
