#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.


# Example of output from ipmi:
# <<<ipmi>>>
# ambienttemp 25.800 degrees_C ok na na na 34.800 40.200 na
# bulk.v12-0-s0 11.940 Volts ok na 10.200 na na 13.800 na
# bulk.v3_3-s0 3.360 Volts ok na 3.000 na na 3.600 na
# bulk.v3_3-s5 3.240 Volts ok na 3.000 na na 3.600 na
# bulk.v5-s0 5.040 Volts ok na 4.500 na na 5.520 na
# bulk.v5-s5 5.040 Volts ok na 4.500 na na 5.520 na
# cpu0.dietemp 51.000 degrees_C ok na na na 70.200 73.200 na
# ...
# On another host
# mb.t_amb 24.000 degrees_C ok na na na 70.000 75.000 80.000
# mb.v_bat 2.839 Volts ok 2.340 2.527 2.621 3.307 3.510 3.697
# mb.v_+3v3stby 3.218 Volts ok 2.595 2.785 2.993 3.598 3.789 3.996
# mb.v_+3v3 3.339 Volts ok 2.595 2.785 2.993 3.598 3.789 3.996
# mb.v_+5v 5.044 Volts ok 3.484 3.978 4.498 5.486 5.980 6.500
# fp.t_amb 21.000 degrees_C ok na na na 30.000 35.000 45.000
# pdb.t_amb 21.000 degrees_C ok na na na 70.000 75.000 80.000
# io.t_amb 19.000 degrees_C ok na na na 70.000 75.000 80.000
# p0.t_core 18.000 degrees_C ok na na na 62.000 67.000 75.000
# p0.v_vdd 1.332 Volts ok 0.792 0.900 0.996 1.596 1.692 1.800

# Yet another host (HP DL 360G5)
# <<<ipmi>>>
# UID_Light 0.000 unspecified ok na na 0.000 na na na
# Int._Health_LED 0.000 unspecified ok na na 0.000 na na na
# Ext._Health_LED 0.000 unspecified ok na na 0.000 na na na
# Power_Supply_1 0.000 unspecified nc na na 0.000 na na na
# Power_Supply_2 0.000 unspecified nc na na 0.000 na na na
# Power_Supplies 0.000 unspecified nc na na 0.000 na na na
# VRM_1 0.000 unspecified cr na na 0.000 na na na
# VRM_2 0.000 unspecified cr na na 0.000 na na na
# Fan_Block_1 34.888 unspecified nc na na 75.264 na na na
# Fan_Block_2 29.792 unspecified nc na na 75.264 na na na
# Fan_Block_3 34.888 unspecified nc na na 75.264 na na na
# Fan_Blocks 0.000 unspecified nc na na 0.000 na na na
# Temp_1 39.000 degrees_C ok na na -64.000 na na na
# Temp_2 16.000 degrees_C ok na na -64.000 na na na
# Temp_3 30.000 degrees_C ok na na -64.000 na na na
# Temp_4 30.000 degrees_C ok na na -64.000 na na na
# Temp_5 25.000 degrees_C ok na na -64.000 na na na
# Temp_6 30.000 degrees_C ok na na -64.000 na na na
# Temp_7 30.000 degrees_C ok na na -64.000 na na na
# Power_Meter 180.000 Watts cr na na 384.000 na na na

# And this host has some false-criticals (PowerMeter, VirtualFan)
# <<<ipmi>>>
# Temp_1 17.000 degrees_C ok 0.000 0.000 0.000 40.000 42.000 46.000
# Temp_2 40.000 degrees_C ok 0.000 0.000 0.000 0.000 82.000 83.000
# Temp_3 44.000 degrees_C ok 0.000 0.000 0.000 0.000 82.000 83.000
# Temp_4 52.000 degrees_C ok 0.000 0.000 0.000 0.000 87.000 92.000
# Temp_5 46.000 degrees_C ok 0.000 0.000 0.000 0.000 85.000 90.000
# Temp_6 55.000 degrees_C ok 0.000 0.000 0.000 0.000 85.000 90.000
# Temp_7 51.000 degrees_C ok 0.000 0.000 0.000 0.000 85.000 90.000
# Temp_8 58.000 degrees_C ok 0.000 0.000 0.000 0.000 78.000 83.000
# Temp_9 74.000 degrees_C ok 0.000 0.000 0.000 0.000 110.000 115.000
# Temp_10 31.000 degrees_C ok 0.000 0.000 0.000 0.000 60.000 65.000
# Virtual_Fan 19.600 unspecified nc na na na na na na
# Power_Meter 236.000 Watts cr na na na na na na

# IPMI has two operation modes:
# 1. detailed
# 2. summarized
# This controls how the inventory is done. In summary-mode, the
# inventory returns one single check item 'Summary' - or nothing
# if the host does not send any IPMI information
# In Detailed mode for each sensor one item is returned.

# Newer output formats (sensor list and compact/discrete sensors)
# <<<ipmi:sep(124)>>>
# BB +5V           | 5.070      | Volts      | ok    | na        | 4.446     | 4.576     | 5.408     | 5.564     | na
# BB +12V AUX      | 11.904     | Volts      | ok    | na        | 10.416    | 10.726    | 13.144    | 13.578    | na
# BB +0.9V         | 0.898      | Volts      | ok    | na        | 0.811     | 0.835     | 0.950     | 0.979     | na
# Serverboard Temp | 39.000     | degrees C  | ok    | na        | 5.000     | 10.000    | 61.000    | 66.000    | na
# Ctrl Panel Temp  | 31.000     | degrees C  | ok    | na        | 0.000     | 5.000     | 44.000    | 48.000    | na
# Fan 1            | 7740.000   | RPM        | ok    | na        | 1720.000  | 1978.000  | na        | na        | na
# Fan 2            | 8557.000   | RPM        | ok    | na        | 1720.000  | 1978.000  | na        | na        | na
# Fan 3            | 7611.000   | RPM        | ok    | na        | 1720.000  | 1978.000  | na        | na        | na
# <<<ipmi_discrete:sep(124)>>>
# PS3 Status       | C8h | ok | 10.1 | Presence detected
# PS4 Status       | C9h | ok | 10.2 | Presence detected
# Pwr Unit Stat    | 01h | ok | 21.1 |
# Power Redundancy | 02h | ok | 21.1 | Fully Redundant
# BMC Watchdog     | 03h | ok |  7.1 |
# PS1 Status       | C8h | ok | 10.1 | Presence detected, Failure detected     <= NOT OK !!
# PS2 Status       | C9h | ok | 10.2 | Presence detected

# broken
# <<<ipmi:cached(1472175405,300)>>>
# 01-Inlet_Ambient 18.000 degrees_C ok na na na na 42.000 46.000
# 02-CPU_1 40.000 degrees_C ok na na na na 70.000 na
# 03-CPU_2 40.000 degrees_C ok na na na na 70.000 na
# 04-DIMM_P1_1-3 32.000 degrees_C ok na na na na 87.000 na
# 05-DIMM_P1_4-6 32.000 degrees_C ok na na na na 87.000 na
# 06-DIMM_P2_1-3 27.000 degrees_C ok na na na na 87.000 na
# 07-DIMM_P2_4-6 26.000 degrees_C ok na na na na 87.000 na
# 09-Chipset 47.000 degrees_C ok na na na na 105.000 na
# 10-VR_P1 32.000 degrees_C ok na na na na 115.000 120.000
# 11-VR_P2 27.000 degrees_C ok na na na na 115.000 120.000
# 12-VR_P1_Zone 27.000 degrees_C ok na na na na 80.000 85.000
# 13-VR_P2_Zone 25.000 degrees_C ok na na na na 80.000 85.000
# 14-VR_P1_Mem 33.000 degrees_C ok na na na na 115.000 120.000
# 15-VR_P2_Mem 23.000 degrees_C ok na na na na 115.000 120.000
# 16-VR_P1Mem_Zone 32.000 degrees_C ok na na na na 80.000 85.000
# 17-VR_P2Mem_Zone 22.000 degrees_C ok na na na na 80.000 85.000
# 18-HD_Controller 57.000 degrees_C ok na na na na 90.000 na
# 19-Supercap 32.000 degrees_C ok na na na na 65.000 na
# 21-PCI_Zone 30.000 degrees_C ok na na na na 80.000 85.000
# 23-I/O_1_Zone 28.000 degrees_C ok na na na na 80.000 85.000
# 26-I/O_LOM 40.000 degrees_C ok na na na na 100.000 na
# 27-Sys_Exhaust 31.000 degrees_C ok na na na na 80.000 85.000
# PS3_Inpu


ipmi_summarize = True
ipmi_ignore_nr = False # set to True in order to ignore entries with state 'nr'
ipmi_ignored_sensors = [] # example: [ "Power_Meter", "Virtual_Fan" ]
inventory_ipmi_rules = []


def ipmi_get_ignored_sensors(hostname):
    # merge ignored sensor list specified here and those from wato
    ignored_sensors_combined = ipmi_ignored_sensors[:]
    rules = host_extra_conf(hostname, inventory_ipmi_rules)
    if rules:
        ignored_sensors_combined += rules[0].get("ignored_sensors", [])

    return ignored_sensors_combined


def ipmi_ignore_entry(name, state, ignored_sensors):
    if ipmi_ignore_nr and (state.startswith('nr') or state.startswith('ns')):
        return True

    for e in ignored_sensors:
        if name.startswith(e):
            return True

    return False


def parse_ipmi(info):
    ipmi_info, ipmi_discrete_info = info
    parsed = []
    for section in [ ipmi_info, ipmi_discrete_info ]:
        if section:
            for line in section:
                if len(line) >= 2:
                    # Compatible with older check versions
                    name = line[0].strip().replace(" ", "_")
                    line = [ name ] + map(lambda x: x.strip(), line[1:])

                    # Discrete sensors have no values
                    if len(line) == 5:
                        state = line[2]
                        if line[4]:
                            state += " (%s)" % line[4]
                        line = [line[0], None, None, state, None, None, None, None, None, None]

                if len(line) == 10:
                    parsed.append(line)

    return parsed


def inventory_ipmi(info):
    parsed = parse_ipmi(info)
    summarize = True
    rules = host_extra_conf(g_hostname, inventory_ipmi_rules)
    if rules:
        summarize = rules[0].get("summarize", True)

    # the default for summarize is always true so if either configuration key has been set
    # to false that setting was changed by the user and needs to override the other
    if not ipmi_summarize or not summarize or len(parsed) == 0:
        ignored_sensors = ipmi_get_ignored_sensors(g_hostname)
        for line in parsed:
            if not ipmi_ignore_entry(line[0], line[3], ignored_sensors):
                yield line[0], None
    else:
        yield "Summary", None


def check_ipmi(item, params, info):
    parsed = parse_ipmi(info)
    if item == "Summary":
        return check_ipmi_summarized(parsed, params)
    else:
        return check_ipmi_detailed(item, parsed)


def ipmi_format_message(status, val, unit):
    infotext = status

    # otherwise the unit is unknown or there is none (val could represent yes/no)
    if val is not None:
        infotext += ", %s" % val

    if unit not in [ 'unspecified', None ]:
        infotext += " %s" % unit

    return infotext


def check_ipmi_detailed(item, parsed):
    for name, val, unit, status, unrec_low, crit_low, \
        warn_low, warn_high, crit_high, unrec_high in parsed:

        if name == item:
            if val is not None:
                perfdata = [ (name, val + unit, warn_high, crit_high) ]
            else:
                perfdata = []

            if status.startswith('ok') and not "failure detected" in status.lower():
                state = 0
            elif status.startswith('nc'):
                state = 1
            else:
                state = 2

            return state, "Status: %s" % ipmi_format_message(status, val, unit), perfdata


def check_ipmi_summarized(parsed, params):
    worst_status  = 0
    warn_texts    = []
    crit_texts    = []
    count         = 0
    ambient_count = 0
    ambient_sum   = 0.0

    if 'ignored_sensors' not in params:
        params['ignored_sensors'] = ipmi_get_ignored_sensors(g_hostname)

    for name, val, unit, status, unrec_low, crit_low, \
        warn_low, warn_high, crit_high, unrec_high in parsed:

        # Skip datasets which have no valid data (zero value, no unit and state nc)
        if val == '0.000' and unit == 'unspecified' and status.startswith('nc'):
            continue

        if ipmi_ignore_entry(name, status, params['ignored_sensors']):
            continue

        count += 1
        infotext = "%s (%s)" % (name, ipmi_format_message(status, val, unit))

        if status.startswith('nc'):
            worst_status = max(worst_status, 1)
            warn_texts.append(infotext)

        elif status.startswith('nr') and ipmi_ignore_nr:
            pass

        elif not status.startswith('ok') or "failure detected" in status.lower():
            worst_status = 2
            crit_texts.append(infotext)

        if "amb" in name or "Ambient" in name:
            try:
                ambient_count += 1
                ambient_sum += float(val)
            except:
                pass

    if ambient_count > 0:
        perfdata = [ ("ambient_temp", ambient_sum / ambient_count) ]
    else:
        perfdata = []

    if worst_status == 0:
        infotexts = [ "%d sensors OK" % count ]

    else:
        infotexts = []
        if len(crit_texts) > 0:
            infotexts.append("CRIT are: %s" % ", ".join(crit_texts))
        if len(warn_texts) > 0:
            infotexts.append("WARN are: %s" % ", ".join(warn_texts))

    return worst_status, ' - '.join(infotexts), perfdata


def ipmi_precompile(hostname, item, params):
    if params is not None:
        precomped = params.copy()
    else:
        precomped = {}

    precomped['ignored_sensors'] = ipmi_get_ignored_sensors(hostname)
    return precomped


# Make sure, configuration variables needed during check time are present
# in precompiled code
check_config_variables.append("ipmi_ignore_nr")
check_config_variables.append("ipmi_ignored_sensors")


check_info["ipmi"] = {
    'inventory_function'  : inventory_ipmi,
    'check_function'      : check_ipmi,
    'service_description' : 'IPMI Sensor %s',
    'has_perfdata'        : True,
    'extra_sections'      : [ "ipmi_discrete" ],
}


precompile_params['ipmi'] = ipmi_precompile
