#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

# EXAMPLE DATA FROM: WDC SSC-D0128SC-2100
#<<<smart>>>
#/dev/sda ATA WDC_SSC-D0128SC-   1 Raw_Read_Error_Rate     0x000b   100   100   050    Pre-fail  Always       -       16777215
#/dev/sda ATA WDC_SSC-D0128SC-   3 Spin_Up_Time            0x0007   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC-   5 Reallocated_Sector_Ct   0x0013   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC-   7 Seek_Error_Rate         0x000b   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC-   9 Power_On_Hours          0x0012   100   100   000    Old_age   Always       -       1408
#/dev/sda ATA WDC_SSC-D0128SC-  10 Spin_Retry_Count        0x0013   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC-  12 Power_Cycle_Count       0x0012   100   100   000    Old_age   Always       -       523
#/dev/sda ATA WDC_SSC-D0128SC- 168 Unknown_Attribute       0x0012   100   100   000    Old_age   Always       -       1
#/dev/sda ATA WDC_SSC-D0128SC- 175 Program_Fail_Count_Chip 0x0003   100   100   010    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC- 192 Power-Off_Retract_Count 0x0012   100   100   000    Old_age   Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC- 194 Temperature_Celsius     0x0022   040   100   000    Old_age   Always       -       40 (Lifetime Min/Max 30/60)
#/dev/sda ATA WDC_SSC-D0128SC- 197 Current_Pending_Sector  0x0012   100   100   000    Old_age   Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC- 240 Head_Flying_Hours       0x0013   100   100   050    Pre-fail  Always       -       0
#/dev/sda ATA WDC_SSC-D0128SC- 170 Unknown_Attribute       0x0003   100   100   010    Pre-fail  Always       -       1769478
#/dev/sda ATA WDC_SSC-D0128SC- 173 Unknown_Attribute       0x0012   100   100   000    Old_age   Always       -       4217788040605


factory_settings["smart_temp_default_levels"] = {
    "levels": (35, 40)
}

smart_stats_default_levels = {
   'realloc_events':  (1,  1),
   'realloc_sectors': (1,  1),
   'spin_retries':    (1,  1),
   'pending_retries': (1,  1),
   'pending_sectors': (1,  1),
   'cmd_timeouts':    (5, 10),
   'e2e_errs':        (1,  1),
   'uncorr_errs':     (1,  1),
   'udma_crcs':       (1,  1),
}

def parse_smart_raw_values(info):
    disks = {}
    disk_name = None

    for line in info:
        if len(line) >= 13:
            if line[0] != disk_name:
                disk_name = line[0]
                disk = {}
                disks[disk_name] = disk

            field = line[4]
            if field != "Unknown_Attribute":
                value = saveint(line[12])
                disk[field] = value
    return disks


def parse_smart_normalized_values(info):
    disks = {}
    disk_name = None

    for line in info:
        if len(line) >= 13:
            if line[0] != disk_name:
                disk_name = line[0]
                disk = {}
                disks[disk_name] = disk

            field = line[4]
            if field != "Unknown_Attribute":
                value = int(line[6])
                threshold = int(line[8])
                disk[field] = value, threshold
    return disks


smart_stats_fields = [
    'Reallocated_Sector_Ct',
    'Spin_Retry_Count',
    'Reallocated_Event_Count',
    'Current_Pending_Sector',
    'Command_Timeout',
    'End-to-End_Error',
    'Reported_Uncorrect',
    'UDMA_CRC_Error_Count', ]

def inventory_smart_stats(info):
    disks = parse_smart_raw_values(info)
    inventory = []
    for disk_name, disk in disks.items():
        for field in disk.keys():
            if field in smart_stats_fields: # found at least one interesting field
                cleaned = dict([(f, disk[f]) for f in smart_stats_fields if f in disk])
                inventory.append((disk_name, cleaned))
                break
    return inventory


def check_smart_stats(item, params, info):
    # params is a snapshot of all counters at the point of time of inventory

    disks = parse_smart_raw_values(info)
    normalized = parse_smart_normalized_values(info)

    if item not in disks:
        return 3, "Disk not found"
    disk = disks[item]

    state = 0
    infos   = []
    perfdata = []

    for unit, field, descr in [
                  (' hours', 'Power_On_Hours',          'Powered on',           ),
                  ('',       'Power_Cycle_Count',       'Power cycles',         ),
                  ('',       'Reallocated_Sector_Ct',   'Reallocated sectors',  ),
                  ('',       'Reallocated_Event_Count', 'Reallocated events',   ),
                  ('',       'Spin_Retry_Count',        'Spin retries',         ),
                  ('',       'Current_Pending_Sector',  'Pending sectors',      ),
                  ('',       'Command_Timeout',         'Command timeouts',     ),
                  ('',       'End-to-End_Error',        'End-to-End errors',    ),
                  ('',       'Reported_Uncorrect',      'Uncorrectable errors', ),
                  ('',       'UDMA_CRC_Error_Count',    'UDMA CRC errors',      )]:
        if field in disk:
            value = disk[field]
            infos.append("%s: %d%s" % (descr, value, unit))
            perfdata.append((field, value))

            if field in params:
                ref_value = params[field]

                # For reallocated event counts we experienced to many reported errors for disks
                # which still seem to be OK. The raw value increased by a small amount but the
                # aggregated value remained at it's initial/ok state. So we use the aggregated
                # value now. Only for this field.
                if field == "Reallocated_Event_Count":
                    infos[-1] += " (was %d during discovery; normalized value looks OK)" % ref_value
                    norm_value, norm_threshold = normalized[item][field]
                    if norm_value <= norm_threshold:
                        state = 2

                elif value > ref_value:
                    state = 2
                    infos[-1] += "(!!) (was %d during discovery)" % ref_value

    return state, ", ".join(infos), perfdata

check_info["smart.stats"] = {
    'check_function':            check_smart_stats,
    'inventory_function':        inventory_smart_stats,
    'has_perfdata':              True,
    'service_description':       'SMART %s Stats',
}


def inventory_smart_temp(info):
    disks = parse_smart_raw_values(info)
    for disk_name, disk in disks.iteritems():
        if "Temperature_Celsius" in disk:
            yield (disk_name, {})


def check_smart_temp(item, params, info):
    disks = parse_smart_raw_values(info)

    try:
        temperature = disks[item]["Temperature_Celsius"]
    except KeyError:
        return

    return check_temperature(temperature, params, "smart_%s" % item)


check_info["smart.temp"] = {
    'check_function'          : check_smart_temp,
    'inventory_function'      : inventory_smart_temp,
    'service_description'     : 'Temperature SMART %s',
    'group'                   : 'temperature',
    'has_perfdata'            : True,
    'includes'                : [ 'temperature.include' ],
    'default_levels_variable' : "smart_temp_default_levels"
}
