Skip to content
Snippets Groups Projects
Commit faad0b45 authored by root's avatar root
Browse files

Added check_mellanox.

parent 476ba433
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/python
# Author: Devon Merner (dmerner)
# Date: November 17th, 2016
# Purpose: To monitor Mellanox switches in similar fashion to the check_openmanage script that was made by Trond H. Amundsen.
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
WARNING_TEXT=""
CRITICAL_TEXT=""
import sys
import argparse
import netsnmp
parser = argparse.ArgumentParser()
parser.add_argument("-c", default="public", dest="community",
help="Community to query the agent")
parser.add_argument("-v", default="2", dest="version",
help="SNMP version to use")
parser.add_argument("host", default="localhost", nargs="?",
help="Agent to retrieve variables from")
parser.add_argument("-d", action="store_true", dest="debug",
help="Print debug information")
# Warning/critical thresholds
parser.add_argument("-tW", default="40", dest="tempwarn",
help="Board temperature monitor warning threshold in degrees celsius (returns warning if equivelant or greater than this number) Default: 40")
parser.add_argument("-tC", default="50", dest="tempcrit",
help="Board temperature monitor critical threshold in degrees celsius (returns critical if equivelant or greater than this number) Default: 50")
parser.add_argument("-fW", default="2000", dest="fanwarn",
help="Fan monitor warning threshold in RPM (returns warning if equivelant or less than this number) Default: 2000")
parser.add_argument("-fC", default="1000", dest="fancrit",
help="Fan monitor critical threshold in RPM (returns critical if equivelant or less than this number) Default: 1000")
options = parser.parse_args()
args = {
"Version": int(options.version),
"DestHost": options.host,
"Community": options.community
}
dataReturned = 0
# Collect fan/temperature sensor data
for idx in netsnmp.snmpwalk(netsnmp.Varbind(".1.3.6.1.2.1.99.1.1"), **args):
dataReturned = 1
# Mellanox indexes start with 200 for some reason.
if not idx.startswith("200"):
continue
sensortype, desc, units, value = netsnmp.snmpget(
netsnmp.Varbind(".1.3.6.1.2.1.47.1.1.1.1.7", idx),
netsnmp.Varbind(".1.3.6.1.2.1.47.1.1.1.1.2", idx),
netsnmp.Varbind(".1.3.6.1.2.1.99.1.1.1.6", idx),
netsnmp.Varbind(".1.3.6.1.2.1.99.1.1.1.4", idx),
**args)
if (sensortype is None):
continue
# Convert into actual units if using SNMP v1
if (int(options.version) == 1) and sensortype.startswith("Temperature"):
value = float(value) / 10
if options.debug:
print("{} {} {} {}".format(sensortype, desc, value, units))
# Check Thresholds
if sensortype.startswith("Temperature") and desc.lower().startswith("mgmt/board"):
if value >= int(options.tempcrit):
CRITICAL_TEXT += desc + " temperature is " + str(value) + " " + units + ". "
elif value >= int(options.tempwarn):
WARNING_TEXT += desc + " temperature is " + str(value) + " " + units + ". "
elif sensortype.startswith("Fan"):
if int(value) <= int(options.fancrit):
CRITICAL_TEXT += desc + " speed is " + value + " " + units + ". "
elif int(value) <= int(options.fanwarn):
WARNING_TEXT += desc + " speed is " + value + " " + units + ". "
if dataReturned == 0:
print("UNKNOWN - SNMP returned no data (Host is unreachable or SNMP is not responding) | ")
sys.exit(STATE_UNKNOWN)
# Collect power supply information
for idx in netsnmp.snmpwalk(netsnmp.Varbind(".1.3.6.1.2.1.47.1.1.1.1.1"), **args):
sensortype, desc, alarm = netsnmp.snmpget(
netsnmp.Varbind(".1.3.6.1.2.1.47.1.1.1.1.7", idx),
netsnmp.Varbind(".1.3.6.1.2.1.47.1.1.1.1.2", idx),
netsnmp.Varbind(".1.3.6.1.2.1.131.1.1.1.5", idx),
**args)
if sensortype is None:
continue
if not sensortype.startswith("Power"):
continue
alarmnice = "OK"
if alarm == "\x10":
alarmnice = "CRITICAL"
CRITICAL_TEXT += "Power Supply " + desc + " is CRITICAL "
if options.debug:
print("{} {} {}".format(sensortype, desc, alarmnice))
# Critical/Warning Think Logic
if CRITICAL_TEXT and (CRITICAL_TEXT is not None):
print("CRITICAL - " + CRITICAL_TEXT + "|")
sys.exit(STATE_CRITICAL)
elif WARNING_TEXT and (WARNING_TEXT is not None):
print("WARNING - " + WARNING_TEXT + "|")
sys.exit(STATE_WARNING)
else:
print("OK - No problems detected |")
sys.exit(STATE_OK)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment