Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
icinga-plugins
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
cscf
icinga-plugins
Commits
faad0b45
Commit
faad0b45
authored
3 years ago
by
root
Browse files
Options
Downloads
Patches
Plain Diff
Added check_mellanox.
parent
476ba433
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
check_mellanox
+123
-0
123 additions, 0 deletions
check_mellanox
with
123 additions
and
0 deletions
check_mellanox
0 → 100755
+
123
−
0
View file @
faad0b45
#!/usr/bin/python
# Author: Devon Merner (dmerner)
# Date: November 17th, 2016
# Purpose: To monitor Mellanox switches in similar fashion to the check_openmanage script that was made by Trond H. Amundsen.
STATE_OK
=
0
STATE_WARNING
=
1
STATE_CRITICAL
=
2
STATE_UNKNOWN
=
3
WARNING_TEXT
=
""
CRITICAL_TEXT
=
""
import
sys
import
argparse
import
netsnmp
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"
-c
"
,
default
=
"
public
"
,
dest
=
"
community
"
,
help
=
"
Community to query the agent
"
)
parser
.
add_argument
(
"
-v
"
,
default
=
"
2
"
,
dest
=
"
version
"
,
help
=
"
SNMP version to use
"
)
parser
.
add_argument
(
"
host
"
,
default
=
"
localhost
"
,
nargs
=
"
?
"
,
help
=
"
Agent to retrieve variables from
"
)
parser
.
add_argument
(
"
-d
"
,
action
=
"
store_true
"
,
dest
=
"
debug
"
,
help
=
"
Print debug information
"
)
# Warning/critical thresholds
parser
.
add_argument
(
"
-tW
"
,
default
=
"
40
"
,
dest
=
"
tempwarn
"
,
help
=
"
Board temperature monitor warning threshold in degrees celsius (returns warning if equivelant or greater than this number) Default: 40
"
)
parser
.
add_argument
(
"
-tC
"
,
default
=
"
50
"
,
dest
=
"
tempcrit
"
,
help
=
"
Board temperature monitor critical threshold in degrees celsius (returns critical if equivelant or greater than this number) Default: 50
"
)
parser
.
add_argument
(
"
-fW
"
,
default
=
"
2000
"
,
dest
=
"
fanwarn
"
,
help
=
"
Fan monitor warning threshold in RPM (returns warning if equivelant or less than this number) Default: 2000
"
)
parser
.
add_argument
(
"
-fC
"
,
default
=
"
1000
"
,
dest
=
"
fancrit
"
,
help
=
"
Fan monitor critical threshold in RPM (returns critical if equivelant or less than this number) Default: 1000
"
)
options
=
parser
.
parse_args
()
args
=
{
"
Version
"
:
int
(
options
.
version
),
"
DestHost
"
:
options
.
host
,
"
Community
"
:
options
.
community
}
dataReturned
=
0
# Collect fan/temperature sensor data
for
idx
in
netsnmp
.
snmpwalk
(
netsnmp
.
Varbind
(
"
.1.3.6.1.2.1.99.1.1
"
),
**
args
):
dataReturned
=
1
# Mellanox indexes start with 200 for some reason.
if
not
idx
.
startswith
(
"
200
"
):
continue
sensortype
,
desc
,
units
,
value
=
netsnmp
.
snmpget
(
netsnmp
.
Varbind
(
"
.1.3.6.1.2.1.47.1.1.1.1.7
"
,
idx
),
netsnmp
.
Varbind
(
"
.1.3.6.1.2.1.47.1.1.1.1.2
"
,
idx
),
netsnmp
.
Varbind
(
"
.1.3.6.1.2.1.99.1.1.1.6
"
,
idx
),
netsnmp
.
Varbind
(
"
.1.3.6.1.2.1.99.1.1.1.4
"
,
idx
),
**
args
)
if
(
sensortype
is
None
):
continue
# Convert into actual units if using SNMP v1
if
(
int
(
options
.
version
)
==
1
)
and
sensortype
.
startswith
(
"
Temperature
"
):
value
=
float
(
value
)
/
10
if
options
.
debug
:
print
(
"
{} {} {} {}
"
.
format
(
sensortype
,
desc
,
value
,
units
))
# Check Thresholds
if
sensortype
.
startswith
(
"
Temperature
"
)
and
desc
.
lower
().
startswith
(
"
mgmt/board
"
):
if
value
>=
int
(
options
.
tempcrit
):
CRITICAL_TEXT
+=
desc
+
"
temperature is
"
+
str
(
value
)
+
"
"
+
units
+
"
.
"
elif
value
>=
int
(
options
.
tempwarn
):
WARNING_TEXT
+=
desc
+
"
temperature is
"
+
str
(
value
)
+
"
"
+
units
+
"
.
"
elif
sensortype
.
startswith
(
"
Fan
"
):
if
int
(
value
)
<=
int
(
options
.
fancrit
):
CRITICAL_TEXT
+=
desc
+
"
speed is
"
+
value
+
"
"
+
units
+
"
.
"
elif
int
(
value
)
<=
int
(
options
.
fanwarn
):
WARNING_TEXT
+=
desc
+
"
speed is
"
+
value
+
"
"
+
units
+
"
.
"
if
dataReturned
==
0
:
print
(
"
UNKNOWN - SNMP returned no data (Host is unreachable or SNMP is not responding) |
"
)
sys
.
exit
(
STATE_UNKNOWN
)
# Collect power supply information
for
idx
in
netsnmp
.
snmpwalk
(
netsnmp
.
Varbind
(
"
.1.3.6.1.2.1.47.1.1.1.1.1
"
),
**
args
):
sensortype
,
desc
,
alarm
=
netsnmp
.
snmpget
(
netsnmp
.
Varbind
(
"
.1.3.6.1.2.1.47.1.1.1.1.7
"
,
idx
),
netsnmp
.
Varbind
(
"
.1.3.6.1.2.1.47.1.1.1.1.2
"
,
idx
),
netsnmp
.
Varbind
(
"
.1.3.6.1.2.1.131.1.1.1.5
"
,
idx
),
**
args
)
if
sensortype
is
None
:
continue
if
not
sensortype
.
startswith
(
"
Power
"
):
continue
alarmnice
=
"
OK
"
if
alarm
==
"
\x10
"
:
alarmnice
=
"
CRITICAL
"
CRITICAL_TEXT
+=
"
Power Supply
"
+
desc
+
"
is CRITICAL
"
if
options
.
debug
:
print
(
"
{} {} {}
"
.
format
(
sensortype
,
desc
,
alarmnice
))
# Critical/Warning Think Logic
if
CRITICAL_TEXT
and
(
CRITICAL_TEXT
is
not
None
):
print
(
"
CRITICAL -
"
+
CRITICAL_TEXT
+
"
|
"
)
sys
.
exit
(
STATE_CRITICAL
)
elif
WARNING_TEXT
and
(
WARNING_TEXT
is
not
None
):
print
(
"
WARNING -
"
+
WARNING_TEXT
+
"
|
"
)
sys
.
exit
(
STATE_WARNING
)
else
:
print
(
"
OK - No problems detected |
"
)
sys
.
exit
(
STATE_OK
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment