With Nagios you can monitor almost everything and philosophy is simple.
Nagios uses plug-ins, say binary/Perl/shell script and check its returning value and according to that determines host/service state. So Nagios doesn't know and it is not interested to know what plug-in is monitoring. Here is the plug-in that checks status of all zpools in the system.
#!/usr/bin/sh
#set -x
# script name zpoolhealth.sh
# -------------------------
# Nagios plugin : determines zpool health
# Nagios plugin return values
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4
# variables
progname=`basename $0`
tmpdir=/tmp
okfile=${tmpdir}/${progname}.ok-zpool
warnfile=${tmpdir}/${progname}.warn-zpool
critfile=${tmpdir}/${progname}.crit-zpool
# --- ERROR SUBROUTINE
err() {
echo "\n ERROR: $* \n"
exit 1
}
# --- END SCRIPT WITH OUTPUT
endscript () {
echo "${RESULT}"
exit ${EXIT_STATUS}
}
# --- CLEANING SUBROUTINE
tmp_file_cleaning () {
[ -f ${okfile}.$$ ] && rm ${okfile}.$$
[ -f ${warnfile}.$$ ] && rm ${warnfile}.$$
[ -f ${critfile}.$$ ] && rm ${critfile}.$$
}
# --- cleaning in case of script termination and regular exit
trap tmp_file_cleaning HUP INT QUIT ABRT EXIT
# ---- find zpools in the system
myzpools=`zpool list -H | awk '{print $1}'`
if [ "${myzpools}" = "no" ]; then
echo "There is no zpool(s)"; exit 3
fi
#echo My zpools: ${myzpools}
# --- get zpool health and create temp files
for zp in ${myzpools}
do
health=`zpool list -H ${zp} | awk '{print $6}'`
if [ ${health} = ONLINE ]
then
printf "${zp} " >> ${okfile}.$$
elif [ ${health} = DEGRADED ]
then
printf "${zp} " >> ${warnfile}.$$
else
printf "${zp} " >> ${critfile}.$$
fi
done
# --- check temp files and create output
# all zpools are online
if [ -f ${okfile}.$$ ] && [ ! -f ${warnfile}.$$ ] && [ ! -f ${critfile}.$$ ]
then
okpools=`cat ${okfile}.$$`
RESULT="OK: ${okpools}"
EXIT_STATUS="${STATE_OK}"
# zpools are online, at least one is degraded and no critical ones
elif [ -f ${okfile}.$$ ] && [ -f ${warnfile}.$$ ] && [ ! -f ${critfile}.$$ ]
then
warnpools=`cat ${warnfile}.$$`
okpools=`cat ${okfile}.$$`
RESULT="WARN(DEGRADED): ${warnpools} OK: ${okpools}"
EXIT_STATUS="${STATE_WARNING}"
# all zpools are degraded
elif [ ! -f ${okfile}.$$ ] && [ -f ${warnfile}.$$ ] && [ ! -f ${critfile}.$$ ]
then
warnpools=`cat ${warnfile}.$$`
RESULT="WARN(DEGRADED) ${warnpools}"
EXIT_STATUS="${STATE_WARNING}"
# there are zpools in each state
elif [ -f ${okfile}.$$ ] && [ -f ${warnfile}.$$ ] && [ -f ${critfile}.$$ ]
then
okpools=`cat ${okfile}.$$`
warnpools=`cat ${warnfile}.$$`
critpools=`cat ${critfile}.$$`
RESULT="CRIT(FAULT/OFFLINE/UNAVAIL): ${critpools} WARN(DEGRADED): ${warnpools} OK: ${okpools}"
EXIT_STATUS="${STATE_CRITICAL}"
# zpools are online and at least one is critical
elif [ -f ${okfile}.$$ ] && [ ! -f ${warnfile}.$$ ] && [ -f ${critfile}.$$ ]
then
okpools=`cat ${okfile}.$$`
critpools=`cat ${critfile}.$$`
RESULT="CRIT(FAULT/OFFLINE/UNAVAIL): ${critpools} OK: ${okpools}"
EXIT_STATUS="${STATE_CRITICAL}"
# no online zpools, all are degraded and critical
elif [ ! -f ${okfile}.$$ ] && [ -f ${warnfile}.$$ ] && [ -f ${critfile}.$$ ]
then
warnpools=`cat ${warnfile}.$$`
critpools=`cat ${critfile}.$$`
RESULT="CRIT(FAULT/OFFLINE/UNAVAIL): ${critpools} WARN(DEGRADED) ${warnpools}"
EXIT_STATUS="${STATE_CRITICAL}"
# all zpools are critical
elif [ ! -f ${okfile}.$$ ] && [ ! -f ${warnfile}.$$ ] && [ -f ${critfile}.$$ ]
then
critpools=`cat ${critfile}.$$`
RESULT="CRIT(FAULT/OFFLINE/UNAVAIL): ${critpools}"
EXIT_STATUS="${STATE_CRITICAL}"
fi
# call subroutine to end script
endscript
|
Usually I create the README file with some info how to deploy plugin, etc.
################################################
README about Nagios plugin zpoolhealth.sh
################################################
1. Copy plugin zpoolhealth.sh to remote host's directory /opt/csw/libexec/nagios-plugins/
2. On the plugin, make permissions 755, owner root:bin
-rwxr-xr-x 1 root bin 3516 Jan 23 10:32 zpoolhealth.sh
3. Add lines to remote host in file /opt/csw/etc/nrpe.cfg
# check zpool status
command[check_zpool_status]=/opt/csw/libexec/nagios-plugins/zpoolhealth.sh
4. Reset NRPE service on remote host
{host}/> svcadm restart cswnrpe
5. Test how NRPE uses plugin on remote host, using CLI from Nagios machine (nagiosbox)
{nagiosbox}/> /opt/csw/nagios/libexec/check_nrpe -H unixlab -c check_zpool_status
OK: space.1 space0
6. Define Nagios service group on nagiosbox, file /etc/nagios/UNIX/services.cfg
define servicegroup{
servicegroup_name zpool_status
alias Zpool status
}
7. Create service so Nagios can check the host, file /etc/nagios/UNIX/services.cfg
define service{
use gen-service
host_name unixlab ;first test on unixlab
#hostgroup_name SUN,CC,FILESERVER ;if test ok, include others, copy plugin there
service_description Zpool status
servicegroups zpool_status
check_command check-nrpe!check_zpool_status
}
8. Refresh nagios service
> svcadm refresh nagios
-- Note:
The script deployplugin.sh can be used to do next on multiple remote hosts:
1. copy plugin to remote host
2. backup nrpe.cfg on remote host
3. append required lines to nrpe.cfg on remote host
4. restart cswnrpe service on remote host
|
So yes, if you want to deploy plugin on many machines, see this script.
#!/bin/sh
#set -x
#
# script name deployplugin.sh
# -----------------------------
# 1. copy plugin to remote host
# 2. backup nrpe.cfg on remote host
# 3. append required lines to nrpe.cfg on remote host
# 4. restart cswnrpe service on remote host
# -- error subroutine
err() {
echo "\n ERROR: $* \n"
exit 1
}
# variables
backuptime=`date +%m-%d-%Y.%Hh%Mm%Ss` #time of nrpe.cfg backup
nrpedir="/opt/csw/etc"
nrpefile=nrpe.cfg
# command to backup nrpe.cfg
backupnrpe="cp -p ${nrpedir}/${nrpefile} ${nrpedir}/.${nrpefile}.${backuptime}"
plugin_dest_dir="/opt/csw/libexec/nagios-plugins" # location of plugin
plugin_src_dir="/etc/master/nagios.plugin/zpool_status"
plugin=zpoolhealth.sh
# list of hosts
hostlist='
host-1
host-2
host-3
host-etc'
for host in ${hostlist}
do
fping -q ${host} # -q = quiet
if [ $? -eq 0 ]; then
echo " --- OK --- Host ${host} is reachable, proceed."
# check existance of directory /opt/csw/etc
[ `ssh ${host} file ${nrpedir} | awk -F: '{print $2}'` = "directory" ] || \
err "\n Directory ${nrpedir} doesn't exist."
# check existence of file nrpe.cfg
[ "`ssh ${host} file ${nrpedir}/${nrpefile} | awk '{print $2, $3}'`" = "ascii text" ] || \
err "\n File ${nrpefile} doesn't exist."
# on remote host, check existence of destination directory for plugins
[ `ssh ${host} file ${plugin_dest_dir} | awk -F: '{print $2}'` = "directory" ] || \
err "\n Directory ${plugindir} doesn't exist."
# copy plugin to remote host
scp -p ${plugin_src_dir}/${plugin} ${host}:${plugin_dest_dir}/${plugin} || \
err "\n ${plugin_src_dir}/${plugin} can't be copied to ${host}"
# backup nrpe.cfg on remote host
ssh ${host} ${backupnrpe} || \
err "\n nrpe.cfg can't be backup-ed on ${host}"
# add lines to the end ($) of nrpe.cfg
# get file, append lines and create temp in mgmt.dc/tmp/
ssh ${host} cat ${nrpedir}/${nrpefile} | \
sed '$a\
# check zpool status' > /tmp/${nrpefile}.${host} || err "\n Can't append first line"
cat /tmp/${nrpefile}.${host} | sed '$a\
command[check_zpool_status]=/opt/csw/libexec/nagios-plugins/zpoolhealth.sh' > /tmp/${nrpefile}.${host}.$$ || \
err "\n Can't append second line"
mv /tmp/${nrpefile}.${host}.$$ /tmp/${nrpefile}.${host}
# copy tmp file to remote host
scp -p /tmp/${nrpefile}.${host} ${host}:${nrpedir}/${nrpefile} || \
err "\n Can't copy temp file /tmp/${nrpefile}.${host} to ${host}"
# remove temp file
rm /tmp/${nrpefile}.${host} || err "\n Can't remove temp file /tmp/${nrpefile}.${host}"
# reset cswnrpe service
ssh ${host} svcadm restart cswnrpe || \
err "\n The cswnrpe service can't be restarted"
sleep 3
[ "`ssh ${host} svcs -H cswnrpe | awk '{print $1}'`" = "online" ] || \
echo "The cswnrpe service on ${host} is not online - check this later"
else
echo " ??????? Host ${host} is not reachable - check needed !"
fi
done
exit 0
|