#!/bin/bash
# ---------------
# if you manually run check
# ./check_ping -H monitoring1 -w 300,20% -c 700,80%
#PING OK - Packet loss = 0%, RTA = 9.15 ms|rta=9.152000ms;300.000000;700.000000;0.000000 pl=0%;20;80;0
# ./check_http -I monitoring1
#HTTP OK: HTTP/1.1 200 OK - 515 bytes in 0.021 second response time |time=0.021402s;;;0.000000 size=515B;;;0
# This script files a bug if "server" ping or http service is down.
# Once bug is filed, it creates a holding file so duplicated bugs are not created
# and when bug is resolved, the holding file has to be removed manually,
# so script in future can create new bugs
# -----------------------------------------------------------------------------
# this can be improved !?
# on "Bugzilla server" there is the file
# /opt/createbug/monitoring1/http_fails.txt used to file a bug
# it reads:
# -----------------------------------------------------------
#
#From: zarko.dudic@domain.com
#Subject: monitoring1 HTTP check fails
#
#@product = Hosts
#@component = Problems
#@version = unspecified
#@op_sys = Linux
#@platform = Other
#@priority = P1
#@severity = critical
#
#monitoring1 HTTP check fails from monitoring22,
#this is our Nagios server.
#
#Once this bug is resolved please delete the file
#monitoring22:/var/createbug/monitoring1/filed_http_bug
#----
#Note: this file exists to prevent creation of duplicated bugs.
#
# --------------------------------------------------------------------
# Treat unset variables and parameters other than the special parameters "@" and "*"
# as an error when performing parameter expansion.
set -o nounset
readonly PROGNAME=`/bin/basename $0`
readonly loggerinfo="logger -t ${PROGNAME} Info:"
readonly loggerwarning="logger -t ${PROGNAME} Warning:"
readonly loggerproblem="logger -t ${PROGNAME} Problem:"
readonly host_to_check=monitoring1.domain.com
readonly bugzilla_server=bug-sysadmin
${loggerinfo} ======== Start at `date +'%Y-%m-%dT%H-%M-%S_%Z'` =======
#### function: error #########
# args: multiple
# what: print args to STDERR
#
err() {
echo ; echo "Problem: $*" ; echo
${loggerproblem} "$*"
exit 1
}
#### function: file bug for failed ping ###########
# args: none
# what: if ping check fails, files a P1 bug with sysadmin Bugzilla,
# also creates holding file so duplicated bugs are not created
#
filebug_ping(){
touch /var/createbug/monitoring1/filed_ping_bug || err "cannot create ping bug holder file"
ssh ${bugzilla_server} "/usr/share/bugzilla/email_in.pl < /opt/createbug/monitoring1/ping_fails.txt" || \
err "cannot file bug for failed ping"
${loggerinfo} A bug has been filed for PING check failure.
}
#### function: file bug for failed http check ###########
# args: none
# what: if http check fails, files a P1 bug with sysadmin Bugzilla,
# also creates holding file so duplicated bugs are not created
#
filebug_http(){
touch /var/createbug/monitoring1/filed_http_bug || err "cannot create http bug holder file"
ssh ${bugzilla_server} "/usr/share/bugzilla/email_in.pl < /opt/createbug/monitoring1/http_fails.txt" || \
err "cannot file bug for failed http"
${loggerinfo} A bug has been filed for HTTP check failure.
}
#### function: file bug if nagios service isn't online ###########
# args: none
# what: if nagios service isn't online, files a P1 bug with sysadmin Bugzilla,
# also creates holding file so duplicated bugs are not created
#
filebug_nagios_service(){
touch /var/createbug/monitoring1/filed_nagios_service_bug || err "cannot create nagios service bug holder file"
ssh ${bugzilla_server} "/usr/share/bugzilla/email_in.pl < /opt/createbug/monitoring1/nagios_service_fails.txt" || \
err "cannot file bug for failed nagios service"
${loggerinfo} A bug has been filed for Nagios service check failure.
}
path_to_commands=/usr/lib64/nagios/plugins
ping_command=`"${path_to_commands}"/check_ping -H "${host_to_check}" -w 300,20% -c 700,80%`
http_command=`"${path_to_commands}"/check_http -I "${host_to_check}"`
# check nagios service
ssh "${host_to_check}" service nagios status > /dev/null
if [ $? -eq 0 ]; then
nagios_servics=OK
fi
ping_result=`echo "${ping_command}" | awk '{print $1, $2}'`
http_result=`echo "${http_command}" | awk '{print $1, $2}'`
if [ "${ping_result}" != "PING OK" ]; then
# check if bug has been filed before
if ! [ -e /var/createbug/monitoring1/filed_ping_bug ]; then
# file bug for ping
filebug_ping
else
${loggerinfo} PING fails but the bug exist and is not resolved.
fi
elif [ "${http_result}" != "HTTP OK:" ]; then
# check if bug has been filed before
if ! [ -e /var/createbug/monitoring1/filed_http_bug ]; then
# file bug for http
filebug_http
else
${loggerinfo} HTTP fails but the bug exist and is not resolved.
fi
elif [ "${nagios_servics}" != "OK" ]; then
# check if bug has been filed before
if ! [ -e /var/createbug/monitoring1/filed_nagios_service_bug ]; then
# file bug for nagios service
filebug_nagios_service
else
${loggerinfo} Nagios service fails but the bug exist and is not resolved.
fi
else
${loggerinfo} There is no ping or http or Nagios service problem.
fi
${loggerinfo} ======== Finish at `date +'%Y-%m-%dT%H-%M-%S_%Z'` =====
exit 0
|