#!/bin/sh

# Copyright (c) 2012 ken.naruo
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

LANG=C;export LANG
SCRIPT_DIR=`dirname $0`
. ${SCRIPT_DIR}/.root
UNAME=`uname -n`

ERROR_QUE=${ROOT}/que/error/DAEMON_ERROR_STATUS_${UNAME}
ERROR_LOG=${ROOT}/que/error/DAEMON_ERROR_${UNAME}_`date +%Y%m%d`

## cluster.conf load
startup () {
  . ${ROOT}/cluster/global.conf
}
trap startup HUP
startup


## controller server number check
if [ "${CONTROLLER}" = "" ];then
  echo "controller server is not defined!!" >  ${ERROR_QUE}
  exit 1
fi

## if two server
CLUSTER_CHECK=`echo "${CONTROLLER}" | awk '{print \$2}'`
if [ "${CLUSTER_CHECK}" ];then
 ## rm master flag when starting daedmon
 if [ -f ${ROOT}/cluster/flag/master ]; then
    rm -f ${ROOT}/cluster/flag/master
    rm -f ${ROOT}/que/message/*
    rm -f ${ROOT}/que/exclusive_resource/*
    rm -f ${ROOT}/que/tmp/*
 fi
fi

## if over three servers
CLUSTER_CHECK=`echo "${CONTROLLER}" | awk '{print \$3}'`
if [ "${CLUSTER_CHECK}" ];then
 echo "controller server is under two server!!"  >  ${ERROR_QUE}
 exit 1
fi

## execute server check
if [ "${EXECUTE_SERVER}" = "" ];then
 echo "execution server is not defined!!"  >  ${ERROR_QUE}
 exit 1
fi

## ssh port check
if [ "${SSH_PORT}" = "" ];then
 echo "global.conf ssh port is not defined!!"  >  ${ERROR_QUE} 
 exit 1
fi

IFCONFIG=/usr/sbin/ifconfig
LINUX=`uname -a | grep Linux`

if [ "${LINUX}" ];then
  IFCONFIG="/sbin/ifconfig"
fi

## monitoring cluster controller
while true
do
 for RHOST in ${CONTROLLER}
 do
   IP_ADDR_CHECK=`${IFCONFIG} -a | grep ${RHOST}`

   if [ "${IP_ADDR_CHECK}" = "" ];then

       CLUSTER_CHECK=`ssh -p ${SSH_PORT} ${RHOST} ps -ef | grep ${ROOT}/sbin/ecron_clusterd | grep -v grep`
     
       if [ "${CLUSTER_CHECK}" = "" ];then

         echo "`date` Controller Server failed!! remote host is ${RHOST}" > ${ERROR_QUE}

           if [ "${MAIL_ADDR}"  -a ! -f ${ROOT}/que/error/${RHOST}_error ];then
             echo "`date` ${RHOST} has failed" > ${ROOT}/que/error/${RHOST}_error
             echo "${RHOST} has failed"|mailx -s "ERROR ${RHOST} ecron_clusterd failed" ${MAIL_ADDR}
           fi

       else
         
           if [ -f ${ROOT}/que/error/${RHOST}_error ];then
             rm ${ROOT}/que/error/${RHOST}_error
           fi

       fi

        ssh -p ${SSH_PORT} ${RHOST} ls ${ROOT}/cluster/flag/master > /dev/null 2>&1
        FLAG_CHECK=$?
       
        if [ "${FLAG_CHECK}" != 0 -a ! -f ${ROOT}/cluster/flag/master ];then

          echo "`date` master server is not defined!! there is no master flag between ${CONTROLLER}"  >> ${ERROR_LOG}
          touch ${ROOT}/cluster/flag/master
          sleep 1
          echo "`date` create master flag on ${UNAME}"  >> ${ERROR_LOG}

          if [ "${MAIL_ADDR}" ];then
             echo "master server is not defined!! between ${CONTROLLER} and create master flag on ${UNAME}"|mailx -s "ERROR ${UNAME} ecron_clusterd " ${MAIL_ADDR}
          fi

        fi

        if [ "${FLAG_CHECK}" = 0 -a -f ${ROOT}/cluster/flag/master ];then

          echo "`date` master server is double defined!! between ${CONTROLLER}"  >>  ${ERROR_LOG}

          if [ "${CLUSTER_CHECK}" = "" ];then
            ssh -p ${SSH_PORT} ${RHOST} rm -f ${ROOT}/cluster/flag/master
            sleep 1
            echo "`date` flag delete ${RHOST}" >>  ${ERROR_LOG}
          else
            rm -f ${ROOT}/cluster/flag/master
            sleep 1
            echo "`date` flag delete ${UNAME}" >>  ${ERROR_LOG}
          fi


          if [ "${MAIL_ADDR}" ];then
             echo "master server is double defined!! between ${CONTROLLER}, and master server updated "|mailx -s "ERROR ${UNAME} ecron_clusterd " ${MAIL_ADDR}

          fi

        fi

    fi

 done

 ## mirroring error/status que all server
 if [ -f ${ROOT}/cluster/flag/master ]; then
 for RHOST in ${CONTROLLER}
  do
   ERROR=""
   STATUS=""

    IP_ADDR_CHECK=`${IFCONFIG} -a | grep ${RHOST}`
    if [ "${IP_ADDR_CHECK}" = "" ];then
       rsync -e "ssh -p ${SSH_PORT}" -alv ${ROOT}/que/error ${RHOST}:${ROOT}/que  > /dev/null 2>&1
       ERROR=$?
       rsync -e "ssh -p ${SSH_PORT}" -alv ${ROOT}/que/status ${RHOST}:${ROOT}/que  > /dev/null 2>&1
       STATUS=$?

       if [ "${ERROR}" != 0  -o  "${STATUS}" != 0  ];then

        echo "`date` rsyc failed!! ,can't mirror que!! remote host is ${RHOST}" >  ${ERROR_QUE}

           if [ "${MAIL_ADDR}" -a ! -f ${ROOT}/que/error/${RHOST}_mirror_error ];then

             echo "${RHOST} que mirror failed"|mailx -s "ERROR ${RHOST} ecron_clusterd failed" ${MAIL_ADDR}
             echo "`date` ${RHOST} que mirror  failed" >  ${ROOT}/que/error/${RHOST}_mirror_error

           fi

       else

          if [ -f ${ROOT}/que/error/${RHOST}_mirror_error ];then
            
            rm  ${ROOT}/que/error/${RHOST}_mirror_error

          fi

       fi

    fi

   done

   fi

 sleep 30

done
