#!/bin/sh
#
#
# Description: Diskcheck Resource Agent
# Version: 1.0
# License: GNU General Public License (GPL)
# 
# Copyright (c) 2008 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
#
#
#####################################
# Initialization:

if [ -f ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ]; then
	FUNCTION_FILE="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"
elif [ -f /usr/lib64/heartbeat/ocf-shellfuncs ]; then
	FUNCTION_FILE="/usr/lib64/heartbeat/ocf-shellfuncs"
elif [ -f /usr/lib/heartbeat/ocf-shellfuncs ]; then
	FUNCTION_FILE="/usr/lib/heartbeat/ocf-shellfuncs"
else
	echo "${OCF_RESOURCE_INSTANCE} ocf-shellfuncs file doesn't exist." >&2
	exit 1
fi

. ${FUNCTION_FILE}


#####################################

meta_data() {
	cat <<XML
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Diskcheck">
<version>1.0</version>

<longdesc lang="en">
This is a Diskcheck Resource Agent.
</longdesc>
<shortdesc lang="en">Diskcheck resource agent</shortdesc>

<parameters>

<parameter name="attr_name" unique="1">
<longdesc lang="en">
Set an attribute name of the disk connection information.
</longdesc>
<shortdesc lang="en">An attribute name</shortdesc>
<content type="string" default="diskcheck_status" />
</parameter>

<parameter name="driver_type" unique="1" required="1">
<longdesc lang="en">
Set the type of driver using with shared disk. {CCISS|RDAC|QLA|FDISK}
</longdesc>
<shortdesc lang="en">Type of driver</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="ctrl_serialno" unique="1">
<longdesc lang="en">
Set the serial number of the controller. (only when you set driver_type to "CCISS")
</longdesc>
<shortdesc lang="en">Serial number of the controller</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="mppfile_path" unique="1">
<longdesc lang="en">
Set full-path of driver's information file. (only when you set driver_type to "RDAC")
</longdesc>
<shortdesc lang="en">Driver's information file path</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="device_name" unique="1">
<longdesc lang="en">
Set one of the device name of the shared disk. (only when you set driver_type to "FDISK")
</longdesc>
<shortdesc lang="en">Name of the shared disk</shortdesc>
<content type="string" default="" />
</parameter>

</parameters>

<actions>
<action name="start"	timeout="60" />
<action name="stop"	timeout="60" />
<action name="monitor"	timeout="60" depth="0" interval="10" start-delay="60" />
<action name="validate-all" timeout="5" />
<action name="status" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>

</resource-agent>
XML

	return ${OCF_SUCCESS}

}

usage() {
	cat <<END
usage: $0 {start|stop|monitor|validate-all|status|meta-data}
END
}

diskcheck_start() {

	ocf_log info "${OCF_RESOURCE_INSTANCE} Diskcheck start .."

	rm -f ${STATUS_FILE} > /dev/null 2>&1
	touch ${STATUS_FILE} > /dev/null 2>&1

	##
	# initial diskcheck.
	##
	diskcheck_monitor_internal
	FUNC_RET=$?

	if [ ${FUNC_RET} != 0 ]; then
		ocf_log err "${OCF_RESOURCE_INSTANCE} Diskcheck failed to start."
		return ${FUNC_RET}
	fi

	ocf_log info "${OCF_RESOURCE_INSTANCE} Diskcheck started."
	return ${OCF_SUCCESS}
}

diskcheck_stop() {
	rm -f ${STATUS_FILE} > /dev/null 2>&1

	ocf_log info "${OCF_RESOURCE_INSTANCE} Diskcheck stopped."
	return ${OCF_SUCCESS}
}

#======================================================================
# Local functions
#======================================================================
# update the value of attribute.
#    arg1 : the status value.
#    ret  : 0 - success.
#           1 - failed to update.
diskcheck_status_update() {

	STATUS_VALUE=$1
	UPDATE_CMD=/usr/sbin/attrd_updater
	RETRY_CNT=`expr 1 + 3` #count of retry attrd_updater (1 check and 3 retry)

	# confirm that attrd_updater command can be executed. 
	if [ ! -x ${UPDATE_CMD} ]; then
		ocf_log err "${OCF_RESOURCE_INSTANCE} Can't execute ${UPDATE_CMD}."
		exit ${OCF_ERR_GENERIC}
	fi

	##
	# If the command fails, re-try 3times.
	##
	i=0
	while [ ${i} -lt ${RETRY_CNT} ]
	do
		i=`expr ${i} + 1`
		${UPDATE_CMD} -n ${STATUS_ATTR} -v ${STATUS_VALUE} > /dev/null 2>&1
		CMD_RET=$?
		if [ ${CMD_RET} = 0 ]; then
			return 0
		fi
		ocf_log warn \
			"${OCF_RESOURCE_INSTANCE} failed to update attribute (${i})."
	done

	# all retry commands are failed.
	return 1
}

##
# diskcheck function (for CCISS)
#    ret : return value of hpacucli command.
#          0 - connection is normal.
#          else - error occurs.
##
diskcheck_cciss() {

	CHECK_CMD=/usr/sbin/hpacucli

	# confirm that disk check command can be executed. 
	if [ ! -x ${CHECK_CMD} ]; then
		ocf_log err "${OCF_RESOURCE_INSTANCE} Can't execute ${CHECK_CMD}."
		exit ${OCF_ERR_GENERIC}
	fi

	# do check command.
	${CHECK_CMD} controller serialnumber=${SERIAL_NO} logicaldrive all show \
		> /dev/null 2>&1

	return $?
}

##
# diskcheck function (for RDAC)
#    ret : 0 - connection is normal.
#          1 - error occurs.
##
diskcheck_rdac() {

	# set the number of connections.
	LINE_NUM=2

	##
	# Check 5 times.
	# Only when all checkes are failed, we consider that a failure occurs.  
	##
	i=0
	CHK_NUM=5
	SOME_CONN_ERR=0
	ALL_CONN_ERR=0
	while [ ${i} -lt ${CHK_NUM} ]
	do
		# inspect whether the file exists.
		if [ ! -f "${MPPFILE_PATH}" ]; then
			ocf_log err "${OCF_RESOURCE_INSTANCE} ${MPPFILE_PATH} file doesn't exist."
			exit ${OCF_ERR_GENERIC}
		fi
		##
		# Count the number of normal connections.
		##
		CHECK_RET=`(awk -F: '/UTMLunExist/{print $3}' ${MPPFILE_PATH}) \
			2>/dev/null | grep N | wc -l`
		if [ ${CHECK_RET} -eq 0 ]; then
			# All connections are failed.
			ALL_CONN_ERR=`expr ${ALL_CONN_ERR} + 1`
			ocf_log warn "${OCF_RESOURCE_INSTANCE} detected all connections are failed (${ALL_CONN_ERR})."
		elif [ ${CHECK_RET} -lt ${LINE_NUM} ]; then
			# Some connections are failed.
			SOME_CONN_ERR=`expr ${SOME_CONN_ERR} + 1`
			ocf_log warn "${OCF_RESOURCE_INSTANCE} detected some connections are failed (${SOME_CONN_ERR})."
		else
			# All connections are normal.
			#SOME_CONN_ERR=`expr ${SOME_CONN_ERR} + 0`
			#ALL_CONN_ERR=`expr ${ALL_CONN_ERR} + 0`
			:;
		fi

		# Loop count increment.
		i=`expr ${i} + 1`
	done

	# All connections are failed.
	if [ ${ALL_CONN_ERR} -eq ${CHK_NUM} ]; then
		return 1
	fi

	return 0
}

##
# diskcheck function (for QLA)
#    ret : 0 - connection is normal.
#          1 - error occurs.
##
diskcheck_qla() {

	# set the number of connections.
	LINE_NUM=2

	CHECK_CMD=/usr/sbin/adapter_info

	# confirm that disk check command can be executed. 
	if [ ! -x ${CHECK_CMD} ]; then
		ocf_log err "${OCF_RESOURCE_INSTANCE} Can't execute ${CHECK_CMD}."
		exit ${OCF_ERR_GENERIC}
	fi

	CHECK_RET=`${CHECK_CMD} 2> /dev/null | grep READY | wc -l`

	if [ "${CHECK_RET}" -eq 0 ]; then
		#All connections are failed.
		ocf_log warn "${OCF_RESOURCE_INSTANCE} detected all connections are failed."
		return 1
	fi

	if [ "${CHECK_RET}" -lt ${LINE_NUM} ]; then
		#Some connections are failed.
		ocf_log warn "${OCF_RESOURCE_INSTANCE} detected some connections are failed."
	fi

	return 0
}

##
# diskcheck function (for FDISK)
#    ret : 0 - connection is normal.
#          1 - error occurs.
##
diskcheck_fdisk() {

	CHECK_CMD=/sbin/fdisk

	# confirm that disk check command can be executed. 
	if [ ! -x ${CHECK_CMD} ]; then
		ocf_log err "${OCF_RESOURCE_INSTANCE} Can't execute ${CHECK_CMD}."
		exit ${OCF_ERR_GENERIC}
	fi

	${CHECK_CMD} -l 2>/dev/null | grep ${DEVICE_NAME} > /dev/null 2>&1
	return $?
}

##
# this function is substance of the monitor function. 
##
diskcheck_monitor_internal() {

	##
	# distinct driver type.
	##
	if [ "${DRIVER_TYPE}" = "CCISS" ]; then
		if [ "${SERIAL_NO}" = "" ]; then
			ocf_log err "${OCF_RESOURCE_INSTANCE} ctrl_serialno is required."
			return ${OCF_ERR_ARGS}
		fi
		diskcheck_cciss
		FUNC_RET=$?
	elif [ "${DRIVER_TYPE}" = "RDAC" ]; then 
		if [ "${MPPFILE_PATH}" = "" ]; then
			ocf_log err "${OCF_RESOURCE_INSTANCE} mppfile_path is required."
			return ${OCF_ERR_ARGS}
		fi
		# inspect whether the file exists.
		if [ ! -f "${MPPFILE_PATH}" ]; then
			ocf_log err "${OCF_RESOURCE_INSTANCE} ${MPPFILE_PATH} file doesn't exist."
			return ${OCF_ERR_ARGS}
		fi
		diskcheck_rdac
		FUNC_RET=$?
	elif [ "${DRIVER_TYPE}" = "QLA" ]; then 
		diskcheck_qla
		FUNC_RET=$?
	elif [ "${DRIVER_TYPE}" = "FDISK" ]; then 
		if [ "${DEVICE_NAME}" = "" ]; then
			ocf_log err "${OCF_RESOURCE_INSTANCE} device_name is required."
			return ${OCF_ERR_ARGS}
		fi
		diskcheck_fdisk
		FUNC_RET=$?
	else
		#when specified driver is unknown, return error.
		ocf_log err "${OCF_RESOURCE_INSTANCE} driver type is unknown."
		return ${OCF_ERR_ARGS}
	fi

	##
	# set diskcheck status for attrd_updater.
	##
	if [ ${FUNC_RET} = 0 ]; then
		# connection with disk is normal.
		ocf_log debug "${OCF_RESOURCE_INSTANCE} Disk connection is Normal."
		STATUS="normal"
	else
		# error occurs.
		ocf_log warn "${OCF_RESOURCE_INSTANCE} Disk connection is Error."
		STATUS="ERROR"
	fi

	# Get previous status value.
	if [ ! -f ${STATUS_FILE} ]; then
		ocf_log err "${OCF_RESOURCE_INSTANCE} ${STATUS_FILE} doesn't exist. "
		return ${OCF_ERR_GENERIC}
	fi
	PREV_STATUS=`(head -n 1 ${STATUS_FILE}) 2> /dev/null`
	if [ $? != 0 ]; then
		ocf_log err "${OCF_RESOURCE_INSTANCE} Can't read ${STATUS_FILE}."
		return ${OCF_ERR_GENERIC}
	fi

	# Write the status value.
	(echo ${STATUS} > ${STATUS_FILE}) 2> /dev/null
	if [ $? != 0 ]; then
		ocf_log err "${OCF_RESOURCE_INSTANCE} Can't write ${STATUS_FILE}."
		return ${OCF_ERR_GENERIC}
	fi
	
	# If the status is the same, do not update attribute.
	if [ "${STATUS}" = "${PREV_STATUS}" ]; then
		ocf_log debug "${OCF_RESOURCE_INSTANCE} Diskcheck is succeeded."
		return ${OCF_SUCCESS}
	fi

	##
	# If the previous and present states are different, update attribute.
	##
	diskcheck_status_update ${STATUS}
	if [ $? = 0 ]; then
		ocf_log debug "${OCF_RESOURCE_INSTANCE} Diskcheck is succeeded."
		return ${OCF_SUCCESS}
	else
		ocf_log err "${OCF_RESOURCE_INSTANCE} Diskcheck is failed."
		return ${OCF_ERR_GENERIC}
	fi
}
#======================================================================
# monitor function
#======================================================================
diskcheck_monitor() {

	##
	# If it is probe, always return "not running".
	##
	if [ "${OCF_RESKEY_CRM_meta_interval:-0}" -eq "0" ]; then
		ocf_log debug "${OCF_RESOURCE_INSTANCE} get probe. (Diskcheck is not running.)"
		exit ${OCF_NOT_RUNNING}
	fi

	diskcheck_monitor_internal

	return $?
}


diskcheck_validate() {

	return $OCF_SUCCESS

}

diskcheck_status() {

	return $OCF_SUCCESS

}

#======================================================================
# variable definition (from cib.xml)
#======================================================================
#attribute name.
STATUS_ATTR=${OCF_RESKEY_attr_name:-"diskcheck_status"}
#driver type.
#  "CCISS"   : cciss driver (HP, SCSI conn, single path)
#  "RDAC"    : RDAC driver  (RDAC, Fibre channel conn, multi path)
#  "QLA"     : qla driver   (HP, Fibre channel conn, multi path)
#  "FDISK"   : common       (use fdisk command)
DRIVER_TYPE=${OCF_RESKEY_driver_type:-""}
#SCSI controller's serial number. (use in the case of "CCISS" only)
SERIAL_NO=${OCF_RESKEY_ctrl_serialno:-""}
#RDAC driver's information file path(full). (use in the case of "RDAC" only)
MPPFILE_PATH=${OCF_RESKEY_mppfile_path:-""}
#FDISK device name. (use in the case of "FDISK" only)
DEVICE_NAME=${OCF_RESKEY_device_name:-""}
#======================================================================
# variable definition (not from cib.xml)
#======================================================================
#status file
HA_VARRUNDIR="/var/run/heartbeat"
STATUS_FILE="${HA_VARRUNDIR}/rsctmp/Diskcheck-${OCF_RESOURCE_INSTANCE}.state"


#======================================================================
# main function
#======================================================================
if [ $# -ne 1 ]; then
	usage
	exit ${OCF_ERR_ARGS}
fi


case ${__OCF_ACTION} in
meta-data)	meta_data
		;;
start)		diskcheck_start
		;;
stop)		diskcheck_stop
		;;
monitor)	diskcheck_monitor
		;;
validate-all)	diskcheck_validate
		;;
status)		diskcheck_status
		;;
usage|help)	usage
		exit ${OCF_SUCCESS}
		;;
*)		usage
		exit ${OCF_ERR_UNIMPLEMENTED}
		;;
esac

exit $?
