#!/bin/bash # # # Delft FEWS # Copyright (c) 2003-2008 Deltares. All Rights Reserved. # # Developed by: # Tessella Support Services plc # Tauro Kantorencentrum # President Kennedylaan 19 # 2517 JK Den Haag # The Netherlands # email: info@tessella.com # web: www.tessella.com # # Project Ref: Tessella/NPD/5431 # # File history # Version Date Author # $Revision: 18767 $ $Date: 2008-05-07 14:09:48 +0200 (Wed, 07 May 2008) $ $Author: broek_f $ # # Description: # Script to run ensembles for the Topkapi model on a Condor Grid Engine # It is assumed that the condor bin directory is in the PATH # # Return Values: # 0 - Sucess # 1 - Failure # # Command line options and arguments # Mandatory: # -o path : The directory for the output log files (redirected stdout/stderr) # -n num : Total number of ensemble members e.g. 16 # -t num : Number of seconds after which the condor cluster job should time out # -d path : Root directory for the Topkapi module (local path) # (where all the input/output files reside) # -r path : Root directory for the Topkapi module (remote network path) # This should be the same physical directory as the one # specified under -d # (where all the input/output files reside) # # Optional: # -x path : Full path to the Topkapi binary (Windows-style path) # -s path : Full or relative path to the condor submit file # -w path : Full or relative path to the wrapper script for Topkapi # Pre, Run and Post execution # -h : Print help message # # Function to inform the user on usage # Arguments : none function print_usage() { echo "" echo "Script to run ensembles for the Topkapi model on a Condor Grid Engine." echo "It is assumed that the condor bin directory is in the PATH." echo "" echo "Usage: $0 options" echo " options are:" echo " Mandatory:" echo " -o path : The directory for the output log files (redirected stdout/stderr)" echo " -n num : Total number of ensemble members e.g. 16" echo " -t num : Number of seconds after which the condor cluster job should time out" echo " -d path : Root directory for the Topkapi module (remote network path)" echo " (where all the input/output files reside)" echo "" echo " Optional:" echo " -x path : Full path to the Topkapi binary (Windows-style path)" echo " -s path : Full or relative path to the condor submit file" echo " -w path : Full or relative path to the wrapper script for Topkapi" echo " Pre, Run and Post execution" echo " -h : Print help message" echo "" } # Function to create contents of the condor submit file function create_submit() { echo "########################" echo "#" echo "# Submit description file for Topkapi run" echo "# Created: by $0 on $(date +"%F %T %Z") on $(hostname)" echo "#" echo "########################" echo "Executable = ${Topkapi_WRAPPER}" echo "Universe = vanilla" echo 'Output = $(Process)\log\Topkapi.out.$(Cluster).$(Process)' echo "Log = ${CONDOR_Topkapi_LOG}" echo 'Error = $(Process)\log\Topkapi.err.$(Cluster).$(Process)' echo "Notification = Error" echo "" echo "Arguments = \"'${Topkapi_BIN}' '${Topkapi_ROOT_DIR}' \$(Process)\"" echo "" echo "run_as_owner = false" echo "" echo "Queue ${ENSEMBLE_SIZE}" echo "" } # Path to Topkapi executable can be overridden by command line option -x Topkapi_BIN="E:\FEWS_PO_TestBench_01042008\Po_SA\Modules\TopkapiParallel\bin\topkapi.exe" # Path to wrapper for Topkapi Pre, Run, and Post execution can be overridden by command line option -w Topkapi_WRAPPER=Topkapi_pre_run_post.bat # Path to condor submit file can be overridden by command line option -s CONDOR_Topkapi_SUBMIT=Topkapi.submit # Name of the condor log file CONDOR_Topkapi_LOG=Topkapi.log # Process command line options while getopts "ho:x:w:s:d:t:n:" options; do case $options in o ) LOGFILE_PATH=$OPTARG ;; x ) Topkapi_BIN=$OPTARG ;; w ) Topkapi_WRAPPER=$OPTARG ;; d ) Topkapi_ROOT_DIR=$OPTARG ;; t ) CONDOR_TIMEOUT=$OPTARG ;; n ) ENSEMBLE_SIZE=$OPTARG ;; h ) print_usage # help option exit 0;; \? ) print_usage # unknown option exit 1;; * ) print_usage # any other value/option exit 1;; esac done # Check path to lisflood wrapper if [ ! -e ${Topkapi_WRAPPER} ]; then echo "Error: Topkapi wrapper ${Topkapi_WRAPPER} does not exist." exit 1 fi if [ ! -x ${Topkapi_WRAPPER} ]; then echo "Error: ${Topkapi_WRAPPER} is not an executable file." exit 1 fi # Ensure that all options have been set correctly # Test logfile path if [ -z ${LOGFILE_PATH} ]; then echo "Error: output log file path not set (use option -o)." exit 1 fi if [ ! -d ${LOGFILE_PATH} ]; then echo "Error: output log file path ${LOGFILE_PATH} " echo " does not exist or is not a directory." exit 1 fi # Test Topkapi root directory if [ -z ${Topkapi_ROOT_DIR} ]; then echo "Error: root directory for Topkapi module not set" echo " (use option -d)." exit 1 fi if [ ! -d ${Topkapi_ROOT_DIR} ]; then echo "Error: root directory for Topkapi module ${Topkapi_ROOT_DIR} " echo " does not exist or is not a directory." exit 1 fi # Test number of ensemble members if [ -z ${ENSEMBLE_SIZE} ]; then echo "Error: number of ensemble members not set (use option -n)." exit 1 fi # Test time out if [ -z ${CONDOR_TIMEOUT} ]; then echo "Error: time out for entire enseble run (in seconds) not set (use option -t)." exit 1 fi # Create the subdirectory for the condor log files declare -i i=0 while [ $i -lt ${ENSEMBLE_SIZE} ]; do if [ ! -d ${Topkapi_ROOT_DIR}/$i/log ]; then echo "Creating directory ${Topkapi_ROOT_DIR}/$i/log" mkdir -p ${Topkapi_ROOT_DIR}/$i/log if [ ! -d ${Topkapi_ROOT_DIR}/$i/log ]; then echo "ERROR: Unable to create directory ${Topkapi_ROOT_DIR}/$i/log" exit 1 fi fi i=$i+1 done # Update the condor submit file create_submit > ${CONDOR_Topkapi_SUBMIT} # Submit the job and retrieve the cluster id # condor_submit returns the following lines: # Submitting job(s)..... # Logging submit event(s)..... # 5 job(s) submitted to cluster 86. echo "Submitting condor job..." CONDOR_SUBMIT_OUTPUT=$(condor_submit ${CONDOR_Topkapi_SUBMIT} 2>&1) echo ${CONDOR_SUBMIT_OUTPUT} CLUSTER_ID=$(echo ${CONDOR_SUBMIT_OUTPUT} | grep "to cluster" | grep -o -E "[0-9]+\.+"|grep -o -E "[0-9]+") # Wait until the cluster job is done echo echo "Waiting for a maximum of ${CONDOR_TIMEOUT} second(s) for condor job with cluster ID ${CLUSTER_ID}" echo " by watching condor log file $(pwd)/${CONDOR_Topkapi_LOG}" condor_wait -wait ${CONDOR_TIMEOUT} ${CONDOR_Topkapi_LOG} ${CLUSTER_ID} wait retval_wait=$? echo "condor_wait returned value: ${retval_wait}" # Parse and combine the diagnostics files # Clean up # Return the return value of condor_wait exit ${retval_wait}