#!/bin/bash

# Run commands on CTDB nodes.

# See http://ctdb.samba.org/ for more information about CTDB.

# Copyright (C) Martin Schwenke  2008

# Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.

# Copyright (C) Andrew Tridgell  2007

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
   
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
   
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

prog=$(basename $0)

usage ()
{
    cat >&2 <<EOF
Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
  options:
    -c          Run in current working directory on specified nodes.
    -o <prefix> Save standard output from each node to file <prefix>.<ip>
    -p          Run command in parallel on specified nodes.
    -q          Do not print node addresses (overrides -v).
    -v          Print node address even for a single node.
  <NODES>       "all", "ok" (or "healthy"), "con" (or "connected"),
                "rm" (or "recmaster"), "lvs" (or "lvsmaster"),
                "natgw" (or "natgwlist");
                or a node number (0 base); or
                list (comma separated) of <NODES>; or
                range (hyphen separated) of node numbers.
EOF
    exit 1

}

invalid_nodespec ()
{
    echo "Invalid <nodespec>" >&2 ; echo >&2
    usage
}

# Defaults.
current=false
parallel=false
verbose=false
quiet=false
prefix=""

parse_options ()
{
    # $POSIXLY_CORRECT means that the command passed to onnode can
    # take options and getopt won't reorder things to make them
    # options ot onnode.
    local temp=$(POSIXLY_CORRECT=1 getopt -n "$prog" -o "cho:pqv" -l help -- "$@")

    [ $? != 0 ] && usage

    eval set -- "$temp"

    while true ; do
	case "$1" in
	    -c) current=true ; shift ;;
	    -o) prefix="$2" ; shift 2 ;;
	    -p) parallel=true ; shift ;;
	    -q) quiet=true ; shift ;;
	    -v) verbose=true ; shift ;;
	    --) shift ; break ;;
	    -h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
	esac
    done

    [ $# -lt 2 ] && usage

    nodespec="$1" ; shift
    command="$@"
}

echo_nth ()
{
    local n="$1" ; shift

    shift $n
    local node="$1"

    if [ -n "$node" ] ; then
	echo $node
    else
	echo "${prog}: \"node ${n}\" does not exist" >&2
	exit 1
    fi
}

parse_nodespec ()
{
    # Subshell avoids hacks to restore $IFS.
    (
	IFS=","
	for i in $1 ; do
	    case "$i" in
		*-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
		# Separate lines for readability.
		all|ok|healthy|con|connected) echo "$i" ;;
		rm|recmaster|lvs|lvsmaster|natgw|natgwlist) echo "$i" ;;
		*)
		    [ $i -gt -1 ] 2>/dev/null || invalid_nodespec
		    echo $i
	    esac
	done
    )
}

ctdb_status_output="" # cache
get_nodes_with_status ()
{
    local all_nodes="$1"
    local status="$2"

    local bits
    case "$status" in
	healthy)
	    bits="0:0:0:0"
	    ;;
	connected)
	    bits="0:[0-1]:[0-1]:[0-1]"
	    ;;
	*)
	    invalid_nodespec
    esac

    if [ -z "$ctdb_status_output" ] ; then
	# FIXME: need to do something if $CTDB_NODES_SOCKETS is set.
	ctdb_status_output=$(ctdb -Y status 2>/dev/null)
	if [ $? -ne 0 ] ; then
	    echo "${prog}: unable to get status of CTDB nodes" >&2
	    exit 1
	fi
	ctdb_status_output="${ctdb_status_output#* }"
    fi

    local nodes=""
    local i
    for i in $ctdb_status_output ; do
	# Try removing bits from end.
	local t="${i%:${bits}:}"
	if [ "$t" != "$i" ] ; then
	    # Succeeded.  Get address.  NOTE: this is an optimisation.
	    # It might be better to get the node number and then get
	    # the nth node to get the address.  This would make things
	    # more consistent if /etc/ctdb/nodes actually contained
	    # hostnames.
	    nodes="${nodes} ${t##*:}"
	fi
    done

    echo $nodes
}

ctdb_props="" # cache
get_node_with_property ()
{
    local all_nodes="$1"
    local prop="$2"

    local prop_node=""
    if [ "${ctdb_props##:${prop}:}" = "$ctdb_props" ] ; then
	prop_node=$(ctdb "$prop" -Y 2>/dev/null)
	# We only want the first line.
	local nl="
"
	prop_node="${prop_node%%${nl}*}"
	if [ $? -eq 0 ] ; then
	    ctdb_props="${ctdb_props}${ctdb_props:+ }:${prop}:${prop_node}"
	else
	    prop_node=""
	fi
    else
	prop_node="${ctdb_props##:${prop}:}"
	prop_node="${prop_node%% *}"
    fi
    if [ -n "$prop_node" ] ; then
	echo_nth "$prop_node" $all_nodes
    else
	echo "${prog}: No ${prop} available" >&2
	exit 1
    fi
}

get_nodes ()
{
    local all_nodes

    if [ -n "$CTDB_NODES_SOCKETS" ] ; then 
	all_nodes="$CTDB_NODES_SOCKETS"
    else
	[ -f "$CTDB_NODES_FILE" ] || CTDB_NODES_FILE=/etc/ctdb/nodes
	all_nodes=$(egrep '^[[:alnum:]]' $CTDB_NODES_FILE)
    fi

    local nodes=""
    local n
    for n in $(parse_nodespec "$1") ; do
	[ $? != 0 ] && exit 1  # Required to catch exit in above subshell.
	case "$n" in
	    all)
		echo $all_nodes ;;
	    ok|healthy) 
		get_nodes_with_status "$all_nodes" "healthy" || exit 1
		;;
	    con|connected) 
		get_nodes_with_status "$all_nodes" "connected" || exit 1
		;;
	    rm|recmaster)
		get_node_with_property "$all_nodes" "recmaster" || exit 1
		;;
	    lvs|lvsmaster)
		get_node_with_property "$all_nodes" "lvsmaster" || exit 1
		;;
	    natgw|natgwlist)
		get_node_with_property "$all_nodes" "natgwlist" || exit 1
		;;
	    *)
		echo_nth $n $all_nodes
	esac
	
    done
}

fakessh ()
{
    CTDB_SOCKET="$1" sh -c "$2"
}

stdout_filter ()
{
    if [ -n "$prefix" ] ; then
	cat >"${prefix}.${n}"
    elif $verbose && $parallel ; then
	sed -e "s@^@[$n] @"
    else
	cat
    fi
}

stderr_filter ()
{
    if $verbose && $parallel ; then
	sed -e "s@^@[$n] @"
    else
	cat
    fi
}

######################################################################

parse_options "$@"

$current && command="cd $PWD && $command"

ssh_opts=
if [ -n "$CTDB_NODES_SOCKETS" ] ; then
    SSH=fakessh
else 
    # Could "2>/dev/null || true" but want to see errors from typos in file.
    [ -r /etc/ctdb/onnode.conf ] && . /etc/ctdb/onnode.conf
    [ -n "$SSH" ] || SSH=ssh
    if [ "$SSH" = "ssh" ] ; then
	ssh_opts="-n"
    else
	: # rsh? All bets are off!
    fi
fi

######################################################################

nodes=$(get_nodes "$nodespec")
[ $? != 0 ] && exit 1   # Required to catch exit in above subshell.

if $quiet ; then
    verbose=false
else
    # If $nodes contains a space or a newline then assume multiple nodes.
    nl="
"
    [ "$nodes" != "${nodes%[ ${nl}]*}" ] && verbose=true
fi

pids=""
trap 'kill -TERM $pids 2>/dev/null' INT TERM
# There's a small race here where the kill can fail if no processes
# have been added to $pids and the script is interrupted.  However,
# the part of the window where it matter is very small.
retcode=0
for n in $nodes ; do
    set -o pipefail 2>/dev/null
    if $parallel ; then
	{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; } &
	pids="${pids} $!"
    else
	if $verbose ; then
	    echo >&2 ; echo ">> NODE: $n <<" >&2
	fi

	{ exec 3>&1 ; { $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" | stdout_filter >&3 ; } 2>&1 | stderr_filter ; }
	[ $? = 0 ] || retcode=$?
    fi
done

$parallel && {
    for p in $pids; do
	wait $p
	[ $? = 0 ] || retcode=$?
    done
}

exit $retcode
