#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Shell script to copy snapshots of a Solr Lucene collection from the master

orig_dir=$(pwd)
cd ${0%/*}/..
solr_root=$(pwd)
cd ${orig_dir}

unset master_host rsyncd_port master_data_dir master_status_dir snap_name
unset sizeonly stats data_dir user verbose debug compress startStatus
. ${solr_root}/bin/scripts-util

# set up variables
prog=${0##*/}
log=${solr_root}/logs/${prog}.log

# define usage string
USAGE="\
usage: $prog [-M master] [-P portnum] [-D mdir] [-S sdir] [-n snapshot] [-d dir] [-u username] [-svz]
       -M master   specify hostname of master server from where to pull index
                   snapshot
       -P port     specify rsyncd port number of master server from where to
                   pull index snapshot
       -D          specify directory holding index data on master server
       -S          specify directory holding snapshot status on master server
       -n snapshot pull a specific snapshot by name
       -d          specify directory holding index data on local machine
       -u          specify user to sudo to before running script
       -s          use the --size-only option with rsync
       -v          increase verbosity (-vv show file transfer stats also)
       -V          output debugging info
       -z          enable compression of data
"

# parse args
while getopts M:P:D:S:n:d:u:svVz OPTION
do
    case $OPTION in
    M)
        master_host="$OPTARG"
        ;;
    P)
        rsyncd_port="$OPTARG"
        ;;
    D)
        master_data_dir="$OPTARG"
        ;;
    S)
        master_status_dir="$OPTARG"
        ;;
    n)
        snap_name="$OPTARG"
        ;;
    d)
        data_dir="$OPTARG"
        ;;
    u)
        user="$OPTARG"
        ;;
    s)
        sizeonly="--size-only"
        ;;
    v)
        [[ -n $verbose ]] && stats="--stats" || verbose=v
        ;;
    V)
        debug="V"
        ;;
    z)
        compress="z"
        ;;
    *)
        echo "$USAGE"
        exit 1
    esac
done

[[ -n $debug ]] && set -x

if [[ -z ${master_host} ]]
then
    echo "name of master server missing in $confFile or command line."
    echo "$USAGE"
    exit 1
fi

# try to determine rsyncd port number from $confFile if not specified on
# command line, default to solr_port+10000
if [[ -z ${rsyncd_port} ]]
then
    if [[ "${solr_port}" ]]
    then
        rsyncd_port=`expr 10000 + ${solr_port}`
    else
        echo "rsyncd port number of master server missing in $confFile or command line."
        echo "$USAGE"
        exit 1
    fi
fi

if [[ -z ${master_data_dir} ]]
then
    echo "directory holding index data on master server missing in $confFile or command line."
    echo "$USAGE"
    exit 1
fi

if [[ -z ${master_status_dir} ]]
then
    echo "directory holding snapshot status on master server missing in $confFile or command line."
    echo "$USAGE"
    exit 1
fi

fixUser "$@"

# use default value for data_dir if not specified
# relative path starts at ${solr_root}
if [[ -z ${data_dir} ]]
then
    data_dir=${solr_root}/data
elif [[ "`echo ${data_dir}|cut -c1`" != "/" ]]
then
    data_dir=${solr_root}/${data_dir}
fi

# assume relative path to start at ${solr_root}
if [[ "`echo ${master_data_dir}|cut -c1`" != "/" ]]
then
    master_data_dir=${solr_root}/${master_data_dir}
fi
if [[ "`echo ${master_status_dir}|cut -c1`" != "/" ]]
then
    master_status_dir=${solr_root}/${master_status_dir}
fi

# push stats/state to master if necessary
function pushStatus
{
    scp -q -o StrictHostKeyChecking=no ${solr_root}/logs/snappuller.status ${master_host}:${master_status_dir}/snapshot.status.`uname -n`
}

start=`date +"%s"`

logMessage started by $oldwhoami
logMessage command: $0 $@

if [[ ! -f ${solr_root}/logs/snappuller-enabled ]]
then
    logMessage snappuller disabled
    exit 1
fi

# make sure we can ssh to master
if
    ! ssh -o StrictHostKeyChecking=no ${master_host} id 1>/dev/null 2>&1
then
    logMessage failed to ssh to master ${master_host}
    exit 1
fi

# get directory name of latest snapshot if not specified on command line
if [[ -z ${snap_name} ]]
then
    snap_name=`ssh -o StrictHostKeyChecking=no ${master_host} "ls ${master_data_dir}|grep 'snapshot\.'|grep -v wip|sort -r|head -1"`
fi
if [[ "${snap_name}" == "" ]]
then
    logMessage no snapshot available on ${master_host} in ${master_data_dir}
    logExit ended 0
else
    name=`basename ${snap_name}`
fi

# clean up after INT/TERM
trap 'echo cleaning up, please wait ...;/bin/rm -rf ${data_dir}/${name} ${data_dir}/${name}-wip;echo ${startStatus} aborted:$(timeStamp)>${solr_root}/logs/snappuller.status;pushStatus;logExit aborted 13' INT TERM

if [[ -d ${data_dir}/${name} || -d ${data_dir}/${name}-wip ]]
then
    logMessage no new snapshot available on ${master_host} in ${master_data_dir}
    logExit ended 0
fi

# take a snapshot of current index so that only modified files will be rsync-ed
# put the snapshot in the 'work-in-progress" directory to prevent it from
# being installed while the copying is still in progress
cp -lr ${data_dir}/index ${data_dir}/${name}-wip
# force rsync of segments and .del files since we are doing size-only
if [[ -n ${sizeonly} ]]
then
    rm -f ${data_dir}/${name}-wip/segments
    rm -f ${data_dir}/${name}-wip/*.del
fi

logMessage pulling snapshot ${name}

# make sure master has directory for hold slaves stats/state
ssh -o StrictHostKeyChecking=no ${master_host} mkdir -p ${master_status_dir}

# start new distribution stats
rsyncStart=`date`
startTimestamp=`date -d "$rsyncStart" +'%Y%m%d-%H%M%S'`
rsyncStartSec=`date -d "$rsyncStart" +'%s'`
startStatus="rsync of `basename ${name}` started:$startTimestamp"
echo ${startStatus} > ${solr_root}/logs/snappuller.status
pushStatus

# rsync over files that have changed
rsync -Wa${verbose}${compress} --delete ${sizeonly} \
${stats} rsync://${master_host}:${rsyncd_port}/solr/${name}/ ${data_dir}/${name}-wip

rc=$?
rsyncEnd=`date`
endTimestamp=`date -d "$rsyncEnd" +'%Y%m%d-%H%M%S'`
rsyncEndSec=`date -d "$rsyncEnd" +'%s'`
elapsed=`expr $rsyncEndSec - $rsyncStartSec`
if [[ $rc != 0 ]]
then
  logMessage rsync failed
  /bin/rm -rf ${data_dir}/${name}-wip
  echo ${startStatus} failed:$endTimestamp > ${solr_root}/logs/snappuller.status
  pushStatus
  logExit failed 1
fi

# move into place atomically
mv ${data_dir}/${name}-wip ${data_dir}/${name}

# finish new distribution stats`
echo ${startStatus} ended:$endTimestamp rsync-elapsed:${elapsed} > ${solr_root}/logs/snappuller.status
pushStatus
logExit ended 0
