#!/usr/bin/env expect
############################################################################
# Purpose: Functions and variables to eb used from the main globals files,
#          not directly from tests.
############################################################################
# Copyright (C) 2016 SchedMD LLC.
# Written by Albert Gil <albert.gil@schedmd.com>

# This file is part of SLURM, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the supplied file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
############################################################################


################################################################
#
# NAME
#	_wait_for_single_job - waits for a job to reach the desired state
#
# SYNOPSIS
#	_wait_for_single_job ?options? job_id desired_state
#
# DESCRIPTION
#	Wait for a previously submitted Slurm job to reach the desired state.
#
# OPTIONS
#	-timeout <integer_number>
#		time in seconds to wait for the job to be in the desired state
#		before timing out (default is 360)
#	-pollinterval <integer_number>
#		time in seconds between each job state check (default is 1)
#
# ARGUMENTS
#	job_id
#		The Slurm job id of a job we want to wait for.
#	desired_state
#		The state you want the job to attain before
#		returning.  Currently supports:
#			DONE any terminated state
#			PENDING job is pending
#			RUNNING job is running
#			SPECIAL_EXIT
#			SUSPENDED job is suspended
#
# RETURN VALUE
#	RETURN_SUCCESS, or non-zero on error.
#
# NOTE: We sleep for two seconds before replying that a job is
# done to give time for I/O completion (stdout/stderr files)
#
################################################################

proc _wait_for_single_job args {
	global scontrol

	set timeout       360
	set poll_interval 1
	while {[llength $args]} {
		switch -glob -- [lindex $args 0] {
			-time*  {set args [lassign $args - timeout]}
			-poll*  {set args [lassign $args - poll_interval]}
			-*      {fail "Unknown option: [lindex $args 0]"}
			default break
		}
	}
	set argument_count [llength $args]
	if {$argument_count != 2} {
		fail "Invalid number of arguments ($argument_count): $args"
	} else {
		lassign $args job_id desired_state
	}

	# First verify that desired_state is supported
	switch $desired_state {
		"DONE" {}
		"PENDING" {}
		"RUNNING" {}
		"SPECIAL_EXIT" {}
		"SUSPENDED" {}
		default {
			log_warn "Invalid desired state: $desired_state"
			return $::RETURN_ERROR
		}
	}

	if {$job_id == 0} {
		log_warn "Invalid job ID: $job_id"
		return $::RETURN_ERROR
	}

	set my_delay    0
	while 1 {
		set fd [open "|$scontrol -o show job $job_id"]
		gets $fd line
		catch {close $fd}
		if {[regexp {JobState\s*=\s*(\w+)} $line foo state] != 1} {
			set state "NOT_FOUND"
		}

		switch $state {
			"NOT_FOUND" -
			"BOOT_FAIL" -
			"CANCELLED" -
			"COMPLETED" -
			"DEADLINE" -
			"FAILED" -
			"NODE_FAIL" -
			"OUT_OF_MEMORY" -
			"PREEMPTED" -
			"TIMEOUT" {
				if {[string compare $desired_state "DONE"] == 0} {
					log_debug "Job $job_id is DONE ($state)"
					sleep 2
					return $::RETURN_SUCCESS
				}
				if {[string compare $desired_state "RUNNING"] == 0} {
					log_debug "Job $job_id is $state, but we wanted RUNNING"
				}
				if {[string compare $desired_state "SUSPENDED"] == 0} {
					log_debug "Job $job_id is $state, but we wanted SUSPENDED"
				}
				return $::RETURN_ERROR
			}
			"PENDING" {
				if {[string compare $desired_state "PENDING"] == 0} {
					log_debug "Job $job_id is PENDING"
					return $::RETURN_SUCCESS
				}
				log_debug "Job $job_id is in state $state, desire $desired_state"
			}
			"RUNNING" {
				if {[string compare $desired_state "RUNNING"] == 0} {
					log_debug "Job $job_id is RUNNING"
					return $::RETURN_SUCCESS
				}
				log_debug "Job $job_id is in state $state, desire $desired_state"
			}
			"SPECIAL_EXIT" {
				if {[string compare $desired_state "SPECIAL_EXIT"] == 0} {
					log_debug "Job $job_id is SPECIAL_EXIT"
					return $::RETURN_SUCCESS
				}
				log_debug "Job $job_id is in state $state, desire $desired_state"
			}
			"SUSPENDED" {
				if {[string compare $desired_state "SUSPENDED"] == 0} {
					log_debug "Job $job_id is SUSPENDED"
					return $::RETURN_SUCCESS
				}
				log_debug "Job $job_id is in state $state, desire $desired_state"
			}
			default {
				log_debug "Job $job_id is in state $state, desire $desired_state"
			}
		}

		if { $my_delay > $timeout } {
			log_warn "Timeout waiting for job state $desired_state"
			return $::RETURN_TIMEOUT
		}

		exec sleep $poll_interval
		set my_delay [expr $my_delay + $poll_interval]
	}
}
