|
-*- tcl-indent-level: 5; indent-tabs-mode: nil; -*-
vi: set autoindent expandtab tabstop=5 shiftwidth=5 :
manager.tcl Version 1.0
Wraps the genericAPI.tcl for use by the manager API.
set ::RCS_ID_managertcl {$Id: manager.tcl,v 1.518 2008/11/04 22:04:34 emaros Exp $}
set ::RCS_ID_managertcl [ string trim $::RCS_ID_managertcl "\$" ]
package provide manager 1.0
namespace eval mgr {
set activejobs 0
}
proc mgr::expandCmd { { cmd "" } } {
;## if its a xxx.meta command, expand and recurse.
if { [ catch {
if { [ llength [ split $cmd "\n" ] ] == 1 } {
set cmd [ mgr::parseMeta $cmd ]
set sequence [ mgr::expandCmd $cmd ]
} else {
set sequence [ mgr::parseBlock $cmd ]
}
} err ] } {
addLogEntry $err
return -code error "[ myName ]: $err"
}
return $sequence
}
proc mgr::commandSanityChecks { cmd } {
if { [ catch {
;## -np must be at least '2' since no search code is run
;## on the wrapper master and we include the wrapper master
;## as one of the nodes requested to avoid underrepresenting
;## the system requirements of large numbers of small jobs.
if { [ regexp -nocase -- {\s+-np\s+[01]\s+} $cmd ] } {
set err "minimum possible value of '-np' option is '2'"
return -code error $err
}
;## when ::REJECT_EXPLICIT_DYNLIB_PATHS is set to 1, we
;## reject al commands that attempt to specify a dynlib.so
;## by absolute path. this is done to prevent the running
;## of potentially hazardous experimental code on production
;## LDAS systems. PR #1825 10-07-2003
if { [ info exists ::REJECT_EXPLICIT_DYNLIB_PATHS ] && \
[ string equal 1 $::REJECT_EXPLICIT_DYNLIB_PATHS ] } {
set dyn_rx {\s+-dynlib\s+(\S+)\s+}
if { [ regexp -nocase -- $dyn_rx $cmd -> dynlib ] } {
regsub -all -- {[\}\{\"\s]} $dynlib {} dynlib
if { [ regexp {[\\\/]+} $dynlib ] } {
set msg "This system has ::REJECT_EXPLICIT_DYNLIB_PATHS"
append msg " enabled to prevent the use of experimental"
append msg " shared objects."
append msg " You may not specify path components as"
append msg " part of your -dynlib option (and you"
append msg " specified '$dynlib')."
return -code error $msg
}
}
}
} err ] } {
return -code error "[ myName ]: $err"
}
}
proc mgr::cmdParseUserTimeout { cmd } {
if { [ catch {
set timeout 0
;## optional option available to users in order to cause
;## jobs to timeout *more* quickly.
set uto_rx {\s+-usertimeout\s+(\S+)\s+}
if { [ regexp -nocase -- $uto_rx $cmd -> timeout ] } {
regsub -all -- {[\}\{\"\s]} $timeout {} timeout
set standard $::MANAGER_ABORT_AFTER_N_SECONDS_IN_ONE_API
set timeout [ expr { round($timeout) } ]
if { $timeout > 2 * $standard } {
set timeout [ expr { 2 * $standard } ]
}
} else {
set timeout 0
}
} err ] } {
set err "Error in -usertimeout option: '$err'"
return -code error "[ myName ]: $err"
}
return $timeout
}
proc mgr::parseMeta { cmd } {
set procname {}
set usropts [ list ]
set command {}
set macrolist [ mgr::globMacros ]
regexp $::cmd_rx $cmd -> procname usropts
;## first pass expansion from meta file
if { [ lsearch -exact $macrolist ${procname}.meta ] > -1 } {
set fname [ file join $::LDASMACROS meta ${procname}.meta ]
set fid [ open $fname r ]
;## meta macros are constrained to start with a line
;## declaring the "opts" and their defaults.
set command [ string trim [ gets $fid ] ]
;## test it.
if { ! [ regexp {^set opts \{[^\}]+\}} $command ] } {
set msg "the first line of the meta file must be\n"
append msg "the declaration of 'opts', but I found:\n"
append msg "$command\ninstead."
return -code error "[ myName ]: $msg"
}
;## apply the user supplied options
if { [ llength $usropts ] } {
append command "\n metaOpts \{ $usropts \}\n"
}
append command [ read $fid ]
::close $fid
} else {
return -code error "[ myName ]: Bad meta command: '$cmd'"
}
return $command
}
proc mgr::parseBlock { cmd } {
set api {}
set tmp [ list ]
set sequence [ list ]
set api_rx ^([ join $::API_LIST | ])\\s*\$
set macrolist [ mgr::globMacros ]
foreach line [ split $cmd ";\n" ] {
set line [ string trim $line ]
if { [ regexp {(^\#.*$|^$)} $line ] } {
continue
}
if { [ regexp {^set opts \{[^\}]+\}} $line ] } {
set sequence $line
continue
}
if { [ regexp -nocase $api_rx $line api ] } {
if { [ string equal $api $tmp ] } {
continue
} else {
set tmp $api
continue
}
}
if { ! [ regexp $::cmd_rx $line -> procname opts ] } {
set procname [ string trim $line ]
}
if { [ string equal metaOpts $procname ] } {
append sequence "\n metaOpts $opts\n"
continue
}
if { [ lsearch -exact $macrolist ${procname}.$api ] > -1 } {
set fname [ file join $::LDASMACROS $api ${procname}.$api ]
append sequence "\n$api\n"
set fid [ open $fname r ]
set tmp {}
while { [ gets $fid code ] > -1 } {
if { ! [ regexp {(^[ ]*;?\#)|(^[ ]*$)} $code ] } {
append tmp "$code\n"
}
}
::close $fid
append sequence \{[ string trim $tmp ]\}
set procname {}
continue
}
} ;# end of foreach
return $sequence
}
proc mgr::globMacros { } {
set macrolist {}
foreach file [ glob -nocomplain $::LDASMACROS/*/*.* ] {
lappend macrolist [ file tail $file ]
}
if { [ llength $macrolist ] < 3 } {
set msg "*********************************************\n"
append msg "ABORT!! No macro code found under '$::LDASMACROS'\n"
append msg "If this is a new installation of LDAS, you\n"
append msg "must read the documentation for installing\n"
append msg "the manager API.\n"
append msg "If this is not a new installation, then\n"
append msg "either you have deleted the macro files or\n"
append msg "redefined ::LDASMACROS to point to the wrong\n"
append msg "location. You may also being experiencing\n"
append msg "transient NFS problems.\n\n"
append msg " NO JOBS CAN BE PROCESSED\n"
append msg " UNTIL THIS PROBLEM IS SOLVED.\n"
append msg "*********************************************\n"
puts stderr $msg
return -code error "[ myName ]:\n$msg"
}
set macrolist
}
if { [ mgr::bootstrapAPI $apis ] } { ... }
Comments:
proc mgr::bootstrapAPI { { apis "" } { comment "" } } {
set redirect /dev/null
set libmalloc [ list ]
set libstdc [ list ]
set apihost unknown!
if { ! [ string length $apis ] } {
set msg "you must specify an api, list of api's, or 'all'"
return -code error "[ myName ]: $msg"
}
if { [ string equal all $apis ] } {
set apis $::API_LIST
} else {
foreach api $apis {
if { [ lsearch $::API_LIST $api ] < 0 } {
set msg "cannot bootstrap unregistered API: '$api'"
return -code error $msg
}
}
}
;## emergency port requests always succeed
set caller [ uplevel myName ]
if { [ string equal emergency_callback $caller ] } {
mgr::bootstrapLock $apis 1
} else {
set apis [ mgr::bootstrapLock $apis 0 ]
}
set localhost [ set ::${::LDAS_SYSTEM}(manager) ]
foreach api $apis {
set api [ string tolower $api ]
regsub {api$} $api {} api
if { [ string equal $api manager ] } { continue }
set apiPid [ getPid $api ]
set apiPid [ string trim $apiPid ]
addLogEntry "$api pid is '$apiPid'" purple
;## if API is up but appears stuck and user does not want reboot
;## hold off rebooting API
if { [ string length $apiPid ] && ! $::STARTUP_IN_PROGRESS && \
[ info exist ::WAIT_FOR_API ] && [ set ::WAIT_FOR_API ] } {
set subject "hold off bootstraping $api"
set body "set ::WAIT_FOR_API to 0 to resume booting"
addLogEntry "Subject: ${subject}; Body: ${body}" email
continue
}
if { [ catch {
mgr::shutdownAPI $api $comment
mgr::bootstrapPause $api
set postmortem [ mgr::postMortem $api ]
;## if the call to mgr::postMortem returned data
if { [ string length $postmortem ] } {
addLogEntry $postmortem blue mgr::postMortem
}
set apihost [ set ::${api}(host) ]
} err ] } {
addLogEntry "$err (api: $api)" 2
}
if { [ catch {
set libstdc [ mgr::libstdcPreload $api ]
} err ] } {
continue
}
if { [ catch {
set libmalloc [ mgr::libmallocPreload $api ]
} err ] } {
continue
}
;## Ed asked for different vars for different platforms 04/28/04
;## this will return SunOS or Linux
;## Ed asked for this 07/02/07
;## have different env for different APIs to run libumem
set extra_env [ mgr::getExtraEnv $api ]
set extra_env [ subst -nocommands $extra_env ]
set api_cmd "${api}API"
if { ! [ string equal $apihost $localhost ] } {
set api_cmd "$::LDAS/bin/${api_cmd}"
}
set launcher [ mgr::getLauncher $api $api_cmd ]
mgr::perApiRunningStats $api restart [ gpsTime ]
set dir [ mgr::makeApiAtHome $api ]
set dotfile "${dir}/.. "
set fid [ open $dotfile w ]
file attributes $dotfile -permissions 0600
puts $fid $::MGRKEY
::close $fid
set redirect ${dir}/${api}.log
addLogEntry "bootstrapping $api API on $apihost"
set apipath ${::LDASLIB}/${api}API
if { [ catch {
if { [ string equal $apihost $localhost ] } {
set LD_LIBRARY_PATH $::env(LD_LIBRARY_PATH)
set cwd [ pwd ]
cd $dir
set cmd "/bin/env $extra_env "
append cmd "RUNDIR=$::env(RUNDIR) "
append cmd "LD_LIBRARY_PATH=$apipath:$::LD_LIBRARY_PATH "
append cmd "DB2INSTANCE=$::DB2INSTANCE "
append cmd "$launcher >& $redirect &"
eval ::exec $cmd
cd $cwd
} else {
set cmd "alias cd=cd && cd $dir && /bin/env "
append cmd "RUNDIR=$::env(RUNDIR) HOST=$apihost "
append cmd "PATH=$::LDAS/bin:$::env(PATH) "
append cmd "LD_LIBRARY_PATH=$apipath:$::LD_LIBRARY_PATH "
append cmd "DB2INSTANCE=$::DB2INSTANCE $extra_env "
append cmd "$launcher >& $redirect &"
execssh $apihost $cmd
}
addLogEntry "Startup command for ${api}API: '$cmd'" blue
set up [ mgr::bootstrapStatus $api ]
set pid [ getPid $api ]
;## if we succeeded inform martian boxes of
;## FTP and HTTP info for gateway.
if { $up } {
mgr::pushFTPandHTTPinfo $api $::LDAS_VERSION
} else {
set msg "failed connect to operator socket on $api API"
return -code error "[ myName ]: $msg"
}
mgr::bootstrapSuccessEmail $api $apihost $pid
mgr::bootstrapUnlock $api
mgr::bootstrapFatalErrorEmail $api $apihost NULL 1
} err ] } {
addLogEntry $err red
mgr::bootstrapUnlock $api
mgr::bootstrapFatalErrorEmail $api $apihost $err 0
}
} ;## end of foreach on api
after 0 mgr::apiStatusPage
}
proc mgr::bootstrapSuccessEmail { api apihost pid } {
if { [ catch {
set report 1
if { [ info exists ::bootstrapemailpid($api) ] } {
set last_pid $::bootstrapemailpid($api)
if { [ string equal $pid $last_pid ] } {
set report 0
}
}
set ::bootstrapemailpid($api) $pid
if { $report } {
if { [ info exists ::jobid ] && \
[ string equal STARTUP $::jobid ] } {
set msg "done. $api API is now running as pid: $pid"
addLogEntry $msg green
} else {
set logurl $::HTTPURL
regexp {(.+)/jobs} $logurl -> logurl
set logurl ${logurl}/logs
set subject "$::LDAS_SYSTEM ${api}API restarted"
set msg "$::LDAS_SYSTEM ${api}API restarted.\n"
append msg "$api API is now running as pid '${pid}' on "
append msg "$::LDAS_SYSTEM host ${apihost}.\nSee the "
append msg "$::LDAS_SYSTEM LDAS logs for details: "
append msg "$logurl"
addLogEntry "Subject: ${subject}; Body: $msg" email
}
}
} err ] } {
return -code error "[ myName ]: $err"
}
}
proc mgr::bootstrapFatalErrorEmail { api apihost err { cancel 0 } } {
if { $cancel == 1 } {
if { [ info exists ::fatalerroremailtime($api) ] } {
::unset ::fatalerroremailtime($api)
}
return {}
}
if { [ catch {
set errlvl blue
if { [ info exists ::UNABLE_TO_START_API_EMAIL_INTERVAL_S ] } {
set interval $::UNABLE_TO_START_API_EMAIL_INTERVAL_S
} else {
set interval 3600
}
set now [ clock seconds ]
if { [ info exists ::fatalerroremailtime($api) ] } {
set last_notified $::fatalerroremailtime($api)
if { $now - $last_notified > $interval } {
set ::fatalerroremailtime($api) $now
set errlvl email
} else {
set errlvl red
}
} else {
set last_notified $now
set ::fatalerroremailtime($api) $now
set errlvl email
}
set subject "$::LDAS_SYSTEM bootstrap of $api failed!"
set msg "could not start $api API on $apihost!\n"
append msg "(error: $err)"
if { [ string equal email $errlvl ] } {
addLogEntry "Subject: ${subject}; Body: $msg" $errlvl
} else {
addLogEntry $msg $errlvl
}
} err ] } {
return -code error "[ myName ]: $err"
}
}
proc mgr::bootstrapPause { api } {
if { [ catch {
if { [ info exists ::BOOTSTRAP_PAUSE_TIMEOUT ] } {
if { ! [ regexp {\d+} $::BOOTSTRAP_PAUSE_TIMEOUT ] } {
set ::BOOTSTRAP_PAUSE_TIMEOUT 10000
} elseif { $::BOOTSTRAP_PAUSE_TIMEOUT < 5000 } {
set ::BOOTSTRAP_PAUSE_TIMEOUT 5000
}
} else {
set ::BOOTSTRAP_PAUSE_TIMEOUT 10000
}
set pause $::BOOTSTRAP_PAUSE_TIMEOUT
set first [ expr { $pause / 5 } ]
set second $first
set third [ expr { $first * 3 } ]
set sid [ sock::open $api emergency ]
::close $sid
set start [ clock clicks -milliseconds ]
addLogEntry "giving ${api}API $first ms to shut down..."
after $first
set sid [ sock::open $api emergency ]
::close $sid
addLogEntry "giving ${api}API $second more ms to shut down..."
after $second
set sid [ sock::open $api emergency ]
::close $sid
addLogEntry "giving ${api}API $third more ms to shut down..."
after $third
set sid [ sock::open $api emergency ]
::close $sid
set msg "${api}API failed to shutdown in $pause ms. "
append msg "will try 'genocide' action. "
append msg "if this fails, somebody will have to "
append msg "clean up the ${api}API BY HAND..."
addLogEntry $msg red
} err ] } {
if { [ info exists start ] } {
set done [ clock clicks -milliseconds ]
set dt [ expr { $done - $start } ]
addLogEntry "${api}API shut down in $dt ms"
}
}
}
proc mgr::libstdcPreload { api } {
if { [ catch {
;## force preloading of libstdc++ as required
;## to avoid instant abort when c++ exception
;## is thrown which is not handled by a c program,
;## i.e. Tcl.
;## if getApiOS failed due to ssh issue throw exeption
;## and caller will skip the API.
set libstdc [ list ]
set OS [ getApiOS $api ]
if { [ regexp {Linux} $OS ] } {
if { [ info exists ::STDCPLUSPLUSLIBPAT ] } {
set libstdc [ libstdcPlusPlus $api ]
}
} elseif { [ regexp {Sun} $OS ] } {
if { [ info exists ::SUN_LIBSTDCPP ] } {
set libstdc $::SUN_LIBSTDCPP
}
} elseif { [ regexp {ssh_failed} $OS ] } {
return -code error $OS
}
} err ] } {
return -code error "[ myName ]: $err"
}
return $libstdc
}
proc mgr::libmallocPreload { api } {
if { [ catch {
;## force preloading of libXXmalloc as required
;## to avoid monotonic increasing memory usage due
;## to memory fragmentation.
set libmalloc [ list ]
set OS [ getApiOS $api ]
if { [ regexp {Linux} $OS ] } {
if { [ info exists ::LINUX_MALLOC ] } {
set libmalloc $::LINUX_MALLOC
}
} elseif { [ regexp {Sun} $OS ] } {
if { [ info exists ::SUN_MALLOC ] } {
set libmalloc $::SUN_MALLOC
}
} elseif { [ regexp {ssh_failed} $OS ] } {
return -code error $OS
}
} err ] } {
return -code error "[ myName ]: $err"
}
return $libmalloc
}
proc mgr::makeApiAtHome { api } {
if { [ catch {
set dir $::env(RUNDIR)/${api}API
if { ! [ file exists $dir ] } {
file mkdir $dir
} else {
if { ! [ file isdirectory $dir ] } {
file rename $dir ${dir}.bogus
file mkdir $dir
set msg "file with name reserved for $api API "
append msg "working directory renamed to ${dir}.bogus"
addLogEntry $msg red
}
}
file attributes $dir -permissions 0755
;## move misplaced .rsc and .ini files
foreach ext [ list rsc ini ] {
if { [ file exists $::env(RUNDIR)/LDAS${api}.$ext ] } {
file rename -force \
$::env(RUNDIR)/LDAS${api}.$ext $dir/LDAS${api}.$ext
set msg "moving LDAS${api}.$ext from $::env(RUNDIR) to $dir"
addLogEntry $msg blue
}
}
;## make sure browsers can browse things in here
;## that have the correct permissions
set htaccess ${dir}/.htaccess
if { ! [ file exists $htaccess ] } {
set fid [ open $htaccess w 0644 ]
puts $fid "DefaultType text/html"
::close $fid
}
} err ] } {
return -code error "[ myName ]: $err"
}
return $dir
}
proc mgr::bootstrapStatus { api } {
set flag 0
set i 0
if { ! [ info exist ::API_BOOTSTRAP_MAX_S ] } {
set ::API_BOOTSTRAP_MAX_S 60
addLogEntry "Resource ::API_BOOTSTRAP_MAX_S set to $::API_BOOTSTRAP_MAX_S by default" blue
}
if { ! [ info exist ::API_BOOTSTRAP_DELAY_S ] } {
set ::API_BOOTSTRAP_DELAY_S 5
addLogEntry "Resource ::API_BOOTSTRAP_DELAY_S set to $::API_BOOTSTRAP_DELAY_S by default" blue
}
set maxtime [ expr $::API_BOOTSTRAP_MAX_S * 1000 ]
set delay [ expr $::API_BOOTSTRAP_DELAY_S * 1000 ]
while { $i < $maxtime } {
if { [ catch {
if { [ string equal manager $api ] } {
set flag 1
} else {
set sid [ sock::open $api operator ]
::close $sid
set flag 1
}
} err ] } {
catch { ::close $sid }
if { [ info exists ::DEBUG_BOOTSTRAP_LOCK ] && \
[ string equal 1 $::DEBUG_BOOTSTRAP_LOCK ] } {
addLogEntry "${api}API $err" purple
}
set flag 0
}
if { ! $flag } {
incr i $delay
after $delay
} else {
break
}
}
return $flag
}
proc mgr::bootstrapLock { apis { reset 0 } } {
if { [ catch {
set temp [ list ]
set now [ clock seconds ]
foreach api $apis {
if { ! [ info exists ::__bootstraplock_$api ] } {
set ::__bootstraplock_$api $now
lappend temp $api
} elseif { $reset } {
set ::__bootstraplock_$api $now
} else {
set lastlock [ lindex [ set ::__bootstraplock_$api ] end ]
if { $now > $lastlock } {
lappend ::__bootstraplock_$api $now
}
}
}
if { [ info exists ::DEBUG_BOOTSTRAP_LOCK ] && \
[ string equal 1 $::DEBUG_BOOTSTRAP_LOCK ] } {
set report [ list ]
set locks [ info vars ::__boot* ]
foreach lock $locks {
lappend report "$lock [ set $lock ] "
}
addLogEntry $report purple
}
} err ] } {
return -code error "[ myName ]: $err"
}
return $temp
}
proc mgr::bootstrapUnlock { api } {
if { [ catch {
if { [ info exists ::__bootstraplock_$api ] } {
set locks [ set ::__bootstraplock_$api ]
if { [ llength $locks ] > 1 } {
set locks [ lrange $locks 1 end ]
set ::__bootstraplock_$api $locks
set locked 1
} else {
::unset ::__bootstraplock_$api
set locked 0
}
} else {
set locked 0
}
if { [ info exists ::DEBUG_BOOTSTRAP_LOCK ] && \
[ string equal 1 $::DEBUG_BOOTSTRAP_LOCK ] } {
set report [ list ]
set locks [ info vars ::__boot* ]
foreach lock $locks {
lappend report "$lock [ set $lock ] "
}
addLogEntry $report purple
}
} err ] } {
return -code error "[ myName ]: $err"
}
return $locked
}
proc bootstrapLockTimeout { } {
if { [ catch {
set locks [ list ]
set api oops!
set now [ clock seconds ]
set notice "The resource variable ::MGR_BOOTSTRAPLOCK_TIMEOUT "
append notice "has been\nset to \$::MGR_BOOTSTRAPLOCK_TIMEOUT."
append notice " If this value is less than "
append notice "15 * number_of_beowulf_nodes\n"
append notice "or the number_of_mpi_search_users, "
append notice "whichever is GREATER for this system, then\n"
append notice "the value should be adjusted, and made to be\n"
append notice "at least 15 * (number of nodes or users)."
;## rationalise resource variable
if { ! [ info exists ::MGR_BOOTSTRAPLOCK_TIMEOUT ] } {
set ::MGR_BOOTSTRAPLOCK_TIMEOUT 300
set notice [ subst -nocommands $notice ]
addLogEntry $notice blue
} else {
;## constrain timeout to an integer number of
;## seconds between 0 and 9999
if { ! [ regexp {^\d{1,4}$} $::MGR_BOOTSTRAPLOCK_TIMEOUT ] } {
set ::MGR_BOOTSTRAPLOCK_TIMEOUT 300
set notice [ subst -nocommands $notice ]
addLogEntry $notice blue
}
}
set to $::MGR_BOOTSTRAPLOCK_TIMEOUT
;## iterate over the locks, unset the expired ones
;## and send email to the responsible party
set locks [ info vars ::__bootstraplock_* ]
foreach lock $locks {
regexp {__bootstraplock_(\S+)} $lock -> api
if { [ string equal manager $api ] } { continue }
set then [ set $lock ]
if { $now >= [ lindex $then end ] + $to } {
::unset $lock
set msg "$lock expired after $to seconds. "
append msg "something is wrong with the $::LDAS_SYSTEM "
append msg "LDAS system $api API!"
set subject "$::LDAS_SYSTEM $lock expired!"
set notice [ subst -nocommands $notice ]
set msg "${msg}\n\n$notice"
addLogEntry "Subject: ${subject}; Body: $msg" mail
}
}
} err ] } {
return -code error "[ myName ]: $err"
}
}
proc mgr::pushFTPandHTTPinfo { api { version "" } } {
set errs [ list ]
if { [ catch {
if { [ string length $version ] } {
mgr::apiVersionCheck $api $version
} else {
mgr::apiVersionCheck $api UNKNOWN
}
set vars [ list $::FTPURL $::FTPDIR $::HTTPURL \
$::HTTPDIR $::GRIDFTPURL $::GRIDFTPDIR \
[ lindex $::LDAS_GATEWAY 0 ] \
[ lindex $::LDAS_GATEWAY 1 ] $::LDAS_SYSTEM ]
set sid [ sock::open $api emergency ]
fconfigure $sid -blocking off
puts $sid "$::MGRKEY setFTPandHTTPinfo $vars"
::close $sid
} err ] } {
catch { ::close $sid }
return -code error "[ myName ]: $err"
}
}
mgr::shutdownAPI $apinameComments:
proc mgr::shutdownAPI { { apis "" } { comment "" } } {
if { ! [ string length $apis ] } {
set apis $::API_LIST
}
foreach api $apis {
if { [ string equal $api manager ] } {
continue
}
;## abort sequence for an assistant manager
set assistants [ namespace children :: ${::RUNCODE}* ]
foreach assistant $assistants {
if { [ catch {
set name [ string trim $assistant : ]
if { [ info exists ::${name}::api ] && \
[ string equal $api [ set ::${name}::api ] ] } {
set msg "$api API shutting down NOW!\n$name aborting!"
set ::${name}::errorapi [ set ::${name}::api ]
if { [ llength [ namespace children :: ::$name ] ] } {
::${name}::predelete $msg $msg
if { [ llength [ namespace children :: ::$name ] ] } {
::${name}::seq
}
}
}
} err ] } {
addLogEntry $err red
}
}
if { [ catch {
;## memory usage and other API process info
set statefile [ file join $::LDASLOG ${api}.status ]
set arc $::LDASARC/${api}API/${api}_status.[ gpsTime ]
if { [ file exists $statefile ] } {
set fid1 [ open $statefile r ]
set fid2 [ open $arc w ]
puts $fid2 "<html>\n<pre>"
fcopy $fid1 $fid2
::close $fid2
::close $fid1
file delete -force -- $statefile
}
} err ] } {
}
;## take a gcore of the API before shutdown
if { ! [ string equal nocore $comment ] } {
set apihost [ set ::${api}(host) ]
set now [ clock seconds ]
set pid ""
if { [ info exists ::bootstrapemailpid($api) ] } {
set pid $::bootstrapemailpid($api)
}
if { ![ string length $pid ] } {
set lockfile [ glob -nocomplain $::TOPDIR/${api}API/.${api}.*.lock ]
regexp {(\d+)} $lockfile -> pid
addLogEntry "obtained $api pid $pid from lock file" purple
}
if { [ string length $pid ] && [ info exist ::FORCE_GCORE ] && $::FORCE_GCORE} {
set cmd "alias cd=cd && cd $::TOPDIR/savedCores && gcore -o ${api}_${now}_gcore $pid"
addLogEntry "executing $cmd on $apihost" purple
execssh $apihost $cmd
}
}
if { [ catch {
set sid [ sock::open $api emergency ]
puts $sid "$::MGRKEY ${api}::sHuTdOwN \"$comment\""
after 100
cmd::receive $sid ${api}_$sid mgr::logReply
addLogEntry "$api API Shut Down" red
} err ] } {
}
if { [ catch {
;## I think we get here and delete the lock file
;## under all circumstances...
set globpat [ file join [ apiDirectory $api ] .$api.*.lock ]
set locks [ glob -nocomplain $globpat ]
foreach lock $locks {
set pid [ lindex [ split [ file tail $lock ] . ] end-1 ]
file delete -force -- $lock
addLogEntry "deleted lock file: $lock" blue
}
} err ] } {
}
}
}
proc mgr::postMortem { api } {
set data [ list ]
set url [ list ]
if { [ catch {
set url [ file dirname $::HTTPURL ]
regsub / $url // url
;## we may need an ::LDAS_LOG_ARCHIVE_URL
set url ${url}/logs/archive/${api}API/
set time [ gpsTime ]
set home [ apiDirectory $api ]
set stderrlog ${home}/${api}.log
if { [ file exists $stderrlog ] } {
set url $url${api}.log.$time
set url "stdout/stderr for previous run of $api API: $url"
set arcname $::LDASARC/${api}API/${api}.log.$time
set dir $::LDASARC/${api}API
if { ! [ file isdirectory $dir ] } {
file mkdir $dir
file attributes $dir -permissions 0755
gifBalls $dir
}
set fid1 [ open $arcname w ]
puts $fid1 "<html>\n<pre>"
set fid2 [ open $stderrlog r ]
fcopy $fid2 $fid1
::close $fid2
::close $fid1
} else {
set url [ list ]
}
} err ] } {
if { [ info exists fid ] } {
catch { ::close $fid }
}
if { [ info exists fid1 ] } {
catch { ::close $fid1 }
}
if { [ info exists fid2 ] } {
catch { ::close $fid2 }
}
return -code error "[ myName ]: $err"
}
return $url
}
proc mgr::dontStepOnJob { api timeout } {
set i 0
set vnames [ infoVars ]
;## try not to disturb a running job
foreach vname $vnames {
if { [ regexp $::RUNCODE $vname ] } {
while { [ regexp $api [ set $vname ] ] && $i < $timeout } {
incr i
sleep 1000
}
}
}
}
proc mgr::preValidate { cmd } {
if { [ catch {
set seqpt {}
set user [ list ]
set soname [ list ]
regexp -nocase -- {-name\s+\{?\s*(\S+)} [ lindex $cmd 1 ] -> user
set user [ string trim $user "{} " ]
;## see if dso is temporarily blocked!
regexp -nocase -- {-dynlib\s+\{?\s*(\S+)} $cmd -> soname
set soname [ string trim $soname "{} " ]
set cmdname [ lindex [ lindex $cmd 2 ] 0 ]
;## see if user is temporarily blocked!
if { [ lsearch $::mgr::blocked_users $user ] != -1 } {
set seqpt "ADMIN NOTICE: "
set err "user '$user' has been temporarily blocked!"
after 0 \
[ list mgr::logRejectedJob $user $cmdname blocked_user ]
return -code error $err
}
;## see if dso is temporarily blocked!
if { [ lsearch $::mgr::blocked_dsos $soname ] != -1 } {
set seqpt "ADMIN NOTICE: "
set err "DSO '$soname' has been temporarily blocked!"
after 0 \
[ list mgr::logRejectedJob $user $cmdname blocked_dso ]
return -code error $err
}
;## see if the user is exceeding his quota!
set seqpt "QUOTA EXCEEDED: "
mgr::userQuota $cmd
} err ] } {
return -code error "$seqpt$err"
}
}
proc mgr::userInfo { args } {
set return_info [ list ]
set scratch [ list ]
set now [ clock seconds ]
if { [ catch {
if { [ catch {
array set input $args
} ] } {
eval array set input $args
}
foreach user $::QUEUE(USERS) {
if { ! [ llength $user ] } { continue }
if { [ llength $user ] != 13 } {
mgr::reportCorruptedQueue $user
}
;## each user entry contains the three user info item
;## pairs and a list of the last N jobs processed for
;## that user.
set info [ lrange $user 0 end-1 ]
set jobs [ lindex $user end ]
array set q $info
if { [ string equal $q(-name) $input(-name) ] } {
;## Peter Shawhan's challenge/response code added
;## 05/20/02
if { [ string equal md5protocol $input(-password) ] } {
set salt [ uplevel set salt ]
set digest [ uplevel set digest ]
set test [ key::md5 $q(-password)$salt ]
if { [ string equal $digest $test ] } {
set hash $q(-password)
} else {
set hash invalid
}
} else {
set hash [ key::md5 $input(-password) ]
}
if { ! [ string equal $q(-password) $hash ] } {
set msg "Incorrect password given for user $q(-name)"
return -code error $msg
}
if { ! [ regexp {^\d+$} $q(-expires) ] } {
set q(expires) [ clock scan $q(-expires) ]
}
if { $now > $q(-expires) } {
set edate [ clock format $q(-expires) -format %D ]
set msg "MOU for user $q(-name) expires on ${edate}. "
append msg "Please contact LDAS admin to apply for "
append msg "an extension."
return -code error $msg
}
;## here we mangle the args so that the user password
;## is not updated when the other user data is updated!
regsub -- $input(-password) $args $q(-password) args
set info [ expandOpts info ]
set return_info $info
if { [ llength $return_info ] < 12 } {
set msg "QUEUE(USERS) has been corrupted!\n"
append msg "Please check the user info file for bad\n"
append msg "entries, missing \"'s, etc."
return -code error $msg
}
}
lappend scratch [ concat $info [ list $jobs ] ]
} ;## end of foreach
if { [ llength $return_info ] } {
set ::QUEUE(USERS) $scratch
if { ! [ regexp "\[a-z\]+" $::QUEUE(USERS) ] } {
set msg "The QUEUE(USERS) queue has been\n"
append msg "initialised, but is empty."
return -code error $msg
}
} else {
return -code error "unknown user: '$input(-name)'"
}
} err ] } {
return -code error "[ myName ]: $err"
}
return $return_info
}
proc mgr::reportCorruptedQueue { args } {
set baddata [ list $args ]
set subject "$::LDAS_SYSTEM users queue has become corrupted!"
set msg "${subject}\n"
append msg "The manager API choked on this user entry,\n"
append msg "which does not appear to have the correct\n"
append msg "number of fields:\n\n'$baddata'"
addLogEntry "Subject: ${subject}; Body: $msg" mail
return -code error "[ myName ]: $subject"
}
proc mgr::assignJobid { args } {
if { [ catch {
set return_info [ list ]
set scratch [ list ]
set now [ clock seconds ]
if { [ catch {
array set input $args
} ] } {
eval array set input $args
}
foreach user $::QUEUE(USERS) {
if { ! [ llength $user ] } { continue }
if { [ llength $user ] != 13 } {
set msg "QUEUE(USERS) has been corrupted!"
addLogEntry $msg 2
return -code error "[ myName ]: $msg"
}
;## each user entry contains the three user info item
;## pairs and a list of the last N jobs processed for
;## that user.
set info [ lrange $user 0 end-1 ]
set jobs [ lindex $user end ]
array set q $info
if { [ string equal $q(-name) $input(-name) ] } {
set jobid [ key::increment $::RUNCODE ]
set jobs \
[ linsert $jobs 0 [ list $jobid $input(-email) $now ] ]
set info [ expandOpts info ]
set return_info $info
if { [ llength $return_info ] < 12 } {
set msg "QUEUE(USERS) has been corrupted!\n"
append msg "Please check the user info file for bad\n"
append msg "entries, missing \"'s, etc."
return -code error "[ myName ]: $msg"
}
}
lappend scratch [ concat $info [ list $jobs ] ]
} ;## end of foreach
if { [ llength $return_info ] } {
set ::QUEUE(USERS) $scratch
if { ! [ regexp "\[a-z\]+" $::QUEUE(USERS) ] } {
set msg "The QUEUE(USERS) queue has been\n"
append msg "initialised, but is empty."
return -code error "[ myName ]: $msg"
}
}
} err ] } {
return -code error "[ myName ]: $err"
}
return $jobid
}
proc mgr::deleteJobidFromUsersQueue { jobid } {
if { [ catch {
set name [ list ]