LDAS logo
TclDOC logo

The manager.tcl Script

Modification Date: 11/26/2008

Table of Procedures

red ball kIlLjOb
red ball manager::reply
red ball mgr::abortBlockedJob
red ball mgr::abortJob
red ball mgr::activeJobStatusLockReport
red ball mgr::activeJobStatusSummary
red ball mgr::activelyMonitorDiskUsage
red ball mgr::addAPI
red ball mgr::addUser
red ball mgr::apiSocketStatus
red ball mgr::apiStatus
red ball mgr::apiStatusPage
red ball mgr::apiVersionCheck
red ball mgr::assignJobid
red ball mgr::blockDso
red ball mgr::blockUser
red ball mgr::bootstrapAPI
red ball mgr::bootstrapFatalErrorEmail
red ball mgr::bootstrapLock
red ball mgr::bootstrapLockTimeout
red ball mgr::bootstrapPause
red ball mgr::bootstrapStatus
red ball mgr::bootstrapSuccessEmail
red ball mgr::bootstrapUnlock
red ball mgr::chopQueueTo
red ball mgr::cmdParseUserTimeout
red ball mgr::commandSanityChecks
red ball mgr::cryptCheck
red ball mgr::deleteJobidFromUsersQueue
red ball mgr::delocaliseOutputURLs
red ball mgr::detectBlockage
red ball mgr::dontStepOnJob
red ball mgr::dropPrettyCommand
red ball mgr::duplicateOpts
red ball mgr::expandCmd
red ball mgr::expiresNextWeek
red ball mgr::expiresTomorrow
red ball mgr::forcePerms
red ball mgr::garbageCollect
red ball mgr::getCommandInputText
red ball mgr::getDiskUsageInfo
red ball mgr::getExtraEnv
red ball mgr::getLauncher
red ball mgr::getURLsForJob
red ball mgr::globMacros
red ball mgr::hideDirectories
red ball mgr::job2email
red ball mgr::job2name
red ball mgr::jobStatus
red ball mgr::jobTimingReport
red ball mgr::jobidToHref
red ball mgr::killJob
red ball mgr::killPing
red ball mgr::lastUsedPort
red ball mgr::libmallocPreload
red ball mgr::libstdcPreload
red ball mgr::liveJobListPush
red ball mgr::localiseInputURLs
red ball mgr::logRejectedJob
red ball mgr::logReply
red ball mgr::makeApiAtHome
red ball mgr::makeFtpDirectory
red ball mgr::manageLogs
red ball mgr::memUsage
red ball mgr::oneLineJobTimingReport
red ball mgr::parseBlock
red ball mgr::parseMeta
red ball mgr::perApiRunningStats
red ball mgr::ping
red ball mgr::postMortem
red ball mgr::preValidate
red ball mgr::prettifyCmdHtml
red ball mgr::promoteJobs
red ball mgr::pushFTPandHTTPinfo
red ball mgr::recryptWithNewKey
red ball mgr::removeAPI
red ball mgr::removeExpiredUsers
red ball mgr::removeUser
red ball mgr::reply
red ball mgr::reportCorruptedQueue
red ball mgr::reportOversizeQueue
red ball mgr::reportWrongHost
red ball mgr::reportWrongUser
red ball mgr::rmJobFiles
red ball mgr::runStatusSummary
red ball mgr::runningJobStatsFile
red ball mgr::sHuTdOwN
red ball mgr::scanNFSErrors
red ball mgr::shutdownAPI
red ball mgr::throttleOnDiskUsage
red ball mgr::throttlePipelines
red ball mgr::throttlePipelinesAtApi
red ball mgr::updateOrAddUser
red ball mgr::updateUserInfo
red ball mgr::userInfo
red ball mgr::userQuota
red ball mgr::validateUserOptions
red ball mgr::validateUserPassword
red ball mgr::validateopts
red ball mgr::writeActiveJobStatus
red ball mgr::zombies

-*- tcl-indent-level: 5; indent-tabs-mode: nil; -*- vi: set autoindent expandtab tabstop=5 shiftwidth=5 :
manager.tcl Version 1.0 Wraps the
genericAPI.tcl for use by the manager API.

set ::RCS_ID_managertcl {$Id: manager.tcl,v 1.518 2008/11/04 22:04:34 emaros Exp $}
set ::RCS_ID_managertcl [ string trim $::RCS_ID_managertcl "\$" ]
package provide manager 1.0
namespace eval mgr {
     set activejobs 0
}

§   §   §
§   §   §

Name: mgr::expandCmd

Description:
This is the entry point for the managers macro parser.
It calls itself recursively, and calls the mgr::parseMeta and mgr::parseBlock. The result of this expansion is a block of code which the assistant managers can use to drive the low level API's.
Usage:

Comments:

This is a "two pass" parser.
proc mgr::expandCmd { { cmd "" } } {
     ;## if its a xxx.meta command, expand and recurse.
     if { [ catch {
          if { [ llength [ split $cmd "\n" ] ] == 1 } {
               set cmd      [ mgr::parseMeta $cmd ]
               set sequence [ mgr::expandCmd $cmd ]
          } else {
               set sequence [ mgr::parseBlock $cmd ]
          }
     } err ] } {
          addLogEntry $err
          return -code error "[ myName ]: $err"
     }
     return $sequence
}

§   §   §

Name: mgr::commandSanityChecks

Description:
Catch-all for command sanity that short-circuits as early as possible.
Called from mgr::validateopts
Parameters: Usage:

Comments:

proc mgr::commandSanityChecks { cmd } {
     if { [ catch {
          ;## -np must be at least '2' since no search code is run
          ;## on the wrapper master and we include the wrapper master
          ;## as one of the nodes requested to avoid underrepresenting
          ;## the system requirements of large numbers of small jobs.
          if { [ regexp -nocase -- {\s+-np\s+[01]\s+} $cmd ] } {
               set err "minimum possible value of '-np' option is '2'"
               return -code error $err
          }
          ;## when ::REJECT_EXPLICIT_DYNLIB_PATHS is set to 1, we
          ;## reject al commands that attempt to specify a dynlib.so
          ;## by absolute path. this is done to prevent the running
          ;## of potentially hazardous experimental code on production
          ;## LDAS systems. PR #1825 10-07-2003
          if { [ info exists ::REJECT_EXPLICIT_DYNLIB_PATHS ] && \
                   [ string equal 1 $::REJECT_EXPLICIT_DYNLIB_PATHS ] } {
               set dyn_rx {\s+-dynlib\s+(\S+)\s+}
               if { [ regexp -nocase -- $dyn_rx $cmd -> dynlib ] } {
                    regsub -all -- {[\}\{\"\s]} $dynlib {} dynlib
                    if { [ regexp {[\\\/]+} $dynlib ] } {
                         set msg "This system has ::REJECT_EXPLICIT_DYNLIB_PATHS"
                         append msg " enabled to prevent the use of experimental"
                         append msg " shared objects."
                         append msg " You may not specify path components as"
                         append msg " part of your -dynlib option (and you"
                         append msg " specified '$dynlib')."
                         return -code error $msg
                    }
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::cmdParseUserTimeout

Description:

Parameters: Usage:

Comments:

proc mgr::cmdParseUserTimeout { cmd } {
     if { [ catch {
          set timeout 0
          ;## optional option available to users in order to cause
          ;## jobs to timeout *more* quickly.
          set uto_rx {\s+-usertimeout\s+(\S+)\s+}
          if { [ regexp -nocase -- $uto_rx $cmd -> timeout ] } {
               regsub -all -- {[\}\{\"\s]} $timeout {} timeout
               set standard $::MANAGER_ABORT_AFTER_N_SECONDS_IN_ONE_API
               set timeout [ expr { round($timeout) } ]
               if { $timeout > 2 * $standard } {
                    set timeout [ expr { 2 * $standard } ]
               }
          } else {
               set timeout 0
          }
     } err ] } {
          set err "Error in -usertimeout option: '$err'"
          return -code error "[ myName ]: $err"
     }
     return $timeout
}

§   §   §

Name: mgr::parseMeta

Description:
Called by mgr::expandCmd on the command received from the user, which is assumed to be a "meta" command.
Usage:

Comments:

Should be "caught".
proc mgr::parseMeta { cmd } {
     set procname {}
     set usropts  [ list ]
     set command  {}
     set macrolist [ mgr::globMacros ]
     regexp $::cmd_rx $cmd -> procname usropts
     ;## first pass expansion from meta file
     if { [ lsearch -exact $macrolist ${procname}.meta ] > -1 } {
          set fname [ file join $::LDASMACROS meta ${procname}.meta ]
          set fid [ open $fname r ]
          ;## meta macros are constrained to start with a line
          ;## declaring the "opts" and their defaults.
          set command [ string trim [ gets $fid ] ]
          ;## test it.
          if { ! [ regexp {^set opts \{[^\}]+\}} $command ] } {
               set msg    "the first line of the meta file must be\n"
               append msg "the declaration of 'opts', but I found:\n"
               append msg "$command\ninstead."
               return -code error "[ myName ]: $msg"
          }
          ;## apply the user supplied options
          if { [ llength $usropts ] } {
               append command "\n metaOpts \{ $usropts \}\n"
          }
          append command [ read $fid ]
          ::close $fid
     } else {
          return -code error "[ myName ]: Bad meta command: '$cmd'"
     }
     return $command
}

§   §   §

Name: mgr::parseBlock

Description:
Usage:

Comments:

proc mgr::parseBlock { cmd } {
     set api {}
     set tmp [ list ]
     set sequence [ list ]
     set api_rx ^([ join $::API_LIST | ])\\s*\$
     set macrolist [ mgr::globMacros ]
     foreach line [ split $cmd ";\n" ] {
          set line [ string trim $line ]
          if { [ regexp {(^\#.*$|^$)} $line ] } {
               continue
          }
          if { [ regexp {^set opts \{[^\}]+\}} $line ] } {
               set sequence $line
               continue
          }
          if { [ regexp -nocase $api_rx $line api ] } {
               if { [ string equal $api $tmp ] } {
                    continue
               } else {
                    set tmp $api
                    continue
               }
          }
          if { ! [ regexp $::cmd_rx $line -> procname opts ] } {
               set procname [ string trim $line ]
          }
          if { [ string equal metaOpts $procname ] } {
               append sequence "\n metaOpts $opts\n"
               continue
          }
          if { [ lsearch -exact $macrolist ${procname}.$api ] > -1 } {
               set fname [ file join $::LDASMACROS $api ${procname}.$api ]
               append sequence "\n$api\n"
               set fid [ open $fname r ]
               set tmp {}
               while { [ gets $fid code ] > -1 } {
                    if { ! [ regexp {(^[ ]*;?\#)|(^[ ]*$)} $code ] } {
                         append tmp "$code\n"
                    }
               }
               ::close $fid
               append sequence \{[ string trim $tmp ]\}
               set procname {}
               continue
          }
     } ;# end of foreach
     return $sequence
}

§   §   §

Name: mgr::globMacros

Description:
Returns all the macro filenames found under ::LDASMACROS in a list, or sends a helpful error message if the macro files are not found.

Parameters: Usage:

Comments:

proc mgr::globMacros { } {
     set macrolist {}
     foreach file [ glob -nocomplain $::LDASMACROS/*/*.* ] {
          lappend macrolist [ file tail $file ]
     }
     if { [ llength $macrolist ] < 3 } {
          set msg "*********************************************\n"
          append msg "ABORT!! No macro code found under '$::LDASMACROS'\n"
          append msg "If this is a new installation of LDAS, you\n"
          append msg "must read the documentation for installing\n"
          append msg "the manager API.\n"
          append msg "If this is not a new installation, then\n"
          append msg "either you have deleted the macro files or\n"
          append msg "redefined ::LDASMACROS to point to the wrong\n"
          append msg "location.  You may also being experiencing\n"
          append msg "transient NFS problems.\n\n"
          append msg "         NO JOBS CAN BE PROCESSED\n"
          append msg "       UNTIL THIS PROBLEM IS SOLVED.\n"
          append msg "*********************************************\n"
          puts stderr $msg
          return -code error "[ myName ]:\n$msg"
     }
     set macrolist
}

§   §   §

Name: mgr::bootstrapAPI

Description:
Execute command sequence on a remote machine to bring an API online. The remote command is executed through ssh.
Usage:
      if { [ mgr::bootstrapAPI $apis ] } { ... }
Comments:
The ssh call may be overly elaborate.
proc mgr::bootstrapAPI { { apis "" } { comment "" } } {
     set redirect /dev/null
     set libmalloc [ list ]
     set libstdc   [ list ]
     set apihost unknown!
     if { ! [ string length $apis ] } {
          set msg "you must specify an api, list of api's, or 'all'"
          return -code error "[ myName ]: $msg"
     }
     if { [ string equal all $apis ] } {
          set apis $::API_LIST
     } else {
          foreach api $apis {
               if { [ lsearch $::API_LIST $api ] < 0 } {
                    set msg "cannot bootstrap unregistered API: '$api'"
                    return -code error $msg
               }
          }
     }
     ;## emergency port requests always succeed
     set caller [ uplevel myName ]
     if { [ string equal emergency_callback $caller ] } {
          mgr::bootstrapLock $apis 1
     } else {
          set apis [ mgr::bootstrapLock $apis 0 ]
     }
     set localhost [ set ::${::LDAS_SYSTEM}(manager) ]
     foreach api $apis {
          set api [ string tolower $api ]
          regsub {api$} $api {} api
          if { [ string equal $api manager ] } { continue }
          set apiPid [ getPid $api ]
          set apiPid [ string trim $apiPid ]
          addLogEntry "$api pid is '$apiPid'" purple
          ;## if API is up but appears stuck and user does not want reboot
          ;## hold off rebooting API
          if { [ string length $apiPid ] && ! $::STARTUP_IN_PROGRESS && \
                   [ info exist ::WAIT_FOR_API ] && [ set ::WAIT_FOR_API ] } {
               set subject "hold off bootstraping $api"
               set body "set ::WAIT_FOR_API to 0 to resume booting"
               addLogEntry "Subject: ${subject}; Body: ${body}" email
               continue
          }
          if { [ catch {
               mgr::shutdownAPI $api $comment
               mgr::bootstrapPause $api
               set postmortem [ mgr::postMortem $api ]
               ;## if the call to mgr::postMortem returned data
               if { [ string length $postmortem ] } {
                    addLogEntry $postmortem blue mgr::postMortem
               }
               set apihost [ set ::${api}(host) ]
          } err ] } {
               addLogEntry "$err (api: $api)" 2
          }
          if { [ catch {
               set libstdc [ mgr::libstdcPreload $api ]
          } err ] } {
               continue
          }
          if { [ catch {
               set libmalloc [ mgr::libmallocPreload $api ]
          } err ] } {
               continue
          }
          ;## Ed asked for different vars for different platforms 04/28/04
          ;## this will return SunOS or Linux
          ;## Ed asked for this 07/02/07
          ;## have different env for different APIs to run libumem
          set extra_env [ mgr::getExtraEnv $api ]
          set extra_env [ subst -nocommands $extra_env ]
          set api_cmd "${api}API"
          if { ! [ string equal $apihost $localhost ] } {
               set api_cmd "$::LDAS/bin/${api_cmd}"
          }
          set launcher [ mgr::getLauncher $api $api_cmd ]
          mgr::perApiRunningStats $api restart [ gpsTime ]
          set dir [ mgr::makeApiAtHome $api ]
          set dotfile "${dir}/.. "
          set fid [ open $dotfile w ]
          file attributes $dotfile -permissions 0600
          puts $fid $::MGRKEY
          ::close $fid
          set redirect ${dir}/${api}.log
          addLogEntry "bootstrapping $api API on $apihost"
          set apipath ${::LDASLIB}/${api}API
          if { [ catch {
               if { [ string equal $apihost $localhost ] } {
                    set LD_LIBRARY_PATH $::env(LD_LIBRARY_PATH)
                    set cwd [ pwd ]
                    cd $dir
                    set cmd    "/bin/env $extra_env "
                    append cmd "RUNDIR=$::env(RUNDIR) "
                    append cmd "LD_LIBRARY_PATH=$apipath:$::LD_LIBRARY_PATH "
                    append cmd "DB2INSTANCE=$::DB2INSTANCE "
                    append cmd "$launcher >& $redirect &"
                    eval ::exec $cmd
                    cd $cwd
               } else {
                    set cmd    "alias cd=cd && cd $dir && /bin/env "
                    append cmd "RUNDIR=$::env(RUNDIR) HOST=$apihost "
                    append cmd "PATH=$::LDAS/bin:$::env(PATH) "
                    append cmd "LD_LIBRARY_PATH=$apipath:$::LD_LIBRARY_PATH "
                    append cmd "DB2INSTANCE=$::DB2INSTANCE $extra_env "
                    append cmd "$launcher >& $redirect &"
                    execssh $apihost $cmd
               }
               addLogEntry "Startup command for ${api}API: '$cmd'" blue
               set up [ mgr::bootstrapStatus $api ]
               set pid [ getPid $api ]
               ;## if we succeeded inform martian boxes of
               ;## FTP and HTTP info for gateway.
               if { $up } {
                    mgr::pushFTPandHTTPinfo $api $::LDAS_VERSION
               } else {
                    set msg "failed connect to operator socket on $api API"
                    return -code error "[ myName ]: $msg"
               }
               mgr::bootstrapSuccessEmail $api $apihost $pid
               mgr::bootstrapUnlock $api
               mgr::bootstrapFatalErrorEmail $api $apihost NULL 1
          } err ] } {
               addLogEntry $err red
               mgr::bootstrapUnlock $api
               mgr::bootstrapFatalErrorEmail $api $apihost $err 0
          }
     } ;## end of foreach on api
     after 0 mgr::apiStatusPage
}

§   §   §

Name: mgr::bootstrapSuccessEmail

Description:

Parameters: Usage:

Comments:

proc mgr::bootstrapSuccessEmail { api apihost pid } {
     if { [ catch {
          set report 1
          if { [ info exists ::bootstrapemailpid($api) ] } {
               set last_pid   $::bootstrapemailpid($api)
               if { [ string equal $pid $last_pid ] } {
                    set report 0
               }
          }
          set ::bootstrapemailpid($api) $pid
          if { $report } {
               if { [ info exists ::jobid ] && \
                        [ string equal STARTUP $::jobid ] } {
                    set msg "done. $api API is now running as pid: $pid"
                    addLogEntry $msg green
               } else {
                    set logurl $::HTTPURL
                    regexp {(.+)/jobs} $logurl -> logurl
                    set logurl ${logurl}/logs
                    set subject "$::LDAS_SYSTEM ${api}API restarted"
                    set msg    "$::LDAS_SYSTEM ${api}API restarted.\n"
                    append msg "$api API is now running as pid '${pid}' on "
                    append msg "$::LDAS_SYSTEM host ${apihost}.\nSee the "
                    append msg "$::LDAS_SYSTEM LDAS logs for details: "
                    append msg "$logurl"
                    addLogEntry "Subject: ${subject}; Body: $msg" email
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::bootstrapFatalErrorEmail

Description:
Do not send email every single time an API fails to be restarted -- requested by Kent Blackburn 05/2003.
Set the resource variable ::EMAIL_UNABLE_TO_START_API_INTERVAL_S to the number of seconds to supress repeated emails.
Default supression period is one hour.

Parameters: Usage:

Comments:

proc mgr::bootstrapFatalErrorEmail { api apihost err { cancel 0 } } {
     if { $cancel == 1 } {
          if { [ info exists ::fatalerroremailtime($api) ] } {
               ::unset ::fatalerroremailtime($api)
          }
          return {}
     }
     if { [ catch {
          set errlvl blue
          if { [ info exists ::UNABLE_TO_START_API_EMAIL_INTERVAL_S ] } {
               set interval $::UNABLE_TO_START_API_EMAIL_INTERVAL_S
          } else {
               set interval 3600
          }
          set now [ clock seconds ]
          if { [ info exists ::fatalerroremailtime($api) ] } {
               set last_notified $::fatalerroremailtime($api)
               if { $now - $last_notified > $interval } {
                    set ::fatalerroremailtime($api) $now
                    set errlvl email
               } else {
                    set errlvl red
               }
          } else {
               set last_notified $now
               set ::fatalerroremailtime($api) $now
               set errlvl email
          }
          set subject "$::LDAS_SYSTEM bootstrap of $api failed!"
          set msg     "could not start $api API on $apihost!\n"
          append msg  "(error: $err)"
          if { [ string equal email $errlvl ] } {
               addLogEntry "Subject: ${subject}; Body: $msg" $errlvl
          } else {
               addLogEntry $msg $errlvl
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::bootstrapPause

Description:
Wait up to ::BOOTSTRAP_PAUSE_TIMEOUT milliseconds for an API to shutdown on command before initiating genocide.

Parameters: Usage:

Comments:

If ::BOOTSTRAP_PAUSE_TIMEOUT is less than 5000 ms, it will be forced to be 5000 ms.
proc mgr::bootstrapPause { api } {
     if { [ catch {
          if { [ info exists ::BOOTSTRAP_PAUSE_TIMEOUT ] } {
               if { ! [ regexp {\d+} $::BOOTSTRAP_PAUSE_TIMEOUT ] } {
                    set ::BOOTSTRAP_PAUSE_TIMEOUT 10000
               } elseif { $::BOOTSTRAP_PAUSE_TIMEOUT < 5000 } {
                    set ::BOOTSTRAP_PAUSE_TIMEOUT 5000
               }
          } else {
               set ::BOOTSTRAP_PAUSE_TIMEOUT 10000
          }
          set pause $::BOOTSTRAP_PAUSE_TIMEOUT
          set first  [ expr { $pause / 5 } ]
          set second $first
          set third  [ expr { $first * 3 } ]
          set sid [ sock::open $api emergency ]
          ::close $sid
          set start [ clock clicks -milliseconds ]
          addLogEntry "giving ${api}API $first ms to shut down..."
          after $first
          set sid [ sock::open $api emergency ]
          ::close $sid
          addLogEntry "giving ${api}API $second more ms to shut down..."
          after $second
          set sid [ sock::open $api emergency ]
          ::close $sid
          addLogEntry "giving ${api}API $third more ms to shut down..."
          after $third
          set sid [ sock::open $api emergency ]
          ::close $sid
          set msg "${api}API failed to shutdown in $pause ms. "
          append msg "will try 'genocide' action. "
          append msg "if this fails, somebody will have to "
          append msg "clean up the ${api}API BY HAND..."
          addLogEntry $msg red
     } err ] } {
          if { [ info exists start ] } {
               set done [ clock clicks -milliseconds ]
               set dt [ expr { $done - $start } ]
               addLogEntry "${api}API shut down in $dt ms"
          }
     }
}

§   §   §

Name: mgr::libstdcPreload

Description:

Parameters: Usage:

Comments:

proc mgr::libstdcPreload { api } {
     if { [ catch {
          ;## force preloading of libstdc++ as required
          ;## to avoid instant abort when c++ exception
          ;## is thrown which is not handled by a c program,
          ;## i.e. Tcl.
          ;## if getApiOS failed due to ssh issue throw exeption
          ;## and caller will skip the API.
          set libstdc [ list ]
          set OS [ getApiOS $api ]
          if { [ regexp {Linux} $OS ] } {
               if { [ info exists ::STDCPLUSPLUSLIBPAT ] } {
                    set libstdc [ libstdcPlusPlus $api ]
               }
          } elseif { [ regexp {Sun} $OS ] } {
               if { [ info exists ::SUN_LIBSTDCPP ] } {
                    set libstdc $::SUN_LIBSTDCPP
               }
          } elseif { [ regexp {ssh_failed} $OS ] } {
               return -code error $OS
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $libstdc
}

§   §   §

Name: mgr::libmallocPreload

Description:

Parameters: Usage:

Comments:

proc mgr::libmallocPreload { api } {
     if { [ catch {
          ;## force preloading of libXXmalloc as required
          ;## to avoid monotonic increasing memory usage due
          ;## to memory fragmentation.
          set libmalloc [ list ]
          set OS [ getApiOS $api ]
          if { [ regexp {Linux} $OS ] } {
               if { [ info exists ::LINUX_MALLOC ] } {
                    set libmalloc  $::LINUX_MALLOC
               }
          } elseif { [ regexp {Sun} $OS ] } {
               if { [ info exists ::SUN_MALLOC ] } {
                    set libmalloc  $::SUN_MALLOC
               }
          } elseif { [ regexp {ssh_failed} $OS ] } {
               return -code error $OS
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $libmalloc
}

§   §   §

Name: mgr::makeApiAtHome

Description:

Parameters: Usage:

Comments:

proc mgr::makeApiAtHome { api } {
     if { [ catch {
          set dir $::env(RUNDIR)/${api}API
          if { ! [ file exists $dir ] } {
               file mkdir $dir
          } else {
               if { ! [ file isdirectory $dir ] } {
                    file rename $dir ${dir}.bogus
                    file mkdir $dir
                    set msg    "file with name reserved for $api API "
                    append msg "working directory renamed to ${dir}.bogus"
                    addLogEntry $msg red
               }
          }
          file attributes $dir -permissions 0755
          ;## move misplaced .rsc and .ini files
          foreach ext [ list rsc ini ] {
               if { [ file exists $::env(RUNDIR)/LDAS${api}.$ext ] } {
                    file rename -force \
                        $::env(RUNDIR)/LDAS${api}.$ext $dir/LDAS${api}.$ext
                    set msg "moving LDAS${api}.$ext from $::env(RUNDIR) to $dir"
                    addLogEntry $msg blue
               }
          }
          ;## make sure browsers can browse things in here
          ;## that have the correct permissions
          set htaccess ${dir}/.htaccess
          if { ! [ file exists $htaccess ] } {
               set fid [ open $htaccess w 0644 ]
               puts $fid "DefaultType text/html"
               ::close $fid
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $dir
}

§   §   §

Name: mgr::bootstrapStatus

Description:
Wait up to 1 minute for an API to come up. This can fail!

Parameters: Usage:

Comments:

proc mgr::bootstrapStatus { api } {
     set flag 0
     set i 0
     if { ! [ info exist ::API_BOOTSTRAP_MAX_S ] } {
        set ::API_BOOTSTRAP_MAX_S 60
        addLogEntry "Resource ::API_BOOTSTRAP_MAX_S set to $::API_BOOTSTRAP_MAX_S by default" blue
     }
     if { ! [ info exist ::API_BOOTSTRAP_DELAY_S ] } {
        set ::API_BOOTSTRAP_DELAY_S 5
        addLogEntry "Resource ::API_BOOTSTRAP_DELAY_S set to $::API_BOOTSTRAP_DELAY_S by default" blue
     }
     set maxtime [ expr $::API_BOOTSTRAP_MAX_S * 1000 ]
     set delay [ expr $::API_BOOTSTRAP_DELAY_S * 1000 ]
     while { $i < $maxtime } {
          if { [ catch {
               if { [ string equal manager $api ] } {
                    set flag 1
               } else {
	       	    set sid [ sock::open $api operator ]
                    ::close $sid
                    set flag 1
               }
          } err ] } {
               catch { ::close $sid }
               if { [ info exists ::DEBUG_BOOTSTRAP_LOCK ] && \
                        [ string equal 1 $::DEBUG_BOOTSTRAP_LOCK ] } {
                    addLogEntry "${api}API $err" purple
               }
               set flag 0
          }
          if { ! $flag } {
               incr i $delay
               after $delay
          } else {
               break
          }
     }
     return $flag
}

§   §   §

Name: mgr::bootstrapLock

Description:
Prevents bootstrap from being called multiple times in quick succession.

Parameters: Usage:

Comments:

proc mgr::bootstrapLock { apis { reset 0 } } {
     if { [ catch {
          set temp [ list ]
          set now  [ clock seconds ]
          foreach api $apis {
               if { ! [ info exists ::__bootstraplock_$api ] } {
                    set ::__bootstraplock_$api $now
                    lappend temp $api
               } elseif { $reset } {
                    set ::__bootstraplock_$api $now
               } else {
                    set lastlock [ lindex [ set ::__bootstraplock_$api ] end ]
                    if { $now > $lastlock } {
                         lappend ::__bootstraplock_$api $now
                    }
               }
          }
          if { [ info exists ::DEBUG_BOOTSTRAP_LOCK ] && \
                   [ string equal 1 $::DEBUG_BOOTSTRAP_LOCK ] } {
               set report [ list ]
               set locks [ info vars ::__boot* ]
               foreach lock $locks {
                    lappend report "$lock [ set $lock ] "
               }
               addLogEntry $report purple
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $temp
}

§   §   §

Name: mgr::bootstrapUnlock

Description:

Parameters: Usage:

Comments:

proc mgr::bootstrapUnlock { api } {
     if { [ catch {
          if { [ info exists ::__bootstraplock_$api ] } {
               set locks [ set ::__bootstraplock_$api ]
               if { [ llength $locks ] > 1 } {
                    set locks [ lrange $locks 1 end ]
                    set ::__bootstraplock_$api $locks
                    set locked 1
               } else {
                    ::unset ::__bootstraplock_$api
                    set locked 0
               }
          } else {
               set locked 0
          }
          if { [ info exists ::DEBUG_BOOTSTRAP_LOCK ] && \
                   [ string equal 1 $::DEBUG_BOOTSTRAP_LOCK ] } {
               set report [ list ]
               set locks [ info vars ::__boot* ]
               foreach lock $locks {
                    lappend report "$lock [ set $lock ] "
               }
               addLogEntry $report purple
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $locked
}

§   §   §

Name: mgr::bootstrapLockTimeout

Description:
If an API does not restart, remove the lock and send e-mail to the responsible administrator for the manager.

Parameters: Usage:

Comments:

This is called in a bgLoop at startup of the managerAPI
proc bootstrapLockTimeout { } {
     if { [ catch {
          set locks [ list ]
          set api oops!
          set now [ clock seconds ]
          set notice "The resource variable ::MGR_BOOTSTRAPLOCK_TIMEOUT "
          append notice "has been\nset to \$::MGR_BOOTSTRAPLOCK_TIMEOUT."
          append notice " If this value is less than "
          append notice "15 * number_of_beowulf_nodes\n"
          append notice "or the number_of_mpi_search_users, "
          append notice "whichever is GREATER for this system, then\n"
          append notice "the value should be adjusted, and made to be\n"
          append notice "at least 15 * (number of nodes or users)."
          ;## rationalise resource variable
          if { ! [ info exists ::MGR_BOOTSTRAPLOCK_TIMEOUT ] } {
               set ::MGR_BOOTSTRAPLOCK_TIMEOUT 300
               set notice [ subst -nocommands $notice ]
               addLogEntry $notice blue
          } else {
               ;## constrain timeout to an integer number of
               ;## seconds between 0 and 9999
               if { ! [ regexp {^\d{1,4}$} $::MGR_BOOTSTRAPLOCK_TIMEOUT ] } {
                    set ::MGR_BOOTSTRAPLOCK_TIMEOUT 300
                    set notice [ subst -nocommands $notice ]
                    addLogEntry $notice blue
               }
          }
          set to $::MGR_BOOTSTRAPLOCK_TIMEOUT
          ;## iterate over the locks, unset the expired ones
          ;## and send email to the responsible party
          set locks [ info vars ::__bootstraplock_* ]
          foreach lock $locks {
               regexp {__bootstraplock_(\S+)} $lock -> api
               if { [ string equal manager $api ] } { continue }
               set then [ set $lock ]
               if { $now >= [ lindex $then end ] + $to } {
                    ::unset $lock
                    set msg    "$lock expired after $to seconds. "
                    append msg "something is wrong with the $::LDAS_SYSTEM "
                    append msg "LDAS system $api API!"
                    set subject "$::LDAS_SYSTEM $lock expired!"
                    set notice [ subst -nocommands $notice ]
                    set msg "${msg}\n\n$notice"
                    addLogEntry "Subject: ${subject}; Body: $msg" mail
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::pushFTPandHTTPinfo

Description:
Push gateway information for ftp and http transactions.
When API's are on remote machines they may be inside of a martian network and will need to know the names of the data directories as seen from the outside world.

Parameters: Usage:

Comments:

Will return repetetive error message on failure. :TODO:
proc mgr::pushFTPandHTTPinfo { api { version "" } } {
     set errs [ list ]
     if { [ catch {
          if { [ string length $version ] } {
               mgr::apiVersionCheck $api $version
          } else {
               mgr::apiVersionCheck $api UNKNOWN
          }
          set vars [ list $::FTPURL $::FTPDIR $::HTTPURL \
                         $::HTTPDIR $::GRIDFTPURL $::GRIDFTPDIR \
                         [ lindex $::LDAS_GATEWAY 0 ] \
                         [ lindex $::LDAS_GATEWAY 1 ] $::LDAS_SYSTEM ]
          set sid [ sock::open $api emergency ]
          fconfigure $sid -blocking off
          puts $sid "$::MGRKEY setFTPandHTTPinfo $vars"
          ::close $sid
     } err ] } {
          catch { ::close $sid }
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::shutdownAPI

Description:
Shut down an API started by bootstrapAPI Usage:
      mgr::shutdownAPI $apiname
Comments:
Since the API exists it can contain it's own internal shutdown procedure, ${::API}::sHuTdOwN.
::RESTART_ON_MEMFLAG is a variable set in the LDASapi.rsc which enables the restarting of an API when it's memory usage exceeds the flag level in the memFlag procedure of genericAPI.tcl. If ::RESTART_ON_MEMFLAG = 0, no attempt will be made to save the currently running job.
proc mgr::shutdownAPI { { apis "" } { comment "" } } {
     if { ! [ string length $apis ] } {
          set apis $::API_LIST
     }
     foreach api $apis {
          if { [ string equal $api manager ] } {
               continue
          }
          ;## abort sequence for an assistant manager
          set assistants [ namespace children :: ${::RUNCODE}* ]
          foreach assistant $assistants {
               if { [ catch {
                    set name [ string trim $assistant : ]
                    if { [ info exists ::${name}::api ] && \
                             [ string equal $api [ set ::${name}::api ] ] } {
                         set msg "$api API shutting down NOW!\n$name aborting!"
                         set ::${name}::errorapi [ set ::${name}::api ]
                         if { [ llength [ namespace children :: ::$name ] ] } {
                              ::${name}::predelete $msg $msg
                              if { [ llength [ namespace children :: ::$name ] ] } {
                                   ::${name}::seq
                              }
                         }
                    }
               } err ] } {
                    addLogEntry $err red
               }
          }
          if { [ catch {
               ;## memory usage and other API process info
               set statefile [ file join $::LDASLOG ${api}.status ]
               set arc $::LDASARC/${api}API/${api}_status.[ gpsTime ]
               if { [ file exists $statefile ] } {
                    set fid1 [ open $statefile r ]
                    set fid2 [ open $arc w ]
                    puts $fid2 "<html>\n<pre>"
                    fcopy $fid1 $fid2
                    ::close $fid2
                    ::close $fid1
                    file delete -force -- $statefile
               }
          } err ] } {
          }
          ;## take a gcore of the API before shutdown
          if { ! [ string equal nocore $comment ] } {
               set apihost [ set ::${api}(host) ]
               set now [ clock seconds ]
               set pid ""
               if { [ info exists ::bootstrapemailpid($api) ] } {
                    set pid $::bootstrapemailpid($api)
               }
               if { ![ string length $pid ] } {
                    set lockfile [ glob -nocomplain $::TOPDIR/${api}API/.${api}.*.lock ]
                    regexp {(\d+)} $lockfile -> pid
                    addLogEntry "obtained $api pid $pid from lock file" purple
               }
               if { [ string length $pid ] && [ info exist ::FORCE_GCORE ] && $::FORCE_GCORE} {
                    set cmd "alias cd=cd && cd $::TOPDIR/savedCores && gcore -o ${api}_${now}_gcore $pid"
                    addLogEntry "executing $cmd on $apihost" purple
                    execssh $apihost $cmd
               }
          }
          if { [ catch {
               set sid [ sock::open $api emergency ]
               puts  $sid "$::MGRKEY ${api}::sHuTdOwN \"$comment\""
               after 100
               cmd::receive $sid ${api}_$sid mgr::logReply
               addLogEntry "$api API Shut Down" red
          } err ] } {
          }
          if { [ catch {
               ;## I think we get here and delete the lock file
               ;## under all circumstances...
               set globpat [ file join [ apiDirectory $api ] .$api.*.lock ]
               set locks [ glob -nocomplain $globpat ]
               foreach lock $locks {
                    set pid [ lindex [ split [ file tail $lock ] . ] end-1 ]
                    file delete -force -- $lock
                    addLogEntry "deleted lock file: $lock" blue
               }
          } err ] } {
          }
     }
}

§   §   §

Name: mgr::postMortem

Description:
When the manager attempts to restart an API that has become unavailable, the new log file for the API should contain a reference to the condition which caused the error.
As of 01/04/02 this procedure returns a URL pointing to the archived log. The URL is absolute and should therefore continue to work even after long-term archiving.
This procedure returns a dump of the standard error redirect from the nohup that started the API.

Parameters: Usage:

Comments:

No filtering of the file contents is done at present :TODO:
proc mgr::postMortem { api } {
     set data [ list ]
     set url  [ list ]
     if { [ catch {
          set url [ file dirname $::HTTPURL ]
          regsub / $url // url
          ;## we may need an ::LDAS_LOG_ARCHIVE_URL
          set url ${url}/logs/archive/${api}API/
          set time [ gpsTime ]
          set home [ apiDirectory $api ]
          set stderrlog ${home}/${api}.log
          if { [ file exists $stderrlog ] } {
               set url $url${api}.log.$time
               set url "stdout/stderr for previous run of $api API: $url"
               set arcname $::LDASARC/${api}API/${api}.log.$time
               set dir $::LDASARC/${api}API
               if { ! [ file isdirectory $dir ] } {
                    file mkdir $dir
                    file attributes $dir -permissions 0755
                    gifBalls $dir
               }
               set fid1 [ open $arcname w ]
               puts $fid1 "<html>\n<pre>"
               set fid2 [ open $stderrlog r ]
               fcopy $fid2 $fid1
               ::close $fid2
               ::close $fid1
          } else {
               set url [ list ]
          }
     } err ] } {
          if { [ info exists fid ] } {
               catch { ::close $fid }
          }
          if { [ info exists fid1 ] } {
               catch { ::close $fid1 }
          }
          if { [ info exists fid2 ] } {
               catch { ::close $fid2 }
          }
          return -code error "[ myName ]: $err"
     }
     return $url
}

§   §   §

Name: mgr::dontStepOnJob

Description:
When a shutdown request is received, try not to shutdown while a job is pending.

Parameters: Usage:

Comments:

This is only possible if the shutdown is due to normal maintenance. If the shutdown is being driven by a failure of the API, the job will be lost under any circumstances.
proc mgr::dontStepOnJob { api timeout } {
     set i 0
     set vnames [ infoVars ]
     ;## try not to disturb a running job
     foreach vname $vnames {
          if { [ regexp $::RUNCODE $vname ] } {
               while { [ regexp $api [ set $vname ] ] && $i < $timeout } {
                    incr i
                    sleep 1000
               }
          }
     }
}

§   §   §

Name: mgr::preValidate

Description:

Parameters: Usage:

Comments:

proc mgr::preValidate { cmd } {
     if { [ catch {
          set seqpt {}
          set user   [ list ]
          set soname [ list ]
          regexp -nocase -- {-name\s+\{?\s*(\S+)} [ lindex $cmd 1 ] -> user
          set user [ string trim $user "{} " ]
          ;## see if dso is temporarily blocked!
          regexp -nocase -- {-dynlib\s+\{?\s*(\S+)} $cmd -> soname
          set soname [ string trim $soname "{} " ]
          set cmdname  [ lindex [ lindex $cmd 2 ] 0 ]
          ;## see if user is temporarily blocked!
          if { [ lsearch $::mgr::blocked_users $user ] != -1 } {
               set seqpt "ADMIN NOTICE: "
               set err "user '$user' has been temporarily blocked!"
               after 0 \
                   [ list mgr::logRejectedJob $user $cmdname blocked_user ]
               return -code error $err
          }
          ;## see if dso is temporarily blocked!
          if { [ lsearch $::mgr::blocked_dsos $soname ] != -1 } {
               set seqpt "ADMIN NOTICE: "
               set err "DSO '$soname' has been temporarily blocked!"
               after 0 \
                   [ list mgr::logRejectedJob $user $cmdname blocked_dso ]
               return -code error $err
          }
          ;## see if the user is exceeding his quota!
          set seqpt "QUOTA EXCEEDED: "
          mgr::userQuota $cmd
     } err ] } {
          return -code error "$seqpt$err"
     }
}

§   §   §

Name: mgr::userInfo

Description:
Update user info stored in QUEUE(USERS).
all ldasJob requests must have a user info argument as their first argument, and it must inlude the username, password, and the user's e-mail address.
Usage:

Comments:

There will always be at least the userkey and a new job id. New user info is optional
proc mgr::userInfo { args } {
     set return_info [ list ]
     set scratch     [ list ]
     set now [ clock seconds ]
     if { [ catch {
          if { [ catch {
               array set input $args
          } ] } {
               eval array set input $args
          }
          foreach user $::QUEUE(USERS) {
               if { ! [ llength $user ] } { continue }
               if { [ llength $user ] != 13 } {
                    mgr::reportCorruptedQueue $user
               }
               ;## each user entry contains the three user info item
               ;## pairs and a list of the last N jobs processed for
               ;## that user.
               set info [ lrange $user 0 end-1 ]
               set jobs [ lindex $user end ]
               array set q $info
               if { [ string equal $q(-name) $input(-name) ] } {
                    ;## Peter Shawhan's challenge/response code added
                    ;## 05/20/02
                    if { [ string equal md5protocol $input(-password) ] } {
                         set salt   [ uplevel set salt ]
                         set digest [ uplevel set digest ]
                         set test [ key::md5 $q(-password)$salt ]
                         if { [ string equal $digest $test ] } {
                              set hash $q(-password)
                         } else {
                              set hash invalid
                         }
                    } else {
                         set hash [ key::md5 $input(-password) ]
                    }
                    if { ! [ string equal $q(-password) $hash ] } {
                         set msg "Incorrect password given for user $q(-name)"
                         return -code error $msg
                    }
                    if { ! [ regexp {^\d+$} $q(-expires) ] } {
                         set q(expires) [ clock scan $q(-expires) ]
                    }
                    if { $now > $q(-expires) } {
                         set edate [ clock format $q(-expires) -format %D ]
                         set msg "MOU for user $q(-name) expires on ${edate}. "
                         append msg "Please contact LDAS admin to apply for "
                         append msg "an extension."
                         return -code error $msg
                    }
                    ;## here we mangle the args so that the user password
                    ;## is not updated when the other user data is updated!
                    regsub -- $input(-password) $args $q(-password) args
                    set info [ expandOpts info ]
                    set return_info $info
                    if { [ llength $return_info ] < 12 } {
                         set msg    "QUEUE(USERS) has been corrupted!\n"
                         append msg "Please check the user info file for bad\n"
                         append msg "entries, missing \"'s, etc."
                         return -code error $msg
                    }
               }
               lappend scratch [ concat $info [ list $jobs ] ]
          } ;## end of foreach
          if { [ llength $return_info ] } {
               set ::QUEUE(USERS) $scratch
               if { ! [ regexp "\[a-z\]+" $::QUEUE(USERS) ] } {
                    set msg    "The QUEUE(USERS) queue has been\n"
                    append msg "initialised, but is empty."
                    return -code error $msg
               }
          }  else {
               return -code error "unknown user: '$input(-name)'"
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $return_info
}

§   §   §

Name: mgr::reportCorruptedQueue

Description:

Parameters: Usage:

Comments:

proc mgr::reportCorruptedQueue { args } {
     set baddata [ list $args ]
     set subject "$::LDAS_SYSTEM users queue has become corrupted!"
     set msg "${subject}\n"
     append msg "The manager API choked on this user entry,\n"
     append msg "which does not appear to have the correct\n"
     append msg "number of fields:\n\n'$baddata'"
     addLogEntry "Subject: ${subject}; Body: $msg" mail
     return -code error "[ myName ]: $subject"
}

§   §   §

Name: mgr::assignJobid

Description:

Parameters: Usage:

Comments:

proc mgr::assignJobid { args } {
     if { [ catch {
          set return_info [ list ]
          set scratch     [ list ]
          set now [ clock seconds ]
          if { [ catch {
               array set input $args
          } ] } {
               eval array set input $args
          }
          foreach user $::QUEUE(USERS) {
               if { ! [ llength $user ] } { continue }
               if { [ llength $user ] != 13 } {
                    set msg "QUEUE(USERS) has been corrupted!"
                    addLogEntry $msg 2
                    return -code error "[ myName ]: $msg"
               }
               ;## each user entry contains the three user info item
               ;## pairs and a list of the last N jobs processed for
               ;## that user.
               set info [ lrange $user 0 end-1 ]
               set jobs [ lindex $user end ]
               array set q $info
               if { [ string equal $q(-name) $input(-name) ] } {
                    set jobid [ key::increment $::RUNCODE ]
                    set jobs \
                        [ linsert $jobs 0 [ list $jobid $input(-email) $now ] ]
                    set info [ expandOpts info ]
                    set return_info $info
                    if { [ llength $return_info ] < 12 } {
                         set msg    "QUEUE(USERS) has been corrupted!\n"
                         append msg "Please check the user info file for bad\n"
                         append msg "entries, missing \"'s, etc."
                         return -code error "[ myName ]: $msg"
                    }
               }
               lappend scratch [ concat $info [ list $jobs ] ]
          } ;## end of foreach
          if { [ llength $return_info ] } {
               set ::QUEUE(USERS) $scratch
               if { ! [ regexp "\[a-z\]+" $::QUEUE(USERS) ] } {
                    set msg    "The QUEUE(USERS) queue has been\n"
                    append msg "initialised, but is empty."
                    return -code error "[ myName ]: $msg"
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $jobid
}

§   §   §

Name: mgr::deleteJobidFromUsersQueue

Description:

Parameters: Usage:

Comments:

proc mgr::deleteJobidFromUsersQueue { jobid } {
     if { [ catch {
          set name [ list ]