#! /usr/bin/perl -w use diagnostics; use warnings; use strict; use Cwd; #use Digest::MD5; use File::Basename; use File::Compare; use File::Copy qw(cp), qw(mv); use File::Path qw(mkpath); use Getopt::Long; #use Time::HiRes qw(gettimeofday); use IPC::Open3; use Symbol qw(gensym); use IO::File; my $exedir = $0; if ($exedir =~ m{/}) { $exedir =~ s{/[^/]*$}{}; # Remove file name and trailing slash unshift @INC, $exedir; } if (! -e "$exedir/udb.pm") { die "File \"udb.pm\" not found.\n" . "This installation of SimFactory has not been configured.\n"; } require cdb; require mdb; require udb; # require Config::IniFiles; # my $simpath = get_simpath; # my @machines = map { chomp; $_; } `ls $simpath/mdb`; # my $cfg = Config::IniFiles->new; # die unless $cfg; # # -default # foreach my $machine (@machines) { # $cfg = Config::IniFiles->new # (-import => $cfg, -file => "$simpath/mdb/$machine/$machine.ini"); # die "Could not read machine description for \"$machine\"" unless $cfg; # } # my %ini; # tie %ini, 'Config::IniFiles', (-import => $cfg); # foreach my $section (sort keys %ini) { # foreach my $parameter (sort keys %{$ini{$section}}) { # my @values = $ini{$section}->{$parameter}; # foreach my $value (@values) { # $value = unquoteini $value; # } # print "[$section][$parameter][@values]\n"; # } # } # exit 0; # The following identifiers are replaced in parameter files, script # files, and submit commands: # # string @SOURCEDIR@ # string @SIMULATION_NAME@ # string @SHORT_SIMULATION_NAME@ # string @SIMULATION_ID@ # string @RESTART_ID@ # string @HOSTNAME@ # string @RUNDIR@ # string @SCRIPTFILE@ # string @EXECUTABLE@ # string @PARFILE@ # string @USER@ # string @EMAIL@ # int @NODES@ # int @PROCS@ # int @PROCS_REQUESTED@ # int @PPN@ # int @PPN_USED@ # int @NUM_PROCS@ # int @NUM_THREADS@ # int @MEMORY@ # real @CPUFREQ@ # string @ALLOCATION@ # string @QUEUE@ # string @WALLTIME@ # int @WALLTIME_HH@ # int @WALLTIME_MM@ # int @WALLTIME_SS@ # int @WALLTIME_SECONDS@ # int @WALLTIME_MINUTES@ # real @WALLTIME_HOURS@ # string @SCRATCHDIR@ # string @CHAINED_JOB_ID@ # # string @EXECHOST@ # # These strings are replaced everywhere. If necessary, a quoting # mechanism could be introduced. # # For example, the replacement may look as in the following: # # @SOURCEDIR@ /home/eschnett/Cactus # @SIMULATION_NAME@ rnsid # @SHORT_SIMULATION_NAME@ rnsid-0002 (suitable as job name in PBS scripts) # @SIMULATION_ID@ simulation-rnsid-redshift-eschnett-2007.03.12-23.16.56-6359 # @RESTART_ID@ 0002 # @HOSTNAME@ numrel02.cct.lsu.edu # @RUNDIR@ /home/eschnett/runs/rnsid/output-0002 # @SCRIPTFILE@ rnsid.qsub # @EXECUTABLE@ ./cactus_rnsid # @PARFILE@ rnsid.par # @USER@ eschnett # @EMAIL@ schnetter@cct.lsu.edu # @NODES@ 4 # @PROCS@ 8 # @PROCS_REQUESTED@ 8 # @PPN@ 2 # @PPN_USED@ 2 # @NUM_PROCS@ 8 # @NUM_THREADS@ 1 # @MEMORY@ 4096 # @CPUFREQ@ 2.6 # @ALLOCATION@ cct_numrel # @QUEUE@ workq # @WALLTIME@ 4:00:00 # @WALLTIME_HH@ 4 # @WALLTIME_MM@ 0 # @WALLTIME_SS@ 0 # @WALLTIME_SECONDS@ 14400 # @WALLTIME_MINUTES@ 240 # @WALLTIME_HOURS@ 4.0 # @SCRATCHDIR@ /var/scratch/eschnett/43045 # @CHAINED_JOB_ID@ 247 # # @EXECHOST@ ic0042 # # Checkpoint files are expected to have a name matching the shell # pattern "*.chkpt.it_*.*", located in any subdirectory of the old # restart directory. Upon restarting, these checkpoint files are # linked into corresponding subdirectories of the new restart # directory, where the new job should read them. # The following files exist in a configuration directory's SIMFACTORY # subdirectory: # OptionList # [RunCmd] # ScriptFile # ThornList # The following files exist in a simulation directory: # LOG # SIMULATION_ID # output-NNNN{,-active,-presubmitted}]/ # The following files exist in a simulation directory's SIMFACTORY # subdirectory: # MACHINE # SOURCEDIR # CONFIGURATION # [CONFIG_ID] # [BUILD_ID] # cfg/ # exe/ # par/ # run/ # The following files exist in a run directory: # SIMULATION_ID # A copy of ../SIMULATION_ID # [TERMINATE] # (?) scratch/ # Symbolic link to node-local scratch directory; expect about 10 # GByte per process; data are deleted after run finishes # Note: Currently, only the root process creates this # directory automatically. # Note: This does not yet work on all systems, e.g. on systems # which use poe # Q: What to do if there is no local disk space? # [scratchdir/] # [] # [] # [] # The following files exist in a run directory's SIMFACTORY # subdirectory: # ALLOCATION # JOB_ID # PBS job id # [NODES] # Host names of nodes, one line per process # NUM_PROCS # Number of (MPI) processes # NUM_THREADS # Number of requested threads per process # PPN # Number of requested (PBS) processors per node # PPN_USED # Number of used (active) processors per node (for undersubscribing) # [PRESUBMIT_FROM_ID] # PROCS # Total used (active) number of processors # PROCS_REQUESTED # Total requested (PBS) number of processors # QUEUE # [RESTART_FROM_ID] # [RESTART_FROM_SCRATCH] # [RESTARTED_AS_ID] # [RUNNING] # Process ID, if running # [RunCmd] # SHORT_SIMULATION_NAME # [STOPPED] # ScriptFile # WALLTIME # Requested wall time HH:MM:SS # # # Subroutines # Commands sub command_empty (@); sub command_version (@); sub command_help (@); sub command_sequence (@); sub command_loop (@); sub command_list_machines (@); sub command_remote (@); sub command_login (@); sub command_print_machine (@); sub command_print_mdb (@); sub command_execute (@); sub command_checkout (@); sub command_sync (@); sub command_build (@); sub command_checkout_build (@); sub command_sync_remote_build (@); sub command_remote_build (@); sub command_list_configurations (@); sub command_create (@); sub command_list_simulations (@); sub command_find_simulation (@); sub command_submit (@); sub command_create_submit (@); sub command_cleanup_submit (@); sub command_status (@); sub command_print_exechost (@); sub command_show_output (@); sub command_stop (@); sub command_cleanup (@); sub command_stop_cleanup (@); sub command_purge (@); sub command_run (@); sub command_comment (@); sub command_unknown (@); # Tasks sub create_simulation_id ($); sub find_largest_restart_id ($); sub get_job_status (@); # Helpers sub open_log ($); sub write_log (*$); sub close_log (*); sub read_file ($); sub maybe_read_file ($); sub write_file ($$); sub append_file ($$); sub read_string ($); sub maybe_read_string ($); sub write_string ($$); sub maybe_write_string ($$); sub quote_output ($); sub copy_file ($$$); #sub md5_digest ($); sub list_files ($;$); sub list_file ($); sub execute ($); sub subcommand (@); sub replace_patterns ($$); sub ensure_no_patterns ($$); sub check_filename ($$); sub shorten_string ($); sub derive_sim_and_parfile_name ($); sub derive_sim_name ($); sub quotesafe ($); sub quotexml ($); sub cleanxmltag ($); sub quoteini ($); sub unquoteini ($); # Access to options and machine database sub get_hostnamealias (); sub get_username (); sub get_machine ($); sub get_iomachine ($); sub get_trampoline ($); sub get_sshcmd ($$;$); sub get_dirsuffix ($); sub get_sourcebasedir (); sub get_basedir (); sub get_executable (); sub get_simpath (); sub get_optionlist (;$); sub get_scriptfile (;$); sub get_thornlist (;$); sub get_thornlistlines ($); sub get_parfilelines ($); sub get_commentfile (); sub get_arguments (); sub get_substitutions (); sub add_define ($$$); sub add_substitution ($$$); sub add_replacement ($$$); sub add_attachment ($$$); sub get_submitcommand (); sub get_runcommand (); sub get_statuscommand (); sub get_exechostcommand (); sub get_showoutputcommand (); sub get_stopcommand (); sub get_restart_id (); sub get_from_restart_id (); sub get_memory (); sub get_cpufreq (); sub get_allocation ($); sub get_queue ($); sub get_procs ($$$$$); sub get_walltime ($$); sub get_version ($); sub get_formaline_info ($); sub get_last_match ($$); sub get_option (\%$;$); # Parse command line arguments my %options; GetOptions (\%options, # Generic options 'localdir=s', 'hostname=s', # Synchronisation 'sync-sourcetree!', 'sync-parfiles!', # Configurations 'optionlist=s', 'scriptfile=s', 'thornlist=s', 'debug!', 'optimise!', 'unsafe!', 'profile!', 'sourcebasedir=s', 'basedir=s', #'delete!', 'reconfig!', 'clean!', # Simulations 'simulation=s', 'configuration|c=s', # -c 'parfile|p=s', # -p 'datadir=s', # Restarts 'presubmit!', 'postsubmit=s', 'recover!', 'allocation=s', 'queue=s', 'procs|n=i', # -n 'ppn=i', 'ppn-used=i', 'num-threads|t=i', # -t 'memory=i', # MByte 'walltime|w=s', # -w HH:MM:SS 'restart-id=i', 'from-restart-id=i', 'force|f', # -f 'hide', 'hide-boring', 'hide-dangerous', # Auxiliary options 'commentfile=s', 'argument=s@', 'define=s%', 'substitute=s%', 'replace=s%', 'attach=s%', 'quiet|q', # -q 'verbose|v', # -v 'xml|x', # -x # 'tee', ) or die "Could not parse command line options"; # Some global options my $quiet = get_option %options, 'quiet', 0; my $verbose = get_option %options, 'verbose', 0; my $xml = get_option %options, 'xml', 0; if ($quiet && $verbose) { die 'Options --quiet and --verbose cannot be used together'; } # Other global options # Name of simfactory's directories containing internal information my $internal_dir = 'SIMFACTORY'; # Time (in seconds) between different parallel "iterations" of a loop my $parallel_delay = 0; # Time (in seconds) between creating and submitting a job in the # create-submit command my $submit_delay = 0; # Greeting if (! $xml) { if (! $quiet) { print "Simulation Factory:\n"; } } else { print "\n"; } # Handle subdirectory option my $localdir = $options{'localdir'}; if (defined $localdir) { chdir $localdir or die "Directory \"$localdir\" does not exist"; } command_dispatch: # Find out what command was given, and take the appropriate action my $command = shift @ARGV; if (! defined $command) { command_empty (@ARGV); } elsif ($command eq 'version') { command_version (@ARGV); } elsif ($command eq 'help') { command_help (@ARGV); } elsif ($command eq 'sequence') { command_sequence (@ARGV); } elsif ($command eq 'loop') { command_loop (@ARGV); } elsif ($command =~ /^list[-_]mach(ines)?$/) { command_list_machines (@ARGV); } elsif ($command eq 'remote') { command_remote (@ARGV); } elsif ($command eq 'login') { command_login (@ARGV); } elsif ($command eq 'print-machine') { command_print_machine (@ARGV); } elsif ($command eq 'print-mdb') { command_print_mdb (@ARGV); } elsif ($command eq 'execute') { command_execute (@ARGV); } elsif ($command eq 'checkout') { command_checkout (@ARGV); } elsif ($command eq 'sync' || $command eq 'rsync') { if ($command eq 'rsync') { print "Warning: command \"rsync\" is outdated; use \"sync\" instead\n"; } command_sync (@ARGV); } elsif ($command eq 'build') { command_build (@ARGV); } elsif ($command eq 'checkout-build') { command_checkout_build (@ARGV); } elsif ($command eq 'sync-remote-build' || $command eq 'rsync-remote-build') { if ($command eq 'rsync-remote-build') { print "Warning: command \"rsync-remote-build\" is outdated;\n"; print " use \"sync-remote-build\" instead\n"; } command_sync_remote_build (@ARGV); } elsif ($command eq 'remote-build') { command_remote_build (@ARGV); } elsif ($command =~ /^list[-_]conf(igurations)?$/) { command_list_configurations (@ARGV); } elsif ($command eq 'create') { command_create (@ARGV); } elsif ($command =~ /^list[-_]sim(ulations)?$/) { command_list_simulations (@ARGV); } elsif ($command =~ /^find[-_]sim(ulation)?$/) { command_find_simulation (@ARGV); } elsif ($command eq 'submit' || $command eq 'restart') { if ($command eq 'restart') { print "Warning: command \"restart\" is outdated; use \"submit\" instead\n"; } command_submit (@ARGV); } elsif ($command eq 'create-submit') { command_create_submit (@ARGV); } elsif ($command eq 'cleanup-submit') { command_cleanup_submit (@ARGV); } elsif ($command eq 'status') { command_status (@ARGV); } elsif ($command eq 'print-exechost') { command_print_exechost (@ARGV); } elsif ($command eq 'show-output') { command_show_output (@ARGV); } elsif ($command eq 'stop' || $command eq 'delete') { if ($command eq 'delete') { print "Warning: command \"delete\" is outdated; use \"stop\" instead\n"; } command_stop (@ARGV); } elsif ($command eq 'cleanup') { command_cleanup (@ARGV); } elsif ($command eq 'stop-cleanup' || $command eq 'delete-cleanup') { if ($command eq 'delete-cleanup') { print "Warning: command \"delete-cleanup\" is outdated; use \"stop-cleanup\" instead\n"; } command_stop_cleanup (@ARGV); } elsif ($command eq 'purge') { command_purge (@ARGV); } elsif ($command eq 'run') { command_run (@ARGV); } elsif ($command eq 'comment') { command_comment (@ARGV); } else { command_unknown (@ARGV); } # Done. if (! $xml) { if (! $quiet) { print "Done.\n"; } } else { print "\n"; } exit 0; # Provide some basic output sub command_empty (@) { print "Manage jobs on batch systems.\n"; print "Use 'sim help' to get help.\n"; } # Output version information sub command_version (@) { print " No version information.\n"; } # Provide help sub command_help (@) { print "Manage jobs on batch systems.\n"; print "\n"; print "Syntax:\n"; print " sim \n"; print "\n"; print "Get help:\n"; print " version: Output version number\n"; print " help: Print this list of commands\n"; print "\n"; print "Available meta-commands are:\n"; print " sequence (serial|parallel|parwait) (COMMAND ;)* (COMMAND)?\n"; print " loop (serial|parallel|parwait) VAR (VALUE)* : COMMAND\n"; print "\n"; print "Available commands are:\n"; print " list-mach[ines]: List all available machines\n"; print " remote : Execute remotely\n"; print " login : Open a remote shell\n"; print " print-machine: Output machine name\n"; print " print-mdb []: Output mdb\n"; print " execute : Execute an arbitrary shell command\n"; print " checkout : Check out thorns\n"; print " sync : Copy source tree to another machine\n"; print " build : Build a configuration\n"; print " sync-remote-build \n"; print " remote-build \n"; print " list-conf[igurations]: List all configurations\n"; print " create : Create a job skeleton\n"; print " list-sim[ulations]: List all simulations\n"; print " find-sim[ulation] : Find simulation with a given job id\n"; print " submit : Submit a job\n"; print " create-submit \n"; print " cleanup-submit \n"; print " status : Determine job status\n"; print " print-exechost : Print the job's execution host\n"; print " show-output : Show the job's stdout and stderr\n"; print " stop : Stop (qdel, cancel) job\n"; print " cleanup : Clean up after a job has finished\n"; print " stop-cleanup \n"; print " purge : Completely remove a simulation\n"; print " run : Run (start) a job for a simulation\n"; print " comment : Add a comment to a simulation\n"; print "\n"; print "Available options are:\n"; print " --localdir : Switch to directory\n"; print " --sourcebasedir : Base directory for source tree\n"; print "\n"; print " --[no]sync-sourcetree: Synchronise the source tree\n"; print " --[no]sync-parfiles: Synchronise the parameter files\n"; print "\n"; print " --optionlist : Configuration option list\n"; print " --scriptfile : Script file\n"; print " --thornlist : Thorn list\n"; print " --[no]debug: Create debug configuration (default no)\n"; print " --[no]optimise: Create optimised configuration (default yes)\n"; print " --[no]unsafe: Use additional, unsafe optimisation options (default yes)\n"; print " --[no]profile: Create profiling configuration (default no)\n"; print " --basedir : Base directory for simulation\n"; print " --[no]reconfig: Force reconfiguring (default no)\n"; print " --[no]clean: Force cleaning (default no)\n"; print "\n"; print " --simulation : Simulation name\n"; print " --configuration : Configuration name\n"; print " --parfile : Parameter file name\n"; print " --datadir : Input data directory name\n"; print "\n"; print " --[no]presubmit: Presubmit for running after another restart\n"; print " --postsubmit : Use existing PBS job\n"; print " --[no]recover: Recover from a checkpoint\n"; print " --allocation : Allocation id\n"; print " --queue : Queue file\n"; print " --hide[-boring|-dangerous]: Randomize job name in queue\n"; print " --procs : Total number of processors\n"; print " --ppn : Number of requested processors per node\n"; print " --ppn-used : Number of used processors per node\n"; print " --num-threads : Number of threads per process\n"; print " --memory : Maximum memory per node\n"; print " --walltime [:[:]]: Wall time limit\n"; print " --restart-id : Restart id\n"; print " --from-restart-id : Restart from id\n"; print " --force: Force stop, cleanup, or purge\n"; print " Follow output (\"tail -f\")\n"; print "\n"; print " --commentfile : File with comments (e.g. /dev/tty)\n"; print " --argument : Additional arguments e.g. for submitting jobs\n"; print " --define =: Additional definitions for substitutions\n"; print " --substitute =: Additional pattern substitutions\n"; print " --replace =: Additional replacements for substitutions\n"; print " --attach =: Additional attachments for substitutions\n"; print " --verbose: Output more information\n"; print " --xml: Output result in a structured way\n"; } # List all machines sub command_list_machines (@) { my @args = @_; if (@args) { if (! $xml) { print "\n"; print "WARNING: Unused command line arguments:\n"; print "@args\n"; print "\n"; } else { print "\n"; } } if (! $xml) { print "Machines:\n"; } else { print "\n"; } foreach my $machine (sort keys %mdb::machine_database) { my $entry = $mdb::machine_database{$machine}; my $hostname = $entry->{'hostname'}; my $nodes = $entry->{'nodes'}; my $ppn = $entry->{'ppn'}; if (! $xml) { printf " %-12s %4d nodes, %3d ppn %s\n", $machine, $nodes, $ppn, $hostname; } else { printf " \n"; } } if (! $xml) { } else { print "\n"; } } # Execute a sequence of commands # Syntax: sequence (serial|parallel|parwait) (COMMAND ;)* (COMMAND)? # Note: The semicolon is a special character for the shell, it needs # to be quoted; use \; or ';' or ";" sub command_sequence (@) { my @args = @_; my $mode = shift @args; die unless defined $mode; if ($mode ne 'serial' && $mode ne 'parallel' && $mode ne 'parwait') { die "Sequence mode is \"$mode\", but it must be one of \"serial\", \"parallel\", or \"parwait\"\n"; } # Execute all commands my $delay = 0; my @pids; while (@args) { my @cmds; while (1) { my $cmd = shift @args; last if ! defined $cmd || $cmd eq ';'; push @cmds, $cmd; } if (! @cmds) { print "WARNING: No command specified, doing nothing"; } if ($mode eq 'parallel' || $mode eq 'parwait') { # Wait, but don't wait before the first iteration sleep $delay; $delay = $parallel_delay; } # Create a new process for this command my $pid = fork; die unless defined $pid; if (! $pid) { # This is the child; execute the command @ARGV = @cmds; # Execute the command goto command_dispatch; } # This is the parent; handle the child if ($mode eq 'serial') { waitpid $pid, 0; } elsif ($mode eq 'parwait') { push @pids, $pid; } } # Wait for children, if any for my $pid (@pids) { waitpid $pid, 0; } } # Execute multiple commands # Syntax: loop (serial|parallel|parwait) VAR (VALUE)* : COMMAND # serial: execute commands one after the other # parallel: execute commands in parallel # parwait: execute commands in parallel, but wait at the end # COMMAND will have @VAR@ replaced by VALUE sub command_loop (@) { my @args = @_; my $mode = shift @args; die unless defined $mode; if ($mode ne 'serial' && $mode ne 'parallel' && $mode ne 'parwait') { die "Loop mode is \"$mode\", but it must be one of \"serial\", \"parallel\", or \"parwait\"\n"; } my $var = shift @args; die unless defined $var; if ($var !~ /^[A-Za-z][A-Za-z0-9_]*$/) { die "Loop variable is \"$var\", but it must be an identifier\n"; } my @values; while (1) { my $value = shift @args; die unless defined $value; last if $value eq ':'; push @values, $value; } if (! @values) { print "WARNING: No values specified, doing nothing"; } if (! @args) { print "WARNING: No command specified"; } my $delay = 0; my @pids; for my $value (@values) { print "\@$var\@=$value:\n"; if ($mode eq 'parallel' || $mode eq 'parwait') { # Wait, but don't wait before the first iteration sleep $delay; $delay = $parallel_delay; } # Create a new process for this iteration my $pid = fork; die unless defined $pid; if (! $pid) { # This is the child; execute the iteration # Replace loop variable in command line arguments foreach my $arg (@args) { $arg =~ s/\@$var\@/$value/g; } # Replace loop variable in options foreach my $opt (keys %options) { $options{$opt} =~ s/\@$var\@/$value/g; } @ARGV = @args; # Execute the command goto command_dispatch; } # This is the parent; handle the child if ($mode eq 'serial') { waitpid $pid, 0; } elsif ($mode eq 'parwait') { push @pids, $pid; } } # Wait for children, if any for my $pid (@pids) { waitpid $pid, 0; } } # Execute a command remotely sub command_remote (@) { my @args = @_; my $machine = shift @args; # Check arguments if (! defined $machine) { die "Machine name not specified"; } $machine = get_machine $machine; # Find source base names my $local_machine = get_machine ''; my $local_entry = $mdb::machine_database{$local_machine}; my $local_user = $local_entry->{'user'}; die if ! defined $local_user; my $local_substitutions = get_substitutions; add_define $local_substitutions, 'USER', $local_user; my $local_sourcebasedir = $local_entry->{'sourcebasedir'}; die if ! defined $local_sourcebasedir; $local_sourcebasedir = replace_patterns $local_sourcebasedir, $local_substitutions; my $source_name = get_dirsuffix $local_sourcebasedir; my $local_sourcedir = "$local_sourcebasedir/$source_name"; add_define $local_substitutions, 'SOURCEDIR', $local_sourcedir; my $entry = $mdb::machine_database{$machine}; my $user = $entry->{'user'}; die if ! defined $user; my $substitutions = get_substitutions; add_define $substitutions, 'USER', $user; my $sourcebasedir = $entry->{'sourcebasedir'}; die if ! defined $sourcebasedir; $sourcebasedir = replace_patterns $sourcebasedir, $substitutions; # Find Cactus version my $path = get_dirsuffix $local_sourcebasedir; # Construct remote arguments my $exe = $0; if ($exe !~ m+^/+) { $exe = "$sourcebasedir/$path/$exe"; } else { $exe =~ s/^$local_sourcebasedir/$sourcebasedir/; } #my $args_list = join ' ', @args; #my $args_list = quotesafe (join ' ', @args); #my $args_list = $args[0] . ' ' . quotesafe (join ' ', @args[1..$#args]); my $args_list = join ' ', map (quotesafe $_, @args); $options{'localdir'} = "$sourcebasedir/$path"; #$options{'hostname'} = $machine; my $hostname = $entry->{'hostname'}; $options{'hostname'} = $hostname; my @options_table; foreach my $key (sort keys %options) { if ($key eq 'debug' || $key eq 'clean' || $key eq 'optimise' || $key eq 'unsafe' || $key eq 'profile' || $key eq 'reconfig' || $key eq 'recover' || $key eq 'force' || $key eq 'verbose' || $key eq 'xml') { # boolean my $value = $options{$key}; push @options_table, $value ? "--$key" : "--no$key"; } elsif ($key eq 'argument') { # array my @values = @{$options{$key}}; foreach my $value (@values) { push @options_table, "--$key $value"; } } elsif ($key eq 'define' || $key eq 'replace' || $key eq 'attach') { # hash my %values = %{$options{$key}}; foreach my $key2 (sort keys %values) { my $value2 = $values{$key2}; push @options_table, "--$key $key2=$value2"; } } else { # regular option my $value = $options{$key}; push @options_table, "--$key $value" } } my $options_list = join ' ', @options_table; my $cmd = "$exe $args_list $options_list"; my $sshsetup = $entry->{'sshsetup'}; if (defined $sshsetup) { $cmd = "{ $sshsetup; } && $cmd"; } # ssh to target $cmd = get_sshcmd $machine, $cmd; my $localsshsetup = $entry->{'localsshsetup'}; if (defined $localsshsetup) { $localsshsetup = replace_patterns $localsshsetup, $local_substitutions; $cmd = "{ $localsshsetup; } && $cmd"; } # Do no read from stdin. This allows sim to be run in a pipe, # where another command (e.g. less) can read stdin. $cmd = "$cmd < /dev/null"; if (! $xml) { print "Executing: $cmd\n"; } exec $cmd; # Unreached die "Could not execute command"; } # Open a shell remotely sub command_login (@) { my @args = @_; my $machine = shift @args; # Check arguments if (! defined $machine) { die "Machine name not specified"; } $machine = get_machine $machine; my $simulation_name = $options{'simulation'}; if (defined $simulation_name) { check_filename "Simulation name", $simulation_name; } # Find source base names my $local_machine = get_machine ''; my $local_entry = $mdb::machine_database{$local_machine}; my $local_user = $local_entry->{'user'}; die if ! defined $local_user; my $local_substitutions = get_substitutions; add_define $local_substitutions, 'USER', $local_user; my $local_sourcebasedir = $local_entry->{'sourcebasedir'}; die if ! defined $local_sourcebasedir; $local_sourcebasedir = replace_patterns $local_sourcebasedir, $local_substitutions; my $source_name = get_dirsuffix $local_sourcebasedir; my $local_sourcedir = "$local_sourcebasedir/$source_name"; add_define $local_substitutions, 'SOURCEDIR', $local_sourcedir; my $entry = $mdb::machine_database{$machine}; my $user = $entry->{'user'}; die if ! defined $user; my $substitutions = get_substitutions; add_define $substitutions, 'USER', $user; my $path; if (! defined $simulation_name) { # Find Cactus version my $sourcebasedir = $entry->{'sourcebasedir'}; die if ! defined $sourcebasedir; $sourcebasedir = replace_patterns $sourcebasedir, $substitutions; $path = "$sourcebasedir/$source_name"; } else { # Find job directory my $basedir = $options{'basedir'}; if (! defined $basedir) { $basedir = $entry->{'basedir'}; } die if ! defined $basedir; $basedir = replace_patterns $basedir, $substitutions; $path = "$basedir/$simulation_name"; } # The option -l makes the shell act as login shell (this does not # seem to be required, but is probably useful). # This expects the shell variable $SHELL to be set on the remote # machine. my $cmd = "cd $path || echo \"could not change to directory $path\"; \$SHELL -l"; # ssh to target $cmd = get_sshcmd $machine, $cmd, '-t'; my $localsshsetup = $entry->{'localsshsetup'}; if (defined $localsshsetup) { $localsshsetup = replace_patterns $localsshsetup, $local_substitutions; $cmd = "{ $localsshsetup; } && $cmd"; } print "Executing: $cmd\n"; exec $cmd; # Unreached die "Could not execute command"; } # Output machine name sub command_print_machine (@) { my @args = @_; # Check arguments if (@args) { die "Too many arguments"; } my $machine = get_machine ''; if ($quiet) { print "$machine\n"; } elsif ($xml) { print "\n"; } else { print "Current machine: $machine\n"; } } # Output machine database sub command_print_mdb (@) { my @args = @_; my @machines = @args; if (! @machines) { @machines = sort keys %mdb::machine_database; } # Avoid warning about once-used variables @mdb::mdb_description_entries if 0; my @keys = @mdb::mdb_description_entries; my %section_descriptions = ( 'machine' => 'Machine description', 'access' => 'Access to this machine', 'source' => 'Source tree management', 'simulation' => 'Simulation management', ); foreach my $machine (@machines) { my $entry = $mdb::machine_database{$machine}; if (! $xml) { print "[$machine]\n"; } else { print " \n"; } my $oldsection = ''; foreach my $key (@keys) { my $fkey = sprintf "%-15s", $key; # Avoid warning about once-used variables $mdb::mdb_description if 0; my $description = $mdb::mdb_description->{$key}; my $section = $description->{'section'}; die unless defined $section; if ($section ne $oldsection) { my $section_description = $section_descriptions{$section}; if (! defined $section_description) { die "Unknown mdb description section \"$section\" for key \"$key\""; } if (! $xml) { if (! $quiet) { print "\n"; print "# $section_description\n"; } } } my $value = $entry->{$key}; if (! $xml) { if ($verbose) { my $necessity = $description->{'necessity'}; my $type = $description->{'type'}; my $pattern = $description->{'pattern'}; my $default = $description->{'default'}; my $example = $description->{'example'}; my $descr = $description->{'description'}; print "\n"; print "# $key type: $type"; if (defined $pattern) { print " pattern: {$pattern}"; } print " ($necessity)\n"; if (defined $descr) { $descr =~ s/^/# /mg; $descr =~ s/$/\n/mg; print $descr; } print "# Section: $section\n"; if (defined $default) { $default = quoteini $default; print "# default: $default\n"; } if (defined $example) { $example = quoteini $example; print "# example: $example\n"; } } } if (defined $value) { # Value exists in mdb (may have been set from default # value) if (! $xml) { $value = quoteini $value; print "$fkey = $value\n"; } else { my $tkey = cleanxmltag $key; my $value = quotexml $value; print " <$tkey>$value\n"; } } else { # Value does not exist (and there is no default value) if (! $xml) { if (! $quiet) { print "# $fkey\n"; } } } $oldsection = $section; } if (! $xml) { print "\n"; } else { print " \n"; } } } # Execute an arbitrary command sub command_execute (@) { my @args = @_; # Check arguments if (! @args) { die "No command specified"; } my $cmd = join ' ', @args; execute $cmd; } # Check out some thorns sub command_checkout (@) { my @args = @_; my @thornlists = @args; # Check arguments if (! @thornlists) { die "Thorn list(s) not specified"; } foreach my $thornlist (@thornlists) { my $thornlist2 = 'ThornList'; # TODO: Use --root option instead of a replacement once this # is supported by GetComponents my $substitutions = get_substitutions; add_replacement $substitutions, '!DEFINE ROOT', '.'; if ($thornlist =~ m{://}) { my $thornlist_local = $thornlist; $thornlist_local =~ s{.*/}{}; if (-e $thornlist_local) { die "Local thorn list file \"$thornlist_local\" exists already; refusing to overwrite"; } `curl -O $thornlist` or `wget -O $thornlist_local $thornlist` or die "Could not download thorn list <$thornlist>"; my $thornlist = $thornlist_local } my $thornlistlines = get_thornlistlines $thornlist; $thornlistlines = replace_patterns $thornlistlines, $substitutions; write_file $thornlist2, $thornlistlines or die "Could not write processed thorn list file \"$thornlist2\""; my $cmd = "$exedir/GetComponents $thornlist2"; execute $cmd; unlink $thornlist2; } } # Copy the source tree sub command_sync (@) { my @args = @_; my @machines = @args; # Check arguments if (! @machines) { die "Machine(s) not specified"; } # Sync sources and/or parfiles? my $sync_sourcetree = get_option %options, 'sync-sourcetree', 0; my $sync_parfiles = get_option %options, 'sync-parfiles', 0; if (! $sync_sourcetree && ! $sync_parfiles) { # Special case for convenience $sync_sourcetree = 1; $sync_parfiles = 1; } die unless $sync_sourcetree || $sync_parfiles; # Find source base names my $local_sourcebasedir = get_sourcebasedir; # Find Cactus version my $path = get_dirsuffix $local_sourcebasedir; # Find local rsync command my $rsynccmd; my $rsyncopts; my $local_substitutions; { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $user = $entry->{'user'}; die if ! defined $user; $local_substitutions = get_substitutions; add_define $local_substitutions, 'USER', $user; my $source_name = get_dirsuffix $local_sourcebasedir; my $local_sourcedir = "$local_sourcebasedir/$source_name"; add_define $local_substitutions, 'SOURCEDIR', $local_sourcedir; $rsynccmd = $entry->{'rsynccmd'}; die if ! defined $rsynccmd; $rsynccmd = replace_patterns $rsynccmd, $local_substitutions; $rsyncopts = $entry->{'rsyncopts'}; die if ! defined $rsyncopts; } foreach my $machine (@machines) { $machine = get_machine $machine; # Don't sync to yourself my $local_machine = get_machine ''; if ($machine eq $local_machine) { die "Cannot sync to local machine"; } my $entry = $mdb::machine_database{$machine}; my $user = $entry->{'user'}; die if ! defined $user; my $substitutions = {}; add_define $substitutions, 'USER', $user; my $sourcebasedir = $entry->{'sourcebasedir'}; die if ! defined $sourcebasedir; $sourcebasedir = replace_patterns $sourcebasedir, $substitutions; # Destination rsync command my $iomachine = get_iomachine $machine; my $ioentry = $mdb::machine_database{$iomachine}; my $iohostname = $ioentry->{'hostname'}; my $iorsynccmd = $ioentry->{'rsynccmd'}; die if ! defined $iorsynccmd; $iorsynccmd = replace_patterns $iorsynccmd, $substitutions; my $iorsyncopts = $ioentry->{'rsyncopts'}; die if ! defined $iorsyncopts; my $sshcmd = $ioentry->{'sshcmd'}; my $sshopts = $ioentry->{'sshopts'}; die if ! defined $sshcmd; die if ! defined $sshopts; $sshcmd = "$sshcmd $sshopts"; $sshcmd = replace_patterns $sshcmd, $substitutions; my $trampoline = get_trampoline $iomachine; if (defined $trampoline) { $sshcmd = get_sshcmd $trampoline, $sshcmd; } # List of rsync files and directories which should be copied. my @rsyncfiles; if ($sync_sourcetree) { my @rsync_sources = @{$cdb::configuration_database{'rsync-sources'}}; for my $rsyncfile (@rsync_sources) { if (-e $rsyncfile) { push @rsyncfiles, $rsyncfile; } } } if ($sync_parfiles) { my @rsync_sources = @{$cdb::configuration_database{'rsync-parfiles'}}; print "rsync_sources=[@rsync_sources]\n"; for my $rsyncfile (@rsync_sources) { if (-e $rsyncfile) { push @rsyncfiles, $rsyncfile; } } } # List of excluded patterns. my @rsyncexcludes = @{$cdb::configuration_database{'rsync-excludes'}}; # List of rsync options. Additional rsync options can be # specified in the machine database together with the rsync # command. my @rsyncoptions = ( '--archive', '--hard-links', '--sparse', '--verbose', '--progress', '--partial', '--stats', '--compress', '--delete', '--delete-excluded', (map "--exclude '$_'", @rsyncexcludes), ); my $fullpath = "$user\@$iohostname:$sourcebasedir/$path"; $sshcmd = quotesafe $sshcmd; my $arguments = join ' ', get_arguments; my $cmd = "$rsynccmd --rsh=$sshcmd --rsync-path=$iorsynccmd $rsyncopts $iorsyncopts $arguments " . (join ' ', @rsyncoptions) . " " . (join ' ', @rsyncfiles) . " $fullpath"; my $localsshsetup = $entry->{'localsshsetup'}; if (defined $localsshsetup) { $localsshsetup = replace_patterns $localsshsetup, $local_substitutions; $cmd = "{ $localsshsetup; } && $cmd"; } execute $cmd; } } # Build a configuration # Note: This requires that the user sets up the environment even for # non-interactive shells sub command_build (@) { my @args = @_; my @configuration_names = @args; # Get build options my $debug = $options{'debug'}; my $optimise = $options{'optimise'}; my $unsafe = $options{'unsafe'}; my $profile = $options{'profile'}; if (! defined $debug) { # disable debugging by default $debug = 0; } if (! defined $optimise) { # enable optimisation by default, unless debugging is enabled, # then disable optimisation by default $optimise = ! $debug; } if (! defined $unsafe) { # disable unsafe options by default $unsafe = 0; } if (! defined $profile) { # disable profiling by default $profile = 0; } # Check arguments if (! @configuration_names) { # die "Configuration name(s) not specified"; my $configuration = $cdb::configuration_database{'default-configuration-name'} . ($debug ? '-debug' : '') . ($optimise == $debug ? ($optimise ? '-optimise' : '-nooptimise') : '') . ($unsafe ? '-unsafe' : '') . ($profile ? '-profile' : ''); @configuration_names = ($configuration); print "Configuration name(s) not specified -- using default configuration \"$configuration\"\n"; } # Get machine options my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $user = $entry->{'user'}; my $email = $entry->{'email'}; die "No user specified for $machine" if ! defined $user; die "No email specified for $user on $machine" if ! defined $email; my $substitutions = get_substitutions; add_define $substitutions, 'USER' , $user; add_define $substitutions, 'EMAIL', $email; my $make = $entry->{'make'}; $make = replace_patterns $make, $substitutions; # Get sim directory my $simpath = $0; if ($simpath =~ m+/+) { # Remove slash and file name $simpath =~ s+/[^/]*$++; } else { # Use current directory $simpath = "."; } my $reconfig = get_option %options, 'reconfig', 0; my $clean = get_option %options, 'clean', 0; # Check for errors in command line options { my $optionlist = get_optionlist; my $scriptfile = get_scriptfile; my $thornlist = get_thornlist; } # Check whether configuration options changed; if so, configure foreach my $configuration_name (@configuration_names) { # Ensure that the configuration directory exists if (! -d "configs/$configuration_name") { mkdir 'configs'; mkdir "configs/$configuration_name" or die "Could not create configuration directory"; mkdir "configs/$configuration_name/bindings"; mkdir "configs/$configuration_name/build"; mkdir "configs/$configuration_name/config-data"; mkdir "configs/$configuration_name/lib"; mkdir "configs/$configuration_name/scratch"; } my $remove_config = 0; my $stored_optionlist = "configs/$configuration_name/OptionList"; my $have_stored_optionlist = -e $stored_optionlist; my $need_optionlist = ! $have_stored_optionlist || $reconfig; my $optionlist = get_optionlist $need_optionlist; if (defined $optionlist) { # # -M returns the age of the file # my $config_outdated = # ! -e $stored_optionlist || # -M $stored_optionlist > -M $optionlist; my $stored_option_settings; if ($have_stored_optionlist) { $stored_option_settings = read_file $stored_optionlist; } my $option_settings = read_file $optionlist; # Replace patterns in option settings my $substitutions = get_substitutions; add_define $substitutions, 'DEBUG' , $debug ? 'yes' : 'no'; add_define $substitutions, 'OPTIMISE', $optimise ? 'yes' : 'no'; add_define $substitutions, 'UNSAFE' , $unsafe ? 'yes' : 'no'; add_define $substitutions, 'PROFILE' , $profile ? 'yes' : 'no'; add_replacement $substitutions, 'DEBUG' , $debug ? 'yes' : 'no'; add_replacement $substitutions, 'OPTIMISE', $optimise ? 'yes' : 'no'; add_replacement $substitutions, 'UNSAFE' , $unsafe ? 'yes' : 'no'; add_replacement $substitutions, 'PROFILE' , $profile ? 'yes' : 'no'; $option_settings = replace_patterns $option_settings, $substitutions; my $have_complete_config = -e "configs/$configuration_name/config-data/cctk_Config.h"; my $config_outdated = ! $have_complete_config || ! defined $stored_option_settings || $stored_option_settings ne $option_settings; if ($config_outdated || $reconfig) { # Find out whether the old configuration needs to be # removed my $oldversion; if (defined $stored_option_settings) { $oldversion = get_version $stored_option_settings; } my $newversion = get_version $option_settings; if (defined $newversion) { if (! defined $oldversion || $newversion ne $oldversion) { $remove_config = 1; } } print "Reconfiguring $configuration_name...\n"; unlink "$stored_optionlist.old"; rename $stored_optionlist, "$stored_optionlist.old"; # cp $optionlist, $stored_optionlist; write_file $stored_optionlist, $option_settings; # We cannot use PROMPT, because it automatically # starts the build after configuring. #system "$make $configuration_name-config options=$optionlist"; #system "$make $configuration_name-config options=$optionlist PROMPT=no"; execute "echo yes | { $make $configuration_name-config options=configs/$configuration_name/OptionList; }"; # Force a rebuild unlink "configs/$configuration_name/config-data/make.thornlist"; # Remove the old configuration if necessary if ($remove_config) { print "Complete rebuild required\n"; } } } else { # Ensure that there is an option list if (! $have_stored_optionlist) { die "Configuration $configuration_name has no option list, and no option list was specified.\n" } } # Clean if necessary # NOTE: We need to clean after configuring, so that all # makefiles exist if ($clean || $remove_config) { print "Cleaning $configuration_name...\n"; execute "$make $configuration_name-realclean"; } } # Check whether the script file has changed; if so, copy it foreach my $configuration_name (@configuration_names) { my $stored_scriptfile = "configs/$configuration_name/ScriptFile"; my $have_stored_scriptfile = -e $stored_scriptfile; my $need_scriptfile = ! $have_stored_scriptfile || $reconfig; my $scriptfile = get_scriptfile $need_scriptfile; if (defined $scriptfile) { # # -M returns the age of the file # my $scriptfile_outdated = # ! -e $stored_scriptfile || # -M $stored_scriptfile > -M $scriptfile; my $scriptfile_outdated = ! $have_stored_scriptfile || (read_file $stored_scriptfile) ne (read_file $scriptfile); if ($scriptfile_outdated || $reconfig) { print "Updating script file for configuration $configuration_name...\n"; unlink "$stored_scriptfile.old"; mv $stored_scriptfile, "$stored_scriptfile.old"; cp $scriptfile, $stored_scriptfile; } } else { # Ensure that there is a script file if (! $have_stored_scriptfile) { die "Configuration $configuration_name has no script file, and no script file was specified.\n" } } } # Check whether the thorn list has changed; if so, copy it foreach my $configuration_name (@configuration_names) { my $stored_thornlist = "configs/$configuration_name/ThornList"; my $have_stored_thornlist = -e $stored_thornlist; # Do not update the thorn list when reconfiguring my $need_thornlist = ! $have_stored_thornlist; my $thornlist = get_thornlist $need_thornlist; if (defined $thornlist) { # Replace patterns in thorn list my $substitutions = get_substitutions; my $thornlistlines = get_thornlistlines $thornlist; $thornlistlines = replace_patterns $thornlistlines, $substitutions; # # -M returns the age of the file # my $thornlist_outdated = # ! -e $stored_thornlist || # -M $stored_thornlist > -M $thornlist; my $thornlist_outdated = ! $have_stored_thornlist || (read_file $stored_thornlist) ne $thornlistlines; if ($thornlist_outdated || $reconfig) { print "Updating thorn list for configuration $configuration_name...\n"; unlink "$stored_thornlist.old"; mv $stored_thornlist, "$stored_thornlist.old"; #cp $thornlist, $stored_thornlist; write_file $stored_thornlist, $thornlistlines or die "Could not write processed thorn list file \"$stored_thornlist\""; # Force a rebuild unlink "configs/$configuration_name/config-data/make.thornlist"; } } else { # Ensure that there is a thorn list if (! $have_stored_thornlist) { die "Configuration $configuration_name has no thorn list, and no thorn list was specified.\n" } } } # Build my $configuration_name_list = join ' ', @configuration_names; print "Building $configuration_name_list...\n"; foreach my $configuration_name (@configuration_names) { unlink "exe/cactus_$configuration_name"; } execute "$make $configuration_name_list"; # Make executables world-readable foreach my $configuration_name (@configuration_names) { chmod 0755, "exe/cactus_$configuration_name"; } # Build utilities after the executable (this may depend on # libraries or directories created during the regular build) print "Building utilities for $configuration_name_list...\n"; my $configuration_name_utils_list = join ' ', (map "$_-utils", @configuration_names); execute "$make $configuration_name_utils_list"; foreach my $configuration_name (@configuration_names) { chmod 0755, "exe/$configuration_name"; chmod 0755, glob "exe/$configuration_name/*"; } } # checkout M && build Cs sub command_checkout_build (@) { my @args = @_; my $thornlist = shift @args; my @configuration_names = @args; # Check arguments if (! defined $thornlist) { die "Thorn list not specified"; } if (! @configuration_names) { # die "Configuration name(s) not specified"; } command_checkout $thornlist; command_build @configuration_names; } # sync M && remote M build Cs sub command_sync_remote_build (@) { my @args = @_; my $machine = shift @args; my @configuration_names = @args; # Check arguments if (! defined $machine) { die "Machine name not specified"; } if (! @configuration_names) { # die "Configuration name(s) not specified"; } command_sync $machine; command_remote $machine, 'build', @configuration_names; } # remote M build Cs sub command_remote_build (@) { my @args = @_; my $machine = shift @args; my @configuration_names = @args; # Check arguments if (! defined $machine) { die "Machine name not specified"; } if (! @configuration_names) { # die "Configuration name(s) not specified"; } command_remote $machine, 'build', @configuration_names; } # List all configurations sub command_list_configurations (@) { my @args = @_; if (@args) { print "\n"; print "WARNING: Unused command line arguments:\n"; print "@args\n"; print "\n"; } # Get machine options my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $make = $entry->{'make'}; # Get list of configurations mkdir 'configs'; my @configurations = list_files 'configs'; if (! @configurations) { print "There are no configurations\n"; return; } print "Configurations:\n"; foreach my $configuration (@configurations) { printf " %-40s", $configuration; my $have_exe = -e "exe/cactus_$configuration"; if (! $have_exe) { print " [incomplete]"; } else { my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat "exe/cactus_$configuration"; # TODO: use the caller's local time instead of the # system's local time my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = localtime $mtime; $mon += 1; $year += 1900; my $date = (sprintf "%4d-%02d-%02d %2d:%02d:%02d", $year,$mon,$mday, $hour,$min,$sec); print " [built $date]"; } print "\n"; } } # Create a skeleton sub command_create (@) { my @args = @_; if (@args < 1) { die "Simulation name or parameter file not specified"; } # If desired, synchronise parameter files before creating the # simulation my $sync_sourcetree = get_option %options, 'sync-sourcetree', 0; my $sync_parfiles = get_option %options, 'sync-parfiles', 0; die if $sync_sourcetree; # This would not be useful if ($sync_parfiles) { command_sync @args; } my $maybe_sim_name = shift @args; if (! defined $maybe_sim_name) { die "Neither a simulation name nor a parameter file are specified"; } my ($simulation_name, $parfile_from_sim_name) = derive_sim_and_parfile_name $maybe_sim_name; check_filename 'Simulation name', $simulation_name; if (@args) { print "\n"; print "WARNING: Unused command line arguments:\n"; print "@args\n"; print "\n"; } my $parfile_from_opts = $options{'parfile'}; if (defined $parfile_from_sim_name && defined $parfile_from_opts) { die "Cannot specify parameter file twice"; } my $parfile = $parfile_from_sim_name; if (! defined $parfile) { $parfile = $parfile_from_opts; } check_filename 'Parameter file name', $parfile; # Read the parameter file early to ensure it exists and is # readable my $parfilelines = get_parfilelines $parfile; my ($executable, $scriptfile) = get_executable; my ($name,$dir,$type) = fileparse $executable; my ($configuration) = $name =~ /^cactus_(.*)$/; my $optionlist = "configs/$configuration/OptionList"; my $config_id_file = "configs/$configuration/CONFIG-ID"; my $config_id = maybe_read_string $config_id_file; $config_id = 'no-config-id' if ! defined $config_id; my $build_id_file = "configs/$configuration/BUILD-ID"; my $build_id = maybe_read_string $build_id_file; $build_id = 'no-build-id' if ! defined $build_id; # Create job skeleton directory my $basedir = get_basedir; if (! -d $basedir) { die "Cannot access simulation directory \"$basedir\""; } my $dirname = "$basedir/$simulation_name"; if (-e $dirname) { print "Cannot create job skeleton directory: Directory \"$dirname\" exists already\n"; print "Aborting.\n"; exit 1; } mkdir $dirname or die "Could not create job skeleton directory \"$dirname\""; if (! -d $dirname) { die "Could not create job skeleton directory \"$dirname\""; } mkdir "$dirname/$internal_dir" or die "Could not create job skeleton directory \"$dirname/$internal_dir\""; if (! -d "$dirname/$internal_dir") { die "Could not create job skeleton directory \"$dirname/$internal_dir\""; } # Create log file open LOG, "> $dirname/LOG" or die "Could not create log file"; print LOG "LOG FILE for simulation \"$simulation_name\"\n"; print LOG "================================================================================\n"; close LOG; my $log = open_log $dirname; write_log $log, "Skeleton created\n"; write_log $log, "Job directory: \"$basedir/$simulation_name\"\n"; my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $user = $entry->{'user'}; my $email = $entry->{'email'}; die "No user specified for $machine" if ! defined $user; die "No email specified for $user on $machine" if ! defined $email; my $substitutions = get_substitutions; add_define $substitutions, 'USER' , $user; add_define $substitutions, 'EMAIL', $email; my $sourcebasedir = get_sourcebasedir; my $path = get_dirsuffix $sourcebasedir; my $sourcedir = "$sourcebasedir/$path"; # Populate the skeleton my $simulation_id = create_simulation_id $simulation_name; write_string "$dirname/$internal_dir/MACHINE", $machine; write_log $log, "Machine: \"$machine\"\n"; write_string "$dirname/SIMULATION_ID", $simulation_id; write_log $log, "Simulation id: \"$simulation_id\"\n"; write_string "$dirname/$internal_dir/SOURCEDIR", $sourcedir; write_log $log, "Source dir: \"$sourcedir\"\n"; write_string "$dirname/$internal_dir/CONFIGURATION", $configuration; write_log $log, "Configuration: \"$configuration\"\n"; write_string "$dirname/$internal_dir/CONFIG_ID", $config_id; write_log $log, "Config id: \"$config_id\"\n"; write_string "$dirname/$internal_dir/BUILD_ID", $build_id; write_log $log, "Build id: \"$build_id\"\n"; my $exedir = "$dirname/$internal_dir/exe"; my $exefile = fileparse $executable; die if ! defined $exefile; mkdir $exedir or die "Could not create directory for executable"; copy_file $executable, $exedir, "$basedir/CACHE/exe"; chmod 0755, "$exedir/$exefile"; write_log $log, "Executable: \"$executable\"\n"; my $cfgdir = "$dirname/$internal_dir/cfg"; my $cfgfile = fileparse $optionlist; die if ! defined $cfgfile; mkdir $cfgdir or die "Could not create directory for option list"; my $cfgfilelines = read_file $optionlist; $cfgfilelines = replace_patterns $cfgfilelines, $substitutions; write_file "$cfgdir/$cfgfile", $cfgfilelines; write_log $log, "Option list: \"$optionlist\"\n"; my $rundir = "$dirname/$internal_dir/run"; my $runfile = fileparse $scriptfile; die if ! defined $runfile; mkdir $rundir or die "Could not create directory for script file"; my $runfilelines = read_file $scriptfile; $runfilelines = replace_patterns $runfilelines, $substitutions; write_file "$rundir/$runfile", $runfilelines; write_log $log, "Script file: \"$scriptfile\"\n"; my $pardir = "$dirname/$internal_dir/par"; my $parfilename = fileparse $parfile; die if ! defined $parfilename; mkdir $pardir or die "Could not create directory for parameter file"; $parfilelines = replace_patterns $parfilelines, $substitutions; write_file "$pardir/$parfilename", $parfilelines; write_log $log, "Parameter file: \"$parfile\"\n"; if (defined $options{'datadir'}) { my $datadirsrc = $options{'datadir'}; check_filename 'Data directory name', $datadirsrc; my $datadir = "$dirname/$internal_dir/data"; mkdir $datadir or die "Could not create directory for data"; opendir DATADIR, $datadirsrc; while (my $datafile = readdir DATADIR) { # TODO: Copy whole hierarchy die unless -f "$datadirsrc/$datafile"; copy_file $datafile, $datadir, "$basedir/CACHE/data"; } closedir DATADIR; write_log $log, "Data directory: \"$datadirsrc\"\n"; } close_log $log; } # List all simulations sub command_list_simulations (@) { my @args = @_; if (@args) { if (! $xml) { print "\n"; print "WARNING: Unused command line arguments:\n"; print "@args\n"; print "\n"; } else { print "\n"; } } # Create job skeleton directory my $basedir = get_basedir; my @simulation_names = list_files $basedir; if (! $xml) { if (! @simulation_names) { print "There are no simulations\n"; return; } } my $num_simulations = 0; my $num_active = 0; my $num_active_queued = 0; my $num_active_running = 0; my $num_active_pending = 0; my $num_inactive = 0; my $total_disk_usage = 0.0; if (! $xml) { print "Simulations:\n"; } else { print "\n"; } foreach my $simulation_name (@simulation_names) { my $dirname = "$basedir/$simulation_name"; my $is_simulation = -e "$dirname/SIMULATION_ID"; next if ! $is_simulation; ++ $num_simulations; if (! $xml) { if ($verbose) { print "\n"; } } else { print " \n"; } } if (! $xml) { } else { print "\n"; } if (! $xml) { if ($verbose) { print "\n"; print "Simulations: $num_simulations\n"; print " active: $num_active ($num_active_queued queued, $num_active_running running, $num_active_pending pending)\n"; print " inactive: $num_inactive\n"; $total_disk_usage = sprintf "%.1f", $total_disk_usage; # format print "Total disk usage: $total_disk_usage GByte\n"; } } } # Find simulation with a given job id sub command_find_simulation (@) { my @args = @_; my @job_ids1 = @args; # Check arguments if (! @job_ids1) { die "Job id(s) not specified"; } my %job_ids; foreach my $job_id (@job_ids1) { $job_ids{$job_id} = undef; } # Find job directory my $basedir = get_basedir; if (! -d $basedir) { die "Cannot access simulation directory \"$basedir\""; } my @simulation_names = grep { !/^(CACHE|TRASH)$/ } map { chomp; $_; } `ls $basedir`; foreach my $simulation_name (@simulation_names) { my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { # Ignore non-directories next; } # Examine state of skeleton my ($max_restart_id, $max_existing_id, $active_id, %states) = find_largest_restart_id "$dirname"; if (! defined $active_id) { # Ignore inactive simulations next; } # Format restart id my $restart_id = $active_id; $restart_id = sprintf '%04d', $restart_id; my $restart_dir = "output-$restart_id"; my $workdir = "$dirname/$restart_dir"; # Create the internal directory if it is missing if (! -e "$workdir/$internal_dir") { symlink ".", "$workdir/$internal_dir" } my $job_id = read_string "$workdir/$internal_dir/JOB_ID"; if (exists $job_ids{$job_id}) { print "Found id $job_id in $simulation_name restart $active_id\n"; delete $job_ids{$job_id}; } } foreach my $job_id (sort (keys %job_ids)) { print "Id $job_id not found\n"; } } # Submit a job sub command_submit (@) { my @args = @_; if (@args < 1) { die "Simulation name or parameter file not specified"; } # We could have a consistency check here that the simulation has # actually been created from this parfile my $simulation_name = derive_sim_name(shift @args); check_filename 'Simulation name', $simulation_name; my $procs_arg; my $walltime_arg; # We have positional parameters for the things which are used all # the time. Alternatively you can use the named parameters. if (@args == 2) { $procs_arg = shift @args; $walltime_arg = shift @args; } if (@args) { print "\n"; print "WARNING: Unused command line arguments:\n"; print "@args\n"; print "\n"; } check_filename 'Simulation name', $simulation_name; # Find simulation directory my $basedir = get_basedir; my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { #die "Cannot access simulation skeleton directory \"$dirname\""; die "Simulation \"$simulation_name\" does not exist"; } # Create the internal directory if it is missing if (! -e "$dirname/$internal_dir") { symlink ".", "$dirname/$internal_dir" } # Are we pre- or postsubmitting? # $presubmit: 0 or 1 my $presubmit = get_option %options, 'presubmit', 0; # $postsubmit: either undefined or a restart id my $postsubmit = $options{'postsubmit'}; die if $presubmit && defined $postsubmit; my $log = open_log $dirname; if ($presubmit) { write_log $log, "Presubmitting:\n"; } elsif (defined $postsubmit) { write_log $log, "Postsubmitting restart id \"$postsubmit\":\n"; } else { write_log $log, "Submitting:\n"; } # Examine state of skeleton my ($max_restart_id, $max_existing_id, $active_id, %states) = find_largest_restart_id "$dirname"; # Check for active restarts if (! $presubmit) { # Not presubmitting: don't want active restart if (defined $active_id) { write_log $log, "Restart id \"$active_id\" is active\n"; close_log $log; print "Aborting.\n"; exit 2; } } # Determine restart id my $restart_id; if (defined $postsubmit) { # Use specified restart id $restart_id = $postsubmit; # Format restart id $restart_id = sprintf '%04d', $restart_id; # Check that the given restart id exists if (! exists $states{$restart_id}) { write_log $log, "Restart id \"$restart_id\" does not exist\n"; close_log $log; print "Aborting.\n"; exit 2; } } else { # Find unused restart id $restart_id = get_restart_id; if (defined $restart_id) { # Format restart id $restart_id = sprintf '%04d', $restart_id; # Check that the given restart id is unused if (exists $states{$restart_id}) { write_log $log, "Restart id \"$restart_id\" exists already\n"; close_log $log; print "Aborting.\n"; exit 2; } } else { if (! defined $max_existing_id) { $restart_id = 0; } else { # Increment to obtain an unused id $restart_id = $max_existing_id + 1; } } } # Check restart id if ($restart_id < 0 || $restart_id >= 10000) { write_log $log, "Illegal restart id \"$restart_id\"\n"; close_log $log; print "Aborting.\n"; exit 2; } # Format restart id $restart_id = sprintf '%04d', $restart_id; my $restart_dir = "output-$restart_id"; my $workdir = "$dirname/$restart_dir"; write_log $log, "Using restart id \"$restart_id\"\n"; # Whether to recover from a checkpoint (may be undefined) # -1: don't know # 0: start from initial data # 1: recover # 2: recover if there are checkpoint files # 3: presubmitting my $recover; my $from_restart_id; if ($presubmit) { # Find old restart id $from_restart_id = get_from_restart_id; if (defined $from_restart_id) { # Restart id specified; recover from this id $recover = 3; write_log $log, "Requested presubmission from restart id \"$from_restart_id\"\n"; } elsif (defined $max_existing_id) { # Restarts are available; presubmit from the latest $recover = 3; $from_restart_id = $max_existing_id; write_log $log, "Presubmitting from restart id \"$from_restart_id\"\n"; } else { # No restarts are available write_log $log, "There are no restart ids\n"; close_log $log; print "Aborting.\n"; exit 2; } } else { $recover = get_option %options, 'recover', -1; # Find old restart id $from_restart_id = get_from_restart_id; if ($recover != 0) { if (defined $from_restart_id) { # Restart id specified; recover from this id $recover = 1; write_log $log, "Requested recovering from restart id \"$from_restart_id\"\n"; } elsif (defined $max_restart_id) { # Restarts are available; recover from the latest if # there are checkpoint files $recover = 2; $from_restart_id = $max_restart_id; write_log $log, "Recovering from restart id \"$from_restart_id\" if it has checkpoint files\n"; } else { # No restarts are available; don't recover if ($recover == 1) { write_log $log, "There are no restart ids\n"; close_log $log; print "Aborting.\n"; exit 2; } $recover = 0; write_log $log, "Not recovering since there are no restart ids\n"; } } } $recover == 0 || $recover == 1 || $recover == 2 || $recover == 3 or die; my $maxiter; my %files; my $from_workdir; my $dontlink = 0; if ($recover != 0) { # Format from id $from_restart_id = sprintf '%04d', $from_restart_id; # Check that the given from id exists if (! exists $states{$from_restart_id}) { write_log $log, "Restart id \"$from_restart_id\" does not exist\n"; close_log $log; print "Aborting.\n"; exit 2; } $from_restart_id ne $restart_id or die "Cannot recover a job from itself"; my $from_restart_dir = "output-$from_restart_id"; my $state = $states{$from_restart_id}; die if ! defined $state; if ($state ne '') { $from_restart_dir = "$from_restart_dir-$state"; } $from_workdir = "$dirname/$from_restart_dir"; if (! $presubmit) { # Find checkpoint files in the old directory my $files1 = `cd $from_workdir && find . -name "*.chkpt.it_*.*"`; if ($files1 eq "") { write_log $log, "No checkpoints found under restart dir\n"; my $files2 = `cd $from_workdir/.. && find checkpoints -name "*.chkpt.it_*.*"`; if (!($files2 eq "")) { $files1 = $files2; $dontlink = 1; write_log $log, "Found checkpoints from a simulation \"checkpoints\" directory. Will not hardlink these.\n"; } } foreach my $file (split "\n", $files1) { # if ($file !~ /[.]tmp$/ && # $file =~ /[.]chkpt[.]it_([[:digit:]]+)[.]/) if ($file =~ /[.]chkpt[.]it_([[:digit:]]+)[.]/) { my $iter = $1; if (! defined $files{$iter}) { $files{$iter} = (); } push @{$files{$iter}}, $file; } } # Find latest checkpoint iteration foreach my $iter (keys %files) { if (! defined $maxiter || $iter > $maxiter) { $maxiter = $iter; } } if (! defined $maxiter) { if ($recover == 2) { # Don't recover write_log $log, "Not recovering since there are no checkpoint files\n"; $recover = 0; } else { write_log $log, "Could not find checkpoint files in restart id \"$from_restart_id\"\n"; close_log $log; print "Aborting.\n"; exit 2; } } if ($recover == 2) { $recover = 1; write_log $log, "Checkpoint files found\n"; } } } # We must now know whether to recover or not, unless we are # presubmitting $recover == 0 || $recover == 1 || $recover == 3 or die; if ($recover == 1) { write_log $log, "Recovering from restart id \"$from_restart_id\"\n"; write_log $log, "Recovering from iteration \"$maxiter\"\n"; } my ($old_procs, $old_ppn, $old_ppn_used, $old_num_threads); my ($old_queue, $old_allocation, $old_walltime); if ($recover) { # Create the internal directory if it is missing if (! -e "$from_workdir/$internal_dir") { symlink ".", "$from_workdir/$internal_dir" } $old_procs = read_string "$from_workdir/$internal_dir/PROCS"; $old_ppn = read_string "$from_workdir/$internal_dir/PPN"; $old_ppn_used = read_string "$from_workdir/$internal_dir/PPN_USED"; $old_num_threads = read_string "$from_workdir/$internal_dir/NUM_THREADS"; $old_allocation = maybe_read_string "$from_workdir/$internal_dir/ALLOCATION"; $old_queue = maybe_read_string "$from_workdir/$internal_dir/QUEUE"; $old_walltime = read_string "$from_workdir/$internal_dir/WALLTIME"; } # Gather some information about the job my $short_simulation_name = shorten_string "$simulation_name-$restart_id"; my $simulation_id = read_string "$dirname/SIMULATION_ID"; my $executable = list_file "$dirname/$internal_dir/exe"; my $scriptfile = list_file "$dirname/$internal_dir/run"; my $parfile = list_file "$dirname/$internal_dir/par"; my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $hostname = $entry->{'hostname'}; my $user = get_username; my $memory = get_memory; my $cpufreq = get_cpufreq; my $allocation = get_allocation $old_allocation; my $queue = get_queue $old_queue; my ($nodes, $ppn_used, $procs, $ppn, $procs_requested, $num_procs, $num_threads) = get_procs $old_procs, $old_ppn, $old_ppn_used, $old_num_threads, $procs_arg; my ($walltime, $walltime_hh, $walltime_mm, $walltime_ss, $walltime_seconds, $walltime_minutes, $walltime_hours) = get_walltime $old_walltime, $walltime_arg; my $scratchdir = $entry->{'scratchdir'}; check_filename "Restart directory", $workdir; check_filename "Executable file name", $executable; check_filename "Job script file name", $scriptfile; check_filename "Parameter file name", $parfile; if (defined $postsubmit) { # Postsubmit restart directory if (! -e "$workdir-presubmitted") { die "Restart \"$restart_id\" has not been presubmitted"; } die if ! -l "$workdir-presubmitted"; unlink "$workdir-presubmitted" or die "Could not postsubmit restart directory"; } else { # Create restart directory mkdir $workdir; if (! -d $workdir) { write_log $log, "Could not create restart directory\n"; close_log $log; print "Aborting.\n"; exit 2; } mkdir "$workdir/$internal_dir"; if (! -d "$workdir/$internal_dir") { write_log $log, "Could not create restart directory\n"; close_log $log; print "Aborting.\n"; exit 2; } write_log $log, "Created restart directory\n"; } if ($presubmit) { # Mark restart as presubmitted symlink $workdir, "$workdir-presubmitted" or die "Could not presubmit restart directory"; write_log $log, "Marked restart directory as presubmitted\n"; } else { # Activate restart directory if (-l "$workdir-active") { unlink("$workdir-active") # User might have deleted a # restart but not the symlink - # make it "just work" in this # case } symlink $workdir, "$workdir-active" or die "Could not activate restart directory"; write_log $log, "Activated restart directory\n"; } # For job chaining my $sourcebasedir = get_sourcebasedir; my $path = get_dirsuffix $sourcebasedir; my $sourcedir = "$sourcebasedir/$path"; my $chained_job_id = ''; if ($presubmit) { die if ! defined $from_workdir; $chained_job_id = read_string "$from_workdir/$internal_dir/JOB_ID"; } # Populate restart directory my $substitutions = get_substitutions; add_define $substitutions, 'SOURCEDIR' , $sourcedir; add_define $substitutions, 'SIMULATION_NAME' , $simulation_name; add_define $substitutions, 'SHORT_SIMULATION_NAME', $short_simulation_name; add_define $substitutions, 'SIMULATION_ID' , $simulation_id; add_define $substitutions, 'RESTART_ID' , $restart_id; add_define $substitutions, 'RUNDIR' , $workdir; add_define $substitutions, 'SCRIPTFILE' , "$internal_dir/$scriptfile"; add_define $substitutions, 'EXECUTABLE' , "$internal_dir/$executable"; add_define $substitutions, 'PARFILE' , $parfile; add_define $substitutions, 'HOSTNAME' , $hostname; add_define $substitutions, 'USER' , $user; add_define $substitutions, 'NODES' , $nodes; add_define $substitutions, 'PROCS' , $procs; add_define $substitutions, 'PROCS_REQUESTED' , $procs_requested; add_define $substitutions, 'PPN' , $ppn; add_define $substitutions, 'PPN_USED' , $ppn_used; add_define $substitutions, 'NUM_PROCS' , $num_procs; add_define $substitutions, 'NUM_THREADS' , $num_threads; add_define $substitutions, 'MEMORY' , $memory; add_define $substitutions, 'CPUFREQ' , $cpufreq; add_define $substitutions, 'ALLOCATION' , $allocation; add_define $substitutions, 'QUEUE' , $queue; add_define $substitutions, 'WALLTIME' , $walltime; add_define $substitutions, 'WALLTIME_HH' , $walltime_hh; add_define $substitutions, 'WALLTIME_MM' , $walltime_mm; add_define $substitutions, 'WALLTIME_SS' , $walltime_ss; add_define $substitutions, 'WALLTIME_SECONDS' , $walltime_seconds; add_define $substitutions, 'WALLTIME_MINUTES' , $walltime_minutes; add_define $substitutions, 'WALLTIME_HOURS' , $walltime_hours; add_define $substitutions, 'SCRATCHDIR' , $scratchdir; add_define $substitutions, 'CHAINED_JOB_ID' , $chained_job_id; add_replacement $substitutions, 'TerminationTrigger::max_walltime', $walltime_hours; if (! defined $postsubmit) { my $scriptlines = read_file "$dirname/$internal_dir/run/$scriptfile"; $scriptlines = replace_patterns $scriptlines, $substitutions; ensure_no_patterns 'script file', $scriptlines; write_file "$workdir/$internal_dir/$scriptfile", $scriptlines; write_log $log, "Created job script\n"; # Creating a link may fail if the maximum number of hard links # is exceeded if (! link "$dirname/$internal_dir/exe/$executable", "$workdir/$internal_dir/$executable") { print "Could not create link to executable -- copying instead\n"; cp "$dirname/$internal_dir/exe/$executable", "$workdir/$internal_dir/$executable" or die "Could not copy executable"; my $mode = (stat "$dirname/$internal_dir/exe/$executable")[2]; $mode = $mode & 07777; # mask out file type, keep only permissions chmod $mode, "$workdir/$internal_dir/$executable" or die "Could not chmod executable"; link "$workdir/$internal_dir/$executable", "$dirname/$internal_dir/exe/$executable.new" and rename "$dirname/$internal_dir/exe/$executable.new", "$dirname/$internal_dir/exe/$executable"; } my $parlines = read_file "$dirname/$internal_dir/par/$parfile"; $parlines = replace_patterns $parlines, $substitutions; write_file "$workdir/$parfile", $parlines; write_log $log, "Created parameter file\n"; if (-d "$dirname/$internal_dir/data") { symlink "$dirname/$internal_dir/data", "$workdir/data" or die "Could not create symbolic link to data"; write_log $log, "Created symbolic link to data directory\n"; } } if ($presubmit) { write_string "$workdir/$internal_dir/PRESUBMIT_FROM_ID", $from_restart_id; } else { if (! $recover) { write_file "$workdir/$internal_dir/RESTART_FROM_SCRATCH", ''; } else { write_string "$workdir/$internal_dir/RESTART_FROM_ID", $from_restart_id; write_string "$workdir/$internal_dir/RESTART_FROM_ITERATION", $maxiter; } } if (! defined $postsubmit) { write_string "$workdir/$internal_dir/PROCS", $procs; write_string "$workdir/$internal_dir/PROCS_REQUESTED", $procs_requested; write_string "$workdir/$internal_dir/PPN", $ppn; write_string "$workdir/$internal_dir/PPN_USED", $ppn_used; write_string "$workdir/$internal_dir/NUM_THREADS", $num_threads; maybe_write_string "$workdir/$internal_dir/ALLOCATION", $allocation; maybe_write_string "$workdir/$internal_dir/QUEUE", $queue; write_string "$workdir/$internal_dir/WALLTIME", $walltime; write_string "$workdir/$internal_dir/SHORT_SIMULATION_NAME", $short_simulation_name; } if (! $presubmit) { if ($recover && $dontlink eq 0) { # Link checkpoint files from the old to the new directory foreach my $file (@{$files{$maxiter}}) { $file =~ m|((.*)/)?([^/]+)|; my $dirname = defined $2 ? $2 : '.'; my $basename = $3; my $targetdir = "$workdir/$dirname"; -d $targetdir or mkdir $targetdir or die "Could not create checkpoint directory \"$targetdir\""; my $sourcefile = "$from_workdir/$dirname/$basename"; my $targetfile = "$workdir/$dirname/$basename"; link $sourcefile, $targetfile or die "Could not create link to checkpoint file \"$targetfile\""; } write_log $log, "Linked checkpoint files to new output directory\n"; } } my $job_id; if (! defined $postsubmit) { # Submit the job my ($submitcommand, $submitpattern) = get_submitcommand; $submitcommand = replace_patterns $submitcommand, $substitutions; my $arguments = join ' ', get_arguments; write_log $log, "About to submit job\n"; my $cmd = "cd $workdir && { $submitcommand $arguments; }"; write_log $log, "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; quote_output $output; quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } write_log $log, "Submitted job\n"; # Extract job id $output =~ /$submitpattern/m; $job_id = $1; if (! defined $job_id) { write_log $log, "Could not determine job id\n"; write_log $log, "Using a fake job id instead\n"; $job_id = "unknown-job-id"; } write_string "$workdir/$internal_dir/JOB_ID", $job_id; write_log $log, "Job id is \"$job_id\"\n"; } else { # Use existing the job write_log $log, "Using existing job\n"; # TODO: communicate PBS job id, and ensure that the job # actually exists in the queue } if ($recover == 1) { append_file "$from_workdir/$internal_dir/RESTARTED_AS_ID", "$restart_id\n"; } close_log $log; } # create Ss && submit Ss sub command_create_submit (@) { my @args = @_; # create has one argument, the simulation name my $simulation_name = shift @args; command_create $simulation_name; my @submit_args; if (@args == 2) { my $procs_arg = shift @args; my $walltime_arg = shift @args; @submit_args = ($simulation_name, $procs_arg, $walltime_arg); } else { @submit_args = ($simulation_name); } # Wait a few seconds, to give the nodes' view of the file system # to see what we just created on the head node sleep $submit_delay; command_submit @submit_args; } # cleanup Ss && submit Ss sub command_cleanup_submit (@) { my @args = @_; my @configuration_names = @args; # Check arguments if (! @configuration_names) { die "Configuration name(s) not specified"; } command_cleanup @configuration_names; command_submit @configuration_names; } # Determine job status # TODO: Unify this with "list-simulations", which gives more information sub command_status (@) { my @args = @_; if (@args < 1) { die "Simulation name or parameter file not specified"; } my $simulation_name = derive_sim_name(shift @args); if (@args) { print "\n"; print "WARNING: Unused command line arguments:\n"; print "@args\n"; print "\n"; } # Check arguments check_filename 'Simulation name', $simulation_name; # Find job directory my $basedir = get_basedir; my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { die "Cannot access simulation skeleton directory \"$dirname\""; } # Examine state of skeleton my ($max_restart_id, $max_existing_id, $active_id, %states) = find_largest_restart_id "$dirname"; # Check whether a restart exists if (! defined $max_restart_id) { print "Simulation has no restarts\n"; } else { print "Largest restart id is \"$max_restart_id\"\n"; } # Check whether a restart is currently active if (! defined $active_id) { print "Simulation is inactive\n"; print "Done.\n"; exit 0; } # Set up restart id my $restart_id = get_restart_id; if (defined $restart_id) { # Check that the given restart id is active if ($restart_id ne $active_id) { print "Restart id \"$restart_id\" is not active\n"; print "Aborting.\n"; exit 2; } } else { $restart_id = $active_id; } # Format restart id $restart_id = sprintf '%04d', $restart_id; my $restart_dir = "output-$restart_id"; my $workdir = "$dirname/$restart_dir"; print "Restart id \"$restart_id\" is active\n"; # Find checkpoint files in the directory my $files1 = `cd $workdir && find . -name "*.chkpt.it_*.*"`; my %files; foreach my $file (split "\n", $files1) { if ($file !~ /[.]tmp$/ && $file =~ /[.]chkpt[.]it_([[:digit:]]+)[.]/) { my $iter = $1; if (! defined $files{$iter}) { $files{$iter} = (); } push @{$files{$iter}}, $file; } } # Find latest checkpoint iteration my $maxiter; foreach my $iter (keys %files) { if (! defined $maxiter || $iter > $maxiter) { $maxiter = $iter; } } if (defined $maxiter) { print "Checkpoint for iteration \"$maxiter\" available\n"; } # Create the internal directory if it is missing if (! -e "$workdir/$internal_dir") { symlink ".", "$workdir/$internal_dir" } # Determine job id my $job_id = read_string "$workdir/$internal_dir/JOB_ID"; my $user = get_username; print "Job id is \"$job_id\"\n"; # Request job status print "Requesting job status\n"; my ($statuscommand, $statuspattern) = get_statuscommand; my $substitutions = get_substitutions; add_define $substitutions, 'JOB_ID', $job_id; add_define $substitutions, 'USER' , $user; $statuscommand = replace_patterns $statuscommand, $substitutions; $statuspattern = replace_patterns $statuspattern, $substitutions; my $cmd = "cd $workdir && { $statuscommand; }"; # print "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; # quote_output $output; # quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } # Extract job status if ($output =~ /$statuspattern/m) { print "Job is active (queued or running)\n"; } else { print "Job is inactive (finished or stopped)\n"; } } # Print the execution host name sub command_print_exechost (@) { my @args = @_; if (@args < 1) { die "Simulation name or parameter file not specified"; } my $simulation_name = derive_sim_name(shift @args); if (@args) { print "\n"; print "WARNING: Unused command line arguments:\n"; print "@args\n"; print "\n"; } # Check arguments check_filename 'Simulation name', $simulation_name; # Find job directory my $basedir = get_basedir; my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { die "Cannot access simulation skeleton directory \"$dirname\""; } # Examine state of skeleton my ($max_restart_id, $max_existing_id, $active_id, %states) = find_largest_restart_id "$dirname"; # Check whether a restart exists if (! defined $max_restart_id) { print "Simulation has no restarts\n"; print "Aborting.\n"; exit 2; } else { print "Largest restart id is \"$max_restart_id\"\n"; } if (defined $active_id) { print "Restart id \"$active_id\" is active\n"; } # Set up restart id my $restart_id = get_restart_id; if (! defined $restart_id) { # Check whether a restart is currently active if (defined $active_id) { $restart_id = $active_id; } else { # Use largest restart id $restart_id = $max_restart_id; } } # Format restart id my $restart_is_active = $states{$restart_id} eq 'active'; $restart_id = sprintf '%04d', $restart_id; my $restart_dir = "output-$restart_id"; my $workdir = "$dirname/$restart_dir"; print "Using restart id \"$restart_id\"\n"; if ($restart_is_active) { print "This restart id is active\n"; } my $job_is_active = 0; my $job_is_running = 0; if (! $restart_is_active) { print "Restart is not active\n"; print "Aborting.\n"; exit 2; } # Create the internal directory if it is missing if (! -e "$workdir/$internal_dir") { symlink ".", "$workdir/$internal_dir" } # Determine job id my $job_id = read_string "$workdir/$internal_dir/JOB_ID"; my $user = get_username; print "Job id is \"$job_id\"\n"; # Request job status my ($statuscommand, $statuspattern, $runningpattern, $queuedpattern) = get_statuscommand; my $substitutions = get_substitutions; add_define $substitutions, 'JOB_ID', $job_id; add_define $substitutions, 'USER' , $user; $statuscommand = replace_patterns $statuscommand, $substitutions; $statuspattern = replace_patterns $statuspattern, $substitutions; $runningpattern = replace_patterns $runningpattern, $substitutions; $queuedpattern = replace_patterns $queuedpattern, $substitutions; { my $cmd = "cd $workdir && { $statuscommand; }"; # print "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; #quote_output $output; #quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } # Extract job status $job_is_active = $output =~ /$statuspattern/m; $job_is_running = $output =~ /$runningpattern/m; } if (! $job_is_active) { print "Job is not active\n"; print "Aborting.\n"; exit 2; } if (! $job_is_running) { print "Job is not running\n"; print "Aborting.\n"; exit 2; } # Request exec host print "Requesting execution host\n"; my ($exechostcommand, $exechostpattern) = get_exechostcommand; $exechostcommand = replace_patterns $exechostcommand, $substitutions; $exechostpattern = replace_patterns $exechostpattern, $substitutions; my $exechost; { my $cmd = "cd $workdir && { $exechostcommand; }"; # print "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; # quote_output $output; # quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } $output =~ /$exechostpattern/m; if (! defined $1) { print "Cannot determine execution host\n"; print "Aborting.\n"; exit 2; } $exechost = $1; } print "Execution host is \"$exechost\"\n"; } # Show job output sub command_show_output (@) { my @args = @_; if (@args < 1) { die "Simulation name or parameter file not specified"; } my $simulation_name = derive_sim_name(shift @args); if (@args) { print "\n"; print "WARNING: Unused command line arguments:\n"; print "@args\n"; print "\n"; } # Check arguments check_filename 'Simulation name', $simulation_name; # Find job directory my $basedir = get_basedir; my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { die "Cannot access simulation skeleton directory \"$dirname\""; } # Examine state of skeleton my ($max_restart_id, $max_existing_id, $active_id, %states) = find_largest_restart_id "$dirname"; # Check whether a restart exists if (! defined $max_restart_id) { print "Simulation has no restarts\n"; print "Aborting.\n"; exit 2; } else { print "Largest restart id is \"$max_restart_id\"\n"; } if (defined $active_id) { print "Restart id \"$active_id\" is active\n"; } # Set up restart id my $restart_id = get_restart_id; if (! defined $restart_id) { # Check whether a restart is currently active if (defined $active_id) { $restart_id = $active_id; } else { # Use largest restart id $restart_id = $max_restart_id; } } # Format restart id my $restart_is_active = $states{$restart_id} eq 'active'; $restart_id = sprintf '%04d', $restart_id; my $restart_dir = "output-$restart_id"; my $workdir = "$dirname/$restart_dir"; print "Using restart id \"$restart_id\"\n"; if ($restart_is_active) { print "This restart id is active\n"; } # Create the internal directory if it is missing if (! -e "$dirname/$internal_dir") { symlink ".", "$dirname/$internal_dir" } # Get Formaline output my $parfile = list_file "$dirname/$internal_dir/par"; my $outdir = $parfile; $outdir =~ s/[.]par$//; my $formaline_file = "$workdir/$outdir/formaline-jar.txt"; my $restart_formaline = maybe_read_file $formaline_file; $restart_formaline = '(file does not exist)' if ! defined $restart_formaline; my $restart_stdout = '(file does not exist)'; my $restart_stderr = '(file does not exist)'; my $job_is_active = 0; my $job_is_queued = 0; my $job_is_running = 0; my $did_show_output = 0; if ($restart_is_active) { # Create the internal directory if it is missing if (! -e "$workdir/$internal_dir") { symlink ".", "$workdir/$internal_dir" } # Determine job id my $job_id = read_string "$workdir/$internal_dir/JOB_ID"; my $user = get_username; print "Job id is \"$job_id\"\n"; # Request job status my ($statuscommand, $statuspattern, $runningpattern, $queuedpattern) = get_statuscommand; my $substitutions = get_substitutions; add_define $substitutions, 'SIMULATION_NAME', $simulation_name; add_define $substitutions, 'JOB_ID' , $job_id; add_define $substitutions, 'USER' , $user; $statuscommand = replace_patterns $statuscommand, $substitutions; $statuspattern = replace_patterns $statuspattern, $substitutions; $runningpattern = replace_patterns $runningpattern, $substitutions; $queuedpattern = replace_patterns $queuedpattern, $substitutions; { my $cmd = "cd $workdir && { $statuscommand; }"; # print "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; #quote_output $output; #quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } # Extract job status $job_is_active = $output =~ /$statuspattern/m; $job_is_queued = $output =~ /$queuedpattern/m; $job_is_running = $output =~ /$runningpattern/m; } if ($job_is_active) { # Job is active print "Job is active\n"; if ($job_is_queued) { print "Job is queued\n"; print "(There is no output yet)\n"; $did_show_output = 1; } if ($job_is_running) { print "Job is running\n"; # Request exec host print "Requesting exec host\n"; my ($exechostcommand, $exechostpattern) = get_exechostcommand; $exechostcommand = replace_patterns $exechostcommand, $substitutions; $exechostpattern = replace_patterns $exechostpattern, $substitutions; my $exechost; { my $cmd = "cd $workdir && { $exechostcommand; }"; # print "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; # quote_output $output; # quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } $output =~ /$exechostpattern/m; if (! defined $1) { print "Cannot determine exec host\n"; print "Aborting.\n"; exit 2; } $exechost = $1; } print "Exec host is \"$exechost\"\n"; add_define $substitutions, 'EXECHOST', $exechost; # Request stdout and stderr my ($stdoutcommand, $stderrcommand, $stdoutfollowcommand) = get_showoutputcommand; $stdoutcommand = replace_patterns $stdoutcommand, $substitutions; $stderrcommand = replace_patterns $stderrcommand, $substitutions; $stdoutfollowcommand = replace_patterns $stdoutfollowcommand, $substitutions; # Abuse the "force" option, since it can be # abbreviated as "-f" my $force = defined $options{'force'}; if (! $force) { { my $cmd = "cd $workdir && { $stdoutcommand; }"; # print "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; # quote_output $output; # quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } $restart_stdout = $output; } { my $cmd = "cd $workdir && { $stderrcommand; }"; # print "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; # quote_output $output; # quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } $restart_stderr = $output; } } else { { my $cmd = "cd $workdir && { $stdoutfollowcommand; }"; execute $cmd; $did_show_output = 1; } } } # if job is running } # if job is active } # if restart is active if (! $job_is_active) { # Job is not running my $stdout_file = "$workdir/$simulation_name.out"; my $stderr_file = "$workdir/$simulation_name.err"; $restart_stdout = maybe_read_file $stdout_file; $restart_stderr = maybe_read_file $stderr_file; } if (! $did_show_output) { # Show stdout, stderr, and Formaline output my $sep = '=' x 80; print "$sep\n"; print "The job's Formaline output is:\n"; print "$sep\n"; print "$restart_formaline\n"; print "$sep\n"; print "The job's stdout is:\n"; print "$sep\n"; print "$restart_stdout\n"; print "$sep\n"; print "The job's stderr is:\n"; print "$sep\n"; print "$restart_stderr\n"; print "$sep\n"; } } # Stop job sub command_stop (@) { my @args = @_; my @simulation_names = map (derive_sim_name $_, @args); # Check arguments if (! @simulation_names) { die "Simulation name(s) not specified"; } my $force = defined $options{'force'}; foreach my $simulation_name (@simulation_names) { check_filename 'Simulation name', $simulation_name; print "Stopping simulation \"$simulation_name\":\n"; # Find job directory my $basedir = get_basedir; my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { print "Cannot access simulation skeleton directory \"$dirname\".\n"; next; } my $log = open_log $dirname; write_log $log, "Stopping:\n"; # Examine state of skeleton my ($max_restart_id, $max_existing_id, $active_id, %states) = find_largest_restart_id "$dirname"; # Check whether a restart is currently active if (! defined $active_id) { write_log $log, "No active restart id\n"; close_log $log; print "Doing nothing.\n"; next; } # Set up restart id my $restart_id = get_restart_id; if (defined $restart_id) { # Check that the given restart id is active if ($restart_id ne $active_id) { write_log $log, "Restart id \"$restart_id\" is not active\n"; close_log $log; print "Doing nothing.\n"; next; } } else { $restart_id = $active_id; } # Format restart id $restart_id = sprintf '%04d', $restart_id; my $restart_dir = "output-$restart_id"; my $workdir = "$dirname/$restart_dir"; write_log $log, "Using restart id \"$restart_id\"\n"; # Create the internal directory if it is missing if (! -e "$workdir/$internal_dir") { symlink ".", "$workdir/$internal_dir" } my $stopped_file = "$workdir/$internal_dir/STOPPED"; my $termination_file = "$workdir/TERMINATE"; my $have_termination_file = -e $termination_file; my $use_termination_file = $have_termination_file; if ($have_termination_file) { write_log $log, "Termination file \"$termination_file\" found\n"; if ($force) { write_log $log, "Forcing stop\n"; $use_termination_file = 0; } } if ($use_termination_file) { # The job supports self-termination; use this write_log $log, "Requesting job termination\n"; write_log $log, "Writing \"1\" to termination file\n"; open TF, "> $termination_file"; print TF "1\n"; close TF; open DF, "> $stopped_file"; print DF "terminated\n"; close DF; write_log $log, "Requested job termination\n"; } else { # The job does not support self-termination; kill it # Determine job id my $job_id = read_string "$workdir/$internal_dir/JOB_ID"; write_log $log, "Job id is \"$job_id\"\n"; # Stop job write_log $log, "Requesting job stop\n"; my $user = get_username; my $stopcommand = get_stopcommand; my $substitutions = get_substitutions; add_define $substitutions, 'JOB_ID', $job_id; add_define $substitutions, 'USER' , $user; $stopcommand = replace_patterns $stopcommand, $substitutions; my $cmd = "cd $workdir && { $stopcommand; }"; write_log $log, "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; quote_output $output; quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } open DF, "> $stopped_file"; print DF "stopped\n"; close DF; write_log $log, "Requested job stop\n"; } close_log $log; print "\n"; } } # Clean up after a job has finished sub command_cleanup (@) { my @args = @_; my @simulation_names = map (derive_sim_name $_, @args); # Check arguments if (! @simulation_names) { die "Simulation name(s) not specified"; } my $force = defined $options{'force'}; foreach my $simulation_name (@simulation_names) { check_filename 'Simulation name', $simulation_name; print "Cleaning up simulation \"$simulation_name\":\n"; # Find job directory my $basedir = get_basedir; my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { print "Cannot access simulation skeleton directory \"$dirname\".\n"; next; } my $log = open_log $dirname; write_log $log, "Cleaning up simulation:\n"; # Examine state of skeleton my ($max_restart_id, $max_existing_id, $active_id, %states) = find_largest_restart_id "$dirname"; # Check whether a restart is currently active if (! defined $active_id) { write_log $log, "No active restart id\n"; close_log $log; print "Doing nothing.\n"; next; } # Set up restart id my $restart_id = get_restart_id; if (defined $restart_id) { # Check that the given restart id is active if ($restart_id ne $active_id) { write_log $log, "Restart id \"$restart_id\" is not active\n"; close_log $log; print "Doing nothing.\n"; next; } } else { $restart_id = $active_id; } # Format restart id $restart_id = sprintf '%04d', $restart_id; my $restart_dir = "output-$restart_id"; my $workdir = "$dirname/$restart_dir"; write_log $log, "Cleaning up restart id \"$restart_id\"\n"; # Create the internal directory if it is missing if (! -e "$workdir/$internal_dir") { symlink ".", "$workdir/$internal_dir" } # Determine job id my $job_id = read_string "$workdir/$internal_dir/JOB_ID"; write_log $log, "Job id is \"$job_id\"\n"; my $user = get_username; # Request job status # TODO: Use get_job_status instead of this code write_log $log, "Requesting job status\n"; my ($statuscommand, $statuspattern) = get_statuscommand; my $substitutions = get_substitutions; add_define $substitutions, 'JOB_ID', $job_id; add_define $substitutions, 'USER' , $user; $statuscommand = replace_patterns $statuscommand, $substitutions; $statuspattern = replace_patterns $statuspattern, $substitutions; my $cmd = "cd $workdir && { $statuscommand; }"; # write_log $log, "Executing: $cmd\n"; my ($output, $stderr, $status) = subcommand $cmd; # quote_output $output; # quote_output $stderr; if ($status) { warn "Command \"$cmd\" returned $status\n"; } # Extract job status if ($output =~ /$statuspattern/m) { write_log $log, "Job is active (queued or running)\n"; if ($force) { write_log $log, "Forcing cleanup\n"; } else { print " (You have to stop this job before you can clean it up)\n"; close_log $log; print "Doing nothing.\n"; next; } } else { write_log $log, "Job is inactive (finished or stopped)\n"; } # Deactivate restart directory $restart_dir = "output-$restart_id"; $workdir = "$dirname/$restart_dir"; if (! -e "$workdir-active") { die "Restart \"$restart_id\" is not active"; } die if ! -l "$workdir-active"; unlink "$workdir-active" or die "Could not deactivate restart directory"; write_log $log, "Deactivated restart directory\n"; # Make termination file not-world-writable if (-e "$workdir/TERMINATE") { chmod 0644, "$workdir/TERMINATE"; } # Make output files world-readable chmod 0644, "$workdir/$simulation_name.out"; chmod 0644, "$workdir/$simulation_name.err"; # Remove scratch directory if it is not stored on the compute # nodes my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $scratchdir = $entry->{'scratchdir'}; if ($scratchdir =~ m+^[^/]+) { execute "rm -rf '$workdir/$scratchdir'"; } # Remove half-written checkpoint files { my $files = `cd $workdir && find . -name "*.chkpt.tmp.it_*.*"`; unlink split "\n", $files; } # Hard-link (Formaline) tarballs between different restarts { my @files = split "\n", `cd $workdir && find . -name "*.tar.gz"`; file: foreach my $file (@files) { # Only tread real files next if ! -f "$workdir/$file"; # Only treat files with exactly 1 hard link, so that # an existing hard link structure between files is not # destroyed my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat "$workdir/$file"; next if $nlink != 1; for my $other_restart_id (reverse 0 .. $restart_id - 1) { $other_restart_id = sprintf '%04d', $other_restart_id; my $other_restart_dir = "output-$other_restart_id"; my $other_workdir = $workdir; $other_workdir =~ s+/$restart_dir$+/$other_restart_dir+; if (compare "$workdir/$file", "$other_workdir/$file" == 0) { link "$other_workdir/$file", "$workdir/$file"; next file; } } } } close_log $log; print "\n"; } } # stop Ss && cleanup Ss sub command_stop_cleanup (@) { my @args = @_; my @configuration_names = @args; # Check arguments if (! @configuration_names) { die "Configuration name(s) not specified"; } command_stop @configuration_names; command_cleanup @configuration_names; } # Completely remove a simulation sub command_purge (@) { my @args = @_; my @simulation_names = map (derive_sim_name $_, @args); # Check arguments if (! @simulation_names) { die "Simulation name(s) not specified"; } my $force = defined $options{'force'}; foreach my $simulation_name (@simulation_names) { check_filename 'Simulation name', $simulation_name; print "Purging simulation \"$simulation_name\":\n"; # Find job directory my $basedir = get_basedir; my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { print "Cannot access simulation skeleton directory \"$dirname\".\n"; next; } my $log = open_log $dirname; write_log $log, "Purging simulation:\n"; # Examine state of skeleton my ($max_restart_id, $max_existing_id, $active_id, %states) = find_largest_restart_id "$dirname"; # We only want to purge the simulation if it does not # currently have a queued or running restart. my $running_or_queued_restart = 0; foreach my $restart_id (keys %states) { my $state = $states{$restart_id}; # state is either "presubmitted" or "active" my $restart_id_str = sprintf '%04d', $restart_id; # For the moment only worry about the active restart, not # presubmitted ones if ($state eq "active") { my $restart_dir = "output-${restart_id_str}"; my $workdir = "$dirname/$restart_dir"; # Create the internal directory if it is missing if (! -e "$workdir/$internal_dir") { symlink ".", "$workdir/$internal_dir" } my $job_id = read_string "$workdir/$internal_dir/JOB_ID"; write_log $log, "Job id is \"$job_id\"\n"; my $job_status = get_job_status($job_id); if ($job_status eq "Q" || $job_status eq "R") { $running_or_queued_restart = 1; } } } if ($running_or_queued_restart) { if (! $force) { print " (You have to stop this job before you can purge it)\n"; close_log $log; print "Doing nothing.\n"; next; } } # Gather some information about the job my $simulation_id = read_string "$dirname/SIMULATION_ID"; # Remove simulation completely my $trash = "$basedir/TRASH"; mkdir $trash; if (! -d $trash) { die "Could not create trash directory \"$trash\""; } my $newdirname = "$simulation_name-$simulation_id"; print "Moving simulation to trash directory\n"; write_log $log, "Moving simulation to directory \"$trash/$newdirname\"\n"; rename $dirname, "$trash/$newdirname" or die "Could not move simulation"; # TODO: keep metadata in an archive close_log $log; print "\n"; } } # execute "rm -rf $dirname"; # print "Entropy has been reduced.\n"; # Run a job for a simulation sub command_run (@) { my @args = @_; if (@args < 1) { die "Simulation name or parameter file not specified"; } my $simulation_name = derive_sim_name(shift @args); check_filename 'Simulation name', $simulation_name; print "Running simulation \"$simulation_name\":\n"; # Find job directory my $basedir = get_basedir; my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { print "Cannot access simulation skeleton directory \"$dirname\".\n"; next; } my $log = open_log $dirname; write_log $log, "Running simulation:\n"; # Examine state of skeleton my ($max_restart_id, $max_existing_id, $active_id, %states) = find_largest_restart_id "$dirname"; # Check whether a restart is currently active if (! defined $active_id) { write_log $log, "No active restart id\n"; close_log $log; print "Aborting.\n"; exit 2; } my $restart_id = $active_id; # Format restart id $restart_id = sprintf '%04d', $restart_id; my $restart_dir = "output-$restart_id"; my $workdir = "$dirname/$restart_dir"; write_log $log, "Running restart id \"$restart_id\"\n"; # Create the internal directory if it is missing if (! -e "$workdir/$internal_dir") { symlink ".", "$workdir/$internal_dir" } # Determine job id my $job_id = read_string "$workdir/$internal_dir/JOB_ID"; write_log $log, "Job id is \"$job_id\"\n"; # We only want to run the simulation if the restart is running my $job_status = get_job_status($job_id); if ($job_status ne 'R') { write_log $log, "Job is not running.\n"; close_log $log; print "Aborting.\n"; exit 2; } # Find out whether the job is already running my $runningfile = "$workdir/$internal_dir/RUNNING"; if (-e $runningfile) { write_log $log, "Job is already running.\n"; close_log $log; print "Aborting.\n"; exit 2; } my $pid = $$; write_string $runningfile, $pid; write_log $log, "Running job from PID $pid\n"; my $substitutions = get_substitutions; # TODO: don't expand these while submitting, expand these only # while running # TODO: move most of the postsubmit logic here # TODO: don't make any decisions here (about processors, threads, # codes, directories, etc.); just run the stuff # TODO: how should the decisions be passed? in small files? # TODO: should this assume that it should run in the current # directory, and ignore most of the other simfactory logic? # if so, should it still fill in the log file, or should it # just emit to stdout? # TODO: should there be a special mdb section about running? # e.g. looking at stdout/stderr while running may go there # -- or it may not, since anything with PBS should not go # here. my $executable = read_string "$workdir/$internal_dir/EXECUTABLE"; my $parfile = read_string "$workdir/$internal_dir/PARFILE"; my $num_threads = read_string "$workdir/$internal_dir/NUM_THREADS"; my $num_procs = read_string "$workdir/$internal_dir/NUM_PROCS"; my $scratchdir = read_string "$workdir/$internal_dir/SCRATCHDIR"; add_define $substitutions, 'EXECUTABLE' , $executable; add_define $substitutions, 'PARFILE' , $parfile; add_define $substitutions, 'NUM_THREADS', $num_threads; add_define $substitutions, 'NUM_PROCS' , $num_procs; add_define $substitutions, 'RUNDIR' , $workdir; add_define $substitutions, 'SCRATCHDIR' , $scratchdir; # Set up directory structure execute "mkdir -p '$scratchdir'"; execute "ln -s '$scratchdir' '$workdir/scratch'"; # Output environment execute "env | sort > $workdir/$internal_dir/ENVIRONMENT"; # Output random useful information write_log $log, "Host name: " . `hostname`; write_log $log, "Starting: " . `date`; my ($runcmd, $run2cmd) = get_runcommand; $runcmd = replace_patterns $runcmd, $substitutions; # Create a trampoline shell script if necessary if (defined $run2cmd) { $run2cmd = replace_patterns $run2cmd, $substitutions; write_file "$workdir/$internal_dir/RunCmd", $run2cmd; chmod 0755, "$workdir/$internal_dir/RunCmd"; } my $cmd = "$runcmd $executable -L 3 $parfile"; # Remember start time to measure startup overhead $ENV{'CACTUS_STARTTIME'} = `date +%s`; write_log $log, "Running job\n"; $cmd = "cd $workdir && { $cmd; }"; write_log $log, "Executing: $cmd\n"; close_log $log; my $child = fork; die unless defined $child; if (! $child) { # This is the child; execute the job exec $cmd; die "Could not execute command"; } # This is the parent; wait for the child waitpid $child, 0; $log = open_log $dirname; write_log $log, "Stopping: " . `date`; # Tear down directory structure execute "rm '$workdir/$scratchdir'"; execute "rm -rf '$scratchdir'"; close_log $log; } # Add a comment sub command_comment (@) { my @args = @_; if (@args < 1) { die "Simulation name or parameter file not specified"; } my $simulation_name = derive_sim_name(shift @args); if (@args) { print "\n"; print "WARNING: Unused command line arguments:\n"; print "@args\n"; print "\n"; } # Check arguments if (! defined $simulation_name) { die "Simulation name missing"; } check_filename 'Simulation name', $simulation_name; # Find job directory my $basedir = get_basedir; my $dirname = "$basedir/$simulation_name"; if (! -d $dirname) { die "Cannot access simulation skeleton directory \"$dirname\""; } my $log = open_log $dirname; write_log $log, "User comment:\n"; # Add comment to log file my $commentfile = get_commentfile; my $commentlines; if (defined $commentfile) { $commentlines = read_file $commentfile; } else { print "Type your comment; finish with EOF (ctrl-D):\n"; my $oldflush = $|; $| = 1; # flush stdout my @lines = ; $| = $oldflush; $commentlines = join '', @lines; } write_log $log, $commentlines; close_log $log; } # Handle an unknown command sub command_unknown (@) { print "sim: Unknown command \"$command\"\n"; print "Use \"sim help\" for a list of commands.\n"; exit 1; } # Create unique simulation id sub create_simulation_id ($) { my ($simulation_name) = @_; my $machine = get_machine ''; #my $hostname = get_hostnamealias; my $entry = $mdb::machine_database{$machine}; my $hostname = $entry->{'hostname'}; my $user = get_username; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime (time); $mon += 1; $year += 1900; my $timestamp = (sprintf "%4d.%02d.%02d-%02d.%02d.%02d", $year,$mon,$mday, $hour,$min,$sec); my $pid = $$; my $simulation_id = "simulation-$simulation_name-$machine-$hostname-$user-$timestamp-$pid"; check_filename 'Simulation id', $simulation_id; return $simulation_id; } # Find largest used restart id # States are either 'active', 'presubmitted', or '' (finished) sub find_largest_restart_id ($) { my ($dir) = @_; # Read all file names and look for restart directories my %states; # id -> filename opendir DIR, $dir or die "Could not read directory \"$dir\""; while (my $file = readdir DIR) { if ($file =~ /^output-([[:digit:]]+)$/) { my $id = $1; my $state = ''; if (-l "$dir/$file-active") { die if $state ne ''; $state = 'active'; } else { die if -e "$dir/$file-active"; } if (-l "$dir/$file-presubmitted") { die if $state ne ''; $state = 'presubmitted'; } else { die if -e "$dir/$file-presubmitted"; } if (defined $states{$id}) { die "Duplicate restart ids in directory \"$dir\""; } $states{$id} = $state; } } closedir DIR or die; opendir DIR, $dir or die "Could not read directory \"$dir\""; while (my $file = readdir DIR) { if ($file =~ /^output-([[:digit:]]+)([^[:digit:]].*)$/) { my $restart = $1; my $suffix = $2; $suffix =~ s/^-// or die "Malformed suffix \"$suffix\" for restart \"$restart\""; $suffix eq 'active' || $suffix eq 'presubmitted' or die "Unknown suffix \"$suffix\" for restart \"$restart\""; } } closedir DIR or die; # Find the maximum regular (non-presubmitted) restart id my $max_restart_id; foreach my $id (keys %states) { my $state = $states{$id}; if ($state ne 'presubmitted') { if (! defined $max_restart_id || $id > $max_restart_id) { $max_restart_id = $id; } } } # Find the maximum restart id (presubmitted or not) my $max_existing_id; foreach my $id (keys %states) { if (! defined $max_existing_id || $id > $max_existing_id) { $max_existing_id = $id; } } # Is any restart active? my $active_id; foreach my $id (keys %states) { my $state = $states{$id}; if ($state eq 'active') { ! defined $active_id or die "More than one active restart ids in directory \"$dir\""; $active_id = $id; } } return ($max_restart_id, $max_existing_id, $active_id, %states); } # Handle log file sub open_log ($) { my $dirname = shift; defined $dirname or die; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = localtime (time); $mon += 1; $year += 1900; my $date = (sprintf "%4d-%02d-%02d %2d:%02d:%02d", $year,$mon,$mday, $hour,$min,$sec); my $machine = get_machine ''; #my $hostname = get_hostnamealias; my $entry = $mdb::machine_database{$machine}; my $hostname = $entry->{'hostname'}; my $user = get_username; local *LOG; open (LOG, ">> $dirname/LOG") or die "Could not open log file"; print LOG "\n--------------------------------------------------------------------------------\n"; print LOG "$date $user\@$hostname:\n"; return *LOG; } sub write_log (*$) { my ($log, $message) = @_; print $log $message; print "[log] $message"; } sub close_log (*) { my ($log) = @_; print $log "--------------------------------------------------------------------------------\n"; close $log; } # Handle files sub read_file ($) { my ($filename) = @_; open (FILE, "< $filename") or die "Cannot read file \"$filename\""; my @lines = ; close FILE or die; return (join '', @lines); } sub maybe_read_file ($) { my ($filename) = @_; if (-e $filename) { return read_file $filename; } } sub write_file ($$) { my ($filename, $lines) = @_; ! -e $filename or die "File \"$filename\" exists already"; open (FILE, "> $filename") or die "Cannot write file \"$filename\""; print FILE $lines or die; close FILE or die; } sub append_file ($$) { my ($filename, $lines) = @_; open (FILE, ">> $filename") or die "Cannot append to file \"$filename\""; print FILE $lines or die; close FILE or die; } sub read_string ($) { my ($filename) = @_; my $string = read_file $filename; chomp $string; return $string; } sub maybe_read_string ($) { my ($filename) = @_; if (-e $filename) { return read_string $filename; } } sub write_string ($$) { my ($filename, $string) = @_; if (! defined $string) { die $filename; } die if $string =~ m/\n$/; write_file $filename, "$string\n"; } sub maybe_write_string ($$) { my ($filename, $string) = @_; return if ! defined $string; write_string $filename, $string; } # Caching utilities for executables sub copy_file ($$$) { my ($srcfile, $dstdir, $cachedir) = @_; # Create cache mkpath $cachedir; # Find cache entry my $file = $srcfile; $file =~ s+^.*/++; my $cachefile = "$cachedir/$file"; my $dstfile = "$dstdir/$file"; # Check cache if (! -e $cachefile) { # Copy file unlink $dstfile; cp $srcfile, $dstfile or die; my $mode = (stat $srcfile)[2] or die; chmod $mode, $dstfile or die; # Populate cache unlink $cachefile; link $dstfile, $cachefile; } else { # Use cached file link $cachefile, $dstfile; # Check file my $isequal = 1; # Don't use md5; it is as expensive as comparing the files directly # my $md5_src = md5_digest $srcfile; # my $md5_dst = md5_digest $dstfile; # $isequal = $isequal && $md5_src eq $md5_dst; # Don't compare the files; this is too expensive # $isequal = $isequal && (compare $srcfile, $dstfile) == 0; # Compare file dates and sizes # -M returns the age of the file (in days) # -s returns the size of the file (in bytes) $isequal = $isequal && -M $srcfile >= -M $dstfile; $isequal = $isequal && -s $srcfile == -s $dstfile; if (! $isequal) { # Copy file unlink $dstfile; cp $srcfile, $dstfile or die; my $mode = (stat $srcfile)[2] or die; chmod $mode, $dstfile or die; # Update cache unlink $cachefile; link $dstfile, $cachefile; } } } ## Calculate md5 digest #sub md5_digest ($) #{ # my ($file) = @_; # # open FILE, $file or die "Could not open file \"$file\""; # binmode FILE; # my $md5 = Digest::MD5->new->addfile(*FILE)->hexdigest; # close FILE; # # return $md5; #} # Return files in directory sub list_files ($;$) { my ($dir, $pattern) = @_; $pattern = '*' if ! defined $pattern; my $olddir = getcwd; if (! defined $olddir) { die "Error in getcwd -- possible file system error"; } chdir $dir or die "Directory $dir does not exist"; my @files = glob $pattern; chdir $olddir; return @files; } # Return the single file in a directory sub list_file ($) { my ($dir) = @_; my @files = list_files $dir; if ($#files < 0) { if (!$xml) { print "No files in directory \"$dir\"\n"; } return; } if ($#files > 0) { if (!$xml) { print "Too many files in directory \"$dir\"\n"; } } return $files[0]; } # Execute a shell command, outputting stdout and stderr sub execute ($) { my ($cmd) = @_; die if ! defined $cmd; print "Executing: $cmd\n"; system $cmd; die if $? == -1; if ($?) { my $error = $? >> 8; die "Error $error occurred while executing command \"$cmd\""; } } # This code is based on the recipe in the Perl FAQ for capturing the # output and error from an external command. See # sub subcommand (@) { my ($command) = @_; # These return file descriptors. The files are immediately # unlinked, so will be lost when the file handles are closed. local *CATCHOUT = IO::File->new_tmpfile; local *CATCHERR = IO::File->new_tmpfile; my $pid = open3 gensym, ">&CATCHOUT", ">&CATCHERR", $command; waitpid $pid, 0; my $retcode = $?; seek $_, 0, 0 for \*CATCHOUT, \*CATCHERR; my $stdout = join '', ; my $stderr = join '', ; $stdout = '' if ! defined $stdout; $stderr = '' if ! defined $stderr; return ($stdout, $stderr, $retcode); } # Write quoted output sub quote_output ($) { my ($output) = @_; return if $output eq ''; $output =~ s/^/> /mg; print $output; } # # Determine the replacement in a pattern substitution depending on the # # matched pattern # sub replace_matches ($$); # sub replace_matches ($$) # { # my $replacement = shift; # my @matches = @_; # $replacement =~ s/\$(\d+)/$matches[$1-1]/eg; # return $replacement; # } # Replace patterns # TODO: Handle single @ characters cleanly sub replace_patterns ($$) { my ($lines, $substitutions) = @_; my $old_lines; do { $old_lines = $lines; # NOTE: Idea: Replace @@ by @, and complain about single @ # characters # (This doesn't work since @unknown@ is ignored) # Replace macros of the form @MACRO@ foreach my $pattern (keys %{$substitutions}) { my $replacement = $substitutions->{$pattern}; if (! defined $replacement && $lines =~ /$pattern/) { die "Replacement for pattern \"$pattern\" is not defined, but the pattern used"; } $lines =~ s/$pattern/$replacement/mg; my $match1 = $1; $lines =~ s/\@1\@/$match1/mg if defined $match1; #$lines =~ s/$pattern/replace_matches("$replacement","$1")/emg; } # This does not work, because the parameter and script files # undergo pattern replacement twice -- both when creating and # when submitting a simulation. # NOTE: Idea: Replace only expressions which do not contain @ # characters # # Replace expressions of the form @(EXPRESSION)@ (experimental) # while ($lines =~ /\@\((.*?)\)\@/) { # my ($before, $expr, $after) = ($`, $1, $'); # '); # my $newexpr = eval $expr; # $lines = $before . $newexpr . $after; # } # Replace expressions of the form @(EXPRESSION)@ (experimental) while ($lines =~ /\@\(([^@]*?)\)\@/) { my ($before, $expr, $after) = ($`, $1, $'); # '); my $newexpr = eval $expr; if ($@) { die "Error while evaluating the expression \"$expr\"in a template:\n$@"; } $lines = $before . $newexpr . $after; } # Loop until there is no change any more } while ($lines ne $old_lines); # Remove the protection that may have been added by replacements # and attachments $lines =~ s/^=SIMFACTORY-PROTECTED=//mg; return $lines; } # Ensure there are no undefined patterns left sub ensure_no_patterns ($$) { my ($name, $lines) = @_; # Warn about all patterns: # m/(@[^@]+@)/ # Warn about patterns that don't contain line breaks: # m/(@[^@\n]+@)/ # Warn about patterns containing only identifiers: if ($lines =~ m/(@([[:alnum:]]+)|(@\([^\n]+\)@)@)/) { my $pattern = $1; warn "In $name, pattern \"$pattern\" is used but has not been defined"; } } # Check a file name: Make sure that it is not empty, and that is # contains only legal characters sub check_filename ($$) { my ($purpose, $filename) = @_; defined $purpose or die; $purpose ne '' or die; if (! defined $filename) { die "$purpose: File name not specified"; } if ($filename eq '') { die "$purpose: File name is empty"; } # We are very conservative as to what characters we allow. Other # characters can be added, but be careful. if ($filename !~ m|^[-[:alnum:]._/]+$|) { die "$purpose: Name \"$filename\" contains illegal characters"; } } # Create a string which is suitable as PBS job name sub shorten_string ($) { my ($string) = @_; defined $string or die; if (defined $options{'hide'}) { # Hide the simulation #my ($seconds, $useconds) = gettimeofday; #my $time = ($seconds + $useconds) % 10000; #my $seconds = time; #my $random = int (rand 100); #my $time = ($seconds * 100 + $random) % 10000; #my $time = int (rand 10000); my $time = $$; $string = sprintf "sim-%06d", $time; } elsif (defined $options{'hide-boring'}) { # Make the job look boring my @randoms = ('headon', 'D3.0', 'a0.6', 'mu0.25', 'PN1.5', 'FMR', '1+log', 'nowaves', 'findAH', 'coarse', 'singleBH', 'PUGH', 'movie'); my $random = $randoms[int (rand @randoms)]; my $time = time; $string = "sim-$random-$time"; } elsif (defined $options{'hide-dangerous'}) { # Make the job look dangerous my @randoms = ('paramesh', 'D25.0', 'a0.999', 'mu0.01', 'PN4.0', 'CCM', 'spec35', 'maximal', 'string', 'FE', 'tail', 'DSS', 'PRL', 'naked'); my $random = $randoms[int (rand @randoms)]; my $time = time; $string = "sim-$random-$time"; } # PBS says: "The name specified may be up to and including 15 # characters in length. It must consist of printable, non white # space characters with the first character alphabetic." $string =~ s/[^[:print:]]//g; $string =~ s/[[:space:]]/_/g; $string =~ s/^(?![[:alpha:]])/J/; $string = substr $string, 0, 15; return $string; } # Given a name intended to be used as a simulation name, return a pair # (sim name, par file name) where par file name might be undef. sub derive_sim_and_parfile_name ($) { my ($in_name) = @_; die unless defined $in_name; my ($name,$path,$suffix) = fileparse $in_name, ('.par'); if ($suffix eq '') { return ($in_name, undef); } else { return ($name, $in_name); } } # If passed a parameter file name, generate a simulation name from it. # Otherwise, return as is. sub derive_sim_name ($) { my ($in_name) = @_; my ($sim_name, $parfile) = derive_sim_and_parfile_name($in_name); return $sim_name; } # Add quotes to make a string safe for a shell sub quotesafe ($) { my ($string) = @_; defined $string or die; $string =~ s+'+'\\''+g; return "'$string'"; } # Add quotes to make a string safe for XML sub quotexml ($) { my ($string) = @_; defined $string or die; # Replace special characters $string =~ s/&/&/g; $string =~ s//>/g; return $string; } # Clean a string to make a string usable as XML tag sub cleanxmltag ($) { my ($string) = @_; defined $string or die; # Replace special characters $string =~ s/[^[:alnum:]_]/_/g; return $string; } # Add quotes to make a string safe for the inifile format sub quoteini ($) { my ($string) = @_; defined $string or die; # Add quotes if the string is surrounded by white space, or if it # looks like a quoted string if ($string =~ m/^\s|\s$|^".*"$/) { $string =~ s/"/\\"/g; $string = "\"$string\""; } return $string; } # Remove quotes (if any) from an inifile value sub unquoteini ($) { my ($string) = @_; defined $string or die; # Allow quotes around strings if ($string =~ m/^".*"$/) { $string =~ s/^"//; $string =~ s/"$//; $string =~ s/\"/"/g; } return $string; } # Find the host name of this machine sub get_hostnamealias () { # Search through the host name and all aliases. # Use the first name that contains dots, indicating that this name # is a long host name that includes a domain name. # If there is no name that includes a domain name, use whatever # the system calls "host name". if (defined $options{'hostname'}) { my $hostname = $options{'hostname'}; return $hostname; } # Allow the user to override everything my $home = $ENV{'HOME'}; if (defined $home && -e "$home/.hostname") { my $hostname = read_file "$home/.hostname"; chomp $hostname; return $hostname; } # Get the system's idea of its host name my $hostname = `hostname`; chomp $hostname; # Find its host name and all aliases my ($name, $aliases) = gethostbyname ($hostname); # Use the host name as fallback my $goodname = $name ? $name : $hostname; # Split the aliases my @names = (); push (@names, $name) if $name; push (@names, split (/ /, $aliases)) if $aliases; # Search for a name that contains a dot foreach my $maybename (@names) { if ($maybename =~ /[.]/) { $goodname = $maybename; last; } } return $goodname; } # Find the I/O machine of this machine sub get_iomachine ($) { my ($machine) = @_; die if ! defined $machine; my $entry = $mdb::machine_database{$machine}; die if ! defined $entry; my $iomachine = $entry->{'iomachine'}; if (defined $iomachine) { $machine = $iomachine; die if ! exists $mdb::machine_database{$machine}; } return $machine; } # Find the trampoline, if any, of this machine sub get_trampoline ($) { my ($machine) = @_; die if ! defined $machine; die if ! exists $mdb::machine_database{$machine}; my $entry = $mdb::machine_database{$machine}; my $trampoline = $entry->{'trampoline'}; if (defined $trampoline) { die if ! exists $mdb::machine_database{$trampoline}; } return $trampoline; } # Find the ssh command to access a machine and execute a command there sub get_sshcmd ($$;$) { my ($machine, $cmd, $options) = @_; die if ! defined $machine; die if ! defined $cmd; defined $options or $options = ''; while (defined $machine) { my $entry = $mdb::machine_database{$machine}; my $sshcmd = $entry->{'sshcmd'}; my $sshopts = $entry->{'sshopts'}; my $user = $entry->{'user'}; my $hostname = $entry->{'hostname'}; if (! defined $sshcmd) { die "Entry \"sshcmd\" not defined for machine \"$machine\""; } if (! defined $sshopts) { die "Entry \"sshopts\" not defined for machine \"$machine\""; } if (! defined $user) { die "Entry \"user\" not defined for machine \"$machine\""; } if (! defined $hostname) { die "Entry \"hostname\" not defined for machine \"$machine\""; } my $substitutions = get_substitutions; add_define $substitutions, 'USER', $user; my $ssh = "$sshcmd $sshopts $options $user\@$hostname"; $ssh = replace_patterns $ssh, $substitutions; my $sshsetup = $entry->{'sshsetup'}; if (defined $sshsetup) { $cmd = "{ $sshsetup; } && $cmd"; } $cmd = quotesafe $cmd; $cmd = "$ssh $cmd"; my $trampoline = get_trampoline $machine; if (defined $trampoline) { my $trampoline_entry = $mdb::machine_database{$trampoline}; my $localsshsetup = $trampoline_entry->{'localsshsetup'}; if (defined $localsshsetup) { $localsshsetup = replace_patterns $localsshsetup, $substitutions; $cmd = "{ $localsshsetup; } && $cmd"; } } $machine = $trampoline; } return $cmd; } # Find user name sub get_username () { my $user = $ENV{'USER'}; if (! $user) { my $user = `whoami`; chomp $user; } return $user; } # Get machine name sub get_machine ($) { my ($hostname) = @_; if (! defined $hostname || $hostname eq '') { $hostname = get_hostnamealias; } my @machines; foreach my $machine (keys %mdb::machine_database) { if ("\U$hostname" eq "\U$machine") { push @machines, $machine; } else { my $entry = $mdb::machine_database{$machine}; die if ! defined $entry; my $mdb_hostname = $entry->{'hostname'}; if ("\U$hostname" eq "\U$mdb_hostname") { push @machines, $machine; } elsif ($hostname =~ m/$entry->{'aliaspattern'}/i) { push @machines, $machine; } } } if ($#machines == 0) { # Found one match return $machines[0]; } if ($#machines == -1) { # Found no match die "Unknown machine name \"$hostname\""; } else { # Found multiple matches @machines = sort @machines; die "Could not identify machine \"$hostname\" -- possible matches are @machines"; } } # Get directory suffix (without leading slash) sub get_dirsuffix ($) { my ($prefix) = @_; my $dir = getcwd; if (! defined $dir) { die "Error in getcwd -- possible file system error"; } chdir $prefix; my $real_prefix = getcwd; if (! defined $real_prefix) { die "Error in getcwd -- possible file system error"; } chdir $dir; my ($path) = $dir =~ m{^$real_prefix/(.*)$}; if (! defined $path) { die "Called from the wrong location.\n" . " Current directory is \"$dir\",\n" . " but expected a subdirectory of \"$prefix\".\n" . "It is also be possible that you need to correct your 'sourcebasedir' entry\n" . " in the mdb entry for this machine.\n"; } return $path; } # Get source base directory sub get_sourcebasedir () { my $sourcebasedir = $options{'sourcebasedir'}; if (! defined $sourcebasedir) { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $user = $entry->{'user'}; die if ! defined $user; my $substitutions = get_substitutions; add_define $substitutions, 'USER', $user; $sourcebasedir = $entry->{'sourcebasedir'}; $sourcebasedir = replace_patterns $sourcebasedir, $substitutions; } # Make base directory absolute if ($sourcebasedir !~ m+^/+) { my $cwd = getcwd; if (! defined $cwd) { die "Error in getcwd -- possible file system error"; } $sourcebasedir = "$cwd/$sourcebasedir"; } check_filename 'Source base directory', $sourcebasedir; if (! -d $sourcebasedir) { die "Cannot access source base directory \"$sourcebasedir\""; } return $sourcebasedir; } # Get base directory sub get_basedir () { my $basedir = $options{'basedir'}; if (! defined $basedir) { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $user = $entry->{'user'}; die if ! defined $user; my $substitutions = get_substitutions; add_define $substitutions, 'USER', $user; $basedir = $entry->{'basedir'}; $basedir = replace_patterns $basedir, $substitutions; } # Make base directory absolute if ($basedir !~ m+^/+) { my $cwd = getcwd; if (! defined $cwd) { die "Error in getcwd -- possible file system error"; } $basedir = "$cwd/$basedir"; } check_filename 'Base directory', $basedir; if (! -d $basedir) { mkdir $basedir; } if (! -d $basedir) { die "Cannot access base directory \"$basedir\""; } return $basedir; } # Get executable name and script file name sub get_executable () { #my $executable = $options{'executable'}; #if (! defined $executable) { # die "Executable name not specified"; #} my $configuration = $options{'configuration'}; if (! defined $configuration) { # die "Configuration name not specified"; # Get build options # TODO: unify this with the corresponding code in command_build my $debug = $options{'debug'}; my $optimise = $options{'optimise'}; my $unsafe = $options{'unsafe'}; my $profile = $options{'profile'}; if (! defined $debug) { # disable debugging by default $debug = 0; } if (! defined $optimise) { # enable optimisation by default, unless debugging is enabled, # then disable optimisation by default $optimise = ! $debug; } if (! defined $unsafe) { # disable unsafe options by default $unsafe = 0; } if (! defined $profile) { # disable profiling by default $profile = 0; } $configuration = 'sim' . ($debug ? '-debug' : '') . ($optimise == $debug ? ($optimise ? '-optimise' : '-nooptimise') : '') . ($unsafe ? '-unsafe' : '') . ($profile ? '-profile' : ''); print "Configuration name(s) not specified -- using default configuration \"$configuration\"\n"; } check_filename "Configuration name", $configuration; my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $user = $entry->{'user'}; die if ! defined $user; my $substitutions = get_substitutions; add_define $substitutions, 'USER', $user; my $sourcebasedir = $entry->{'sourcebasedir'}; die if ! defined $sourcebasedir; $sourcebasedir = replace_patterns $sourcebasedir, $substitutions; if (! -e "$sourcebasedir") { die "Source base directory \"$sourcebasedir\" does not exist"; } my $path = get_dirsuffix $sourcebasedir; if (! -e "$sourcebasedir/$path") { die "Cactus directory \"$sourcebasedir/$path\" does not exist"; } if (! -e "$sourcebasedir/$path/configs/$configuration") { die "Configuration \"$configuration\" does not exist"; } my $executable = "$sourcebasedir/$path/exe/cactus_$configuration"; check_filename "Executable", $executable; my $scriptfile = "$sourcebasedir/$path/configs/$configuration/ScriptFile"; check_filename "Script file", $scriptfile; if (! -e $executable) { die "Configuration \"$configuration\" contains no executable"; } if (! -e $scriptfile) { die "Configuration \"$configuration\" contains no script file"; } return ($executable, $scriptfile); } # Get sim directory sub get_simpath () { my $simpath = $0; if ($simpath =~ m+/+) { # Remove slash and file name $simpath =~ s+/[^/]*$++; } else { # Use current directory $simpath = "."; } return $simpath; } # Get option list file name sub get_optionlist (;$) { my ($need_optionlist) = @_; $need_optionlist = 0 if ! defined $need_optionlist; my $optionlist = $options{'optionlist'}; if (! defined $optionlist) { if (! $need_optionlist) { # Return if no option list was provided, don't report an # error return undef; } else { # Use the default from the mdb my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; $optionlist = $entry->{'optionlist'}; } } die if ! defined $optionlist; check_filename 'Option list file name', $optionlist; # Prepend a path if the file name is not absolute if ($optionlist !~ m+^/+) { my $simpath = get_simpath; my $sim_optionlist = "$simpath/optionlists/$optionlist"; if (-e $sim_optionlist) { # Prepend the sim path if that file exists $optionlist = $sim_optionlist; } } if (! -e $optionlist) { die "Option list \"$optionlist\" does not exist"; } return $optionlist; } # Get script file name sub get_scriptfile (;$) { my ($need_scriptfile) = @_; $need_scriptfile = 0 if ! defined $need_scriptfile; my $scriptfile = $options{'scriptfile'}; if (! defined $scriptfile) { if (! $need_scriptfile) { # Return if no script file was provided, don't report an # error return undef; } else { # Use the default from the mdb my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; $scriptfile = $entry->{'scriptfile'}; } } die if ! defined $scriptfile; check_filename 'Script file name', $scriptfile; # Prepend a path if the file name is not absolute if ($scriptfile !~ m+^/+) { my $simpath = get_simpath; my $sim_scriptfile = "$simpath/scriptfiles/$scriptfile"; if (-e $sim_scriptfile) { # Prepend the sim path if that file exists $scriptfile = $sim_scriptfile; } } if (! -e $scriptfile) { die "Script file \"$scriptfile\" does not exist"; } return $scriptfile; } # Get thorn list file name sub get_thornlist (;$) { my ($need_thornlist) = @_; $need_thornlist = 0 if ! defined $need_thornlist; my $thornlist = $options{'thornlist'}; if (! defined $thornlist) { if (! $need_thornlist) { # Return if no thorn list was provided, don't report an # error return undef; } else { # Use the default from the mdb my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; $thornlist = $entry->{'thornlist'}; if (! defined $thornlist) { $thornlist = $cdb::configuration_database{'thornlist'} } } } die "No thornlist specified" if ! defined $thornlist; check_filename 'Thorn list file name', $thornlist; # Prepend a path if the file name is not absolute if ($thornlist !~ m+^/+) { my $simpath = get_simpath; my $sim_thornlist = "$simpath/thornlists/$thornlist"; if (-e $sim_thornlist) { # Prepend the sim path if that file exists $thornlist = $sim_thornlist; } } if (! -e $thornlist) { die "Thorn list \"$thornlist\" does not exist"; } return $thornlist; } # Read a thorn list sub get_thornlistlines ($) { my ($thornlist) = @_; my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $substitutions = get_substitutions; # Comment out all thorns that are not supported on this machine my $disabled_thorns = $entry->{'disabled-thorns'}; if (defined $disabled_thorns) { for my $thorn (split ' ', $disabled_thorns) { add_substitution $substitutions, '^\s*('.$thorn.')\b', '# @1@', } } my $thornlistlines = read_file $thornlist; $thornlistlines = replace_patterns $thornlistlines, $substitutions; return $thornlistlines; } # Read a parameter file sub get_parfilelines ($) { my ($parfile) = @_; my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $substitutions = get_substitutions; # Implement the #@THORN@ syntax in parameter files. This pattern # will be removed unless THORN is deactivated. my $disabled_thorns = $entry->{'disabled-thorns'}; if (defined $disabled_thorns) { my @thorns = split ' ', $disabled_thorns; map { s{.*/}{}; } @thorns; my $pattern = '#@(?!(' . (join '|', @thorns) . ')@)\w+@'; add_substitution $substitutions, $pattern, ''; } my $parfilelines = read_file $parfile; $parfilelines = replace_patterns $parfilelines, $substitutions; return $parfilelines; } # Get comment file name sub get_commentfile () { my $commentfile = $options{'commentfile'}; return if ! defined $commentfile; check_filename 'Comment file name', $commentfile; if (! -e $commentfile) { die "Comment file \"$commentfile\" does not exist"; } return $commentfile; } # Get list of additional arguments for calling other commands sub get_arguments () { my $argument = $options{'argument'}; my @arguments = defined $argument ? @{$argument} : (); return @arguments; } # Get list of additional definitions and replacements for variable # substitutions sub get_substitutions () { my $patterns = {}; # Definitions (of variables) { my $defines = $cdb::configuration_database{'defines'}; foreach my $id (keys %$defines) { my $definition = $defines->{$id}; die unless defined $definition; add_define $patterns, $id, $definition; } } { my $defines = get_option %options, 'define', {}; foreach my $id (keys %$defines) { my $definition = $defines->{$id}; die unless defined $definition; add_define $patterns, $id, $definition; } } # Substitutions (of patterns) { my $substitutions = $cdb::configuration_database{'substitutions'}; foreach my $id (keys %$substitutions) { my $definition = $substitutions->{$id}; die unless defined $definition; add_substitution $patterns, $id, $definition; } } { my $substitutions = get_option %options, 'substitute', {}; foreach my $id (keys %$substitutions) { my $definition = $substitutions->{$id}; die unless defined $definition; add_substitution $patterns, $id, $definition; } } # Replacements (of key-value definitions) { my $replacements = $cdb::configuration_database{'replacements'}; foreach my $id (keys %$replacements) { my $definition = $replacements->{$id}; die unless defined $definition; add_replacement $patterns, $id, $definition } } { my $replacements = get_option %options, 'replace', {}; foreach my $id (keys %$replacements) { my $definition = $replacements->{$id}; die unless defined $definition; add_replacement $patterns, $id, $definition } } # Attachments (to key-value definitions) { my $attachments = $cdb::configuration_database{'attachments'}; foreach my $id (keys %$attachments) { my $definition = $attachments->{$id}; die unless defined $definition; add_attachment $patterns, $id, $definition } } { my $attachments = get_option %options, 'attach', {}; foreach my $id (keys %$attachments) { my $definition = $attachments->{$id}; die unless defined $definition; add_attachment $patterns, $id, $definition } } return $patterns; } # Add additional define # @VAR@ = VALUE sub add_define ($$$) { my ($patterns, $key, $value) = @_; #return if ! defined $value; my $pattern = '@'.$key.'@'; die if exists $patterns->{$pattern}; #if (! defined $value) { # die "Value for key \"$key\" is not defined"; #} $patterns->{$pattern} = $value; } # Add additional substitution # PATTERN = VALUE sub add_substitution ($$$) { my ($patterns, $key, $value) = @_; #return if ! defined $value; my $pattern = $key; if (exists $patterns->{$pattern}) { print "Current keys:\n"; foreach my $key (sort keys %$patterns) { my $value = $patterns->{$key}; print " \"$key\" -> \"$value\"\n"; } die "Key \"$key\" exists already"; } die if exists $patterns->{$pattern}; #if (! defined $value) { # die "Value for key \"$key\" is not defined"; #} $patterns->{$pattern} = $value; } # Add additional replacement # KEY = NEWVALUE sub add_replacement ($$$) { my ($patterns, $key, $value) = @_; # Don't keep comments; comments in strings are difficult to handle my $pattern = '^\s*'.$key.'\s*=.*$'; die if exists $patterns->{$pattern}; # Ensure that each replacement matches only once $patterns->{$pattern} = "=SIMFACTORY-PROTECTED=${key} = ${value}"; } # Add additional attachment # KEY = OLDVALUE NEWVALUE sub add_attachment ($$$) { my ($patterns, $key, $value) = @_; # This does not work with comments my $pattern = '^\s*'.$key.'\s*=\s*(.*?)\s*$'; die if exists $patterns->{$pattern}; # Ensure that each attachment matches only once $patterns->{$pattern} = "=SIMFACTORY-PROTECTED=${key} = \@1\@ ${value}"; } # Get job submission command # Get job submission pattern which extracts the job id from stdout sub get_submitcommand () { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $submitcommand = $entry->{'submit'}; my $submitpattern = $entry->{'submitpattern'}; return ($submitcommand, $submitpattern); } # Get job run commands sub get_runcommand () { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $runcommand = $entry->{'run'}; # TODO: modify mdb description to make this mandatory die if ! defined $runcommand; my $run2command = $entry->{'run2'}; # run2 may not exist return ($runcommand, $run2command); } # Get job status command # Get job status pattern which extracts the job status from stdout sub get_statuscommand () { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $statuscommand = $entry->{'getstatus'}; my $statuspattern = $entry->{'statuspattern'}; my $runningpattern = $entry->{'runningpattern'}; my $queuedpattern = $entry->{'queuedpattern'}; return ($statuscommand, $statuspattern, $runningpattern, $queuedpattern); } # Get job exechost commands # Get job exechost pattern which extracts the exechost from stdout sub get_exechostcommand () { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $exechostcommand = $entry->{'exechost'}; my $exechostpattern = $entry->{'exechostpattern'}; return ($exechostcommand, $exechostpattern); } # Get job show-output commands sub get_showoutputcommand () { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $stdoutcommand = $entry->{'stdout'}; my $stderrcommand = $entry->{'stderr'}; my $stdoutfollowcommand = $entry->{'stdout-follow'}; return ($stdoutcommand, $stderrcommand, $stdoutfollowcommand); } # Get job stop command sub get_stopcommand () { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $stopcommand = $entry->{'stop'}; return $stopcommand; } # Get restart id sub get_restart_id () { my $restart_id = $options{'restart-id'}; if (defined $restart_id) { if ($restart_id < 0 || $restart_id >= 10000) { die "Illegal restart id \"$restart_id\""; } } return $restart_id; } # Get from id sub get_from_restart_id () { my $restart_id = $options{'from-restart-id'}; if (defined $restart_id) { if ($restart_id < 0 || $restart_id >= 10000) { die "Illegal from id \"$restart_id\""; } } return $restart_id; } # Get maximum amount of memory on node (in MByte) sub get_memory () { my $memory = $options{'memory'}; if (! defined $memory) { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; $memory = $entry->{'memory'}; } return $memory; } # Get CPU frequency (in GHz) sub get_cpufreq () { my $cpufreq = $options{'cpufreq'}; if (! defined $cpufreq) { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; $cpufreq = $entry->{'cpufreq'}; } return $cpufreq; } # Get allocation sub get_allocation ($) { my ($old_allocation) = @_; my $allocation = $options{'allocation'}; if (! defined $allocation) { $allocation = $old_allocation; } if (! defined $allocation) { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; $allocation = $entry->{'allocation'}; } #if (! defined $allocation) { # die "Allocation not defined"; #} return $allocation; } # Get queue sub get_queue ($) { my ($old_queue) = @_; my $queue = $options{'queue'}; if (! defined $queue) { $queue = $old_queue; } if (! defined $queue) { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; $queue = $entry->{'queue'}; } #if (! defined $queue) { # die "Queue not defined"; #} return $queue; } # Get number of processes sub get_procs ($$$$$) { my ($old_procs, $old_ppn, $old_ppn_used, $old_num_threads, $procs_arg) = @_; my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; my $maxnodes = $entry->{'nodes'}; my $minppn = $entry->{'min-ppn'}; my $maxppn = $entry->{'ppn'}; # Total number of processors my $procs = $procs_arg; if (! defined $procs) { $procs = get_option %options, 'procs', $old_procs; } if (! defined $procs) { die "Number of processors not specified"; } if ($procs < 1) { die "Illegal number of processors specified"; } # Number of (requested) processors per node my $ppn = get_option %options, 'ppn', $old_ppn; if (! defined $ppn) { # default $ppn = $maxppn; $ppn = $procs if $ppn > $procs; } if (defined $minppn && ($ppn < $minppn || $ppn > $maxppn)) { die "Illegal number of requested processors per node specified: specified ppn=$ppn (min-ppn is $minppn, max-ppn is $maxppn)"; } # Number of used processors per node my $ppn_used = get_option %options, 'ppn-used', $old_ppn_used; if (! defined $ppn_used) { $ppn_used = $ppn; } if ($ppn_used < 1) { die "Illegal number of used processors per node specified: specified ppn-used=$ppn_used"; } if ($ppn_used > $ppn) { # Oversubscription warn "WARNING: Too many used processors per node specified: specified ppn-used=$ppn_used (ppn is $ppn)"; } # Number of (requested) threads per process my $num_threads = get_option %options, 'num-threads', $old_num_threads; if (! defined $num_threads) { # default # $num_threads = $entry->{'num-threads'}; $num_threads = 1; } if ($num_threads < 1) { die "Illegal number of threads per process specified: specified num-threads=$num_threads"; } if ($num_threads > $procs) { # Oversubscription warn "WARNING: Too many threads per process specified: specified num-threads=$num_threads (ppn-used is $procs)"; } # Check consistency # [Allow more threads than used processors] #if ($ppn_used % $num_threads != 0) { # die "Number of used processors per node and number of threads are inconsistent: ppn-used=$ppn_used, num-threads=$num_threads (ppn-used must be an integer multiple of num-threads)"; #} # [Allow more threads than requested processors] #if ($procs % $num_threads != 0) { # die "Number of processes and number of threads per process are inconsistent: procs=$procs, num-threads=$num_threads (procs must be an integer multiple of num-threads)"; #} # [Allow "uneven" numbers of processors; some processors will be idle] #if ($procs % $ppn_used != 0) { # die "Number of processes and number of processors per node are inconsistent: procs=$procs-used, ppn=$ppn_used (procs must be an integer multiple of ppn-used)"; #} my $num_procs = $procs / $num_threads; if ($num_procs < 1) { $num_procs=1; } my $nodes = int (($procs + $ppn_used - 1) / $ppn_used); # my $nodes = $procs / $ppn_used; if ($nodes > $maxnodes) { die "Too many nodes specified: nodes=$nodes (maxnodes is $maxnodes)"; } my $procs_requested = $nodes * $ppn; return ($nodes, $ppn_used, $procs, $ppn, $procs_requested, $num_procs, $num_threads); } # Get wall time # Format is HH:MM:SS sub get_walltime ($$) { my ($old_walltime, $walltime_arg) = @_; my $walltime = (defined $walltime_arg ? $walltime_arg : $options{'walltime'}); if (! defined $walltime) { $walltime = $old_walltime; } if (! defined $walltime) { my $machine = get_machine ''; my $entry = $mdb::machine_database{$machine}; $walltime = $entry->{'maxwalltime'}; } if (! defined $walltime) { $walltime = '8760:00:00'; # one year warn "Wall time not specified; using $walltime instead"; } my ($walltime_hh, $walltime_mm, $walltime_ss) = $walltime =~ /([0-9]+)(?::([0-9]+)(?::([0-9]+))?)?/; if (! defined $walltime_hh) { die "Wall time has incorrect format: expected HH[:MM[:SS]]"; } if (! defined $walltime_mm) { $walltime_mm = 0; } if (! defined $walltime_ss) { $walltime_ss = 0; } $walltime_mm = sprintf "%02d", $walltime_mm; $walltime_ss = sprintf "%02d", $walltime_ss; $walltime = sprintf "%d:%02d:%02d", $walltime_hh, $walltime_mm, $walltime_ss; my $walltime_seconds = $walltime_hh * 3600 + $walltime_mm * 60 + $walltime_ss; my $walltime_minutes = $walltime_seconds / 60; my $walltime_hours = $walltime_seconds / 3600.0; return ($walltime, $walltime_hh, $walltime_mm, $walltime_ss, $walltime_seconds, $walltime_minutes, $walltime_hours); } # Get option list version # Input: One string containing all lines of an option file sub get_version ($) { my ($options) = @_; # Match line containing the version information $options =~ /^(\s*VERSION(\s|=).*)$/m; my $version = $1; if (! defined $version) { return $version; } $version =~ s/#.*$//; # remove comment, if any $version =~ s/\s*$//; # remove all trailing white space $version =~ s/^\s*VERSION(\s*=\s*|\s+)//; # remove key return $version } # Get information from Formaline sub get_formaline_info ($) { my ($workdir) = @_; my $jarfile = `find $workdir -name formaline-jar.txt`; chomp $jarfile; return if $jarfile eq ''; my $output = read_file $jarfile; my $run_date = get_last_match $output, '^run_date="(.*)"$'; my $run_time = get_last_match $output, '^run_time="(.*)"$'; my $cctk_iteration = get_last_match $output, '^cctk_iteration=(.*)$'; my $cctk_time = get_last_match $output, '^cctk_time=(.*)$'; return ($run_date, $run_time, $cctk_iteration, $cctk_time); } # Return the last match in a multi-line string sub get_last_match ($$) { my ($string, $pattern) = @_; my @matches = $string =~ /$pattern/mg; return if $#matches == -1; return $matches[$#matches]; } # Given a job_id string, return the string Q, R or U for the job being # queued, running or unknown. Unknown can mean it has never been # queued or that it has finished. sub get_job_status (@) { my ($job_id) = @_; my $user = get_username; my ($statuscommand, $statuspattern, $runningpattern, $queuedpattern) = get_statuscommand; my $substitutions = get_substitutions; add_define $substitutions, 'JOB_ID', $job_id; add_define $substitutions, 'USER' , $user; $statuscommand = replace_patterns $statuscommand, $substitutions; $statuspattern = replace_patterns $statuspattern, $substitutions; $runningpattern = replace_patterns $runningpattern, $substitutions; $queuedpattern = replace_patterns $queuedpattern, $substitutions; # We only actually look at the output. In future, we could be # more clever, parsing the error message to make sure that it says # "job not found", or throwing the error back to the user if it # says something else my ($output, $stderr, $retcode) = subcommand($statuscommand); my ($job_state) = ""; # If running or queued if ($output =~ /$statuspattern/m) { my $job_is_queued = $output =~ /$queuedpattern/m; my $job_is_running = $output =~ /$runningpattern/m; $job_state = $job_is_queued ? 'Q' : $job_is_running ? 'R' : 'U'; } else { $job_state ='U'; } return ($job_state); } # Access a hash element sub get_option (\%$;$) { my ($hash, $key, $default) = @_; my $value = $hash->{$key}; if (! defined $value) { #if (! defined $default) { # die "Key \"$key\" not found"; #} $value = $default; } return $value; }