#!/usr/bin/perl # # script to analyze E5 root files on the Richmond cluster. it # copies data to a slave node and can leave the data there if you # desire. it does NOT use beomap to allocate node numbers; it # just picks them in numerical order and skips any down nodes. # - gpg # # run this by executing 'submit_eod3c.pl' at the # command line. # # do housekeeping on the master. # system("rm /home/gilfoyle/eod/run/results/*"); system("rm /scratch/gilfoyle/e5/eod*"); system("rm -r /scratch/gilfoyle/e5/24*"); # # read the file with the run numbers and read the numbers into an array. # $run_numbers_file="E5_run_numbers.inp"; open(RUNS,"$run_numbers_file") || die "Failed to open run numbers file.\n"; @runno=; close(RUNS); # # stuff for picking the static slave node. # $number_of_nodes=49; $first_node=0; # # list of down nodes. # @dead_nodes = (30,36); $number_of_dead_nodes = @dead_nodes; # # initialize parameters. if $use_existing_data is NOT zero, then you have # to be sure the run number and the slave node are correctly matched. # $delete_data = 0; $use_existing_data = 1; $job=0; $skipped_nodes=0; # # get the run number and select the slave node. # foreach $line (@runno) { chomp($line); # # get the node number and be careful about skipping the bad ones. # $node=($first_node+$job+$skipped_nodes) % $number_of_nodes; # # skip bad nodes. # for ($i = 0; $i < $number_of_dead_nodes; $i++) { if ($node == $dead_nodes[$i]) { print("Skipping node $node.\n"); $node++; $skipped_nodes++; } } print "\n Selected slave node $node.\n"; # # get the batch command file ready to submit the job and let're rip. # clean up is done in run_root_on_node2.sh since we have to wait for # root to get done on the slave. # open(OUT,">run_job"); print OUT "./run_root_on_node.pl $node $line $delete_data $use_existing_data \n"; print "./run_root_on_node.pl $node $line $delete_data $use_existing_data \n"; close(OUT); system("batch -q a -f run_job"); if ($use_existing_data == 1) { print "Sleep for 15 seconds.\n"; sleep 15; } else { print "Sleep for 120 seconds.\n"; sleep 120; } print "Submit on node $node run $line for analysis.\n"; # # go to the next job. # $job++; }
Script (run_root_on_node.pl) called by the one above to actually run jobs on a node.
#!/usr/bin/perl # # script for running on a slave node of the cluster. # called from submit_eod3d.pl and submit_eod3c.pl. # - gpg 12/03/02 # # set up the environment including the node and run # number assignments that come from the arguments of the # script. @option = @ARGV; $options = @option; $NODE = $option[0]; $RUNNO = $option[1]; $delete_data = $option[2]; $use_existing_data = $option[3]; $RUNFILE = "/home/gilfoyle/eod/run/files_2.558r/run$RUNNO\_files.dat"; #$RUNFILE = "/home/gilfoyle/eod/run/files_2.558n/run$RUNNO\_files.dat"; #$RUNFILE = "/home/gilfoyle/eod/run/files_4.232/run$RUNNO\_files.dat"; system("setenv ROOTSYS /usr/root/PRO"); system("setenv NO_LOCAL 1"); # the following statement was commented out early on because the CLAS # software was not ready and it seems we do not need these environment # variables. #system("source /home/clas/builds/PRODUCTION/packages/cms/rich.cshrc PRODUCTION"); # Make a working directory and go to it # NOTE: Make this on the master as well as a place holder # no actual data goes to the placeholder on the master. system("bpsh $NODE mkdir -p /scratch/gilfoyle/e5/$RUNNO/ >| /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1"); system("mkdir -p /scratch/gilfoyle/e5/$RUNNO/ >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1"); system("cd /scratch/gilfoyle/e5/$RUNNO/"); # move the data from the raid disk to the slave node unless you're using files # that are already on the slave. if ($use_existing_data == 0) { # get the data filenames. system("date >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1"); system("echo 'Get the data.'>> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1"); print "\nrun files: $RUNFILE, run number: $RUNNO.\n"; open(FILES,"$RUNFILE") || die "Failed to open filename file.\n"; @filenames=; close(FILES); # make the slave directory and copy the data to it. do NOT use bpcp because it # apparently copies the data twice according to steven james. system("bpsh $NODE mkdir -p /scratch/gilfoyle/e5/$RUNNO/"); foreach $data_filename (@filenames) { chomp($data_filename); print "copy /data2/e5/root/2.558r/$data_filename to $NODE:/scratch/gilfoyle/e5/$RUNNO/$data_filename.\n"; system("bpsh $NODE cp /data2/e5/root/2.558r/$data_filename /scratch/gilfoyle/e5/$RUNNO/$data_filename"); # print "copy /data2/e5/root/2.558n/$data_filename to $NODE:/scratch/gilfoyle/e5/$RUNNO/$data_filename.\n"; # system("bpsh $NODE cp /data2/e5/root/2.558n/$data_filename /scratch/gilfoyle/e5/$RUNNO/$data_filename"); # print "copy /data2/e5/root/4.232/$data_filename to $NODE:/scratch/gilfoyle/e5/$RUNNO/$data_filename.\n"; # system("bpsh $NODE cp /data2/e5/root/4.232/$data_filename /scratch/gilfoyle/e5/$RUNNO/$data_filename"); } } # copy other necessary files to slave directory. these files are small so # bpcp is not a bottleneck (see previous comment). system("bpcp $RUNFILE $NODE:/scratch/gilfoyle/e5/$RUNNO/data_filenames.dat"); system("bpcp /home/gilfoyle/eod/root/eod3.C $NODE:/scratch/gilfoyle/e5/$RUNNO/"); system("bpcp /home/gilfoyle/eod/root/eod3.h $NODE:/scratch/gilfoyle/e5/$RUNNO/"); system("bpcp /home/gilfoyle/eod/root/user_eod3.h $NODE:/scratch/gilfoyle/e5/$RUNNO/"); system("bpcp /home/gilfoyle/eod/root/run_eod3.C $NODE:/scratch/gilfoyle/e5/$RUNNO/"); # More housekeeping and monitoring information. system("cd /scratch/gilfoyle/e5/$RUNNO/"); chdir("/scratch/gilfoyle/e5/$RUNNO/"); #system("rm /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO}"); system("date >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1"); system("pwd >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1"); # run my root. system("echo 'run my root.'>> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1"); system("bpsh $NODE /home/gilfoyle/eod/root/gpgroot -b -q /scratch/gilfoyle/e5/$RUNNO/run_eod3.C >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1"); system("date >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1"); # Copy histo file back to the master and clean up the slave directory. # Do this here instead of in submit_eod3c.pl so that root is done before # we start cleaning up. system("bpcp ${NODE}:/scratch/gilfoyle/e5/${RUNNO}/eod3_hists.root /home/gilfoyle/eod/run/results/run${RUNNO}_results.root"); chdir("/home/gilfoyle/eod/run/"); if ($delete_data == 1) { system("bpsh ${NODE} rm -r /scratch/gilfoyle/e5/$RUNNO/"); print("Deleting files on slave node.\n"); }