#!/usr/bin/perl
#
# script to analyze E5 root files on the Richmond cluster. it 
# copies data to a slave node and can leave the data there if you
# desire. it does NOT use beomap to allocate node numbers; it
# just picks them in numerical order and skips any down nodes.
#                                          - gpg
#
# run this by executing 'submit_eod3c.pl' at the 
# command line.
#
# do housekeeping on the master.
#
system("rm /home/gilfoyle/eod/run/results/*");
system("rm /scratch/gilfoyle/e5/eod*");
system("rm -r /scratch/gilfoyle/e5/24*");
#
# read the file with the run numbers and read the numbers into an array.
#
$run_numbers_file="E5_run_numbers.inp";
open(RUNS,"$run_numbers_file") || die "Failed to open run numbers file.\n";
@runno=;
close(RUNS);
#
# stuff for picking the static slave node.
#
$number_of_nodes=49;
$first_node=0;
#
# list of down nodes.
#
@dead_nodes = (30,36);
$number_of_dead_nodes = @dead_nodes;
#
# initialize parameters. if $use_existing_data is NOT zero, then you have
# to be sure the run number and the slave node are correctly matched.
#
$delete_data = 0;
$use_existing_data = 1;
$job=0;
$skipped_nodes=0;
#
# get the run number and select the slave node.
#
foreach $line (@runno) {
    chomp($line);
#
# get the node number and be careful about skipping the bad ones.
#
    $node=($first_node+$job+$skipped_nodes) % $number_of_nodes;
#
# skip bad nodes.
#
    for ($i = 0; $i < $number_of_dead_nodes; $i++) {
        if ($node == $dead_nodes[$i]) {
            print("Skipping node $node.\n");
            $node++;
            $skipped_nodes++;
        }
    }
    print "\n Selected slave node $node.\n";
#
# get the batch command file ready to submit the job and let're rip. 
# clean up is done in run_root_on_node2.sh since we have to wait for 
# root to get done on the slave.
#
    open(OUT,">run_job");
    print OUT "./run_root_on_node.pl $node $line $delete_data $use_existing_data \n";
    print     "./run_root_on_node.pl $node $line $delete_data $use_existing_data \n";
    close(OUT);
    system("batch -q a -f run_job");
    if ($use_existing_data == 1) {
        print "Sleep for 15 seconds.\n";
        sleep 15;
    }
    else {
        print "Sleep for 120 seconds.\n";
        sleep 120;
    }
    print "Submit on node $node run $line for analysis.\n";
#
# go to the next job.
#
    $job++;
}


Script (run_root_on_node.pl) called by the one above to actually run jobs on a node.


#!/usr/bin/perl
# 
# script for running on a slave node of the cluster.
# called from submit_eod3d.pl and submit_eod3c.pl. 
#                                      - gpg 12/03/02
#
# set up the environment including the node and run 
# number assignments that come from the arguments of the
# script.

@option  = @ARGV;
$options = @option;

$NODE =  $option[0];
$RUNNO =  $option[1];
$delete_data =  $option[2];
$use_existing_data =  $option[3];
$RUNFILE = "/home/gilfoyle/eod/run/files_2.558r/run$RUNNO\_files.dat";
#$RUNFILE = "/home/gilfoyle/eod/run/files_2.558n/run$RUNNO\_files.dat";
#$RUNFILE = "/home/gilfoyle/eod/run/files_4.232/run$RUNNO\_files.dat";
system("setenv ROOTSYS /usr/root/PRO");
system("setenv NO_LOCAL 1");

# the following statement was commented out early on because the CLAS
# software was not ready and it seems we do not need these environment 
# variables.

#system("source /home/clas/builds/PRODUCTION/packages/cms/rich.cshrc PRODUCTION");

# Make a working directory and go to it
# NOTE: Make this on the master as well as a place holder
# no actual data goes to the placeholder on the master.

system("bpsh $NODE mkdir -p /scratch/gilfoyle/e5/$RUNNO/ >| /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1");
system("mkdir -p /scratch/gilfoyle/e5/$RUNNO/ >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1");
system("cd /scratch/gilfoyle/e5/$RUNNO/");

# move the data from the raid disk to the slave node unless you're using files
# that are already on the slave.

if ($use_existing_data == 0) {

# get the data filenames.

    system("date >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1");
    system("echo 'Get the data.'>> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1");
    print "\nrun files: $RUNFILE, run number: $RUNNO.\n";
    open(FILES,"$RUNFILE") || die "Failed to open filename file.\n";
    @filenames=;
    close(FILES);

# make the slave directory and copy the data to it. do NOT use bpcp because it
# apparently copies the data twice according to steven james.

    system("bpsh $NODE mkdir -p /scratch/gilfoyle/e5/$RUNNO/");
    foreach $data_filename (@filenames) {
        chomp($data_filename);
        print "copy /data2/e5/root/2.558r/$data_filename to $NODE:/scratch/gilfoyle/e5/$RUNNO/$data_filename.\n";
        system("bpsh $NODE cp /data2/e5/root/2.558r/$data_filename /scratch/gilfoyle/e5/$RUNNO/$data_filename");
#       print "copy /data2/e5/root/2.558n/$data_filename to $NODE:/scratch/gilfoyle/e5/$RUNNO/$data_filename.\n";
#       system("bpsh $NODE cp /data2/e5/root/2.558n/$data_filename /scratch/gilfoyle/e5/$RUNNO/$data_filename");
#       print "copy /data2/e5/root/4.232/$data_filename to $NODE:/scratch/gilfoyle/e5/$RUNNO/$data_filename.\n";
#       system("bpsh $NODE cp /data2/e5/root/4.232/$data_filename /scratch/gilfoyle/e5/$RUNNO/$data_filename");
    }
}

# copy other necessary files to slave directory. these files are small so 
# bpcp is not a bottleneck (see previous comment).

system("bpcp $RUNFILE $NODE:/scratch/gilfoyle/e5/$RUNNO/data_filenames.dat");
system("bpcp /home/gilfoyle/eod/root/eod3.C $NODE:/scratch/gilfoyle/e5/$RUNNO/");
system("bpcp /home/gilfoyle/eod/root/eod3.h $NODE:/scratch/gilfoyle/e5/$RUNNO/");
system("bpcp /home/gilfoyle/eod/root/user_eod3.h $NODE:/scratch/gilfoyle/e5/$RUNNO/");
system("bpcp /home/gilfoyle/eod/root/run_eod3.C $NODE:/scratch/gilfoyle/e5/$RUNNO/");
 
# More housekeeping and monitoring information.

system("cd /scratch/gilfoyle/e5/$RUNNO/");
chdir("/scratch/gilfoyle/e5/$RUNNO/");
#system("rm /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO}");
system("date >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1");
system("pwd  >>  /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1");

# run my root.

system("echo 'run my root.'>> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1");
system("bpsh $NODE /home/gilfoyle/eod/root/gpgroot -b -q /scratch/gilfoyle/e5/$RUNNO/run_eod3.C >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1");
system("date >> /scratch/gilfoyle/e5/eod3_log${NODE}_run${RUNNO} 2>&1");

# Copy histo file back to the master and clean up the slave directory.
# Do this here instead of in submit_eod3c.pl so that root is done before
# we start cleaning up.

system("bpcp ${NODE}:/scratch/gilfoyle/e5/${RUNNO}/eod3_hists.root /home/gilfoyle/eod/run/results/run${RUNNO}_results.root");
chdir("/home/gilfoyle/eod/run/");
if ($delete_data == 1) {
    system("bpsh ${NODE} rm -r /scratch/gilfoyle/e5/$RUNNO/");
    print("Deleting files on slave node.\n");
}