Changeset 27621


Ignore:
Timestamp:
06/13/13 20:55:14 (8 years ago)
Author:
ak19
Message:

Ported most of the task.sh functionality across to task.pl. The email and attachment functions still need to be added, as also the main function. And there's also some issue with collection subfolders not getting deleted during a build which results in unnecessary differences between the model and test collections.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/task.pl

    r27556 r27621  
    1 #!/usr/bin/perl -w
    2 
    3 
    4 # Invoke as: sjmc@br:/research/sjm84/envi/bin$ ./envi diffcol summarise
    5 # Doing so will call this pl file and pass in "summarise" in ARGV
    6 # This pl file will in turn call the task executable in this folder
    7 # passing in "summarise" as a parameter.
    8 system("/bin/bash -c \"../etc/tasks/diffcol/task @ARGV\"");
    9 
    10 #system("/bin/bash -c \"./task @ARGV\"");
    11 #print STDERR "/bin/bash -c ../etc/tasks/diffcol/task @ARGV"
     1#!/usr/bin/perl -w 
     2
     3# This program is meant to run the nightly diffcol
     4# It is meant to be an equivalent for the existing task bash script
     5# But it is intended to be expanded to work for Windows and GS3
     6# For windows, need to REMEMBER to set the correct shebangs at the top
     7
     8
     9use Cwd;
     10use File::Path; # for rmdir and mkdir type functions
     11use File::Copy; # for recursive copying of folders but skipping .svn
     12use File::Basename;
     13
     14
     15my $isWin = ($^O =~ m/mswin/i) ? 1 : 0;
     16my $sep = $isWin ? "\\" : "/";
     17my $pathsep = $isWin ? ";" : ":";
     18#my $script_ext = $isWin ? ".bat" : ".bash";
     19my $setup_script = "setup"; # needs to become gs3-setup for GS3
     20
     21# TASK_HOME should be the toplevel diffcol folder
     22$ENV{'TASK_HOME'} = getcwd unless defined $ENV{'TASK_HOME'};
     23##    print STDERR "@@@ TASK_HOME: ".$ENV{'TASK_HOME'}."\n";
     24
     25
     26$ENV{'DATA_DIR'} = filename_concat($ENV{'TASK_HOME'}, "diffcol-data");
     27$ENV{'UPLOAD_DIR'} = filename_concat($ENV{'TASK_HOME'}, "diffcol-reports");
     28$ENV{'MONITOR_EMAIL'} = "greenstone_team\@cs.waikato.ac.nz"; # need to escape @ sign
     29##print STDERR "@@@ email: ".$ENV{'MONITOR_EMAIL'}."\n";
     30
     31# control if an existing compiled greenstone is used
     32# or, if one should be checked out, which revision to checkout from svn
     33$ENV{'SVN_OPT_REV'} = "-r head";
     34#export GSDLHOME=
     35#export GSDL3SRCHOME=
     36
     37#check key environment vars are set
     38if(!defined $ENV{'UPLOAD_DIR'}) {
     39    print STDOUT "Please set a UPLOAD_DIR for the test in an environment.sh file\n";
     40    #return 1;
     41}
     42if(!defined $ENV{'DATA_DIR'}) {
     43    print STDOUT "Please set a DATA_DIR for the test in an environment.sh file\n";
     44    #return 1;
     45}
     46if(!defined $ENV{'MONITOR_EMAIL'}) {
     47    print STDOUT "Please set a MONITOR_EMAIL for the test in an environment.sh file\n";
     48    #return 1;
     49}
     50
     51if($ENV{'DATA_DIR'} eq "/") {
     52    print STDOUT "DATA_DIR should not be the fs root\n";
     53    #return 1;
     54}
     55
     56print STDOUT "DATA_DIR: ".$ENV{'DATA_DIR'}."\n";
     57print STDOUT "UPLOAD_DIR: ".$ENV{'UPLOAD_DIR'}."\n";
     58
     59
     60#create an id for this test
     61my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
     62$year += 1900;
     63$mon += 1;
     64$mon = "0$mon" if ($mon < 10);
     65$mday = "0$mday" if ($mday < 10);
     66my $dateid="$year-$mon-$mday"; #my $dateid=($year+1900)."-".($mon+1)."-$mday";
     67
     68print STDOUT "Starting test '$dateid'\n";
     69
     70
     71# http://stackoverflow.com/questions/2149368/how-can-i-loop-through-files-in-a-directory-in-perl
     72$ENV{'CLASSPATH'} = "";
     73my $jar_lib_path = $ENV{'TASK_HOME'}.$sep."lib";
     74##print STDERR "@@@ $jar_lib_path\n";
     75my @files = <$jar_lib_path/*.jar>; # /full/path/to/diffcol/lib/*jar
     76foreach $file (@files) {
     77    $ENV{'CLASSPATH'}=$file.$pathsep.$ENV{'CLASSPATH'};
     78}
     79##print STDERR "**** classpath: ".$ENV{'CLASSPATH'}."\n";
     80
     81
     82#set the location of the full report                                               
     83my $xmlout=filename_concat($ENV{'DATA_DIR'}, "full-report-$dateid.xml");
     84##print STDERR "XML: $xmlout\n";
     85
     86# the toplevel folder of the greenstone installation being used
     87my $greenstone_home="";
     88# gsdl is the checkout folder and can be greenstone2 or greenstone3
     89my $gsdl="greenstone2";
     90
     91
     92# Check if using existing compiled-up greenstone installation
     93# and set the greenstone_home location accordingly
     94
     95if(defined $ENV{'GSDL3SRCHOME'} || defined $ENV{'GSDLHOME'}) {
     96    print STDOUT "Found existing Greenstone home, will use that instead\n";
     97    $greenstone_home=$ENV{'GSDLHOME'};
     98} else {
     99    $greenstone_home=filename_concat($ENV{'DATA_DIR'}, $gsdl);
     100}
     101##print STDERR "GSHOME: $greenstone_home\n";
     102
     103#&setup_greenstone;
     104&run_test;
     105&summarise;
     106&upload;
     107
     108##********************************
     109
     110
     111
     112#http://stackoverflow.com/questions/7427262/read-a-file-and-save-it-in-variable-using-shell-script
     113
     114sub setup_greenstone
     115{
     116    #clean up from previous tests
     117    print STDOUT "about to clean up any old tests (Ctrl-C to cancel)"; # no newline
     118    for my $i ( 1..5 ) {
     119    sleep 1; # 1 second
     120    print STDOUT ".";
     121    }
     122    print STDOUT "\n";
     123
     124    # http://perldoc.perl.org/File/Path.html
     125    print STDOUT "cleaning up previous tests\n";
     126    &File::Path::remove_tree($ENV{'DATA_DIR'});
     127
     128    print STDOUT "creating the data dir\n";
     129    &File::Path::make_path($ENV{'DATA_DIR'}); # works like mkdir -p
     130
     131    chdir($ENV{'DATA_DIR'});
     132
     133    # use existing compiled-up greenstone installation, if a GSDLHOME set
     134    if(defined $ENV{'GSDL3SRCHOME'} || defined $ENV{'GSDLHOME'}) {
     135    print STDOUT "Found existing Greenstone home, will use that instead";
     136    return;
     137    }
     138
     139    # Else checkout a GS from svn into DATA_DIR
     140
     141    #svn checkout of main gsdl directory                                       
     142    print STDOUT "checkout $gsdl:\n";
     143    my $cmd = "svn co ".$ENV{'SVN_OPT_REV'}." http://svn.greenstone.org/main/trunk/greenstone2 $gsdl";
     144    ##print STDERR "Checkout CMD: $cmd\n";
     145
     146    # # unlike backticks operator, system() will print the output of the command to the screen as it executes
     147    # http://stackoverflow.com/questions/758611/how-to-flush-output-in-backticks-in-perl?rq=1
     148    my $status = system "$cmd"; #my $status = `$cmd`;
     149    print STDOUT "done\n";
     150
     151    ##print STDERR "$ENV{'DATA_DIR'}$sep$gsdl\n";
     152
     153    chdir("$ENV{'DATA_DIR'}$sep$gsdl");
     154
     155    ##print STDERR "@@@ OS:  $^O.|".$Config{'archname64'}."|\n";
     156
     157    # if we're on linux/darwin, need gnome-lib for the correct architecture.
     158    if(!$isWin) {
     159
     160    print STDOUT "setting up gnome-lib-minimal for compilation\n";
     161
     162    # To get gnome-lib, need to determine bit architecture of the linux/darwin
     163    # http://stackoverflow.com/questions/8963400/the-correct-way-to-read-a-data-file-into-an-array
     164    # $Config{'archname64'} doesn't work on the Ubuntu and the Sys::Info package seems to not be supported
     165    # well on ActivePerl.
     166    # But since we know we're on a Linux/Darwin machine at this point, wecan just run `uname -m` and other linux cmds
     167
     168    my $gnome_lib_file = ($^O =~ m/macos/i) ? "darwin-intel" : "linux"; # assuming all darwin is intel, not ppc!!
     169
     170    my $bit_arch=`uname -m`;
     171    $gnome_lib_file .= "-x64" if($bit_arch =~ m/64$/);
     172
     173    #svn checkout gnome-lib for this linux/darwin
     174    chdir("$ENV{'DATA_DIR'}$sep$gsdl$sep"."ext"); #cd $DATA_DIR/$gsdl/ext
     175
     176    ##print STDERR "**** gnomelib: $gnome_lib_file\n";
     177
     178    # checkout and unpack gnome-lib-minimal
     179
     180    #svn export http://svn.greenstone.org/gs2-extensions/gnome-lib/trunk/gnome-lib-minimal-linux-x64.tar.gz gl.tar.gz
     181    $cmd = "svn export http://svn.greenstone.org/gs2-extensions/gnome-lib/trunk/gnome-lib-minimal-".$gnome_lib_file.".tar.gz gl.tar.gz";
     182    system $cmd;
     183    system ("tar -xvzf gl.tar.gz");
     184
     185    chdir("gnome-lib-minimal");
     186    ##print STDERR "*** ARCH: $bit_arch\n";
     187
     188    # need to run source devel.bash on gnome-lib followed by configure, make, make install
     189    # in one go, in order to preserve the compile environment set up by sourcing devel.bash
     190
     191    # http://stackoverflow.com/questions/7369145/activating-a-virtualenv-using-a-shell-script-doesnt-seem-to-work
     192    # http://ubuntuforums.org/showthread.php?t=1932504 linking /bin/sh to bash instead of dash
     193   
     194#   $cmd = "/bin/bash \"source devel.bash && cd ../.. && ./configure --enable-apache httpd && make && make install\"";
     195#   $cmd = "bash -c \"source ./devel.bash && cd ../.. && ./configure --enable-apache-httpd && make && make install\"";
     196
     197    $cmd = "bash -c \"";   
     198
     199    $cmd .= "source ./devel.bash";
     200    $cmd .= " && cd ../..";
     201
     202    #configure
     203    # $cmd .= " && ./configure";
     204    $cmd .= " && echo 'configure $gsdl: ' ";
     205        $cmd .= " && echo '<configure>' >> $xmlout";   
     206        $cmd .= " && ./configure 2>> $ENV{'DATA_DIR'}/compilation-errors"; # configure
     207        $cmd .= " && echo '</configure>' >> $xmlout";
     208        $cmd .= " && echo 'done'";
     209
     210    #make
     211    $cmd .= " && echo 'make $gsdl: '";
     212    $cmd .= " && echo '<make>' >> $xmlout";
     213        $cmd .= " && make 2>> $ENV{'DATA_DIR'}/compilation-errors"; # make
     214        $cmd .= " && echo '</make>' >> $xmlout";
     215        $cmd .= " && echo 'done'";
     216
     217    #make install
     218    $cmd .= " && echo 'make install $gsdl: '";
     219        $cmd .= " && echo '<make-install>' >> $xmlout";
     220    $cmd .= " && make install 2>> $ENV{'DATA_DIR'}/compilation-errors"; # make install
     221        $cmd .= " && echo '</make-install>' >> $xmlout";
     222        $cmd .= " && echo 'done'";
     223   
     224    $cmd .= "\""; # close off cmd to bash and run it
     225    system $cmd;   
     226    }
     227
     228    # set the path to the greenstone_home variable                             
     229    $greenstone_home="$ENV{'DATA_DIR'}$sep$gsdl";
     230
     231}
     232
     233# http://stackoverflow.com/questions/3377879/how-do-i-receive-command-output-immediately
     234sub run_test
     235{
     236    open (my $xml_fh, '>'.$xmlout) || die "Could not open xml file $xmlout for appending: $!\n";   
     237
     238    # perform the requested subcommands, outputting xml information
     239    print $xml_fh "<test time=\"$dateid\" id=\"$dateid\">\n";
     240   
     241    # make sure that diffcol/model-collect is up to date before copying it over to greenstone-home                                                                           
     242    print $xml_fh "Updating $ENV{'TASK_HOME'}/model-collect:\n";
     243    my $cmd = "svn up $ENV{'TASK_HOME'}/model-collect"; #chdir("$ENV{'TASK_HOME'}/model-collect");
     244    my $status = system "$cmd";
     245
     246    # go to whichever greenstone_home we're using
     247    chdir($greenstone_home);
     248
     249    # get svn info
     250    print STDOUT "getting svn info: $xmlout\n";
     251    print $xml_fh "<svn-info>\n";
     252    run_and_print_cmd("svn info", $xml_fh);
     253    print $xml_fh "</svn-info>\n";
     254    print STDOUT "done\n";
     255
     256    #make two copies of the model-collect directory in gsdl
     257    #one to be rebuilt and one as the basis for comparison
     258    #strip both of all .svn directories
     259   
     260    #copy the model collections to the collect folder to be rebuilt
     261    print STDOUT "installing test collections and model collections to new $gsdl installation... ";
     262   
     263    #clean up
     264    if(-d "collect") {
     265    &File::Path::remove_tree("collect") || die "Error could not delete collect: $!";
     266    }
     267    &File::Path::remove_tree("model-collect");
     268
     269    #copy to collect and strip .svn subfolders
     270    &File::Path::make_path("collect"); # create the folder and copy contents across
     271    &copy_recursively(&filename_concat("$ENV{'TASK_HOME'}","model-collect"), "collect", ".svn");
     272
     273    #make the model copy
     274    &File::Path::make_path("model-collect");
     275    &copy_recursively("collect", "model-collect"); # copy contents across
     276   
     277    print STDOUT "done\n";   
     278   
     279
     280    #for each collection, import, build and diff with its model counterpart
     281    opendir my($collect_handle), "collect" or die "Could not open dir $greenstone_home/collect: $!";
     282    for my $collection (readdir $collect_handle) {
     283    next if ($collection eq "." || $collection eq "..");
     284    next if ($collection ne "Small-HTML"); ## TEMPORARY, FOR TESTING THIS SCRIPT
     285
     286    #escape the filename (in case of space)
     287    $collection =~ s@ @\\ @g;
     288    #getting just the basename of the collection would have been necessary had we not cd-ed into $gsdl
     289
     290    print STDERR "*** Found collection $collection\n";
     291    print $xml_fh "<collection-test name=\"$collection\">\n";
     292
     293    #import
     294# Ensure the OIDtype for importing is hash_on_full_filename
     295# "to make document identifiers more stable across upgrades of the software,
     296# although it means that duplicate documents contained in the collection are
     297# no longer detected automatically."
     298    print STDOUT "$collection - Importing:\n";
     299    print $xml_fh "<import>\n";
     300    $cmd = "import.pl -OIDtype hash_on_full_filename $collection -removeold";
     301    &run_build_script($cmd);
     302    print $xml_fh "</import>\n";
     303    print STDOUT "done\n"; 
     304
     305    #build
     306    print STDOUT "$collection - Building:\n";
     307    print $xml_fh "<build>\n";
     308    $cmd = "buildcol.pl $collection -removeold";
     309    &run_build_script($cmd);
     310    print $xml_fh "</build>\n";
     311    print STDOUT "done\n";
     312   
     313    #rename the intermediate 'building' directory 'index'
     314    print STDOUT "$collection - Move \"building\" to \"index\"... ";
     315    my $index = &filename_concat("collect", $collection, "index");
     316    my $building = &filename_concat("collect", $collection, "building");
     317    &File::Path::remove_tree($index);
     318    # Renaming Directories, http://www.perlmonks.org/?node_id=177421
     319    move($building, $index) or die "copy failed: $!"; # File::Copy::move
     320    print STDOUT "done\n";
     321
     322    #diffcol
     323    print STDOUT "$collection - Diffing:\n";
     324    my $diffcol_dir = &filename_concat($ENV{'TASK_HOME'},"diffcol");
     325    $cmd = "./diffcol.pl -output xml -verbosity 10 $collection";   
     326    &run_diff_script($cmd, $xml_fh, $diffcol_dir);
     327
     328    chdir($greenstone_home); # this is actually where we are
     329    print STDOUT "done\n";
     330    print $xml_fh "</collection-test>\n";
     331    }
     332    closedir $collect_handle; # close handle to collect dir
     333
     334    print $xml_fh "</test>\n";
     335    close($xml_fh);
     336
     337    print STDOUT "done\n";
     338}
     339
     340##***************************************************************
     341# runs setup in greenstone_home before running the diff command
     342sub run_diff_script {   
     343    my ($cmd, $fh, $diffcol_dir) = @_;
     344
     345    # we're in greenstone_home now
     346    if(!$isWin) {   
     347    $cmd = "bash -c \"export GSDLHOME=&& source $setup_script.bash && cd $diffcol_dir && $cmd\"";
     348
     349    } else { # Need to prefix cmd -c/-k as necessary
     350    $cmd = "cd $greenstone_home && set GSDLHOME=&& source $setup_script.bat && cd $diffcol_dir && $cmd";
     351    }
     352
     353    return &run_and_print_cmd($cmd, $fh);
     354}
     355
     356# runs setup in greenstone_home before running the given build command
     357sub run_build_script {   
     358    my ($cmd, $fh) = @_;
     359
     360#    chdir($greenstone_home);
     361
     362    # we are in $greenstone_home, can directly run the build cmd on the collection
     363    if(!$isWin) {
     364    $cmd = "bash -c \"export GSDLHOME=&& source $setup_script.bash && $cmd\"";
     365
     366    } else { # Need to prefix cmd -c/-k as necessary
     367    $cmd = "set GSDLHOME=&& source $setup_script.bat && $cmd";
     368    }
     369
     370    return &run_and_print_cmd($cmd, $fh);
     371}
     372#ERROR (36096) running bash -c "export GSDLHOME=&& source setup.bash && buildcol.pl Small-HTML -removeold": Illegal seek
     373
     374
     375# http://stackoverflow.com/questions/758611/how-to-flush-output-in-backticks-in-perl?rq=1activeperl%20sys::info
     376# http://stackoverflow.com/questions/1477500/how-do-i-get-the-output-of-an-external-command-in-perl
     377sub run_and_print_cmd {
     378    my ($cmd, $fh) = @_;
     379   
     380    open my $pin, "$cmd|" or die "unable to run cmd $cmd: $!"; # open(my $fh, '-|', 'powercfg -l') or die $!;
     381
     382    if(defined $fh) { # print cmd output both to the filehandle and to stdout
     383    while (my $line = <$pin>) {
     384        print $fh $line;
     385#       print STDOUT $line; # if also printing cmd output to STDOUT
     386    }
     387    }
     388    else { # no filehandle, so just need to print to stdout
     389
     390    # unlike backticks operator, system() will print the output of the command to the screen as it executes
     391    # http://stackoverflow.com/questions/758611/how-to-flush-output-in-backticks-in-perl?rq=1
     392       
     393    my $status = system $cmd;
     394    if($status != 0) {
     395        print STDERR "ERROR ($status) running $cmd: $!\n";
     396    }
     397    }
     398    close($pin);
     399}
     400
     401sub filename_concat {
     402    my $first_file = shift(@_);
     403    my (@filenames) = @_;
     404
     405    # If first_file is not null or empty, then add it back into the list
     406    if (defined $first_file && $first_file =~ /\S/)
     407    {
     408    unshift(@filenames, $first_file);
     409    }
     410
     411    my $filename = join($sep, @filenames);
     412    $filename =~ s/[\\\/]$//; # remove trailing slashes if any
     413    return $filename;
     414}
     415
     416
     417# The following code is from
     418# http://stackoverflow.com/questions/227613/how-can-i-copy-a-directory-recursively-and-filter-filenames-in-perl
     419# It also states that "Perl's File::Copy is a bit broken (it doesn't copy permissions on Unix systems, for example)"
     420sub copy_recursively {
     421    my ($from_dir, $to_dir, $regex) = @_;
     422    opendir my($dh), $from_dir or die "Could not open dir '$from_dir': $!";
     423
     424#    if(-d !$to_dir) {
     425#   mkdir $to_dir or die "mkdir '$to_dir' failed: $!" if not -e $to_dir;
     426#    }
     427
     428    for my $entry (readdir $dh) {
     429    next if ($entry eq "." || $entry eq "..");
     430        next if (defined $regex && $entry =~ /$regex/);
     431        my $source = "$from_dir/$entry";
     432        my $destination = "$to_dir/$entry";
     433        if (-d $source) {
     434            mkdir $destination or die "mkdir '$destination' failed: $!" if not -e $destination;
     435            copy_recursively($source, $destination, $regex);
     436        } else {
     437            copy($source, $destination) or die "copy failed: $!";
     438        }
     439    }
     440    closedir $dh;
     441    return;
     442}
     443
     444sub summarise {
     445
     446    # make a summarised Xml report
     447    print STDOUT "Summarizing the xml report... ";
     448    my $cmd = "java org.apache.xalan.xslt.Process -IN $xmlout -XSL $ENV{'TASK_HOME'}/xsl/xml-report.xsl -OUT $ENV{'DATA_DIR'}/report-$dateid.xml";
     449    my $status = system($cmd);
     450    print STDOUT "done\n";
     451
     452    # make a summarised HTMl report
     453    print STDOUT "Creating an html summary report... ";
     454    $cmd = "java org.apache.xalan.xslt.Process -IN $ENV{'DATA_DIR'}/report-$dateid.xml -XSL $ENV{'TASK_HOME'}/xsl/html-report.xsl -OUT $ENV{'DATA_DIR'}/report-$dateid.html";
     455    $status = system($cmd);
     456    print STDOUT "done\n";
     457}
     458
     459sub upload {
     460    # ensure the upload directory exists before trying to transfer the xml and html files across
     461    # if it already existed, clear it of contents
     462    if (-d $ENV{'UPLOAD_DIR'}) {
     463    &File::Path::make_path($ENV{'UPLOAD_DIR'});
     464    } # else rm $UPLOAD_DIR/*   
     465
     466    # copy all *.xml and *.html files across to UPLOAD_DIR
     467    opendir my($dh), $ENV{'DATA_DIR'} or die "Could not open DATA_DIR: $!";
     468    for my $entry (readdir $dh) {
     469    next if ($entry !~ m/(\.xml|\.html)$/);
     470    copy($entry, $ENV{'UPLOAD_DIR'});
     471    }
     472    closedir $dh;
     473
     474    print STDOUT "Finished uploading\n";
     475}
     476
     477
     478# TO DO:
     479# Sending emails with perl: http://learn.perl.org/examples/email.html
     480# Sending email attachments with perl: http://www.perlmonks.org/?node_id=19430
     481
     482
     483# The old version of this program contained the following, consisting of 1 line of active code:
     484
     485  # Invoke as: sjmc@br:/research/sjm84/envi/bin$ ./envi diffcol summarise
     486  # Doing so will call this pl file and pass in "summarise" in ARGV
     487  # This pl file will in turn call the task executable in this folder
     488  # passing in "summarise" as a parameter.
     489#system("/bin/bash -c \"../etc/tasks/diffcol/task @ARGV\"");
     490
     491  ##system("/bin/bash -c \"./task @ARGV\"");
     492  ##print STDERR "/bin/bash -c ../etc/tasks/diffcol/task @ARGV"
Note: See TracChangeset for help on using the changeset viewer.