source: other-projects/nightly-tasks/diffcol/trunk/task.pl@ 27621

Last change on this file since 27621 was 27621, checked in by ak19, 11 years ago

Ported most of the task.sh functionality across to task.pl. The email and attachment functions still need to be added, as also the main function. And there's also some issue with collection subfolders not getting deleted during a build which results in unnecessary differences between the model and test collections.

  • Property svn:executable set to *
File size: 17.6 KB
Line 
1#!/usr/bin/perl -w
2
3# This program is meant to run the nightly diffcol
4# It is meant to be an equivalent for the existing task bash script
5# But it is intended to be expanded to work for Windows and GS3
6# For windows, need to REMEMBER to set the correct shebangs at the top
7
8
9use Cwd;
10use File::Path; # for rmdir and mkdir type functions
11use File::Copy; # for recursive copying of folders but skipping .svn
12use File::Basename;
13
14
15my $isWin = ($^O =~ m/mswin/i) ? 1 : 0;
16my $sep = $isWin ? "\\" : "/";
17my $pathsep = $isWin ? ";" : ":";
18#my $script_ext = $isWin ? ".bat" : ".bash";
19my $setup_script = "setup"; # needs to become gs3-setup for GS3
20
21# TASK_HOME should be the toplevel diffcol folder
22$ENV{'TASK_HOME'} = getcwd unless defined $ENV{'TASK_HOME'};
23## print STDERR "@@@ TASK_HOME: ".$ENV{'TASK_HOME'}."\n";
24
25
26$ENV{'DATA_DIR'} = filename_concat($ENV{'TASK_HOME'}, "diffcol-data");
27$ENV{'UPLOAD_DIR'} = filename_concat($ENV{'TASK_HOME'}, "diffcol-reports");
28$ENV{'MONITOR_EMAIL'} = "greenstone_team\@cs.waikato.ac.nz"; # need to escape @ sign
29##print STDERR "@@@ email: ".$ENV{'MONITOR_EMAIL'}."\n";
30
31# control if an existing compiled greenstone is used
32# or, if one should be checked out, which revision to checkout from svn
33$ENV{'SVN_OPT_REV'} = "-r head";
34#export GSDLHOME=
35#export GSDL3SRCHOME=
36
37#check key environment vars are set
38if(!defined $ENV{'UPLOAD_DIR'}) {
39 print STDOUT "Please set a UPLOAD_DIR for the test in an environment.sh file\n";
40 #return 1;
41}
42if(!defined $ENV{'DATA_DIR'}) {
43 print STDOUT "Please set a DATA_DIR for the test in an environment.sh file\n";
44 #return 1;
45}
46if(!defined $ENV{'MONITOR_EMAIL'}) {
47 print STDOUT "Please set a MONITOR_EMAIL for the test in an environment.sh file\n";
48 #return 1;
49}
50
51if($ENV{'DATA_DIR'} eq "/") {
52 print STDOUT "DATA_DIR should not be the fs root\n";
53 #return 1;
54}
55
56print STDOUT "DATA_DIR: ".$ENV{'DATA_DIR'}."\n";
57print STDOUT "UPLOAD_DIR: ".$ENV{'UPLOAD_DIR'}."\n";
58
59
60#create an id for this test
61my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
62$year += 1900;
63$mon += 1;
64$mon = "0$mon" if ($mon < 10);
65$mday = "0$mday" if ($mday < 10);
66my $dateid="$year-$mon-$mday"; #my $dateid=($year+1900)."-".($mon+1)."-$mday";
67
68print STDOUT "Starting test '$dateid'\n";
69
70
71# http://stackoverflow.com/questions/2149368/how-can-i-loop-through-files-in-a-directory-in-perl
72$ENV{'CLASSPATH'} = "";
73my $jar_lib_path = $ENV{'TASK_HOME'}.$sep."lib";
74##print STDERR "@@@ $jar_lib_path\n";
75my @files = <$jar_lib_path/*.jar>; # /full/path/to/diffcol/lib/*jar
76foreach $file (@files) {
77 $ENV{'CLASSPATH'}=$file.$pathsep.$ENV{'CLASSPATH'};
78}
79##print STDERR "**** classpath: ".$ENV{'CLASSPATH'}."\n";
80
81
82#set the location of the full report
83my $xmlout=filename_concat($ENV{'DATA_DIR'}, "full-report-$dateid.xml");
84##print STDERR "XML: $xmlout\n";
85
86# the toplevel folder of the greenstone installation being used
87my $greenstone_home="";
88# gsdl is the checkout folder and can be greenstone2 or greenstone3
89my $gsdl="greenstone2";
90
91
92# Check if using existing compiled-up greenstone installation
93# and set the greenstone_home location accordingly
94
95if(defined $ENV{'GSDL3SRCHOME'} || defined $ENV{'GSDLHOME'}) {
96 print STDOUT "Found existing Greenstone home, will use that instead\n";
97 $greenstone_home=$ENV{'GSDLHOME'};
98} else {
99 $greenstone_home=filename_concat($ENV{'DATA_DIR'}, $gsdl);
100}
101##print STDERR "GSHOME: $greenstone_home\n";
102
103#&setup_greenstone;
104&run_test;
105&summarise;
106&upload;
107
108##********************************
109
110
111
112#http://stackoverflow.com/questions/7427262/read-a-file-and-save-it-in-variable-using-shell-script
113
114sub setup_greenstone
115{
116 #clean up from previous tests
117 print STDOUT "about to clean up any old tests (Ctrl-C to cancel)"; # no newline
118 for my $i ( 1..5 ) {
119 sleep 1; # 1 second
120 print STDOUT ".";
121 }
122 print STDOUT "\n";
123
124 # http://perldoc.perl.org/File/Path.html
125 print STDOUT "cleaning up previous tests\n";
126 &File::Path::remove_tree($ENV{'DATA_DIR'});
127
128 print STDOUT "creating the data dir\n";
129 &File::Path::make_path($ENV{'DATA_DIR'}); # works like mkdir -p
130
131 chdir($ENV{'DATA_DIR'});
132
133 # use existing compiled-up greenstone installation, if a GSDLHOME set
134 if(defined $ENV{'GSDL3SRCHOME'} || defined $ENV{'GSDLHOME'}) {
135 print STDOUT "Found existing Greenstone home, will use that instead";
136 return;
137 }
138
139 # Else checkout a GS from svn into DATA_DIR
140
141 #svn checkout of main gsdl directory
142 print STDOUT "checkout $gsdl:\n";
143 my $cmd = "svn co ".$ENV{'SVN_OPT_REV'}." http://svn.greenstone.org/main/trunk/greenstone2 $gsdl";
144 ##print STDERR "Checkout CMD: $cmd\n";
145
146 # # unlike backticks operator, system() will print the output of the command to the screen as it executes
147 # http://stackoverflow.com/questions/758611/how-to-flush-output-in-backticks-in-perl?rq=1
148 my $status = system "$cmd"; #my $status = `$cmd`;
149 print STDOUT "done\n";
150
151 ##print STDERR "$ENV{'DATA_DIR'}$sep$gsdl\n";
152
153 chdir("$ENV{'DATA_DIR'}$sep$gsdl");
154
155 ##print STDERR "@@@ OS: $^O.|".$Config{'archname64'}."|\n";
156
157 # if we're on linux/darwin, need gnome-lib for the correct architecture.
158 if(!$isWin) {
159
160 print STDOUT "setting up gnome-lib-minimal for compilation\n";
161
162 # To get gnome-lib, need to determine bit architecture of the linux/darwin
163 # http://stackoverflow.com/questions/8963400/the-correct-way-to-read-a-data-file-into-an-array
164 # $Config{'archname64'} doesn't work on the Ubuntu and the Sys::Info package seems to not be supported
165 # well on ActivePerl.
166 # But since we know we're on a Linux/Darwin machine at this point, wecan just run `uname -m` and other linux cmds
167
168 my $gnome_lib_file = ($^O =~ m/macos/i) ? "darwin-intel" : "linux"; # assuming all darwin is intel, not ppc!!
169
170 my $bit_arch=`uname -m`;
171 $gnome_lib_file .= "-x64" if($bit_arch =~ m/64$/);
172
173 #svn checkout gnome-lib for this linux/darwin
174 chdir("$ENV{'DATA_DIR'}$sep$gsdl$sep"."ext"); #cd $DATA_DIR/$gsdl/ext
175
176 ##print STDERR "**** gnomelib: $gnome_lib_file\n";
177
178 # checkout and unpack gnome-lib-minimal
179
180 #svn export http://svn.greenstone.org/gs2-extensions/gnome-lib/trunk/gnome-lib-minimal-linux-x64.tar.gz gl.tar.gz
181 $cmd = "svn export http://svn.greenstone.org/gs2-extensions/gnome-lib/trunk/gnome-lib-minimal-".$gnome_lib_file.".tar.gz gl.tar.gz";
182 system $cmd;
183 system ("tar -xvzf gl.tar.gz");
184
185 chdir("gnome-lib-minimal");
186 ##print STDERR "*** ARCH: $bit_arch\n";
187
188 # need to run source devel.bash on gnome-lib followed by configure, make, make install
189 # in one go, in order to preserve the compile environment set up by sourcing devel.bash
190
191 # http://stackoverflow.com/questions/7369145/activating-a-virtualenv-using-a-shell-script-doesnt-seem-to-work
192 # http://ubuntuforums.org/showthread.php?t=1932504 linking /bin/sh to bash instead of dash
193
194# $cmd = "/bin/bash \"source devel.bash && cd ../.. && ./configure --enable-apache httpd && make && make install\"";
195# $cmd = "bash -c \"source ./devel.bash && cd ../.. && ./configure --enable-apache-httpd && make && make install\"";
196
197 $cmd = "bash -c \"";
198
199 $cmd .= "source ./devel.bash";
200 $cmd .= " && cd ../..";
201
202 #configure
203 # $cmd .= " && ./configure";
204 $cmd .= " && echo 'configure $gsdl: ' ";
205 $cmd .= " && echo '<configure>' >> $xmlout";
206 $cmd .= " && ./configure 2>> $ENV{'DATA_DIR'}/compilation-errors"; # configure
207 $cmd .= " && echo '</configure>' >> $xmlout";
208 $cmd .= " && echo 'done'";
209
210 #make
211 $cmd .= " && echo 'make $gsdl: '";
212 $cmd .= " && echo '<make>' >> $xmlout";
213 $cmd .= " && make 2>> $ENV{'DATA_DIR'}/compilation-errors"; # make
214 $cmd .= " && echo '</make>' >> $xmlout";
215 $cmd .= " && echo 'done'";
216
217 #make install
218 $cmd .= " && echo 'make install $gsdl: '";
219 $cmd .= " && echo '<make-install>' >> $xmlout";
220 $cmd .= " && make install 2>> $ENV{'DATA_DIR'}/compilation-errors"; # make install
221 $cmd .= " && echo '</make-install>' >> $xmlout";
222 $cmd .= " && echo 'done'";
223
224 $cmd .= "\""; # close off cmd to bash and run it
225 system $cmd;
226 }
227
228 # set the path to the greenstone_home variable
229 $greenstone_home="$ENV{'DATA_DIR'}$sep$gsdl";
230
231}
232
233# http://stackoverflow.com/questions/3377879/how-do-i-receive-command-output-immediately
234sub run_test
235{
236 open (my $xml_fh, '>'.$xmlout) || die "Could not open xml file $xmlout for appending: $!\n";
237
238 # perform the requested subcommands, outputting xml information
239 print $xml_fh "<test time=\"$dateid\" id=\"$dateid\">\n";
240
241 # make sure that diffcol/model-collect is up to date before copying it over to greenstone-home
242 print $xml_fh "Updating $ENV{'TASK_HOME'}/model-collect:\n";
243 my $cmd = "svn up $ENV{'TASK_HOME'}/model-collect"; #chdir("$ENV{'TASK_HOME'}/model-collect");
244 my $status = system "$cmd";
245
246 # go to whichever greenstone_home we're using
247 chdir($greenstone_home);
248
249 # get svn info
250 print STDOUT "getting svn info: $xmlout\n";
251 print $xml_fh "<svn-info>\n";
252 run_and_print_cmd("svn info", $xml_fh);
253 print $xml_fh "</svn-info>\n";
254 print STDOUT "done\n";
255
256 #make two copies of the model-collect directory in gsdl
257 #one to be rebuilt and one as the basis for comparison
258 #strip both of all .svn directories
259
260 #copy the model collections to the collect folder to be rebuilt
261 print STDOUT "installing test collections and model collections to new $gsdl installation... ";
262
263 #clean up
264 if(-d "collect") {
265 &File::Path::remove_tree("collect") || die "Error could not delete collect: $!";
266 }
267 &File::Path::remove_tree("model-collect");
268
269 #copy to collect and strip .svn subfolders
270 &File::Path::make_path("collect"); # create the folder and copy contents across
271 &copy_recursively(&filename_concat("$ENV{'TASK_HOME'}","model-collect"), "collect", ".svn");
272
273 #make the model copy
274 &File::Path::make_path("model-collect");
275 &copy_recursively("collect", "model-collect"); # copy contents across
276
277 print STDOUT "done\n";
278
279
280 #for each collection, import, build and diff with its model counterpart
281 opendir my($collect_handle), "collect" or die "Could not open dir $greenstone_home/collect: $!";
282 for my $collection (readdir $collect_handle) {
283 next if ($collection eq "." || $collection eq "..");
284 next if ($collection ne "Small-HTML"); ## TEMPORARY, FOR TESTING THIS SCRIPT
285
286 #escape the filename (in case of space)
287 $collection =~ s@ @\\ @g;
288 #getting just the basename of the collection would have been necessary had we not cd-ed into $gsdl
289
290 print STDERR "*** Found collection $collection\n";
291 print $xml_fh "<collection-test name=\"$collection\">\n";
292
293 #import
294# Ensure the OIDtype for importing is hash_on_full_filename
295# "to make document identifiers more stable across upgrades of the software,
296# although it means that duplicate documents contained in the collection are
297# no longer detected automatically."
298 print STDOUT "$collection - Importing:\n";
299 print $xml_fh "<import>\n";
300 $cmd = "import.pl -OIDtype hash_on_full_filename $collection -removeold";
301 &run_build_script($cmd);
302 print $xml_fh "</import>\n";
303 print STDOUT "done\n";
304
305 #build
306 print STDOUT "$collection - Building:\n";
307 print $xml_fh "<build>\n";
308 $cmd = "buildcol.pl $collection -removeold";
309 &run_build_script($cmd);
310 print $xml_fh "</build>\n";
311 print STDOUT "done\n";
312
313 #rename the intermediate 'building' directory 'index'
314 print STDOUT "$collection - Move \"building\" to \"index\"... ";
315 my $index = &filename_concat("collect", $collection, "index");
316 my $building = &filename_concat("collect", $collection, "building");
317 &File::Path::remove_tree($index);
318 # Renaming Directories, http://www.perlmonks.org/?node_id=177421
319 move($building, $index) or die "copy failed: $!"; # File::Copy::move
320 print STDOUT "done\n";
321
322 #diffcol
323 print STDOUT "$collection - Diffing:\n";
324 my $diffcol_dir = &filename_concat($ENV{'TASK_HOME'},"diffcol");
325 $cmd = "./diffcol.pl -output xml -verbosity 10 $collection";
326 &run_diff_script($cmd, $xml_fh, $diffcol_dir);
327
328 chdir($greenstone_home); # this is actually where we are
329 print STDOUT "done\n";
330 print $xml_fh "</collection-test>\n";
331 }
332 closedir $collect_handle; # close handle to collect dir
333
334 print $xml_fh "</test>\n";
335 close($xml_fh);
336
337 print STDOUT "done\n";
338}
339
340##***************************************************************
341# runs setup in greenstone_home before running the diff command
342sub run_diff_script {
343 my ($cmd, $fh, $diffcol_dir) = @_;
344
345 # we're in greenstone_home now
346 if(!$isWin) {
347 $cmd = "bash -c \"export GSDLHOME=&& source $setup_script.bash && cd $diffcol_dir && $cmd\"";
348
349 } else { # Need to prefix cmd -c/-k as necessary
350 $cmd = "cd $greenstone_home && set GSDLHOME=&& source $setup_script.bat && cd $diffcol_dir && $cmd";
351 }
352
353 return &run_and_print_cmd($cmd, $fh);
354}
355
356# runs setup in greenstone_home before running the given build command
357sub run_build_script {
358 my ($cmd, $fh) = @_;
359
360# chdir($greenstone_home);
361
362 # we are in $greenstone_home, can directly run the build cmd on the collection
363 if(!$isWin) {
364 $cmd = "bash -c \"export GSDLHOME=&& source $setup_script.bash && $cmd\"";
365
366 } else { # Need to prefix cmd -c/-k as necessary
367 $cmd = "set GSDLHOME=&& source $setup_script.bat && $cmd";
368 }
369
370 return &run_and_print_cmd($cmd, $fh);
371}
372#ERROR (36096) running bash -c "export GSDLHOME=&& source setup.bash && buildcol.pl Small-HTML -removeold": Illegal seek
373
374
375# http://stackoverflow.com/questions/758611/how-to-flush-output-in-backticks-in-perl?rq=1activeperl%20sys::info
376# http://stackoverflow.com/questions/1477500/how-do-i-get-the-output-of-an-external-command-in-perl
377sub run_and_print_cmd {
378 my ($cmd, $fh) = @_;
379
380 open my $pin, "$cmd|" or die "unable to run cmd $cmd: $!"; # open(my $fh, '-|', 'powercfg -l') or die $!;
381
382 if(defined $fh) { # print cmd output both to the filehandle and to stdout
383 while (my $line = <$pin>) {
384 print $fh $line;
385# print STDOUT $line; # if also printing cmd output to STDOUT
386 }
387 }
388 else { # no filehandle, so just need to print to stdout
389
390 # unlike backticks operator, system() will print the output of the command to the screen as it executes
391 # http://stackoverflow.com/questions/758611/how-to-flush-output-in-backticks-in-perl?rq=1
392
393 my $status = system $cmd;
394 if($status != 0) {
395 print STDERR "ERROR ($status) running $cmd: $!\n";
396 }
397 }
398 close($pin);
399}
400
401sub filename_concat {
402 my $first_file = shift(@_);
403 my (@filenames) = @_;
404
405 # If first_file is not null or empty, then add it back into the list
406 if (defined $first_file && $first_file =~ /\S/)
407 {
408 unshift(@filenames, $first_file);
409 }
410
411 my $filename = join($sep, @filenames);
412 $filename =~ s/[\\\/]$//; # remove trailing slashes if any
413 return $filename;
414}
415
416
417# The following code is from
418# http://stackoverflow.com/questions/227613/how-can-i-copy-a-directory-recursively-and-filter-filenames-in-perl
419# It also states that "Perl's File::Copy is a bit broken (it doesn't copy permissions on Unix systems, for example)"
420sub copy_recursively {
421 my ($from_dir, $to_dir, $regex) = @_;
422 opendir my($dh), $from_dir or die "Could not open dir '$from_dir': $!";
423
424# if(-d !$to_dir) {
425# mkdir $to_dir or die "mkdir '$to_dir' failed: $!" if not -e $to_dir;
426# }
427
428 for my $entry (readdir $dh) {
429 next if ($entry eq "." || $entry eq "..");
430 next if (defined $regex && $entry =~ /$regex/);
431 my $source = "$from_dir/$entry";
432 my $destination = "$to_dir/$entry";
433 if (-d $source) {
434 mkdir $destination or die "mkdir '$destination' failed: $!" if not -e $destination;
435 copy_recursively($source, $destination, $regex);
436 } else {
437 copy($source, $destination) or die "copy failed: $!";
438 }
439 }
440 closedir $dh;
441 return;
442}
443
444sub summarise {
445
446 # make a summarised Xml report
447 print STDOUT "Summarizing the xml report... ";
448 my $cmd = "java org.apache.xalan.xslt.Process -IN $xmlout -XSL $ENV{'TASK_HOME'}/xsl/xml-report.xsl -OUT $ENV{'DATA_DIR'}/report-$dateid.xml";
449 my $status = system($cmd);
450 print STDOUT "done\n";
451
452 # make a summarised HTMl report
453 print STDOUT "Creating an html summary report... ";
454 $cmd = "java org.apache.xalan.xslt.Process -IN $ENV{'DATA_DIR'}/report-$dateid.xml -XSL $ENV{'TASK_HOME'}/xsl/html-report.xsl -OUT $ENV{'DATA_DIR'}/report-$dateid.html";
455 $status = system($cmd);
456 print STDOUT "done\n";
457}
458
459sub upload {
460 # ensure the upload directory exists before trying to transfer the xml and html files across
461 # if it already existed, clear it of contents
462 if (-d $ENV{'UPLOAD_DIR'}) {
463 &File::Path::make_path($ENV{'UPLOAD_DIR'});
464 } # else rm $UPLOAD_DIR/*
465
466 # copy all *.xml and *.html files across to UPLOAD_DIR
467 opendir my($dh), $ENV{'DATA_DIR'} or die "Could not open DATA_DIR: $!";
468 for my $entry (readdir $dh) {
469 next if ($entry !~ m/(\.xml|\.html)$/);
470 copy($entry, $ENV{'UPLOAD_DIR'});
471 }
472 closedir $dh;
473
474 print STDOUT "Finished uploading\n";
475}
476
477
478# TO DO:
479# Sending emails with perl: http://learn.perl.org/examples/email.html
480# Sending email attachments with perl: http://www.perlmonks.org/?node_id=19430
481
482
483# The old version of this program contained the following, consisting of 1 line of active code:
484
485 # Invoke as: sjmc@br:/research/sjm84/envi/bin$ ./envi diffcol summarise
486 # Doing so will call this pl file and pass in "summarise" in ARGV
487 # This pl file will in turn call the task executable in this folder
488 # passing in "summarise" as a parameter.
489#system("/bin/bash -c \"../etc/tasks/diffcol/task @ARGV\"");
490
491 ##system("/bin/bash -c \"./task @ARGV\"");
492 ##print STDERR "/bin/bash -c ../etc/tasks/diffcol/task @ARGV"
Note: See TracBrowser for help on using the repository browser.