source: other-projects/nightly-tasks/diffcol/trunk/task.pl@ 28606

Last change on this file since 28606 was 28606, checked in by ak19, 10 years ago

Related to previous commit. Need Leopard's IsisGdl binary on Lion machine. Turns out Leopard is 32 bit after all.

  • Property svn:executable set to *
File size: 35.4 KB
Line 
1#!/usr/bin/perl -w
2
3# This program is meant to run the nightly diffcol
4# It is meant to be an equivalent for the existing task bash script
5# But it is intended to be expanded to work for Windows and GS3
6# For windows, need to REMEMBER to set the correct shebangs at the top
7
8
9# TODO:
10# Have a caveat mode and a stable mode (as in snapshot/task.pl)
11#
12#} elsif ( $ENV{'TASK_NAME'} =~ "gs2-diffcol-(caveat|stable)" ) {
13# $major_version = 2;
14# $prefix="2t";
15# $rk="tk2"; # test kit
16#} elsif ( $ENV{'TASK_NAME'} =~ "gs3-diffcol-(caveat|stable)" ) {
17# $major_version = 3;
18# $prefix="3t";
19# $rk="tk3"; # test kit
20
21package diffcoltask;
22
23use Cwd;
24use Switch; # for switch(val) { case: ; ...}
25use File::Path; # for rmdir and mkdir type functions
26use File::Copy; # for recursive copying of folders but skipping .svn
27use File::Basename;
28
29use strict;
30no strict 'subs'; # allow barewords (eg STDERR) as function arguments
31
32
33my $isWin = ($^O =~ m/mswin/i) ? 1 : 0;
34my $isMac = ($^O =~ m/macos|darwin/i) ? 1 : 0;
35
36my $osversion="";
37# Need to get the correct gnome-lib-minimal for the OS
38# darwin11* Lion, darwin12* Mountain Lion, darwin9* and darwin10* are Leopard and Snow Leopard
39if ($^O eq "darwin") {
40 $osversion=`uname -r`; # e.g. 12.x.x
41 #$osversion =~ s@\..*$@@; # e.g.12
42 $osversion = ($osversion =~ m@^1[1-9](\.)?@i) ? "Lion-" : "";
43}
44
45
46my $sep = $isWin ? "\\" : "/";
47my $pathsep = $isWin ? ";" : ":";
48#my $script_ext = $isWin ? ".bat" : ".bash";
49my $setup_script = "setup"; # needs to become gs3-setup for GS3
50my $use_blat = 0; # if we ever get blat to send mail/attachments on Windows working, set this to 1
51
52my $use_local_rebuild = 0; # set to 1 (true) if just diffing and so we needn't copy model-collection over to the test collection again nor rebuild it (This is useful when having built the collection locally once before)
53my $use_static_model = 0; # set to 1 (true) if working with a non-svn model-collection. Defaults to 1 if $use_local_rebuild is turned on
54
55# if use_local_rebuild is on, use_static_model should be on
56if ($use_local_rebuild && !$use_static_model) {
57 $use_static_model = 1;
58}
59
60my $test_os = $isWin ? "windows" : ($isMac ? "darwin" : "linux");
61my $model_os = "linux"; # default
62
63# TASK_HOME should be the toplevel diffcol folder
64$ENV{'TASK_HOME'} = getcwd unless defined $ENV{'TASK_HOME'};
65if($isWin) {
66 $ENV{'TASK_HOME'} =~ s@\/@\\@g;
67 # need to convert TASK_HOME path name to resolve very subtle bug when running task.pl via
68 # run-gs2-diffcol.bat which uses environment.pl's TASK_HOME setting via envi
69 # At that point TASK_HOME is already defined but ends up lowercase, so that entries in archiveinf-doc
70 # end up sorted differently when db2txt -sort is applied compared to if TASK_HOME had kept its case.
71 require Win32; # for working out Windows Long Filenames from Win 8.3 short filenames
72 $ENV{'TASK_HOME'} = &Win32::GetLongPathName($ENV{'TASK_HOME'});
73}
74## print STDERR "@@@ TASK_HOME: ".$ENV{'TASK_HOME'}."\n";
75
76
77$ENV{'BIN_DIR'} = &filename_concat($ENV{'TASK_HOME'}, "bin");
78
79# we'll be using BLAT to send mail attachments on Windows
80my $blat = $use_blat ? &filename_concat($ENV{'BIN_DIR'}, "blat", "full", "blat.exe") : 0;
81if($isWin && $use_blat && ! -e $blat) {
82 print STDERR "\n***********************************\n";
83 print STDERR "No blat.exe found in $blat.\n";
84 print STDERR "Blat needed to send mail with attachments on Windows.\n";
85 print STDERR "Extract the blat zip file found in $ENV{'BIN_DIR'}\n";
86 print STDERR "for your bit architecture and name the folder 'blat'\n";
87 print STDERR "***********************************\n\n";
88 $blat = 0;
89}
90
91
92$ENV{'DATA_DIR'} = &filename_concat($ENV{'TASK_HOME'}, "diffcol-data");
93$ENV{'UPLOAD_DIR'} = &filename_concat($ENV{'TASK_HOME'}, "diffcol-reports");
94$ENV{'MONITOR_EMAIL'} = "greenstone_team\@cs.waikato.ac.nz"; # need to escape @ sign
95$ENV{'GSDL_SMTP'} = ""; #"smtp.gmail.com";
96##print STDERR "@@@ email: ".$ENV{'MONITOR_EMAIL'}."\n";
97
98# control if an existing compiled greenstone is used
99# or, if one should be checked out, which revision to checkout from svn
100$ENV{'SVN_OPT_REV'} = "-r head";
101#$ENV{'GSDLHOME'}=
102#$ENV{'GSDL3SRCHOME'}=
103
104
105# if the first arg is a digit, it's the new envi verbosity param. Take it off the array
106my $envi_verbose = shift(@ARGV) if(exists $ARGV[0] && $ARGV[0] =~ m/^\d+$/);
107
108#parse arguments
109my $action = "all";
110my $subaction = ""; # run_test can take subactions: --just_diff and --no_svn
111my @collections = ();
112
113# run_test can take any number of args
114if(scalar(@ARGV) > 1 && $ARGV[0] ne "run_test") {
115 print STDERR "**** Wrong number of arguments\n";
116 &printusage();
117 exit -1;
118}
119
120if(scalar(@ARGV) == 0) {
121 $action="all";
122}
123else {
124 switch ($ARGV[0]) {
125 case qr/^(-h|--?help|help)$/i { &printusage; exit 0; }
126 case qr/^(setup_greenstone|run_test|summarise|upload|all)$/ { $action=$ARGV[0]; }
127 else {
128 print STDERR "**** Bad subcommand.\n";
129 &printusage;
130 exit -1;
131 }
132 }
133
134 # run_test action can take a subaction: nosvn|justdiff. It can also take --modelOS (windows|linux|darwin)
135 # nosvn: uses the model-collect as static and copies it over to collect, rebuilding what's currently in model-collect instead of copying
136 # it out from the svn model-collect again.
137 # justdiff: same as nosvn, but doesn't copy over model-collection to collect, and doesn't rebuild either of them. Just does the diff part.
138 if($action eq "run_test" && scalar(@ARGV) >= 2) {
139 push(@collections, @ARGV);
140 shift @collections; # remove action from array
141
142 for (my $i=0; $i < scalar(@ARGV); $i++) {
143 if($ARGV[$i] =~ m@^--@) {
144 shift @collections; # remove subaction/flag from array
145
146 $subaction = $ARGV[$i];
147 if($subaction eq "--justdiff") {
148 $use_local_rebuild = $use_static_model = 1;
149 } elsif ($subaction eq "--nosvn") {
150 $use_static_model = 1;
151 #} elsif ($subaction =~ m/\-\-testOS/i && defined $ARGV[$i+1]) {
152 # $test_os = $ARGV[$i+1];
153 # $i++;
154 # shift @collections; # remove test_os value from array
155 } elsif ($subaction =~ m/\-\-modelOS/i && defined $ARGV[$i+1] && $ARGV[$i+1] =~ m/windows|linux|darwin/i) {
156 $model_os = $ARGV[$i+1];
157 $i++;
158 shift @collections; # remove model_os value from array
159 #print STDERR "Model_os specified: $model_os\n";
160 } else {
161 print STDERR "**** Bad subaction/value: ".$ARGV[$i]."\n";
162 &printusage;
163 exit -1;
164 }
165 }
166 }
167
168# foreach my $col (@collections) {
169# print STDERR "Collection: $col\n";
170# }
171 }
172}
173
174
175#check key environment vars are set
176if(!defined $ENV{'UPLOAD_DIR'}) {
177 print STDERR "Please set a UPLOAD_DIR for the test in an environment.sh file\n";
178 #return 1;
179}
180if(!defined $ENV{'DATA_DIR'}) {
181 print STDERR "Please set a DATA_DIR for the test in an environment.sh file\n";
182 #return 1;
183}
184if(!defined $ENV{'MONITOR_EMAIL'}) {
185 print STDERR "Please set a MONITOR_EMAIL for the test in an environment.sh file\n";
186 #return 1;
187}
188
189if($ENV{'DATA_DIR'} eq "/") {
190 print STDERR "DATA_DIR should not be the fs root\n";
191 #return 1;
192}
193
194print STDERR "DATA_DIR: ".$ENV{'DATA_DIR'}."\n";
195print STDERR "UPLOAD_DIR: ".$ENV{'UPLOAD_DIR'}."\n";
196
197#create an id for this test
198my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
199$year += 1900;
200$mon += 1;
201$mon = "0$mon" if ($mon < 10);
202$mday = "0$mday" if ($mday < 10);
203my $dateid="$year.$mon.$mday"; #my $dateid=($year+1900)."-".($mon+1)."-$mday";
204
205print STDERR "Starting test '$dateid'\n";
206
207
208# http://stackoverflow.com/questions/2149368/how-can-i-loop-through-files-in-a-directory-in-perl
209$ENV{'CLASSPATH'} = "";
210my $jar_lib_path = $ENV{'TASK_HOME'}.$sep."lib";
211my @files = <$jar_lib_path/*.jar>; # /full/path/to/diffcol/lib/*jar
212foreach my $file (@files) {
213 $file =~ s@\/@\\@g if $isWin;
214 $ENV{'CLASSPATH'}=$file.$pathsep.$ENV{'CLASSPATH'};
215}
216##print STDERR "**** classpath: ".$ENV{'CLASSPATH'}."\n";
217
218
219#set the location of the full report
220my $xmlout=filename_concat($ENV{'DATA_DIR'}, "full-report-$dateid.xml");
221##print STDERR "XML: $xmlout\n";
222
223# the toplevel folder of the greenstone installation being used
224my $greenstone_home="";
225# gsdl is the checkout folder and can be greenstone2 or greenstone3
226my $gsdl="greenstone2";
227
228
229# Check if using existing compiled-up greenstone installation
230# and set the greenstone_home location accordingly
231
232if(defined $ENV{'GSDL3SRCHOME'} || defined $ENV{'GSDLHOME'}) {
233 print STDERR "Found existing Greenstone home, will use that instead\n";
234 $greenstone_home=$ENV{'GSDLHOME'};
235} else {
236 $greenstone_home=filename_concat($ENV{'DATA_DIR'}, $gsdl);
237}
238##print STDERR "GSHOME: $greenstone_home\n";
239
240#do the requested action
241if($action eq "setup_greenstone") {
242 &setup_greenstone;
243}
244elsif ($action eq "run_test") {
245 &run_test;
246}
247elsif ($action eq "summarise") {
248 &summarise;
249}
250elsif ($action eq "upload") {
251 &upload;
252 &mail_with_report_attached;
253}
254elsif ($action eq "all") {
255 &setup_greenstone;
256 &run_test;
257 &summarise;
258 &upload;
259 &mail_with_report_attached;
260}
261
262##********************************
263
264sub printusage
265{
266# print STDERR "Run as: $0 (help|setup_greenstone|run_test <--modelOS windows|darwin|linux> <--justdiff|--nosvn> <col1 col2 ...> |summarise|upload|all)\n";
267 print STDERR "Run as: $0 (help|setup_greenstone|run_test|summarise|upload|all)\n";
268 print STDERR "where run_test can further take the following optional parameters:\n";
269 print STDERR "\t--modelOS (windows|darwin|linux)\n";
270 print STDERR "\t--justdiff|--nosvn\n";
271 print STDERR "\t<col1 col2 ...>\n";
272}
273
274#http://stackoverflow.com/questions/7427262/read-a-file-and-save-it-in-variable-using-shell-script
275
276sub setup_greenstone
277{
278 #clean up from previous tests
279 print STDERR "about to clean up any old tests (Ctrl-C to cancel)"; # no newline
280 for my $i ( 1..5 ) {
281 sleep 1; # 1 second
282 print STDERR ".";
283 }
284 print STDERR "\n";
285
286 # http://perldoc.perl.org/File/Path.html
287 print STDERR "cleaning up previous tests\n";
288 &File::Path::remove_tree($ENV{'DATA_DIR'});
289
290 print STDERR "creating the data dir\n";
291 &File::Path::make_path($ENV{'DATA_DIR'}); # works like mkdir -p
292
293 chdir($ENV{'DATA_DIR'});
294
295 # use existing compiled-up greenstone installation, if a GSDLHOME set
296 if(defined $ENV{'GSDL3SRCHOME'} || defined $ENV{'GSDLHOME'}) {
297 print STDERR "Found existing Greenstone home, will use that instead\n";
298 return;
299 }
300
301 # Else checkout a GS from svn into DATA_DIR
302
303 #svn checkout of main gsdl directory
304 print STDERR "checkout $gsdl:\n";
305 my $cmd = "svn co ".$ENV{'SVN_OPT_REV'}." http://svn.greenstone.org/main/trunk/greenstone2 $gsdl";
306 ##print STDERR "Checkout CMD: $cmd\n";
307
308 # # unlike backticks operator, system() will print the output of the command to the screen as it executes
309 # http://stackoverflow.com/questions/758611/how-to-flush-output-in-backticks-in-perl?rq=1
310 my $status = system "$cmd"; #my $status = `$cmd`;
311 if($status != 0) {
312 print STDERR "@@@ SVN checkout of $gsdl failed\n";
313 exit -1;
314 }
315 print STDERR "done\n";
316
317 ##print STDERR "$ENV{'DATA_DIR'}$sep$gsdl\n";
318
319 chdir("$ENV{'DATA_DIR'}$sep$gsdl");
320
321 ##print STDERR "@@@ OS: $^O.|".$Config{'archname64'}."|\n";
322
323 if($isWin) {
324 print STDERR "Compiling $gsdl using makegs2.bat running in auto (silent) mode\n";
325
326 # we're now in the GS2 folder, call makegs2 with silent param
327 $cmd = "makegs2.bat silent 2>> $ENV{'DATA_DIR'}/compilation-errors"; # STDERR is sent to compilation-errors file
328 $status = system $cmd;
329 if($status != 0) {
330 print STDERR "Greenstone compilation on Windows failed\n";
331 exit -1;
332 }
333
334 } else { # if we're on linux/darwin, need gnome-lib for the correct architecture. And need imagemagick to build imgs in collections
335
336 my $bit_arch=`uname -m`;
337
338 # imagmagick binary
339 print STDERR "Getting imagemagick binary\n";
340
341 my $os = $isMac ? "darwin" : "linux";
342 my $imagickzip = "imagemagick-$os";
343
344 if($isMac) {
345# $imagickzip .= "-10.5.tar.gz";
346 # at present, only the Imagemagick binaries created by Max for darwin work on the Macs
347 &File::Path::make_path($ENV{'DATA_DIR'}."$sep$gsdl$sep$os"); # need to ensure gsdl/bin/darwin exists
348 $cmd = "svn export http://svn.greenstone.org/main/trunk/binaries/mac/intel/imagemagick bin/darwin/imagemagick";
349 $status = system($cmd);
350 if($status != 0) {
351 print STDERR "@@@ Unable to get imagemagick for darwin\n";
352 }
353
354 # need ghostscript mac binary too for pdf to img conversions on mac
355 $cmd = "svn export http://svn.greenstone.org/main/trunk/binaries/mac/intel/ghostscript bin/darwin/ghostscript";
356 $status = system($cmd);
357 if($status != 0) {
358 print STDERR "@@@ Unable to get ghostscript for darwin\n";
359 }
360
361 # the imagemagick and ghostscript binaries have been set to executable on svn trac now
362# system("chmod -R u+x $ENV{'DATA_DIR'}/$gsdl/bin/darwin/imagemagick/bin/*");
363# system("chmod -R u+x $ENV{'DATA_DIR'}/$gsdl/bin/darwin/ghostscript/bin/*");
364 } else { # linux
365 my $extension64 = ($bit_arch =~ m/64$/) ? "-x64" : "";
366 $imagickzip .= "$extension64.tar.gz";
367
368 # now these next imagemagick steps (and those near the end of this sub) are just for linux, no longer also for mac
369 $cmd = "svn export http://svn.greenstone.org/gs2-extensions/imagemagick/trunk/$imagickzip ext/$imagickzip";
370 $status = system ($cmd);
371 system("cd ext && tar -xvzf $imagickzip");
372 }
373
374 # gnomelib binary
375 print STDERR "setting up gnome-lib-minimal for compilation\n";
376
377 # To get gnome-lib, need to determine bit architecture of the linux/darwin
378 # http://stackoverflow.com/questions/8963400/the-correct-way-to-read-a-data-file-into-an-array
379 # $Config{'archname64'} doesn't work on the Ubuntu and the Sys::Info package seems to not be supported
380 # well on ActivePerl.
381 # But since we know we're on a Linux/Darwin machine at this point, wecan just run `uname -m` and other linux cmds
382
383 # osversion will be "Lion" or ""
384 # and assuming all darwin is intel, not ppc!!
385 my $gnome_lib_file = $isMac ? "darwin-".$osversion."intel" : "linux";
386
387 $gnome_lib_file .= "-x64" if($bit_arch =~ m/64$/ && !$isMac); # linux only case
388
389 #svn checkout gnome-lib for this linux/darwin
390 chdir("$ENV{'DATA_DIR'}$sep$gsdl$sep"."ext"); #cd $DATA_DIR/$gsdl/ext
391
392 ##print STDERR "**** gnomelib: $gnome_lib_file\n";
393
394 # checkout and unpack gnome-lib-minimal
395
396 #svn export http://svn.greenstone.org/gs2-extensions/gnome-lib/trunk/gnome-lib-minimal-linux-x64.tar.gz gl.tar.gz
397 $cmd = "svn export http://svn.greenstone.org/gs2-extensions/gnome-lib/trunk/gnome-lib-minimal-".$gnome_lib_file.".tar.gz gl.tar.gz";
398 system $cmd;
399 system ("tar -xvzf gl.tar.gz");
400
401 chdir("gnome-lib-minimal");
402 ##print STDERR "*** ARCH: $bit_arch\n";
403
404 # need to run source devel.bash on gnome-lib followed by configure, make, make install
405 # in one go, in order to preserve the compile environment set up by sourcing devel.bash
406
407 # http://stackoverflow.com/questions/7369145/activating-a-virtualenv-using-a-shell-script-doesnt-seem-to-work
408 # http://ubuntuforums.org/showthread.php?t=1932504 linking /bin/sh to bash instead of dash
409
410# $cmd = "bash -c \"source ./devel.bash && cd ../.. && ./configure --enable-apache-httpd && make && make install\"";
411 $cmd = "bash -c \"";
412
413 $cmd .= "source ./devel.bash";
414 $cmd .= " && cd ../..";
415
416 #configure
417 # $cmd .= " && ./configure";
418 $cmd .= " && echo 'configure $gsdl: ' ";
419 $cmd .= " && echo '<configure>' >> $xmlout";
420 $cmd .= " && ./configure 2>> $ENV{'DATA_DIR'}/compilation-errors"; # configure
421 $cmd .= " && echo '</configure>' >> $xmlout";
422 $cmd .= " && echo 'done'";
423
424 #make
425 $cmd .= " && echo 'make $gsdl: '";
426 $cmd .= " && echo '<make>' >> $xmlout";
427 $cmd .= " && make 2>> $ENV{'DATA_DIR'}/compilation-errors"; # make
428 $cmd .= " && echo '</make>' >> $xmlout";
429 $cmd .= " && echo 'done'";
430
431 #make install
432 $cmd .= " && echo 'make install $gsdl: '";
433 $cmd .= " && echo '<make-install>' >> $xmlout";
434 $cmd .= " && make install 2>> $ENV{'DATA_DIR'}/compilation-errors"; # make install
435 $cmd .= " && echo '</make-install>' >> $xmlout";
436 $cmd .= " && echo 'done'";
437
438 $cmd .= "\""; # close off cmd to bash and run it
439 $status = system $cmd;
440
441 if(!$isMac) { # Linux
442 # Moving imagemagick after instead of before compilation, since bin/darwin gets overwritten during compilation
443 move("$ENV{'DATA_DIR'}$sep$gsdl$sep"."ext/imagemagick/$os", "$ENV{'DATA_DIR'}$sep$gsdl$sep"."bin/$os/imagemagick"); # http://www.perlmonks.org/?node_id=586537
444 unlink "$ENV{'DATA_DIR'}/$gsdl"."/ext/$imagickzip" or warn "Could not unlink ext/$imagickzip: $!";
445 &File::Path::remove_tree("$ENV{'DATA_DIR'}$sep$gsdl$sep"."ext/imagemagick"); # the untarred parent folder
446 }
447
448 }
449
450 if($status != 0) {
451 print STDERR "@@@ Compilation of Greenstone on Linux/Mac failed\n";
452 exit -1;
453 }
454
455 &getIsisGdl("$ENV{'DATA_DIR'}/$gsdl");
456
457 # set the path to the greenstone_home variable
458 $greenstone_home="$ENV{'DATA_DIR'}$sep$gsdl";
459
460}
461
462sub getPDFBox
463{
464 # current revision is 27763, but using "head" works
465 my $PDFBOX_TRAC_URL="http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java"; # both for .zip and .tar.gz extension
466 #"http://trac.greenstone.org/export/".$ENV{'SVN_OPT_REV'}."/gs2-extensions/pdf-box/trunk/pdf-box-java";
467
468 # now get the PDFBox extension for PDFBox tutorial
469 print STDERR "Getting pdfbox from $PDFBOX_TRAC_URL:\n";
470
471 chdir($greenstone_home);
472 my $cmd = "";
473 if ($isWin) {
474 $cmd = "setup.bat && cd ext && wget $PDFBOX_TRAC_URL.zip && unzip pdf-box-java.zip";
475
476 } elsif ($isMac) { # need to use curl not wget
477 $cmd = "cd ext && curl $PDFBOX_TRAC_URL.tar.gz > pdf-box-java.tar.gz && tar -xzf pdf-box-java.tar.gz";
478 }
479 else { # linux
480 $cmd = "bash -c \"export GSDLHOME=&& source setup.bash && cd ext && wget $PDFBOX_TRAC_URL.tar.gz && tar -xzf pdf-box-java.tar.gz\"";
481 }
482 my $status = system $cmd;
483 if($status != 0) {
484 print STDERR "@@@ Failed to set up PDFBox\n";
485 exit -1; # or proceed to testing other tutorials?
486 }
487}
488
489sub getIsisGdl {
490 my $gsfolder = shift(@_);
491
492 if(!$isWin) {
493 my $bit_arch=`uname -m`;
494 if ($bit_arch =~ m/64$/) {
495 my $cmd = "";
496 if($isMac) {
497 $cmd = "cd $gsfolder/bin/darwin && curl http://www.greenstone.org/caveat-emptor/IsisGdl.macleopard > IsisGdl && chmod u+x IsisGdl";
498 } else { # linux
499 $cmd = "cd $gsfolder/bin/linux && wget http://www.greenstone.org/caveat-emptor/IsisGdl.bin32 && mv IsisGdl.bin32 IsisGdl && chmod u+x IsisGdl";
500 }
501 my $isis_status = system $cmd;
502 if($isis_status != 0) {
503 print STDERR "Unable to get IsisGdl from caveat page\n";
504 }
505 }
506 }
507}
508
509# http://stackoverflow.com/questions/3377879/how-do-i-receive-command-output-immediately
510sub run_test
511{
512 my $num_cols = scalar(@collections); # remember the empty case
513
514 if($num_cols == 0) { # deal with all collections
515 push (@collections, "");
516 # putting the empty string in the array so that the "all colleections" case
517 # can be handled similarl to how the case of user-specified collections is handled
518
519 } else { # deal with user specified set of collections
520 # prefix the directory separator to each collection name
521 @collections = map { $sep.$_ } @collections;
522 }
523
524 my $pdfbox = &filename_concat($greenstone_home, "ext", "pdf-box");
525 if(!-d $pdfbox) {
526 &getPDFBox();
527 }
528
529 &getIsisGdl("$greenstone_home");
530
531 open (my $xml_fh, '>'.$xmlout) || die "Could not open xml file $xmlout for appending: $!\n";
532
533 # perform the requested subcommands, outputting xml information
534 print $xml_fh "<test time=\"$dateid\" id=\"$dateid\">\n";
535
536 my ($cmd, $status);
537 # make sure that diffcol/model-collect is up to date before copying it over to greenstone-home
538
539 if(!$use_local_rebuild) {
540 print $xml_fh "Updating $ENV{'TASK_HOME'}/model-collect:\n";
541 for my $col (@collections) {
542 $cmd = "svn up $ENV{'TASK_HOME'}/model-collect$col"; #chdir("$ENV{'TASK_HOME'}/model-collect");
543 $status = system "$cmd";
544 }
545 }
546
547 # go to whichever greenstone_home we're using
548 chdir($greenstone_home);
549
550 # get svn info
551 print STDERR "getting svn info: $xmlout\n";
552 print $xml_fh "<svn-info>\n";
553 &run_and_print_cmd("svn info", $xml_fh);
554 print $xml_fh "</svn-info>\n";
555 print STDERR "done\n";
556
557 if(!$use_local_rebuild) {
558
559 #make two copies of the model-collect directory in gsdl
560 #one to be rebuilt and one as the basis for comparison
561 #strip both of all .svn directories
562
563 #copy the model collections to the collect folder to be rebuilt
564 print STDERR "installing test collections and model collections to new $gsdl installation... ";
565
566 #clean up
567 if(-d "collect") {
568 for my $col (@collections) {
569 if(-d "collect$col") {
570 &File::Path::remove_tree("collect$col") || die "Error could not delete collect: $!";
571 }
572 }
573 }
574
575 if($use_static_model) {
576 for my $col (@collections) {
577 #copy to collect and strip .svn subfolders
578 &File::Path::make_path("collect$col"); # create the collect folder and copy contents from static model-collection across
579 &copy_recursively("model-collect$col", "collect$col", ".svn");
580 }
581
582 } else { # the default situation: where we check out the model-collect from svn
583 for my $col (@collections) {
584 &File::Path::remove_tree("model-collect$col");
585
586 #copy to collect and strip .svn subfolders
587 &File::Path::make_path("collect$col"); # create the folder and copy contents across
588 &copy_recursively(&filename_concat("$ENV{'TASK_HOME'}","model-collect$col"), "collect$col", ".svn");
589
590 #make the model copy
591 &File::Path::make_path("model-collect$col");
592 &copy_recursively("collect$col", "model-collect$col"); # copy contents across
593 }
594 }
595
596 print STDERR "done\n";
597 }
598
599 #for each collection, import, build and diff with its model counterpart
600
601 # if working with all collections, read the list of collections from the folders in collect
602 if($num_cols == 0) {
603 @collections = (); # get rid of the empty string put in the array to represent "all collections"
604
605 opendir my($collect_handle), "collect" or die "Could not open dir $greenstone_home/collect: $!";
606
607 for my $collection (readdir $collect_handle) {
608 next if ($collection eq "." || $collection eq "..");
609 next if ($collection eq "modelcol");
610 push(@collections, $collection);
611 }
612 closedir $collect_handle; # close handle to collect dir
613 }
614
615 for my $collection (@collections) {
616
617 # next if ($collection ne "Demo-Lucene"); ## TEMPORARY, FOR TESTING THIS SCRIPT
618 # next if ($collection !~ m/OAI|METS|DSpace|MGPP|Lucene/); ## TEMPORARY, FOR TESTING THIS SCRIPT
619
620 #escape the filename (in case of space)
621 $collection =~ s@ @\\ @g;
622 #getting just the basename of the collection would have been necessary had we not cd-ed into $gsdl
623
624 $collection =~ s@^[\\/]@@g; # take the dir-sep prefix away again for user-specified collection names
625
626 if (! -d "collect$sep$collection") {
627 print STDERR "Collection $collection does not exist\n";
628 next;
629 }
630
631 print STDERR "*** Found collection $collection\n";
632 print $xml_fh "<collection-test name=\"$collection\">\n";
633
634 if(!$use_local_rebuild) {
635 #import
636 # Ensure the OIDtype for importing is hash_on_full_filename
637 # "to make document identifiers more stable across upgrades of the software,
638 # although it means that duplicate documents contained in the collection are
639 # no longer detected automatically."
640 print STDERR "$collection - Importing:\n";
641 print $xml_fh "<import>\n";
642 &run_build_script("import.pl -removeold $collection"); #-OIDtype hash_on_full_filename
643 print $xml_fh "</import>\n";
644 print STDERR "done\n";
645
646 #build
647 print STDERR "$collection - Building:\n";
648 print $xml_fh "<build>\n";
649 &run_build_script("buildcol.pl -removeold $collection");
650 print $xml_fh "</build>\n";
651 print STDERR "done\n";
652
653 #rename the intermediate 'building' directory 'index'
654 print STDERR "$collection - Move \"building\" to \"index\"... ";
655 my $index = &filename_concat("collect", $collection, "index");
656 my $building = &filename_concat("collect", $collection, "building");
657 &File::Path::remove_tree($index);
658 # Renaming Directories, http://www.perlmonks.org/?node_id=177421
659 move($building, $index) or die "copy failed: $!"; # File::Copy::move
660 print STDERR "done\n";
661 }
662 #diffcol
663 print STDERR "$collection - Diffing:\n";
664 my $diffcol_dir = &filename_concat($ENV{'TASK_HOME'},"diffcol");
665
666 # help diffcol to know on what os the model cols were generated
667 # and what os this test machine is (on which the test cols will be generated)
668 $cmd = "diffcol.pl -testos $test_os -modelos $model_os -output xml -verbosity 10 $collection"; # need to run with ./diffcol.pl if bash script
669 &run_diff_script($cmd, $xml_fh, $diffcol_dir);
670
671 chdir($greenstone_home); # this is actually where we are
672 print STDERR "done\n";
673 print $xml_fh "</collection-test>\n";
674 }
675
676 print $xml_fh "</test>\n";
677 close($xml_fh);
678
679 print STDERR "done\n";
680}
681
682##***************************************************************
683# runs setup in greenstone_home before running the diff command
684sub run_diff_script {
685 my ($cmd, $fh, $diffcol_dir) = @_;
686
687 # we're in greenstone_home now
688 if(!$isWin) {
689 $cmd = "bash -c \"export GSDLHOME=&& source $setup_script.bash && cd $diffcol_dir && ./$cmd\"";
690
691 } else { # Need to prefix cmd -c/-k as necessary
692 $cmd = "cmd /c \"set GSDLHOME=&& $setup_script.bat && cd $diffcol_dir && perl -S $cmd\"";
693## print STDERR "@@@@ Going to call command: $cmd\n";
694 }
695
696 return &run_and_print_cmd($cmd, $fh);
697}
698
699# runs setup in greenstone_home before running the given build command
700sub run_build_script {
701 my ($cmd, $fh) = @_;
702
703# chdir($greenstone_home);
704 # we are in $greenstone_home already, can directly run the build cmd on the collection
705 if(!$isWin) {
706 $cmd = "bash -c \"export GSDLHOME=&& source $setup_script.bash && $cmd\"";
707
708 } else { # Need to prefix cmd -c/-k as necessary
709 $cmd = "cmd /c \"set GSDLHOME=&& $setup_script.bat && perl -S $cmd\"";
710 }
711## print STDERR "@@@@ Going to call command: $cmd\n";
712
713 return system($cmd);
714 #return &run_and_print_cmd($cmd, $fh); # doesn't work on cmds chained with bash -c
715}
716
717
718# http://stackoverflow.com/questions/758611/how-to-flush-output-in-backticks-in-perl?rq=1activeperl%20sys::info
719# http://stackoverflow.com/questions/1477500/how-do-i-get-the-output-of-an-external-command-in-perl
720sub run_and_print_cmd {
721 my ($cmd, $fh) = @_;
722
723 open my $pin, "$cmd|" or die "unable to run cmd $cmd: $!"; # open(my $fh, '-|', 'powercfg -l') or die $!;
724
725 if(defined $fh) { # print cmd output both to the filehandle and to stdout
726 while (my $line = <$pin>) {
727 print $fh $line;
728# print STDOUT $line; # if also printing cmd output to STDOUT
729 }
730 }
731 else { # no filehandle, so just need to print to stdout
732
733 # unlike backticks operator, system() will print the output of the command to the screen as it executes
734 # http://stackoverflow.com/questions/758611/how-to-flush-output-in-backticks-in-perl?rq=1
735
736 my $status = system $cmd;
737 if($status != 0) {
738 print STDERR "ERROR ($status) running $cmd: $!\n";
739 }
740 }
741 close($pin);
742}
743
744sub filename_concat {
745 my $first_file = shift(@_);
746 my (@filenames) = @_;
747
748 # If first_file is not null or empty, then add it back into the list
749 if (defined $first_file && $first_file =~ /\S/)
750 {
751 unshift(@filenames, $first_file);
752 }
753
754 my $filename = join($sep, @filenames);
755 $filename =~ s/[\\\/]$//; # remove trailing slashes if any
756 return $filename;
757}
758
759
760# The following code is from
761# http://stackoverflow.com/questions/227613/how-can-i-copy-a-directory-recursively-and-filter-filenames-in-perl
762# It also states that "Perl's File::Copy is a bit broken (it doesn't copy permissions on Unix systems, for example)"
763sub copy_recursively {
764 my ($from_dir, $to_dir, $regex) = @_;
765 opendir my($dh), $from_dir or die "Could not open dir '$from_dir': $!";
766
767# if(-d !$to_dir) {
768# mkdir $to_dir or die "mkdir '$to_dir' failed: $!" if not -e $to_dir;
769# }
770
771 for my $entry (readdir $dh) {
772 next if ($entry eq "." || $entry eq "..");
773 next if (defined $regex && $entry =~ /$regex/);
774 my $source = "$from_dir/$entry";
775 my $destination = "$to_dir/$entry";
776 if (-d $source) {
777 mkdir $destination or die "mkdir '$destination' failed: $!" if not -e $destination;
778 copy_recursively($source, $destination, $regex);
779 } else {
780 copy($source, $destination) or die "copy failed: $!";
781 }
782 }
783 closedir $dh;
784 return;
785}
786
787sub summarise {
788
789 # make a summarised Xml report
790 print STDERR "Summarizing the xml report... ";
791 my $cmd = "java org.apache.xalan.xslt.Process -IN $xmlout -XSL $ENV{'TASK_HOME'}/xsl/xml-report.xsl -OUT $ENV{'DATA_DIR'}/report-$dateid.xml";
792 my $status = system($cmd);
793 print STDERR "done\n";
794
795 # make a summarised HTMl report
796 print STDERR "Creating an html summary report... ";
797 $cmd = "java org.apache.xalan.xslt.Process -IN $ENV{'DATA_DIR'}/report-$dateid.xml -XSL $ENV{'TASK_HOME'}/xsl/html-report.xsl -OUT $ENV{'DATA_DIR'}/report-$dateid.html";
798 $status = system($cmd);
799 print STDERR "done\n";
800
801 # Print whether the tests passed or failed
802 print STDERR "*******************************************\n";
803 print STDERR "Checking if successful... \n";
804 $cmd = "java org.apache.xalan.xslt.Process -IN $xmlout -XSL $ENV{'TASK_HOME'}/xsl/passed-or-not.xsl";
805 $status = `$cmd`; #$status = system($cmd);
806 print STDERR "result: $status\n";
807 print STDERR "*******************************************\n";
808}
809
810sub upload {
811 # if the upload dir already existed, clear it of contents
812 if (-d $ENV{'UPLOAD_DIR'}) { #else rm $UPLOAD_DIR/*
813 # don't want to keep previous days reports
814 # else we will have to manually clear them at some point
815 # just generate the set of reports for this run of task.pl upload
816 # and
817 &File::Path::remove_tree($ENV{'UPLOAD_DIR'});
818 }
819 # recreate the upload directory
820 &File::Path::make_path($ENV{'UPLOAD_DIR'});
821
822 # copy all *.xml and *.html files across to UPLOAD_DIR
823 opendir my($dh), $ENV{'DATA_DIR'} or die "Could not open DATA_DIR: $!";
824 for my $entry (readdir $dh) {
825 next if ($entry !~ m/(\.xml|\.html?)$/);
826
827 # copy the reports across with different names: with OS prefixed to them. And for the HTML file on Win, rename to HTM
828 # html files uploaded from windows to nzdl are empty for no reason. Uploading as htm seems to work
829 my $os_entry = $entry;
830 $os_entry =~ s@\[email protected]@ if $isWin;
831 if($isMac) {
832 $os_entry = "diffcol-".$^O."-".$osversion."$os_entry"; # darwin-Lion for Lion/Mountain Lion
833 } else {
834 $os_entry = "diffcol-".$^O."-$os_entry";
835 }
836
837 # get the absolute path to the original files before copying them over
838 $entry = &filename_concat($ENV{'DATA_DIR'}, $entry);
839
840 # copy them over with their new names
841## print STDERR "@@@@ copying across $entry to $ENV{'UPLOAD_DIR'} as $os_entry\n";
842 copy($entry, "$ENV{'UPLOAD_DIR'}$sep$os_entry"); #copy($entry, "$ENV{'UPLOAD_DIR'}");
843 }
844 closedir $dh;
845
846
847 # Upload the html file to puka
848 #default identity dir
849 if ( ! exists $ENV{'IDENTITY_DIR'} ) {
850 $ENV{'IDENTITY_DIR'} = "$ENV{'HOME'}${sep}.ssh"; # "C:\\Research\\Nightly\\tools\\keys" on windows, see environment.pl
851 }
852 if (! exists $ENV{'SNAPSHOT_MODE'} ) {
853 $ENV{'SNAPSHOT_MODE'} = "caveat";
854 }
855
856 #use the correct key for uploading
857 $ENV{'IDENTITY_FILE'} = "$ENV{'IDENTITY_DIR'}${sep}upload-" . $ENV{'SNAPSHOT_MODE'} . ($^O eq "MSWin32" ? ".ppk" : "");
858 if(-f $ENV{'IDENTITY_FILE'}) {
859 # if you need to touch the file on windows: http://stackoverflow.com/questions/51435/windows-version-of-the-unix-touch-command
860
861 # the report we want to upload is actually just os-diffcol-report-$dateid.html
862 my $command = "cd \"$ENV{'UPLOAD_DIR'}\" && tar -c *.htm* | "; #&& cat *.html | "; # && tar -c * |
863 $command .= ($^O eq "MSWin32" ? "plink" : "ssh");
864 $command .= " -T -i \"$ENV{'IDENTITY_FILE'}\" nzdl\@puka.cs.waikato.ac.nz";
865 #print "$command\n";
866 my $status = system("$command");
867 if($status != 0) {
868 print STDERR "*** Failed to upload test report to nzdl $status\n";
869 }
870 } else {
871 print STDERR "*** Cannot upload the test report to nzdl from this machine\n";
872 }
873
874 print STDERR "Finished uploading\n";
875}
876
877# Sending emails with perl: http://learn.perl.org/examples/email.html
878# Sending email attachments with perl: http://www.perlmonks.org/?node_id=19430
879# Sadly none of the packages are installed by default and use of MIME::Lite is discouraged
880sub mail_with_report_attached
881{
882 # email out with report attached, if the tests failed
883 print STDERR "Checking if successful... \n";
884 my $cmd = "java org.apache.xalan.xslt.Process -IN $xmlout -XSL $ENV{'TASK_HOME'}/xsl/passed-or-not.xsl";
885 #my $result = system($cmd);
886 my $result = `$cmd`;
887
888 print STDERR "result: $result\n";
889
890 if($result ne "yes") {
891 my $msg = "$gsdl regression test for $dateid failed";
892 my $subject = "Regression Test Failed"; #"$gsdl regression test for $dateid failed\n";
893 my $attach_file = &filename_concat($ENV{'DATA_DIR'}, "report-$dateid.html");
894
895 if($isWin) {
896 if($use_blat && $blat && $ENV{'GSDL_SMTP'}) {
897 # http://stackoverflow.com/questions/709635/sending-mail-from-batch-file
898 #blat -to [email protected] -server smtp.example.com -f [email protected] -subject "subject" -body "body"
899
900 # need to install blat on windows
901 $cmd = "$blat -to $ENV{'MONITOR_EMAIL'} -server $ENV{'GSDL_SMTP'} -f $ENV{'MONITOR_EMAIL'} -attach $attach_file -subject \"$subject\" -body \"$msg\"";
902 $result = system($cmd);
903 }
904 else {
905 $result = 1; # status from running mail command is 0 if success, 1 if fail
906 print STDERR "********************************************\n";
907 if ($use_blat) {
908 print STDERR "Need blat and SMTP set to send mail attachment\n" ;
909 } else {
910 print STDERR "Not set up to send mail on Windows\n";
911 }
912 print STDERR "Inspect report at: $attach_file\n";
913 print STDERR "********************************************\n";
914 }
915 } else { # linux
916 my $status = system("command -v mutt > /dev/null 2>&1;"); #better way of doing "which mutt"
917
918 if($status != 0) { # mutt doesn't exist, can't send attachments, so send simple email
919 $cmd="echo '$gsdl regression test for $dateid failed.' | mail -s 'Regression Test Failed' $ENV{'MONITOR_EMAIL'}";
920
921 print STDERR "********************************************\n";
922 print STDERR "No mutt installed, unable to mail attachment\n";
923 print STDERR "Inspect report at: $attach_file\n";
924 print STDERR "********************************************\n";
925 } else {
926 #$cmd = "bash -c \"echo '$gsdl regression test for $dateid failed' | mutt -a $attach_file -s 'Regression Test Failed' -- $ENV{'MONITOR_EMAIL'}\"";
927 $cmd = "echo '$gsdl regression test for $dateid failed' | mutt -a $attach_file -s 'Regression Test Failed' -- $ENV{'MONITOR_EMAIL'}";
928 }
929
930 # run the mail command
931 $result = system($cmd); #&run_and_print_cmd($cmd);
932 }
933
934
935 if($result != 0) {
936 print STDERR "*** Unable to send email: $?\n";
937 }
938 else {
939 print STDERR "Sent mail with report attached.\n";
940 }
941 } else {
942 print STDERR "********************************************\n";
943 print STDERR "Tests were successful. Not sending mail.\n";
944 print STDERR "********************************************\n";
945 }
946}
947
948# The old version of this program contained the following, consisting of 1 line of active code:
949
950 # Invoke as: sjmc@br:/research/sjm84/envi/bin$ ./envi diffcol summarise
951 # Doing so will call this pl file and pass in "summarise" in ARGV
952 # This pl file will in turn call the task executable in this folder
953 # passing in "summarise" as a parameter.
954#system("/bin/bash -c \"../etc/tasks/diffcol/task @ARGV\"");
955
956 ##system("/bin/bash -c \"./task @ARGV\"");
957 ##print STDERR "/bin/bash -c ../etc/tasks/diffcol/task @ARGV"
958
Note: See TracBrowser for help on using the repository browser.