source: gs2-extensions/parallel-building/trunk/src/perllib/ParallelInexport.pm@ 27179

Last change on this file since 27179 was 27179, checked in by davidb, 11 years ago

Mods to allow code to run with Greenstone3

File size: 3.9 KB
Line 
1###########################################################################
2#
3# ParallelInexport.pm -- useful class to support parallel_import.pl
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ParallelInexport;
27
28use strict;
29
30# Randomize the order of files in the filelist
31use List::Util qw( shuffle );
32
33# index the files in parallel using MPI farmer to farm off multiple processes
34# [hs, 1 july 2010]
35sub farm_out_processes
36{
37 my ($jobs, $epoch, $importdir, $block_hash, $collection, $site) = @_;
38
39 my $tmp_dir_path = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, 'tmp');
40 if (!-d $tmp_dir_path)
41 {
42 mkdir($tmp_dir_path, 0777);
43 }
44
45 # create the list of files to import
46 my $overwrite = 1;
47 my $tmp_filelist = &util::filename_cat($tmp_dir_path, "filelist.txt");
48 # - if the file is already there (which is should be during testing) then
49 # don't regenerate. This is especially important for imports of 1 million
50 # documents as just the directory scan can take several hours.
51 if ($overwrite || !-f $tmp_filelist)
52 {
53 open (my $filelist, ">$tmp_filelist");
54 my @filenames = keys %{$block_hash->{'all_files'}};
55 @filenames = shuffle(@filenames);
56 foreach my $filename (@filenames)
57 {
58 my $full_filename = &util::filename_cat($importdir,$filename);
59 if ((! exists $block_hash->{'file_blocks'}->{$full_filename})
60 && ($filename !~ m/metadata\.xml$/))
61 {
62 print $filelist "$filename\n";
63 }
64 }
65 close ($filelist);
66 }
67
68 # Determine if we've been provided a mpi.conf file to indicate the other
69 # machines (slave nodes) this parallizable process should run on
70 my $mpi_conf_path = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, 'mpi.conf');
71 my $mpi_flags = '';
72 if (-f $mpi_conf_path)
73 {
74 print STDERR " ***** CLUSTER MODE *****\n";
75 $mpi_flags .= '-machinefile "' . $mpi_conf_path . '" ';
76 #$mpi_flags .= '-nolocal ';
77 }
78 else
79 {
80 print STDERR " ***** SINGLE COMPUTER MODE *****\n";
81 }
82 $mpi_flags .= ' --show-progress --timestamp-output --verbose';
83
84 # invoke the farmer to start processing the files
85 my $gsdlhome;
86
87 if (defined $site) {
88 $gsdlhome = $ENV{'GSDL3HOME'};
89 }
90 else {
91 $site = "";
92 $gsdlhome = $ENV{'GSDLHOME'};
93 }
94
95# my $farmer_exe = $gsdlhome . '/ext/parallel-building/linux/bin/mpiimport';
96 my $farmer_exe = 'mpiimport';
97
98# my $mpi_cmd = $gsdlhome . '/ext/parallel-building/linux/bin/mpirun ' . $mpi_flags . ' -n ' . $jobs . ' ' . $farmer_exe . ' ' . $tmp_filelist . ' ' . $epoch . ' ' . $gsdlhome . ' ' . $collection . ' ' . $site;
99 my $mpi_cmd = 'mpirun ' . $mpi_flags . ' -n ' . $jobs . ' ' . $farmer_exe . ' ' . $tmp_filelist . ' ' . $epoch . ' ' . $gsdlhome . ' ' . $collection . ' ' . $site;
100 print STDERR "MPI Command: \"" . $mpi_cmd . "\"\n";
101# system ($mpi_cmd);
102 open(MPI, $mpi_cmd . " |") or die("Couldn't Execute MPI");
103 while ( defined( my $line = <MPI> ) )
104 {
105 chomp($line);
106 print "$line\n";
107 }
108 close(MPI);
109}
110
1111;
Note: See TracBrowser for help on using the repository browser.