source: gs2-extensions/parallel-building/trunk/src/bin/script/parallel_dspace_filtermedia.pl@ 28764

Last change on this file since 28764 was 28764, checked in by jmt12, 10 years ago

Adding microsecond timing messages

  • Property svn:executable set to *
File size: 6.7 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# Enhance the DSpace media filter component with parallel processing
6# capability. Currently only processes media of dc.type "Video" (which will
7# be manually assigned to the *.TS files produced by ReplayMe!)
8#
9# Copyright (C) 2012 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package parallel_dspace_filtermedia;
28
29# Pragma
30use strict;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
39
40 if (defined $ENV{'GSDLEXTS'}) {
41 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
42 foreach my $e (@extensions) {
43 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
44
45 unshift (@INC, "$ext_prefix/perllib");
46 unshift (@INC, "$ext_prefix/perllib/cpan");
47 unshift (@INC, "$ext_prefix/perllib/plugins");
48 unshift (@INC, "$ext_prefix/perllib/plugouts");
49 }
50 }
51}
52
53# Modules
54use Time::HiRes qw( gettimeofday tv_interval );
55
56sub printUsage
57{
58 my ($message) = @_;
59 if (defined $message)
60 {
61 print STDERR 'Error! ' . $message . "\n";
62 }
63 print STDERR 'Usage: parallel_dspace_filtermedia.pl -workers <int> -dspacehome <path> [-debug]' . "\n\n";
64 exit(0);
65}
66
67sub main
68{
69 my ($start_s, $start_u) = &gettimeofday();
70 print '[' . $start_s . '.' . $start_u . '] Parallel Media Filter started: ' . `date` . "\n";
71
72 # 1. Initialization
73 my $dspace_home = '';
74 my $worker_count = 0;
75 my $debug = 0;
76 my $i;
77 my $argument;
78 for ($i = 0; $i < scalar(@ARGV); $i++)
79 {
80 $argument = $ARGV[$i];
81 if ('-dspacehome' eq $argument)
82 {
83 $i++;
84 $dspace_home = $ARGV[$i];
85 if (!-d $dspace_home)
86 {
87 &printUsage('DSpace home path given doesn\'t exist or isn\'t a directory');
88 }
89 }
90 elsif ('-workers' eq $argument)
91 {
92 $i++;
93 $worker_count = $ARGV[$i];
94 if ($worker_count !~ /^\d+$/)
95 {
96 &printUsage('Worker count must be an integer');
97 }
98 }
99 elsif ('-debug' eq $argument)
100 {
101 $debug = 1;
102 }
103 else
104 {
105 &printUsage('Unrecognized argument: ' . $argument);
106 }
107 }
108 if ('' eq $dspace_home)
109 {
110 &printUsage('Path to dspace home required!');
111 }
112
113 # 2. Run metadata export to get the identifiers of video items in the
114 # collection
115 print ' * Retrieving the list of video item identifiers...' . "\n";
116 my @video_item_identifiers;
117 my $metadata_file_path = $dspace_home . '/log/metadata-' . time() . '.csv';
118 my $metadata_export_cmd = '"' . $dspace_home . '/bin/dspace" metadata-export -f "' . $metadata_file_path . '"';
119 if ($debug)
120 {
121 print '[DEBUG: ' . $metadata_export_cmd . "]\n";
122 }
123 `$metadata_export_cmd`;
124 open(FIN, "<:utf8", $metadata_file_path) || die('Error! Failed to open metadata file for reading: ' . $metadata_file_path);
125 my $line = '';
126 while ($line = <FIN>)
127 {
128 if ($line =~ /\"http:\/\/hdl.handle.net\/([^\"]+)\",.*\"Video\"/)
129 {
130 my $identifier = $1;
131 push(@video_item_identifiers, $identifier);
132 if ($debug)
133 {
134 print '[DEBUG: parsed identifier: ' . $identifier . "]\n";
135 }
136 }
137 }
138 close(FIN);
139 unlink($metadata_file_path);
140
141 if ($worker_count > 0)
142 {
143 print " - Parallel processing video files...\n";
144 # 3. Write the identifiers to the file list
145 my $file_list_path = $dspace_home . '/log/filelist' . time() . '.txt';
146 open(FLOUT, ">:utf8", $file_list_path) or die('Error! Failed to open file for writing: ' . $file_list_path);
147 foreach my $video_item_identifier (@video_item_identifiers)
148 {
149 print FLOUT $video_item_identifier . "\n";
150 }
151 close(FLOUT);
152 # 4. Invoke mpidspacemediafilter (via mpirun) with the the filelist
153 # created above. MPIRun takes the number of worker threads to spawn
154
155 # Determine if we've been provided a mpi.conf file to indicate the other
156 # machines (slave nodes) this parallizable process should run on
157 my $mpi_conf_path = $dspace_home . '/mpi.conf';
158 my $mpi_flags = '--show-progress --timestamp-output ';
159 if (-f $mpi_conf_path)
160 {
161 print STDERR " ***** CLUSTER MODE *****\n";
162 $mpi_flags .= '-machinefile "' . $mpi_conf_path . '" ';
163 #$mpi_flags .= '-nolocal ';
164 }
165 else
166 {
167 print STDERR " ***** SINGLE COMPUTER MODE *****\n";
168 }
169 # Excessive force! Ensure we bind to the correct network interface
170 $mpi_flags .= '--mca btl tcp,sm,self --mca btl_tcp_if_include eth0 ';
171
172 my $mpi_cmd = 'mpirun ' . $mpi_flags . ' -np ' . ($worker_count + 1) . ' "' . $dspace_home . '/bin/mpidspacemediafilter" "' . $ENV{'GSDLHOME'} . '" "' . $dspace_home . '" "' . $file_list_path . '"';
173 if ($debug)
174 {
175 print '[DEBUG: ' . $mpi_cmd . "\n";
176 }
177 `$mpi_cmd`;
178 unlink($file_list_path);
179 }
180 # 5. With 0 worker threads we run a serial process calling media filter
181 # directly
182 else
183 {
184 print " - Serial processing video files...\n";
185 my $serial_mediafilter_cmd = '';
186 foreach my $video_item_identifier (@video_item_identifiers)
187 {
188 print ' - processing item: ' . $video_item_identifier . "\n";
189 $serial_mediafilter_cmd = '"' . $dspace_home . '/bin/dspace" filter-media -f -i "' . $video_item_identifier . '"';
190 if ($debug)
191 {
192 print '[DEBUG: ' . $serial_mediafilter_cmd . "]\n";
193 }
194 `$serial_mediafilter_cmd`;
195 }
196 }
197
198 # 6. Complete!
199 my ($end_s, $end_u) = &gettimeofday();
200 print '[' . $end_s . '.' . $end_u . '] Parallel Media Filter Complete: ' . `date` . "\n";
201 print 'Elapsed Time: ' . sprintf('%0.6f', tv_interval([$start_s, $start_u], [$end_s, $end_u])) . " seconds\n";
202}
203
204&main();
205
206exit;
207
2081;
Note: See TracBrowser for help on using the repository browser.