source: gs2-extensions/parallel-building/trunk/src/bin/script/parallel_dspace_filtermedia.pl@ 25809

Last change on this file since 25809 was 25809, checked in by jmt12, 12 years ago

Script to parallel media filter DSpace

  • Property svn:executable set to *
File size: 5.6 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# Enhance the DSpace media filter component with parallel processing
6# capability. Currently only processes media of dc.type "Video" (which will
7# be manually assigned to the *.TS files produced by ReplayMe!)
8#
9# Copyright (C) 2012 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package parallel_dspace_filtermedia;
28
29BEGIN {
30 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
31 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
34 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
36
37 if (defined $ENV{'GSDLEXTS'}) {
38 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
39 foreach my $e (@extensions) {
40 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
41
42 unshift (@INC, "$ext_prefix/perllib");
43 unshift (@INC, "$ext_prefix/perllib/cpan");
44 unshift (@INC, "$ext_prefix/perllib/plugins");
45 unshift (@INC, "$ext_prefix/perllib/plugouts");
46 }
47 }
48}
49
50use strict;
51
52sub printUsage
53{
54 my ($message) = @_;
55 if (defined $message)
56 {
57 print STDERR 'Error! ' . $message . "\n";
58 }
59 print STDERR 'Usage: parallel_dspace_filtermedia.pl -workers <int> -dspacehome <path> [-debug]' . "\n\n";
60 exit(0);
61}
62
63sub main
64{
65 print 'Parallel Media Filter started: ' . `date` . "\n";
66
67 # 1. Initialization
68 my $dspace_home = '';
69 my $worker_count = 0;
70 my $debug = 0;
71 my $i;
72 my $argument;
73 for ($i = 0; $i < scalar(@ARGV); $i++)
74 {
75 $argument = $ARGV[$i];
76 if ('-dspacehome' eq $argument)
77 {
78 $i++;
79 $dspace_home = $ARGV[$i];
80 if (!-d $dspace_home)
81 {
82 &printUsage('DSpace home path given doesn\'t exist or isn\'t a directory');
83 }
84 }
85 elsif ('-workers' eq $argument)
86 {
87 $i++;
88 $worker_count = $ARGV[$i];
89 if ($worker_count !~ /^\d+$/)
90 {
91 &printUsage('Worker count must be an integer');
92 }
93 }
94 elsif ('-debug' eq $argument)
95 {
96 $debug = 1;
97 }
98 else
99 {
100 &printUsage('Unrecognized argument: ' . $argument);
101 }
102 }
103 if ('' eq $dspace_home)
104 {
105 &printUsage('Path to dspace home required!');
106 }
107
108 # 2. Run metadata export to get the identifiers of video items in the
109 # collection
110 print ' * Retrieving the list of video item identifiers...' . "\n";
111 my @video_item_identifiers;
112 my $metadata_file_path = $dspace_home . '/log/metadata-' . time() . '.csv';
113 my $metadata_export_cmd = '"' . $dspace_home . '/bin/dspace" metadata-export -f "' . $metadata_file_path . '"';
114 if ($debug)
115 {
116 print '[DEBUG: ' . $metadata_export_cmd . "]\n";
117 }
118 `$metadata_export_cmd`;
119 open(FIN, "<:utf8", $metadata_file_path) || die('Error! Failed to open metadata file for reading: ' . $metadata_file_path);
120 my $line = '';
121 while ($line = <FIN>)
122 {
123 if ($line =~ /"http:\/\/hdl.handle.net\/([^"]+)",.*"Video"/)
124 {
125 my $identifier = $1;
126 push(@video_item_identifiers, $identifier);
127 if ($debug)
128 {
129 print '[DEBUG: parsed identifier: ' . $identifier . "]\n";
130 }
131 }
132 }
133 close(FIN);
134 unlink($metadata_file_path);
135
136 if ($worker_count > 0)
137 {
138 print " - Parallel processing video files...\n";
139 # 3. Write the identifiers to the file list
140 my $file_list_path = $dspace_home . '/log/filelist' . time() . '.txt';
141 open(FLOUT, ">:utf8", $file_list_path) or die('Error! Failed to open file for writing: ' . $file_list_path);
142 foreach my $video_item_identifier (@video_item_identifiers)
143 {
144 print FLOUT $video_item_identifier . "\n";
145 }
146 close(FLOUT);
147 # 4. Invoke mpidspacemediafilter (via mpirun) with the the filelist
148 # created above. MPIRun takes the number of worker threads to spawn
149 my $mpi_cmd = 'mpirun -np ' . ($worker_count + 1) . ' "' . $dspace_home . '/bin/mpidspacemediafilter" "' . $dspace_home . '" "' . $file_list_path . '"';
150 if ($debug)
151 {
152 print '[DEBUG: ' . $mpi_cmd . "\n";
153 }
154 `$mpi_cmd`;
155 unlink($file_list_path);
156 }
157 # 5. With 0 worker threads we run a serial process calling media filter
158 # directly
159 else
160 {
161 print " - Serial processing video files...\n";
162 my $serial_mediafilter_cmd = '';
163 foreach my $video_item_identifier (@video_item_identifiers)
164 {
165 print ' - processing item: ' . $video_item_identifier . "\n";
166 $serial_mediafilter_cmd = '"' . $dspace_home . '/bin/dspace" filter-media -f -i "' . $video_item_identifier . '"';
167 if ($debug)
168 {
169 print '[DEBUG: ' . $serial_mediafilter_cmd . "]\n";
170 }
171 `$serial_mediafilter_cmd`;
172 }
173 }
174 # 6. Complete!
175 print 'Parallel Media Filter Complete: ' . `date` . "\n";
176}
177
178&main();
Note: See TracBrowser for help on using the repository browser.