source: gs2-extensions/video-and-audio/trunk/src/perllib/plugins/SimpleVideoPlugin.pm@ 26948

Last change on this file since 26948 was 26948, checked in by jmt12, 11 years ago

Adding 'fixedCore' argument to allow restricting HandbrakeCLI to a single CPU (threads=1 only affects encoding). Replace all built-in perl file tests with calls to util library to allow plugin to process files in HDFS. Coincidentally HDFS *only* supported by 'separateIO' functionality.

File size: 13.8 KB
Line 
1###########################################################################
2#
3# SimpleVideoPlugin.pm -- Plugin for multimedia with some simple video
4# processing
5#
6# A component of the Greenstone digital library software from the New
7# Zealand Digital Library Project at the University of Waikato, New
8# Zealand.
9#
10# Copyright (C) 2012 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package SimpleVideoPlugin;
29
30use File::Temp qw/ tempdir /;
31
32use BasePlugin;
33use MetadataRead;
34use util;
35
36use strict;
37use warnings;
38no strict 'refs'; # allow filehandles to be variables and viceversa
39
40sub BEGIN
41{
42 @SimpleVideoPlugin::ISA = ('MetadataRead', 'BasePlugin');
43}
44
45my $arguments = [ { 'name' => "process_exp",
46 'desc' => "{BasePlugin.process_exp}",
47 'type' => "regexp",
48 'reqd' => "no",
49 'deft' => &get_default_process_exp() },
50 { 'name' => "streamingHQsize",
51 'desc' => "{VideoPlugin.streamingsize}",
52 'type' => "int",
53 'deft' => "720",
54 'reqd' => "no" },
55 { 'name' => "streamingHQVideoBitrate",
56 'desc' => "{VideoPlugin.streamingbitrate}",
57 'type' => "int",
58 'deft' => "496",
59 'reqd' => "no" },
60 { 'name' => "streamingHQAudioBitrate",
61 'desc' => "{VideoPlugin.streamingbitrate}",
62 'type' => "int",
63 'deft' => "80",
64 'reqd' => "no" },
65 { 'name' => "videoDeinterlacingFilter",
66 'desc' => "Activate a deinterlacing filter to increase the quality of TV footage",
67 'type' => "enum",
68 'list' => [{'name' => "true", 'desc' => "{common.true}"},
69 {'name' => "false", 'desc' => "{common.false}"}],
70 'deft' => "false",
71 'reqd' => "no" },
72 { 'name' => "isParallel",
73 'desc' => "Will the import use parallel processing? (maybe this should be set by parallel-import.pl somehow)",
74 'type' => "enum",
75 'list' => [{'name' => "true", 'desc' => "{common.true}"},
76 {'name' => "false", 'desc' => "{common.false}"}],
77 'deft' => "true",
78 'reqd' => "no" },
79 { 'name' => "isCluster",
80 'desc' => "Will the import be run on a cluster (multiple computers) or not (single computer - possibly multiple processors)",
81 'type' => "enum",
82 'list' => [{'name' => "true", 'desc' => "{common.true}"},
83 {'name' => "false", 'desc' => "{common.false}"}],
84 'deft' => "false",
85 'reqd' => "no" },
86 { 'name' => "separateIO",
87 'desc' => "copy and process the file locally (good for segregating IO cost)",
88 'type' => "enum",
89 'list' => [{'name' => "true", 'desc' => "{common.true}"},
90 {'name' => "false", 'desc' => "{common.false}"}],
91 'deft' => "false",
92 'reqd' => "no" },
93 { 'name' => "fixedCore",
94 'desc' => "Restrict the execution of Handbrake to a single core (0 = no restriction, > 0 use value-1'th core)",
95 'type' => "int",
96 'deft' => "0",
97 'reqd' => "no" },
98 ];
99
100my $options = { 'name' => "BasicVideoPlugin",
101 'desc' => "",
102 'abstract' => "no",
103 'inherits' => "yes",
104 'args' => $arguments };
105
106sub new
107{
108 my ($class) = shift (@_);
109 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
110 push(@$pluginlist, $class);
111
112 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
113 push(@{$hashArgOptLists->{"OptList"}},$options);
114 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
115 return bless $self, $class;
116}
117
118sub get_default_process_exp
119{
120 return '(?i)\.ts$';
121}
122
123sub get_oid_hash_type
124{
125 my $self = shift (@_);
126 return "hash_on_ga_xml";
127}
128
129sub process
130{
131 my $self = shift (@_);
132 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
133
134 print STDERR "[A:" . time() . "] SimpleVideoPlugin processing: " . $file . "\n";
135
136 # - I have to add some text (yay, back to needing dummy text) otherwise the
137 # DocumentText formatting is ignored (?!?)
138 my $topsection = $doc_obj->get_top_section();
139 $doc_obj->add_utf8_text($topsection, "This is dummy text");
140
141
142 $file =~ /[\/]?([^\/]+)\.(?:ts)$/;
143 my $filename = $1;
144
145 # Optional date metadata (available on raw ReplayMe recordings)
146 if ($filename =~ /(\d\d\d\d)-(\d\d)-(\d\d)/)
147 {
148 my $date = $1 . $2 . $3;
149 $filename =~ s/[^a-z0-9]+/_/ig;
150 $filename =~ s/^_+|_+$//g;
151 $doc_obj->add_utf8_metadata($topsection,"Date",$date);
152 }
153
154 # Special Case: HDFS *only* supported by separateIO flag (you need to move
155 # the file out of HDFS to local filespace to allow MediaInfo and Handbrake
156 # to be run on it.
157 my $separate_io = $self->{'separateIO'};
158 if (&util::isHDFS($file))
159 {
160 $separate_io = 'true';
161 }
162
163 my $process_dir = $ENV{'GSDLCOLLECTDIR'};
164 # If we are in a cluster, then we don't want to be writing all the logs
165 # etc to the shared file system. Instead, we write to the tmp drive
166 if ($separate_io eq 'true')
167 {
168 $process_dir = &util::filename_cat('/tmp', 'gsimport-' . $filename);
169 if (!&util::dir_exists($process_dir))
170 {
171 mkdir($process_dir, 0775);
172 }
173 }
174 my $logs_dir = &util::filename_cat($process_dir, "logs");
175 if (!&util::dir_exists($logs_dir))
176 {
177 mkdir($logs_dir, 0775);
178 }
179 my $convert_log_path = &util::filename_cat($logs_dir, 'convert-' . $filename . '.log');
180 my $pass_log_path = &util::filename_cat($logs_dir, 'convert-' . $filename . '-pass');
181 my $tmp_dir = &util::filename_cat($process_dir, "cached");
182 if (!&util::dir_exists($tmp_dir))
183 {
184 mkdir($tmp_dir, 0775);
185 }
186 $tmp_dir = &util::filename_cat($tmp_dir, $filename);
187 if (!&util::dir_exists($tmp_dir))
188 {
189 mkdir($tmp_dir, 0775);
190 }
191
192 # If we are separating IO, then we also start by copying the file to
193 # the process directory (local tmp) as well
194 my $ivideo_path = &util::filename_cat($base_dir, $file);
195 if ($separate_io eq 'true')
196 {
197 print STDERR "[B1:" . time() . "] Creating local copy of file\n";
198 my $local_ivideo_path = &util::filename_cat($process_dir, $filename . ".ts");
199 &util::cp($ivideo_path, $local_ivideo_path);
200 $ivideo_path = $local_ivideo_path;
201 print STDERR "[B2:" . time() . "] Complete\n";
202 }
203
204 # 1. Use MediaInfo to extract important metadata
205 print STDERR "[C1:" . time() . "] Extracting metadata\n";
206 print " - Extracting metadata using MediaInfo\n";
207 my $mi_metadata = $self->getMetadata($ivideo_path);
208 $doc_obj->add_utf8_metadata($topsection,"Format", 'multimedia (' . $mi_metadata->{'General'}->{'Format'} . ')');
209 $doc_obj->set_metadata_element($topsection,"FileSize",$mi_metadata->{'General'}->{'File_size'});
210 $doc_obj->add_utf8_metadata($topsection,"Duration",$mi_metadata->{'General'}->{'Duration'});
211 if (defined $mi_metadata->{'Video'}->{'Format_Info'} && defined $mi_metadata->{'Video'}->{'Format'})
212 {
213 $doc_obj->add_utf8_metadata($topsection,"VideoFormat",$mi_metadata->{'Video'}->{'Format_Info'} . ' (' . $mi_metadata->{'Video'}->{'Format'} . ')');
214 }
215 if (defined $mi_metadata->{'Audio'}->{'Format_Info'} && defined $mi_metadata->{'Audio'}->{'Format'})
216 {
217 $doc_obj->add_utf8_metadata($topsection,"AudioFormat",$mi_metadata->{'Audio'}->{'Format_Info'} . ' (' . $mi_metadata->{'Audio'}->{'Format'} . ')');
218 }
219 $doc_obj->add_utf8_metadata($topsection,"Width",$mi_metadata->{'Video'}->{'Width'});
220 $doc_obj->add_utf8_metadata($topsection,"Height",$mi_metadata->{'Video'}->{'Height'});
221 print STDERR "[C2:" . time() . "] Complete\n";
222
223 # 2. Convert into FLV, reprocess to make seekable, and associate
224 # - generate a path for our temporary converted video file
225 print STDERR "[D1:" . time() . "] Converting video to streamble format\n";
226 my $ovideo_path = &util::filename_cat($tmp_dir, 'gsv.mp4');
227 if (&util::file_exists($ovideo_path))
228 {
229 print " - Found existing converted video in cache\n";
230 }
231 else
232 {
233 # - first conversion pass
234 print " - Convert using Handbrake\n";
235 my $streaming_HQ_size = $self->{'streamingHQsize'};
236 my $streaming_HQ_VideoBitrate = $self->{'streamingHQVideoBitrate'};
237 my $streaming_HQ_AudioBitrate = $self->{'streamingHQAudioBitrate'};
238 my $deinterlace = $self->{'videoDeinterlacingFilter'};
239 my $video_processing_parameters;
240 if (!$streaming_HQ_size || $streaming_HQ_size eq "fullsize")
241 {
242 $video_processing_parameters = "--strict-anamorphic";
243 }
244 else
245 {
246 $video_processing_parameters = "-w $streaming_HQ_size --loose-anamorphic";
247 }
248 if ($deinterlace eq "true")
249 {
250 $video_processing_parameters .= " --decomb";
251 }
252 # Default MenCoder options for x264
253 my $mencoder_options = 'ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0';
254 my $is_cluster = $self->{'isCluster'};
255 my $is_parallel = $self->{'isParallel'};
256 # If we are parallel processing on a single (presumably) multicore computer
257 # then we need to limit the number of threads (and hence CPUs) HandBrake
258 # will utilize in order to emulate true parallel processing (otherwise the
259 # first thread to get to HandBrake conversion will take up most the CPUs
260 # causing all other threads to wait anyway). It will interesting to test
261 # whether parallel processing or serial processing (with HandBrake parallel
262 # processing) is faster. *update* threads=1 *only* controls the encoding and
263 # several other parts of Handbrake can run parallel (demuxing etc). I've
264 # had to include a 'taskset' command to truely make Handbrake serial
265 if ($is_parallel eq 'true' && $is_cluster eq 'false')
266 {
267 $mencoder_options .= ':threads=1';
268 }
269 # Banish HandbrakeCLI to the (fixedCore-1)'th CPU if necessary
270 my $cmd = '';
271 if (defined $self->{'fixedCore'} && $self->{'fixedCore'} > 0)
272 {
273 $cmd .= 'taskset -c ' . ($self->{'fixedCore'} - 1) . ' ';
274 }
275 $cmd .= 'HandBrakeCLI -i "' . $ivideo_path . '" -t 1 -c 1 -f mp4 -O -o "' . $ovideo_path . '" ' . $video_processing_parameters . ' -e x264 -b ' . $streaming_HQ_VideoBitrate . ' -a 1 -E faac -6 dpl2 -R Auto -B ' . $streaming_HQ_AudioBitrate . ' -D 0.0 -x ' . $mencoder_options . ' > "' . $convert_log_path . '" 2>&1';
276 print "[DEBUG: " . $cmd . "]\n";
277 `$cmd`;
278 }
279 if (!&util::file_exists($ovideo_path))
280 {
281 die("Fatal Error! Failed to convert video: " . $ovideo_path . "\nReason:" . $! . "\n");
282 }
283 print STDERR "[D2:" . time() . "] Complete\n";
284
285 # 3. Extract keyframes using hive
286 print STDERR "[E1:" . time() . "] Extract keyframes\n";
287 my $oshots_path = &util::filename_cat($tmp_dir, 'shots.xml');
288 if (&util::file_exists($oshots_path))
289 {
290 print " - Found existing keyframe images in cache\n";
291 }
292 else
293 {
294 print " - Generating keyframe images using Hive2\n";
295 my $cmd = 'hive2_ffmpegsvn -o "' . $oshots_path . '" -k "' . $tmp_dir . '" "' . $ovideo_path . '" >> "' . $convert_log_path . '" 2>&1';
296 ###print "[cmd: " . $cmd . "]\n";
297 `$cmd`;
298 }
299 if (!&util::file_exists($oshots_path))
300 {
301 die("Fatal Error! Failed to extract keyframe images: " . $oshots_path . "\nReason:" . $! . "\n");
302 }
303 print STDERR "[E2:" . time() . "] Complete\n";
304
305
306 # 4. Associate files (copies back to shared space if IO separated)
307 print STDERR "[F1:" . time() . "] Associate derived files to doc_obj\n";
308 # - associate streamable video
309 $doc_obj->associate_file($ovideo_path,'gsv.mp4','video/mp4',$topsection);
310 # - associate all of the JPGs found in the temp directory
311 opendir(my $dh, $tmp_dir);
312 my @shots = readdir($dh);
313 closedir($dh);
314 my $thumbnail = 0;
315 foreach my $shot (sort @shots)
316 {
317 my $shot_path = &util::filename_cat($tmp_dir, $shot);
318 if ($shot =~ /.jpg$/)
319 {
320 if (!$thumbnail)
321 {
322 $doc_obj->add_utf8_metadata($topsection,"Thumbnail",$shot);
323 $thumbnail = 1;
324 }
325 $doc_obj->add_utf8_metadata($topsection,"Keyframe",$shot);
326 $doc_obj->associate_file($shot_path,$shot,"image/jpeg",$topsection);
327 }
328 }
329 print STDERR "[F2:" . time() . "] Complete\n";
330
331 # 5. Done! Cleanup.
332 print STDERR "[G:" . time() . "] SimpleVideoPlugin: Complete!\n";
333 return 1;
334}
335
336sub getMetadata
337{
338 my ($self, $ivideo_path) = @_;
339 my $cmd = 'mediainfo --Output=XML "' . $ivideo_path . '" 2>&1';
340 ###print "Cmd: " . $cmd . "\n";
341 my $metadata_xml = `$cmd`;
342 my @lines = split(/\r?\n/, $metadata_xml);
343 my $metadata = {'Unknown'=>{}};
344 my $metadata_type = 'Unknown';
345 foreach my $line (@lines)
346 {
347 if ($line =~ /<track type="(.+)">/)
348 {
349 $metadata_type = $1;
350 if (!defined $metadata->{$metadata_type})
351 {
352 $metadata->{$metadata_type} = {};
353 }
354 }
355 elsif ($line =~ /<([^>]+)>(.+)<\/[^>]+>/)
356 {
357 my $field = $1;
358 my $value = $2;
359 $metadata->{$metadata_type}->{$field} = $value;
360 }
361 }
362 return $metadata;
363}
364
3651;
366
367
368
369
370
371
372
373
374
375
376
Note: See TracBrowser for help on using the repository browser.