source: gs2-extensions/video-and-audio/trunk/src/perllib/plugins/SimpleVideoPlugin.pm@ 29179

Last change on this file since 29179 was 29179, checked in by jmt12, 10 years ago

Changes to support low and med cpu loads and to avoid excessive IO in high CPU mode

File size: 17.3 KB
Line 
1###########################################################################
2#
3# SimpleVideoPlugin.pm -- Plugin for multimedia with some simple video
4# processing
5#
6# A component of the Greenstone digital library software from the New
7# Zealand Digital Library Project at the University of Waikato, New
8# Zealand.
9#
10# Copyright (C) 2012 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package SimpleVideoPlugin;
29
30use File::Temp qw/ tempdir /;
31
32use BasePlugin;
33use MetadataRead;
34use util;
35use FileUtils;
36
37use strict;
38use warnings;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
41sub BEGIN
42{
43 @SimpleVideoPlugin::ISA = ('MetadataRead', 'BasePlugin');
44}
45
46my $arguments = [ { 'name' => "process_exp",
47 'desc' => "{BasePlugin.process_exp}",
48 'type' => "regexp",
49 'reqd' => "no",
50 'deft' => &get_default_process_exp() },
51 { 'name' => "streamingHQsize",
52 'desc' => "{VideoPlugin.streamingsize}",
53 'type' => "int",
54 'deft' => "720",
55 'reqd' => "no" },
56 { 'name' => "streamingHQVideoBitrate",
57 'desc' => "{VideoPlugin.streamingbitrate}",
58 'type' => "int",
59 'deft' => "496",
60 'reqd' => "no" },
61 { 'name' => "streamingHQAudioBitrate",
62 'desc' => "{VideoPlugin.streamingbitrate}",
63 'type' => "int",
64 'deft' => "80",
65 'reqd' => "no" },
66 { 'name' => "videoDeinterlacingFilter",
67 'desc' => "Activate a deinterlacing filter to increase the quality of TV footage",
68 'type' => "enum",
69 'list' => [{'name' => "true", 'desc' => "{common.true}"},
70 {'name' => "false", 'desc' => "{common.false}"}],
71 'deft' => "false",
72 'reqd' => "no" },
73 { 'name' => "isParallel",
74 'desc' => "Will the import use parallel processing? (maybe this should be set by parallel-import.pl somehow)",
75 'type' => "enum",
76 'list' => [{'name' => "true", 'desc' => "{common.true}"},
77 {'name' => "false", 'desc' => "{common.false}"}],
78 'deft' => "true",
79 'reqd' => "no" },
80 { 'name' => "isCluster",
81 'desc' => "Will the import be run on a cluster (multiple computers) or not (single computer - possibly multiple processors)",
82 'type' => "enum",
83 'list' => [{'name' => "true", 'desc' => "{common.true}"},
84 {'name' => "false", 'desc' => "{common.false}"}],
85 'deft' => "false",
86 'reqd' => "no" },
87 { 'name' => "separateIO",
88 'desc' => "copy and process the file locally (good for segregating IO cost)",
89 'type' => "enum",
90 'list' => [{'name' => "true", 'desc' => "{common.true}"},
91 {'name' => "false", 'desc' => "{common.false}"}],
92 'deft' => "false",
93 'reqd' => "no" },
94 { 'name' => "fixedCore",
95 'desc' => "Restrict the execution of Handbrake to a single core (0 = no restriction, > 0 use value-1'th core)",
96 'type' => "int",
97 'deft' => "0",
98 'reqd' => "no" },
99 { 'name' => "no_keyframes",
100 'desc' => "Disable keyframe extraction",
101 'type' => "flag",
102 'reqd' => "no" },
103 ];
104
105my $options = { 'name' => "BasicVideoPlugin",
106 'desc' => "",
107 'abstract' => "no",
108 'inherits' => "yes",
109 'args' => $arguments };
110
111sub new
112{
113 my ($class) = shift (@_);
114 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
115 push(@$pluginlist, $class);
116
117 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
118 push(@{$hashArgOptLists->{"OptList"}},$options);
119 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
120 return bless $self, $class;
121}
122
123sub get_default_process_exp
124{
125 return '(?i)\.ts$';
126}
127
128sub get_oid_hash_type
129{
130 my $self = shift (@_);
131 return "hash_on_ga_xml";
132}
133
134sub process
135{
136 my $self = shift (@_);
137 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
138
139 if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
140 {
141 $file = &FileUtils::filenameConcatenate($base_dir, $file);
142 }
143
144 print STDERR " * SimpleVideoPlugin processing: " . $file . "\n";
145
146 # - I have to add some text (yay, back to needing dummy text) otherwise the
147 # DocumentText formatting is ignored (?!?)
148 my $topsection = $doc_obj->get_top_section();
149 $doc_obj->add_utf8_text($topsection, "This is dummy text");
150
151
152 $file =~ /[\/]?([^\/]+)\.(?:ts)$/;
153 my $filename = $1;
154
155 # Optional date metadata (available on raw ReplayMe recordings)
156 if ($filename =~ /(\d\d\d\d)-(\d\d)-(\d\d)/)
157 {
158 my $date = $1 . $2 . $3;
159 $filename =~ s/[^a-z0-9]+/_/ig;
160 $filename =~ s/^_+|_+$//g;
161 $doc_obj->add_utf8_metadata($topsection,"Date",$date);
162 }
163
164 # Special Case: HDFS *only* supported by separateIO flag (you need to move
165 # the file out of HDFS to local filespace to allow MediaInfo and Handbrake
166 # to be run on it.
167 my $separate_io = $self->{'separateIO'};
168 if (&FileUtils::isHDFS($file))
169 {
170 $separate_io = 'true';
171 }
172 ###rint STDERR "[DEBUG] separate_io:" . $separate_io . "\n";
173
174 my $process_dir = $ENV{'GSDLCOLLECTDIR'};
175 # If we are in a cluster, then we don't want to be writing all the logs
176 # etc to the shared file system. Instead, we write to the tmp drive
177 if ($separate_io eq 'true')
178 {
179 $process_dir = &FileUtils::filenameConcatenate('/tmp', 'gsimport-' . $filename);
180 if (!&FileUtils::directoryExists($process_dir))
181 {
182 mkdir($process_dir, 0775);
183 }
184 }
185 my $logs_dir = &FileUtils::filenameConcatenate($process_dir, "logs");
186 if (!&FileUtils::directoryExists($logs_dir))
187 {
188 mkdir($logs_dir, 0775);
189 }
190 my $convert_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '.log');
191 my $pass_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '-pass');
192 my $tmp_dir = &FileUtils::filenameConcatenate($process_dir, "cached");
193 if (!&FileUtils::directoryExists($tmp_dir))
194 {
195 mkdir($tmp_dir, 0775);
196 }
197 $tmp_dir = &FileUtils::filenameConcatenate($tmp_dir, $filename);
198 if (!&FileUtils::directoryExists($tmp_dir))
199 {
200 mkdir($tmp_dir, 0775);
201 }
202
203 # If we are separating IO, then we also start by copying the file to
204 # the process directory (local tmp) as well
205 my $ivideo_path = $file;
206 if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
207 {
208 $ivideo_path = &FileUtils::filenameConcatenate($base_dir, $file);
209 }
210 if ($separate_io eq 'true')
211 {
212 my $io_start = time();
213 print " - creating local copy of file: " . $ivideo_path . " [IOS:" . time() . "]\n";
214 my $local_ivideo_path = &FileUtils::filenameConcatenate($process_dir, $filename . ".ts");
215 &FileUtils::copyFiles($ivideo_path, $local_ivideo_path);
216 my $remote_size = &FileUtils::fileSize($ivideo_path);
217 my $local_size = &FileUtils::fileSize($local_ivideo_path);
218 if ($remote_size != $local_size)
219 {
220 die('Error! File copied is not of same size as original: ' . $remote_size . 'b != ' . $local_size . "b\n");
221 }
222 elsif (&FileUtils::fileExists($local_ivideo_path))
223 {
224 $ivideo_path = $local_ivideo_path;
225 print ' - copied! [IOE:' . time() . "]\n";
226 }
227 else
228 {
229 die('Error! Failed to copy file: ' . $ivideo_path . ' => ' . $local_ivideo_path . ' [IOE:' . time() . "]\n");
230 }
231 }
232
233 # 1. Use MediaInfo to extract important metadata
234 print ' - Extracting metadata using MediaInfo...';
235 my $mi_metadata = $self->getMetadata($ivideo_path);
236 my $input_video_duration = &parseDurationAsSeconds($mi_metadata->{'General'}->{'Duration'});
237 $doc_obj->add_utf8_metadata($topsection,"Duration",$mi_metadata->{'General'}->{'Duration'});
238 $doc_obj->add_utf8_metadata($topsection,"Format", 'multimedia (' . $mi_metadata->{'General'}->{'Format'} . ')');
239 if (defined $mi_metadata->{'General'}->{'File_size'})
240 {
241 $doc_obj->set_metadata_element($topsection, "FileSize", $mi_metadata->{'General'}->{'File_size'});
242 }
243 else
244 {
245 $doc_obj->set_metadata_element($topsection, "FileSize", &FileUtils::fileSize($ivideo_path));
246 }
247 if (defined $mi_metadata->{'Video'}->{'Format_Info'} && defined $mi_metadata->{'Video'}->{'Format'})
248 {
249 $doc_obj->add_utf8_metadata($topsection,"VideoFormat",$mi_metadata->{'Video'}->{'Format_Info'} . ' (' . $mi_metadata->{'Video'}->{'Format'} . ')');
250 }
251 if (defined $mi_metadata->{'Audio'}->{'Format_Info'} && defined $mi_metadata->{'Audio'}->{'Format'})
252 {
253 $doc_obj->add_utf8_metadata($topsection,"AudioFormat",$mi_metadata->{'Audio'}->{'Format_Info'} . ' (' . $mi_metadata->{'Audio'}->{'Format'} . ')');
254 }
255 $doc_obj->add_utf8_metadata($topsection,"Width",$mi_metadata->{'Video'}->{'Width'});
256 $doc_obj->add_utf8_metadata($topsection,"Height",$mi_metadata->{'Video'}->{'Height'});
257 print STDERR "Done!\n";
258
259 # 2. Convert into FLV, reprocess to make seekable, and associate
260 # - generate a path for our temporary converted video file
261 my $ovideo_path = &FileUtils::filenameConcatenate($tmp_dir, 'gsv.mp4');
262 if ($self->{'streamingHQsize'} > 0)
263 {
264 print STDERR " - Converting video to streamble format...\n";
265 if (&FileUtils::fileExists($ovideo_path))
266 {
267 print " - Found existing converted video in cache!\n";
268 }
269 else
270 {
271 # - first conversion pass
272 print " - Convert using Handbrake\n";
273 my $streaming_HQ_size = $self->{'streamingHQsize'};
274 my $streaming_HQ_VideoBitrate = $self->{'streamingHQVideoBitrate'};
275 my $streaming_HQ_AudioBitrate = $self->{'streamingHQAudioBitrate'};
276 my $deinterlace = $self->{'videoDeinterlacingFilter'};
277 my $video_processing_parameters;
278 if (!$streaming_HQ_size || $streaming_HQ_size eq "fullsize")
279 {
280 $video_processing_parameters = "--strict-anamorphic";
281 }
282 else
283 {
284 $video_processing_parameters = "-w $streaming_HQ_size --loose-anamorphic";
285 }
286 if ($deinterlace eq "true")
287 {
288 $video_processing_parameters .= " --decomb";
289 }
290 # Default MenCoder options for x264
291 my $mencoder_options = 'ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0';
292 my $is_cluster = $self->{'isCluster'};
293 my $is_parallel = $self->{'isParallel'};
294 # If we are parallel processing on a single (presumably) multicore computer
295 # then we need to limit the number of threads (and hence CPUs) HandBrake
296 # will utilize in order to emulate true parallel processing (otherwise the
297 # first thread to get to HandBrake conversion will take up most the CPUs
298 # causing all other threads to wait anyway). It will interesting to test
299 # whether parallel processing or serial processing (with HandBrake parallel
300 # processing) is faster. *update* threads=1 *only* controls the encoding and
301 # several other parts of Handbrake can run parallel (demuxing etc). I've
302 # had to include a 'taskset' command to truely make Handbrake serial
303 if ($is_parallel eq 'true' && $is_cluster eq 'false')
304 {
305 $mencoder_options .= ':threads=1';
306 }
307 # Banish HandbrakeCLI to the (fixedCore-1)'th CPU if necessary
308 my $cmd = '';
309 if (defined $self->{'fixedCore'} && $self->{'fixedCore'} > 0)
310 {
311 $cmd .= 'taskset -c ' . ($self->{'fixedCore'} - 1) . ' ';
312 }
313 $cmd .= 'HandBrakeCLI -i "' . $ivideo_path . '" -t 1 -c 1 -f mp4 -O -o "' . $ovideo_path . '" ' . $video_processing_parameters . ' -e x264 -b ' . $streaming_HQ_VideoBitrate . ' -a 1 -E faac -6 dpl2 -R Auto -B ' . $streaming_HQ_AudioBitrate . ' -D 0.0 -x ' . $mencoder_options . ' > "' . $convert_log_path . '" 2>&1';
314 my $attempt_count = 0;
315 do
316 {
317 $attempt_count++;
318 ###rint "[DEBUG: Video conversion attempt #" . $attempt_count . ": |" . $cmd . "|]\n";
319 `$cmd`;
320 }
321 while ($attempt_count < 5 && !&FileUtils::fileExists($ovideo_path))
322 }
323 if (!&FileUtils::fileExists($ovideo_path))
324 {
325 die("Fatal Error! Failed to convert video: " . $ovideo_path . "\nReason:" . $! . "\n");
326 }
327 # Extra check - ensure the converted video is approximately the same duration
328 # as the input video, given or take around 5 seconds
329 my $output_raw_video_duration = &getDuration($ovideo_path);
330 my $output_video_duration = &parseDurationAsSeconds($output_raw_video_duration);
331 if (abs($input_video_duration - $output_video_duration) > 5)
332 {
333 print STDERR "!Warning! Output video does not have same duration as input video.\n";
334 }
335 print STDERR " - conversion done!\n";
336 }
337 else
338 {
339 print " - skipping web-streamable conversion\n";
340 }
341
342 # 3. Extract keyframes using hive
343 my $generate_keyframes = 1;
344 if (defined $self->{'no_keyframes'} && $self->{'no_keyframes'} == 1)
345 {
346 $generate_keyframes = 0;
347 print " - skipping keyframe generation\n";
348 }
349 else
350 {
351 print STDERR " - extract keyframes...\n";
352 $tmp_dir = '/tmp/ramdrive';
353 my $oshots_path = &FileUtils::filenameConcatenate($tmp_dir, 'shots.xml');
354 if (&FileUtils::fileExists($oshots_path))
355 {
356 print " - found existing keyframe images in cache\n";
357 }
358 else
359 {
360 print " - generating keyframe images using Hive2\n";
361 my $cmd = 'hive2_ffmpegsvn -o "' . $oshots_path . '" -k "' . $tmp_dir . '" "' . $ovideo_path . '" >> "' . $convert_log_path . '" 2>&1';
362 ###print "[cmd: " . $cmd . "]\n";
363 `$cmd`;
364 }
365 if (!&FileUtils::fileExists($oshots_path))
366 {
367 die("Fatal Error! Failed to extract keyframe images: " . $oshots_path . "\nReason:" . $! . "\n");
368 }
369 print STDERR " - keyframes extracted!\n";
370 }
371
372 # 4. Associate files (copies back to shared space if IO separated)
373 print STDERR " - Associate derived files to doc_obj... ";
374 # - associate streamable video
375 if (-f $ovideo_path)
376 {
377 $doc_obj->associate_file($ovideo_path, 'gsv.mp4', 'video/mp4', $topsection);
378 }
379 else
380 {
381 $doc_obj->associate_file($ivideo_path, 'gsv.ts', 'video/ts', $topsection);
382 }
383 # - associate all of the JPGs found in the temp directory
384 if ($generate_keyframes)
385 {
386 opendir(my $dh, $tmp_dir);
387 my @shots = readdir($dh);
388 closedir($dh);
389 my $thumbnail = 0;
390 foreach my $shot (sort @shots)
391 {
392 my $shot_path = &FileUtils::filenameConcatenate($tmp_dir, $shot);
393 if ($shot =~ /.jpg$/)
394 {
395 if (!$thumbnail)
396 {
397 $doc_obj->add_utf8_metadata($topsection,"Thumbnail",$shot);
398 $thumbnail = 1;
399 }
400 $doc_obj->add_utf8_metadata($topsection,"Keyframe",$shot);
401 #$doc_obj->associate_file($shot_path,$shot,"image/jpeg",$topsection);
402 }
403 }
404 }
405 print STDERR "Done!\n";
406
407 # 5. Done! Cleanup.
408 print STDERR "SimpleVideoPlugin: Complete! [IOS:" . time() . "]\n";
409 return 1;
410}
411
412
413## @function getDuration()
414#
415sub getDuration
416{
417 my ($video_path) = @_;
418 my $cmd = 'mediainfo --Inform="General;%Duration/String%" "' . $video_path . '" 2>&1';
419 my $duration = `$cmd`;
420 return $duration;
421}
422## getDuration() ##
423
424
425## @function getMetadata()
426#
427sub getMetadata
428{
429 my ($self, $ivideo_path) = @_;
430 my $cmd = 'mediainfo --Output=XML "' . $ivideo_path . '" 2>&1';
431 ###rint "[DEBUG] command: " . $cmd . "\n";
432 my $metadata_xml = `$cmd`;
433 ###rint "[DEBUG] result: " . $metadata_xml . "\n\n";
434 my @lines = split(/\r?\n/, $metadata_xml);
435 my $metadata = {'Unknown'=>{}};
436 my $metadata_type = 'Unknown';
437 foreach my $line (@lines)
438 {
439 if ($line =~ /<track type="(.+)">/)
440 {
441 $metadata_type = $1;
442 if (!defined $metadata->{$metadata_type})
443 {
444 $metadata->{$metadata_type} = {};
445 }
446 }
447 elsif ($line =~ /<([^>]+)>(.+)<\/[^>]+>/)
448 {
449 my $field = $1;
450 my $value = $2;
451 $metadata->{$metadata_type}->{$field} = $value;
452 }
453 }
454 return $metadata;
455}
456## getMetadata() ##
457
458
459## @function parseDurationAsSeconds()
460#
461sub parseDurationAsSeconds
462{
463 my ($duration_str) = @_;
464 my $seconds = 0;
465 # h - hours
466 if ($duration_str =~ /(\d+)h/)
467 {
468 $seconds += $1 * 60 * 60;
469 }
470 # mn - minutes
471 if ($duration_str =~ /(\d+)mn/)
472 {
473 $seconds += $1 * 60;
474 }
475 # s - seconds
476 if ($duration_str =~ /(\d+)s/)
477 {
478 $seconds += $1;
479 }
480 # ms - milliseconds (ignore - I'll be lucky to match to the closest second)
481 return $seconds;
482}
483## parseDurationAsSeconds() ##
484
4851;
486
487
488
489
490
491
492
493
494
495
496
Note: See TracBrowser for help on using the repository browser.