source: gs2-extensions/video-and-audio/trunk/src/perllib/plugins/SimpleVideoPlugin.pm@ 29180

Last change on this file since 29180 was 29180, checked in by jmt12, 10 years ago

Removing RAMDRIVE override as it won't be available on other systems

File size: 17.3 KB
Line 
1###########################################################################
2#
3# SimpleVideoPlugin.pm -- Plugin for multimedia with some simple video
4# processing
5#
6# A component of the Greenstone digital library software from the New
7# Zealand Digital Library Project at the University of Waikato, New
8# Zealand.
9#
10# Copyright (C) 2012 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package SimpleVideoPlugin;
29
30use File::Temp qw/ tempdir /;
31
32use BasePlugin;
33use MetadataRead;
34use util;
35use FileUtils;
36
37use strict;
38use warnings;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
41sub BEGIN
42{
43 @SimpleVideoPlugin::ISA = ('MetadataRead', 'BasePlugin');
44}
45
46my $arguments = [ { 'name' => "process_exp",
47 'desc' => "{BasePlugin.process_exp}",
48 'type' => "regexp",
49 'reqd' => "no",
50 'deft' => &get_default_process_exp() },
51 { 'name' => "streamingHQsize",
52 'desc' => "{VideoPlugin.streamingsize}",
53 'type' => "int",
54 'deft' => "720",
55 'reqd' => "no" },
56 { 'name' => "streamingHQVideoBitrate",
57 'desc' => "{VideoPlugin.streamingbitrate}",
58 'type' => "int",
59 'deft' => "496",
60 'reqd' => "no" },
61 { 'name' => "streamingHQAudioBitrate",
62 'desc' => "{VideoPlugin.streamingbitrate}",
63 'type' => "int",
64 'deft' => "80",
65 'reqd' => "no" },
66 { 'name' => "videoDeinterlacingFilter",
67 'desc' => "Activate a deinterlacing filter to increase the quality of TV footage",
68 'type' => "enum",
69 'list' => [{'name' => "true", 'desc' => "{common.true}"},
70 {'name' => "false", 'desc' => "{common.false}"}],
71 'deft' => "false",
72 'reqd' => "no" },
73 { 'name' => "isParallel",
74 'desc' => "Will the import use parallel processing? (maybe this should be set by parallel-import.pl somehow)",
75 'type' => "enum",
76 'list' => [{'name' => "true", 'desc' => "{common.true}"},
77 {'name' => "false", 'desc' => "{common.false}"}],
78 'deft' => "true",
79 'reqd' => "no" },
80 { 'name' => "isCluster",
81 'desc' => "Will the import be run on a cluster (multiple computers) or not (single computer - possibly multiple processors)",
82 'type' => "enum",
83 'list' => [{'name' => "true", 'desc' => "{common.true}"},
84 {'name' => "false", 'desc' => "{common.false}"}],
85 'deft' => "false",
86 'reqd' => "no" },
87 { 'name' => "separateIO",
88 'desc' => "copy and process the file locally (good for segregating IO cost)",
89 'type' => "enum",
90 'list' => [{'name' => "true", 'desc' => "{common.true}"},
91 {'name' => "false", 'desc' => "{common.false}"}],
92 'deft' => "false",
93 'reqd' => "no" },
94 { 'name' => "fixedCore",
95 'desc' => "Restrict the execution of Handbrake to a single core (0 = no restriction, > 0 use value-1'th core)",
96 'type' => "int",
97 'deft' => "0",
98 'reqd' => "no" },
99 { 'name' => "no_keyframes",
100 'desc' => "Disable keyframe extraction",
101 'type' => "flag",
102 'reqd' => "no" },
103 ];
104
105my $options = { 'name' => "BasicVideoPlugin",
106 'desc' => "",
107 'abstract' => "no",
108 'inherits' => "yes",
109 'args' => $arguments };
110
111sub new
112{
113 my ($class) = shift (@_);
114 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
115 push(@$pluginlist, $class);
116
117 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
118 push(@{$hashArgOptLists->{"OptList"}},$options);
119 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
120 return bless $self, $class;
121}
122
123sub get_default_process_exp
124{
125 return '(?i)\.ts$';
126}
127
128sub get_oid_hash_type
129{
130 my $self = shift (@_);
131 return "hash_on_ga_xml";
132}
133
134sub process
135{
136 my $self = shift (@_);
137 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
138
139 if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
140 {
141 $file = &FileUtils::filenameConcatenate($base_dir, $file);
142 }
143
144 print STDERR " * SimpleVideoPlugin processing: " . $file . "\n";
145
146 # - I have to add some text (yay, back to needing dummy text) otherwise the
147 # DocumentText formatting is ignored (?!?)
148 my $topsection = $doc_obj->get_top_section();
149 $doc_obj->add_utf8_text($topsection, "This is dummy text");
150
151
152 $file =~ /[\/]?([^\/]+)\.(?:ts)$/;
153 my $filename = $1;
154
155 # Optional date metadata (available on raw ReplayMe recordings)
156 if ($filename =~ /(\d\d\d\d)-(\d\d)-(\d\d)/)
157 {
158 my $date = $1 . $2 . $3;
159 $filename =~ s/[^a-z0-9]+/_/ig;
160 $filename =~ s/^_+|_+$//g;
161 $doc_obj->add_utf8_metadata($topsection,"Date",$date);
162 }
163
164 # Special Case: HDFS *only* supported by separateIO flag (you need to move
165 # the file out of HDFS to local filespace to allow MediaInfo and Handbrake
166 # to be run on it.
167 my $separate_io = $self->{'separateIO'};
168 if (&FileUtils::isHDFS($file))
169 {
170 $separate_io = 'true';
171 }
172 ###rint STDERR "[DEBUG] separate_io:" . $separate_io . "\n";
173
174 my $process_dir = $ENV{'GSDLCOLLECTDIR'};
175 # If we are in a cluster, then we don't want to be writing all the logs
176 # etc to the shared file system. Instead, we write to the tmp drive
177 if ($separate_io eq 'true')
178 {
179 $process_dir = &FileUtils::filenameConcatenate('/tmp', 'gsimport-' . $filename);
180 if (!&FileUtils::directoryExists($process_dir))
181 {
182 mkdir($process_dir, 0775);
183 }
184 }
185 my $logs_dir = &FileUtils::filenameConcatenate($process_dir, "logs");
186 if (!&FileUtils::directoryExists($logs_dir))
187 {
188 mkdir($logs_dir, 0775);
189 }
190 my $convert_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '.log');
191 my $pass_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '-pass');
192 my $tmp_dir = &FileUtils::filenameConcatenate($process_dir, "cached");
193 if (!&FileUtils::directoryExists($tmp_dir))
194 {
195 mkdir($tmp_dir, 0775);
196 }
197 $tmp_dir = &FileUtils::filenameConcatenate($tmp_dir, $filename);
198 if (!&FileUtils::directoryExists($tmp_dir))
199 {
200 mkdir($tmp_dir, 0775);
201 }
202
203 # If we are separating IO, then we also start by copying the file to
204 # the process directory (local tmp) as well
205 my $ivideo_path = $file;
206 if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
207 {
208 $ivideo_path = &FileUtils::filenameConcatenate($base_dir, $file);
209 }
210 if ($separate_io eq 'true')
211 {
212 my $io_start = time();
213 print " - creating local copy of file: " . $ivideo_path . " [IOS:" . time() . "]\n";
214 my $local_ivideo_path = &FileUtils::filenameConcatenate($process_dir, $filename . ".ts");
215 &FileUtils::copyFiles($ivideo_path, $local_ivideo_path);
216 my $remote_size = &FileUtils::fileSize($ivideo_path);
217 my $local_size = &FileUtils::fileSize($local_ivideo_path);
218 if ($remote_size != $local_size)
219 {
220 die('Error! File copied is not of same size as original: ' . $remote_size . 'b != ' . $local_size . "b\n");
221 }
222 elsif (&FileUtils::fileExists($local_ivideo_path))
223 {
224 $ivideo_path = $local_ivideo_path;
225 print ' - copied! [IOE:' . time() . "]\n";
226 }
227 else
228 {
229 die('Error! Failed to copy file: ' . $ivideo_path . ' => ' . $local_ivideo_path . ' [IOE:' . time() . "]\n");
230 }
231 }
232
233 # 1. Use MediaInfo to extract important metadata
234 print ' - Extracting metadata using MediaInfo...';
235 my $mi_metadata = $self->getMetadata($ivideo_path);
236 my $input_video_duration = &parseDurationAsSeconds($mi_metadata->{'General'}->{'Duration'});
237 $doc_obj->add_utf8_metadata($topsection,"Duration",$mi_metadata->{'General'}->{'Duration'});
238 $doc_obj->add_utf8_metadata($topsection,"Format", 'multimedia (' . $mi_metadata->{'General'}->{'Format'} . ')');
239 if (defined $mi_metadata->{'General'}->{'File_size'})
240 {
241 $doc_obj->set_metadata_element($topsection, "FileSize", $mi_metadata->{'General'}->{'File_size'});
242 }
243 else
244 {
245 $doc_obj->set_metadata_element($topsection, "FileSize", &FileUtils::fileSize($ivideo_path));
246 }
247 if (defined $mi_metadata->{'Video'}->{'Format_Info'} && defined $mi_metadata->{'Video'}->{'Format'})
248 {
249 $doc_obj->add_utf8_metadata($topsection,"VideoFormat",$mi_metadata->{'Video'}->{'Format_Info'} . ' (' . $mi_metadata->{'Video'}->{'Format'} . ')');
250 }
251 if (defined $mi_metadata->{'Audio'}->{'Format_Info'} && defined $mi_metadata->{'Audio'}->{'Format'})
252 {
253 $doc_obj->add_utf8_metadata($topsection,"AudioFormat",$mi_metadata->{'Audio'}->{'Format_Info'} . ' (' . $mi_metadata->{'Audio'}->{'Format'} . ')');
254 }
255 $doc_obj->add_utf8_metadata($topsection,"Width",$mi_metadata->{'Video'}->{'Width'});
256 $doc_obj->add_utf8_metadata($topsection,"Height",$mi_metadata->{'Video'}->{'Height'});
257 print STDERR "Done!\n";
258
259 # 2. Convert into FLV, reprocess to make seekable, and associate
260 # - generate a path for our temporary converted video file
261 my $ovideo_path = &FileUtils::filenameConcatenate($tmp_dir, 'gsv.mp4');
262 if ($self->{'streamingHQsize'} > 0)
263 {
264 print STDERR " - Converting video to streamble format...\n";
265 if (&FileUtils::fileExists($ovideo_path))
266 {
267 print " - Found existing converted video in cache!\n";
268 }
269 else
270 {
271 # - first conversion pass
272 print " - Convert using Handbrake\n";
273 my $streaming_HQ_size = $self->{'streamingHQsize'};
274 my $streaming_HQ_VideoBitrate = $self->{'streamingHQVideoBitrate'};
275 my $streaming_HQ_AudioBitrate = $self->{'streamingHQAudioBitrate'};
276 my $deinterlace = $self->{'videoDeinterlacingFilter'};
277 my $video_processing_parameters;
278 if (!$streaming_HQ_size || $streaming_HQ_size eq "fullsize")
279 {
280 $video_processing_parameters = "--strict-anamorphic";
281 }
282 else
283 {
284 $video_processing_parameters = "-w $streaming_HQ_size --loose-anamorphic";
285 }
286 if ($deinterlace eq "true")
287 {
288 $video_processing_parameters .= " --decomb";
289 }
290 # Default MenCoder options for x264
291 my $mencoder_options = 'ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0';
292 my $is_cluster = $self->{'isCluster'};
293 my $is_parallel = $self->{'isParallel'};
294 # If we are parallel processing on a single (presumably) multicore computer
295 # then we need to limit the number of threads (and hence CPUs) HandBrake
296 # will utilize in order to emulate true parallel processing (otherwise the
297 # first thread to get to HandBrake conversion will take up most the CPUs
298 # causing all other threads to wait anyway). It will interesting to test
299 # whether parallel processing or serial processing (with HandBrake parallel
300 # processing) is faster. *update* threads=1 *only* controls the encoding and
301 # several other parts of Handbrake can run parallel (demuxing etc). I've
302 # had to include a 'taskset' command to truely make Handbrake serial
303 if ($is_parallel eq 'true' && $is_cluster eq 'false')
304 {
305 $mencoder_options .= ':threads=1';
306 }
307 # Banish HandbrakeCLI to the (fixedCore-1)'th CPU if necessary
308 my $cmd = '';
309 if (defined $self->{'fixedCore'} && $self->{'fixedCore'} > 0)
310 {
311 $cmd .= 'taskset -c ' . ($self->{'fixedCore'} - 1) . ' ';
312 }
313 $cmd .= 'HandBrakeCLI -i "' . $ivideo_path . '" -t 1 -c 1 -f mp4 -O -o "' . $ovideo_path . '" ' . $video_processing_parameters . ' -e x264 -b ' . $streaming_HQ_VideoBitrate . ' -a 1 -E faac -6 dpl2 -R Auto -B ' . $streaming_HQ_AudioBitrate . ' -D 0.0 -x ' . $mencoder_options . ' > "' . $convert_log_path . '" 2>&1';
314 my $attempt_count = 0;
315 do
316 {
317 $attempt_count++;
318 ###rint "[DEBUG: Video conversion attempt #" . $attempt_count . ": |" . $cmd . "|]\n";
319 `$cmd`;
320 }
321 while ($attempt_count < 5 && !&FileUtils::fileExists($ovideo_path))
322 }
323 if (!&FileUtils::fileExists($ovideo_path))
324 {
325 die("Fatal Error! Failed to convert video: " . $ovideo_path . "\nReason:" . $! . "\n");
326 }
327 # Extra check - ensure the converted video is approximately the same duration
328 # as the input video, given or take around 5 seconds
329 my $output_raw_video_duration = &getDuration($ovideo_path);
330 my $output_video_duration = &parseDurationAsSeconds($output_raw_video_duration);
331 if (abs($input_video_duration - $output_video_duration) > 5)
332 {
333 print STDERR "!Warning! Output video does not have same duration as input video.\n";
334 }
335 print STDERR " - conversion done!\n";
336 }
337 else
338 {
339 print " - skipping web-streamable conversion\n";
340 }
341
342 # 3. Extract keyframes using hive
343 my $generate_keyframes = 1;
344 if (defined $self->{'no_keyframes'} && $self->{'no_keyframes'} == 1)
345 {
346 $generate_keyframes = 0;
347 print " - skipping keyframe generation\n";
348 }
349 else
350 {
351 print STDERR " - extract keyframes...\n";
352 ###$tmp_dir = '/tmp/ramdrive';
353 my $oshots_path = &FileUtils::filenameConcatenate($tmp_dir, 'shots.xml');
354 if (&FileUtils::fileExists($oshots_path))
355 {
356 print " - found existing keyframe images in cache\n";
357 }
358 else
359 {
360 print " - generating keyframe images using Hive2\n";
361 my $cmd = 'hive2_ffmpegsvn -o "' . $oshots_path . '" -k "' . $tmp_dir . '" "' . $ovideo_path . '" >> "' . $convert_log_path . '" 2>&1';
362 ###print "[cmd: " . $cmd . "]\n";
363 `$cmd`;
364 }
365 if (!&FileUtils::fileExists($oshots_path))
366 {
367 die("Fatal Error! Failed to extract keyframe images: " . $oshots_path . "\nReason:" . $! . "\n");
368 }
369 print STDERR " - keyframes extracted!\n";
370 }
371
372 # 4. Associate files (copies back to shared space if IO separated)
373 print STDERR " - Associate derived files to doc_obj... ";
374 # - associate streamable video
375 if (-f $ovideo_path)
376 {
377 $doc_obj->associate_file($ovideo_path, 'gsv.mp4', 'video/mp4', $topsection);
378 }
379 else
380 {
381 $doc_obj->associate_file($ivideo_path, 'gsv.ts', 'video/ts', $topsection);
382 }
383 # - associate all of the JPGs found in the temp directory
384 if ($generate_keyframes)
385 {
386 opendir(my $dh, $tmp_dir);
387 my @shots = readdir($dh);
388 closedir($dh);
389 my $thumbnail = 0;
390 foreach my $shot (sort @shots)
391 {
392 my $shot_path = &FileUtils::filenameConcatenate($tmp_dir, $shot);
393 if ($shot =~ /.jpg$/)
394 {
395 if (!$thumbnail)
396 {
397 $doc_obj->add_utf8_metadata($topsection,"Thumbnail",$shot);
398 $thumbnail = 1;
399 }
400 $doc_obj->add_utf8_metadata($topsection,"Keyframe",$shot);
401 #$doc_obj->associate_file($shot_path,$shot,"image/jpeg",$topsection);
402 }
403 }
404 }
405 print STDERR "Done!\n";
406
407 # 5. Done! Cleanup.
408 print STDERR "SimpleVideoPlugin: Complete! [IOS:" . time() . "]\n";
409 return 1;
410}
411
412
413## @function getDuration()
414#
415sub getDuration
416{
417 my ($video_path) = @_;
418 my $cmd = 'mediainfo --Inform="General;%Duration/String%" "' . $video_path . '" 2>&1';
419 my $duration = `$cmd`;
420 return $duration;
421}
422## getDuration() ##
423
424
425## @function getMetadata()
426#
427sub getMetadata
428{
429 my ($self, $ivideo_path) = @_;
430 my $cmd = 'mediainfo --Output=XML "' . $ivideo_path . '" 2>&1';
431 ###rint "[DEBUG] command: " . $cmd . "\n";
432 my $metadata_xml = `$cmd`;
433 ###rint "[DEBUG] result: " . $metadata_xml . "\n\n";
434 my @lines = split(/\r?\n/, $metadata_xml);
435 my $metadata = {'Unknown'=>{}};
436 my $metadata_type = 'Unknown';
437 foreach my $line (@lines)
438 {
439 if ($line =~ /<track type="(.+)">/)
440 {
441 $metadata_type = $1;
442 if (!defined $metadata->{$metadata_type})
443 {
444 $metadata->{$metadata_type} = {};
445 }
446 }
447 elsif ($line =~ /<([^>]+)>(.+)<\/[^>]+>/)
448 {
449 my $field = $1;
450 my $value = $2;
451 $metadata->{$metadata_type}->{$field} = $value;
452 }
453 }
454 return $metadata;
455}
456## getMetadata() ##
457
458
459## @function parseDurationAsSeconds()
460#
461sub parseDurationAsSeconds
462{
463 my ($duration_str) = @_;
464 my $seconds = 0;
465 # h - hours
466 if ($duration_str =~ /(\d+)h/)
467 {
468 $seconds += $1 * 60 * 60;
469 }
470 # mn - minutes
471 if ($duration_str =~ /(\d+)mn/)
472 {
473 $seconds += $1 * 60;
474 }
475 # s - seconds
476 if ($duration_str =~ /(\d+)s/)
477 {
478 $seconds += $1;
479 }
480 # ms - milliseconds (ignore - I'll be lucky to match to the closest second)
481 return $seconds;
482}
483## parseDurationAsSeconds() ##
484
4851;
486
487
488
489
490
491
492
493
494
495
496
Note: See TracBrowser for help on using the repository browser.