source: gs2-extensions/video-and-audio/trunk/src/perllib/plugins/SimpleVideoPlugin.pm@ 28655

Last change on this file since 28655 was 28655, checked in by jmt12, 10 years ago

Added in some extra testing to check that the duration of the converted video is in the same ballpark as the input video - I wanted to detect an occasional issue where a 10 minute video got truncated down to a few seconds

File size: 16.4 KB
Line 
1###########################################################################
2#
3# SimpleVideoPlugin.pm -- Plugin for multimedia with some simple video
4# processing
5#
6# A component of the Greenstone digital library software from the New
7# Zealand Digital Library Project at the University of Waikato, New
8# Zealand.
9#
10# Copyright (C) 2012 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package SimpleVideoPlugin;
29
30use File::Temp qw/ tempdir /;
31
32use BasePlugin;
33use MetadataRead;
34use util;
35use FileUtils;
36
37use strict;
38use warnings;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
41sub BEGIN
42{
43 @SimpleVideoPlugin::ISA = ('MetadataRead', 'BasePlugin');
44}
45
46my $arguments = [ { 'name' => "process_exp",
47 'desc' => "{BasePlugin.process_exp}",
48 'type' => "regexp",
49 'reqd' => "no",
50 'deft' => &get_default_process_exp() },
51 { 'name' => "streamingHQsize",
52 'desc' => "{VideoPlugin.streamingsize}",
53 'type' => "int",
54 'deft' => "720",
55 'reqd' => "no" },
56 { 'name' => "streamingHQVideoBitrate",
57 'desc' => "{VideoPlugin.streamingbitrate}",
58 'type' => "int",
59 'deft' => "496",
60 'reqd' => "no" },
61 { 'name' => "streamingHQAudioBitrate",
62 'desc' => "{VideoPlugin.streamingbitrate}",
63 'type' => "int",
64 'deft' => "80",
65 'reqd' => "no" },
66 { 'name' => "videoDeinterlacingFilter",
67 'desc' => "Activate a deinterlacing filter to increase the quality of TV footage",
68 'type' => "enum",
69 'list' => [{'name' => "true", 'desc' => "{common.true}"},
70 {'name' => "false", 'desc' => "{common.false}"}],
71 'deft' => "false",
72 'reqd' => "no" },
73 { 'name' => "isParallel",
74 'desc' => "Will the import use parallel processing? (maybe this should be set by parallel-import.pl somehow)",
75 'type' => "enum",
76 'list' => [{'name' => "true", 'desc' => "{common.true}"},
77 {'name' => "false", 'desc' => "{common.false}"}],
78 'deft' => "true",
79 'reqd' => "no" },
80 { 'name' => "isCluster",
81 'desc' => "Will the import be run on a cluster (multiple computers) or not (single computer - possibly multiple processors)",
82 'type' => "enum",
83 'list' => [{'name' => "true", 'desc' => "{common.true}"},
84 {'name' => "false", 'desc' => "{common.false}"}],
85 'deft' => "false",
86 'reqd' => "no" },
87 { 'name' => "separateIO",
88 'desc' => "copy and process the file locally (good for segregating IO cost)",
89 'type' => "enum",
90 'list' => [{'name' => "true", 'desc' => "{common.true}"},
91 {'name' => "false", 'desc' => "{common.false}"}],
92 'deft' => "false",
93 'reqd' => "no" },
94 { 'name' => "fixedCore",
95 'desc' => "Restrict the execution of Handbrake to a single core (0 = no restriction, > 0 use value-1'th core)",
96 'type' => "int",
97 'deft' => "0",
98 'reqd' => "no" },
99 ];
100
101my $options = { 'name' => "BasicVideoPlugin",
102 'desc' => "",
103 'abstract' => "no",
104 'inherits' => "yes",
105 'args' => $arguments };
106
107sub new
108{
109 my ($class) = shift (@_);
110 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
111 push(@$pluginlist, $class);
112
113 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
114 push(@{$hashArgOptLists->{"OptList"}},$options);
115 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
116 return bless $self, $class;
117}
118
119sub get_default_process_exp
120{
121 return '(?i)\.ts$';
122}
123
124sub get_oid_hash_type
125{
126 my $self = shift (@_);
127 return "hash_on_ga_xml";
128}
129
130sub process
131{
132 my $self = shift (@_);
133 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
134
135 if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
136 {
137 $file = &FileUtils::filenameConcatenate($base_dir, $file);
138 }
139
140 print STDERR " * SimpleVideoPlugin processing: " . $file . "\n";
141
142 # - I have to add some text (yay, back to needing dummy text) otherwise the
143 # DocumentText formatting is ignored (?!?)
144 my $topsection = $doc_obj->get_top_section();
145 $doc_obj->add_utf8_text($topsection, "This is dummy text");
146
147
148 $file =~ /[\/]?([^\/]+)\.(?:ts)$/;
149 my $filename = $1;
150
151 # Optional date metadata (available on raw ReplayMe recordings)
152 if ($filename =~ /(\d\d\d\d)-(\d\d)-(\d\d)/)
153 {
154 my $date = $1 . $2 . $3;
155 $filename =~ s/[^a-z0-9]+/_/ig;
156 $filename =~ s/^_+|_+$//g;
157 $doc_obj->add_utf8_metadata($topsection,"Date",$date);
158 }
159
160 # Special Case: HDFS *only* supported by separateIO flag (you need to move
161 # the file out of HDFS to local filespace to allow MediaInfo and Handbrake
162 # to be run on it.
163 my $separate_io = $self->{'separateIO'};
164 if (&FileUtils::isHDFS($file))
165 {
166 $separate_io = 'true';
167 }
168 ###rint STDERR "[DEBUG] separate_io:" . $separate_io . "\n";
169
170 my $process_dir = $ENV{'GSDLCOLLECTDIR'};
171 # If we are in a cluster, then we don't want to be writing all the logs
172 # etc to the shared file system. Instead, we write to the tmp drive
173 if ($separate_io eq 'true')
174 {
175 $process_dir = &FileUtils::filenameConcatenate('/tmp', 'gsimport-' . $filename);
176 if (!&FileUtils::directoryExists($process_dir))
177 {
178 mkdir($process_dir, 0775);
179 }
180 }
181 my $logs_dir = &FileUtils::filenameConcatenate($process_dir, "logs");
182 if (!&FileUtils::directoryExists($logs_dir))
183 {
184 mkdir($logs_dir, 0775);
185 }
186 my $convert_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '.log');
187 my $pass_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '-pass');
188 my $tmp_dir = &FileUtils::filenameConcatenate($process_dir, "cached");
189 if (!&FileUtils::directoryExists($tmp_dir))
190 {
191 mkdir($tmp_dir, 0775);
192 }
193 $tmp_dir = &FileUtils::filenameConcatenate($tmp_dir, $filename);
194 if (!&FileUtils::directoryExists($tmp_dir))
195 {
196 mkdir($tmp_dir, 0775);
197 }
198
199 # If we are separating IO, then we also start by copying the file to
200 # the process directory (local tmp) as well
201 my $ivideo_path = $file;
202 if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
203 {
204 $ivideo_path = &FileUtils::filenameConcatenate($base_dir, $file);
205 }
206 if ($separate_io eq 'true')
207 {
208 my $io_start = time();
209 print " - creating local copy of file: " . $ivideo_path . " [IOS:" . time() . "]\n";
210 my $local_ivideo_path = &FileUtils::filenameConcatenate($process_dir, $filename . ".ts");
211 &FileUtils::copyFiles($ivideo_path, $local_ivideo_path);
212 my $remote_size = &FileUtils::fileSize($ivideo_path);
213 my $local_size = &FileUtils::fileSize($local_ivideo_path);
214 if ($remote_size != $local_size)
215 {
216 die('Error! File copied is not of same size as original: ' . $remote_size . 'b != ' . $local_size . "b\n");
217 }
218 elsif (&FileUtils::fileExists($local_ivideo_path))
219 {
220 $ivideo_path = $local_ivideo_path;
221 print ' - copied! [IOE:' . time() . "]\n";
222 }
223 else
224 {
225 die('Error! Failed to copy file: ' . $ivideo_path . ' => ' . $local_ivideo_path . ' [IOE:' . time() . "]\n");
226 }
227 }
228
229 # 1. Use MediaInfo to extract important metadata
230 print ' - Extracting metadata using MediaInfo...';
231 my $mi_metadata = $self->getMetadata($ivideo_path);
232 my $input_video_duration = &parseDurationAsSeconds($mi_metadata->{'General'}->{'Duration'});
233 $doc_obj->add_utf8_metadata($topsection,"Duration",$mi_metadata->{'General'}->{'Duration'});
234 $doc_obj->add_utf8_metadata($topsection,"Format", 'multimedia (' . $mi_metadata->{'General'}->{'Format'} . ')');
235 if (defined $mi_metadata->{'General'}->{'File_size'})
236 {
237 $doc_obj->set_metadata_element($topsection, "FileSize", $mi_metadata->{'General'}->{'File_size'});
238 }
239 else
240 {
241 $doc_obj->set_metadata_element($topsection, "FileSize", &FileUtils::fileSize($ivideo_path));
242 }
243 if (defined $mi_metadata->{'Video'}->{'Format_Info'} && defined $mi_metadata->{'Video'}->{'Format'})
244 {
245 $doc_obj->add_utf8_metadata($topsection,"VideoFormat",$mi_metadata->{'Video'}->{'Format_Info'} . ' (' . $mi_metadata->{'Video'}->{'Format'} . ')');
246 }
247 if (defined $mi_metadata->{'Audio'}->{'Format_Info'} && defined $mi_metadata->{'Audio'}->{'Format'})
248 {
249 $doc_obj->add_utf8_metadata($topsection,"AudioFormat",$mi_metadata->{'Audio'}->{'Format_Info'} . ' (' . $mi_metadata->{'Audio'}->{'Format'} . ')');
250 }
251 $doc_obj->add_utf8_metadata($topsection,"Width",$mi_metadata->{'Video'}->{'Width'});
252 $doc_obj->add_utf8_metadata($topsection,"Height",$mi_metadata->{'Video'}->{'Height'});
253 print STDERR "Done!\n";
254
255 # 2. Convert into FLV, reprocess to make seekable, and associate
256 # - generate a path for our temporary converted video file
257 print STDERR " - Converting video to streamble format...\n";
258 my $ovideo_path = &FileUtils::filenameConcatenate($tmp_dir, 'gsv.mp4');
259 if (&FileUtils::fileExists($ovideo_path))
260 {
261 print " - Found existing converted video in cache!\n";
262 }
263 else
264 {
265 # - first conversion pass
266 print " - Convert using Handbrake\n";
267 my $streaming_HQ_size = $self->{'streamingHQsize'};
268 my $streaming_HQ_VideoBitrate = $self->{'streamingHQVideoBitrate'};
269 my $streaming_HQ_AudioBitrate = $self->{'streamingHQAudioBitrate'};
270 my $deinterlace = $self->{'videoDeinterlacingFilter'};
271 my $video_processing_parameters;
272 if (!$streaming_HQ_size || $streaming_HQ_size eq "fullsize")
273 {
274 $video_processing_parameters = "--strict-anamorphic";
275 }
276 else
277 {
278 $video_processing_parameters = "-w $streaming_HQ_size --loose-anamorphic";
279 }
280 if ($deinterlace eq "true")
281 {
282 $video_processing_parameters .= " --decomb";
283 }
284 # Default MenCoder options for x264
285 my $mencoder_options = 'ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0';
286 my $is_cluster = $self->{'isCluster'};
287 my $is_parallel = $self->{'isParallel'};
288 # If we are parallel processing on a single (presumably) multicore computer
289 # then we need to limit the number of threads (and hence CPUs) HandBrake
290 # will utilize in order to emulate true parallel processing (otherwise the
291 # first thread to get to HandBrake conversion will take up most the CPUs
292 # causing all other threads to wait anyway). It will interesting to test
293 # whether parallel processing or serial processing (with HandBrake parallel
294 # processing) is faster. *update* threads=1 *only* controls the encoding and
295 # several other parts of Handbrake can run parallel (demuxing etc). I've
296 # had to include a 'taskset' command to truely make Handbrake serial
297 if ($is_parallel eq 'true' && $is_cluster eq 'false')
298 {
299 $mencoder_options .= ':threads=1';
300 }
301 # Banish HandbrakeCLI to the (fixedCore-1)'th CPU if necessary
302 my $cmd = '';
303 if (defined $self->{'fixedCore'} && $self->{'fixedCore'} > 0)
304 {
305 $cmd .= 'taskset -c ' . ($self->{'fixedCore'} - 1) . ' ';
306 }
307 $cmd .= 'HandBrakeCLI -i "' . $ivideo_path . '" -t 1 -c 1 -f mp4 -O -o "' . $ovideo_path . '" ' . $video_processing_parameters . ' -e x264 -b ' . $streaming_HQ_VideoBitrate . ' -a 1 -E faac -6 dpl2 -R Auto -B ' . $streaming_HQ_AudioBitrate . ' -D 0.0 -x ' . $mencoder_options . ' > "' . $convert_log_path . '" 2>&1';
308 my $attempt_count = 0;
309 do
310 {
311 $attempt_count++;
312 ###rint "[DEBUG: Video conversion attempt #" . $attempt_count . ": |" . $cmd . "|]\n";
313 `$cmd`;
314 }
315 while ($attempt_count < 5 && !&FileUtils::fileExists($ovideo_path))
316 }
317 if (!&FileUtils::fileExists($ovideo_path))
318 {
319 die("Fatal Error! Failed to convert video: " . $ovideo_path . "\nReason:" . $! . "\n");
320 }
321 # Extra check - ensure the converted video is approximately the same duration
322 # as the input video, given or take around 5 seconds
323 my $output_raw_video_duration = &getDuration($ovideo_path);
324 my $output_video_duration = &parseDurationAsSeconds($output_raw_video_duration);
325 if (abs($input_video_duration - $output_video_duration) > 5)
326 {
327 print STDERR "!Warning! Output video does not have same duration as input video.\n";
328 }
329 print STDERR " - conversion done!\n";
330
331 # 3. Extract keyframes using hive
332 print STDERR " - extract keyframes...\n";
333 my $oshots_path = &FileUtils::filenameConcatenate($tmp_dir, 'shots.xml');
334 if (&FileUtils::fileExists($oshots_path))
335 {
336 print " - found existing keyframe images in cache\n";
337 }
338 else
339 {
340 print " - generating keyframe images using Hive2\n";
341 my $cmd = 'hive2_ffmpegsvn -o "' . $oshots_path . '" -k "' . $tmp_dir . '" "' . $ovideo_path . '" >> "' . $convert_log_path . '" 2>&1';
342 ###print "[cmd: " . $cmd . "]\n";
343 `$cmd`;
344 }
345 if (!&FileUtils::fileExists($oshots_path))
346 {
347 die("Fatal Error! Failed to extract keyframe images: " . $oshots_path . "\nReason:" . $! . "\n");
348 }
349 print STDERR " - keyframes extracted!\n";
350
351
352 # 4. Associate files (copies back to shared space if IO separated)
353 print STDERR " - Associate derived files to doc_obj... ";
354 # - associate streamable video
355 $doc_obj->associate_file($ovideo_path,'gsv.mp4','video/mp4',$topsection);
356 # - associate all of the JPGs found in the temp directory
357 opendir(my $dh, $tmp_dir);
358 my @shots = readdir($dh);
359 closedir($dh);
360 my $thumbnail = 0;
361 foreach my $shot (sort @shots)
362 {
363 my $shot_path = &FileUtils::filenameConcatenate($tmp_dir, $shot);
364 if ($shot =~ /.jpg$/)
365 {
366 if (!$thumbnail)
367 {
368 $doc_obj->add_utf8_metadata($topsection,"Thumbnail",$shot);
369 $thumbnail = 1;
370 }
371 $doc_obj->add_utf8_metadata($topsection,"Keyframe",$shot);
372 $doc_obj->associate_file($shot_path,$shot,"image/jpeg",$topsection);
373 }
374 }
375 print STDERR "Done!\n";
376
377 # 5. Done! Cleanup.
378 print STDERR "SimpleVideoPlugin: Complete! [IOS:" . time() . "]\n";
379 return 1;
380}
381
382
383## @function getDuration()
384#
385sub getDuration
386{
387 my ($video_path) = @_;
388 my $cmd = 'mediainfo --Inform="General;%Duration/String%" "' . $video_path . '" 2>&1';
389 my $duration = `$cmd`;
390 return $duration;
391}
392## getDuration() ##
393
394
395## @function getMetadata()
396#
397sub getMetadata
398{
399 my ($self, $ivideo_path) = @_;
400 my $cmd = 'mediainfo --Output=XML "' . $ivideo_path . '" 2>&1';
401 ###rint "[DEBUG] command: " . $cmd . "\n";
402 my $metadata_xml = `$cmd`;
403 ###rint "[DEBUG] result: " . $metadata_xml . "\n\n";
404 my @lines = split(/\r?\n/, $metadata_xml);
405 my $metadata = {'Unknown'=>{}};
406 my $metadata_type = 'Unknown';
407 foreach my $line (@lines)
408 {
409 if ($line =~ /<track type="(.+)">/)
410 {
411 $metadata_type = $1;
412 if (!defined $metadata->{$metadata_type})
413 {
414 $metadata->{$metadata_type} = {};
415 }
416 }
417 elsif ($line =~ /<([^>]+)>(.+)<\/[^>]+>/)
418 {
419 my $field = $1;
420 my $value = $2;
421 $metadata->{$metadata_type}->{$field} = $value;
422 }
423 }
424 return $metadata;
425}
426## getMetadata() ##
427
428
429## @function parseDurationAsSeconds()
430#
431sub parseDurationAsSeconds
432{
433 my ($duration_str) = @_;
434 my $seconds = 0;
435 # h - hours
436 if ($duration_str =~ /(\d+)h/)
437 {
438 $seconds += $1 * 60 * 60;
439 }
440 # mn - minutes
441 if ($duration_str =~ /(\d+)mn/)
442 {
443 $seconds += $1 * 60;
444 }
445 # s - seconds
446 if ($duration_str =~ /(\d+)s/)
447 {
448 $seconds += $1;
449 }
450 # ms - milliseconds (ignore - I'll be lucky to match to the closest second)
451 return $seconds;
452}
453## parseDurationAsSeconds() ##
454
4551;
456
457
458
459
460
461
462
463
464
465
466
Note: See TracBrowser for help on using the repository browser.