source: gs2-extensions/video-and-audio/trunk/src/perllib/plugins/SimpleVideoPlugin.pm@ 27527

Last change on this file since 27527 was 27527, checked in by jmt12, 11 years ago

Calling the isHDFS() in FileUtils rather than the non-existant one in utils

File size: 14.4 KB
RevLine 
[25780]1###########################################################################
2#
3# SimpleVideoPlugin.pm -- Plugin for multimedia with some simple video
4# processing
5#
6# A component of the Greenstone digital library software from the New
7# Zealand Digital Library Project at the University of Waikato, New
8# Zealand.
9#
10# Copyright (C) 2012 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package SimpleVideoPlugin;
29
30use File::Temp qw/ tempdir /;
31
32use BasePlugin;
33use MetadataRead;
34use util;
[27527]35use FileUtils;
[25780]36
37use strict;
38use warnings;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
41sub BEGIN
42{
43 @SimpleVideoPlugin::ISA = ('MetadataRead', 'BasePlugin');
44}
45
46my $arguments = [ { 'name' => "process_exp",
47 'desc' => "{BasePlugin.process_exp}",
48 'type' => "regexp",
49 'reqd' => "no",
50 'deft' => &get_default_process_exp() },
51 { 'name' => "streamingHQsize",
52 'desc' => "{VideoPlugin.streamingsize}",
53 'type' => "int",
54 'deft' => "720",
55 'reqd' => "no" },
56 { 'name' => "streamingHQVideoBitrate",
57 'desc' => "{VideoPlugin.streamingbitrate}",
58 'type' => "int",
59 'deft' => "496",
60 'reqd' => "no" },
61 { 'name' => "streamingHQAudioBitrate",
62 'desc' => "{VideoPlugin.streamingbitrate}",
63 'type' => "int",
64 'deft' => "80",
65 'reqd' => "no" },
66 { 'name' => "videoDeinterlacingFilter",
67 'desc' => "Activate a deinterlacing filter to increase the quality of TV footage",
68 'type' => "enum",
69 'list' => [{'name' => "true", 'desc' => "{common.true}"},
70 {'name' => "false", 'desc' => "{common.false}"}],
71 'deft' => "false",
72 'reqd' => "no" },
73 { 'name' => "isParallel",
74 'desc' => "Will the import use parallel processing? (maybe this should be set by parallel-import.pl somehow)",
75 'type' => "enum",
76 'list' => [{'name' => "true", 'desc' => "{common.true}"},
77 {'name' => "false", 'desc' => "{common.false}"}],
[25842]78 'deft' => "true",
79 'reqd' => "no" },
[25780]80 { 'name' => "isCluster",
81 'desc' => "Will the import be run on a cluster (multiple computers) or not (single computer - possibly multiple processors)",
82 'type' => "enum",
83 'list' => [{'name' => "true", 'desc' => "{common.true}"},
84 {'name' => "false", 'desc' => "{common.false}"}],
85 'deft' => "false",
[25842]86 'reqd' => "no" },
87 { 'name' => "separateIO",
88 'desc' => "copy and process the file locally (good for segregating IO cost)",
89 'type' => "enum",
90 'list' => [{'name' => "true", 'desc' => "{common.true}"},
91 {'name' => "false", 'desc' => "{common.false}"}],
92 'deft' => "false",
[26948]93 'reqd' => "no" },
94 { 'name' => "fixedCore",
95 'desc' => "Restrict the execution of Handbrake to a single core (0 = no restriction, > 0 use value-1'th core)",
96 'type' => "int",
97 'deft' => "0",
98 'reqd' => "no" },
[25780]99 ];
100
101my $options = { 'name' => "BasicVideoPlugin",
102 'desc' => "",
103 'abstract' => "no",
104 'inherits' => "yes",
105 'args' => $arguments };
106
107sub new
108{
109 my ($class) = shift (@_);
110 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
111 push(@$pluginlist, $class);
112
113 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
114 push(@{$hashArgOptLists->{"OptList"}},$options);
115 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
116 return bless $self, $class;
117}
118
119sub get_default_process_exp
120{
121 return '(?i)\.ts$';
122}
123
124sub get_oid_hash_type
125{
126 my $self = shift (@_);
127 return "hash_on_ga_xml";
128}
129
130sub process
131{
132 my $self = shift (@_);
133 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
134
[25842]135 print STDERR "[A:" . time() . "] SimpleVideoPlugin processing: " . $file . "\n";
136
137 # - I have to add some text (yay, back to needing dummy text) otherwise the
138 # DocumentText formatting is ignored (?!?)
[25780]139 my $topsection = $doc_obj->get_top_section();
[25842]140 $doc_obj->add_utf8_text($topsection, "This is dummy text");
[25780]141
[25842]142
[26948]143 $file =~ /[\/]?([^\/]+)\.(?:ts)$/;
[25780]144 my $filename = $1;
145
[25842]146 # Optional date metadata (available on raw ReplayMe recordings)
147 if ($filename =~ /(\d\d\d\d)-(\d\d)-(\d\d)/)
148 {
149 my $date = $1 . $2 . $3;
150 $filename =~ s/[^a-z0-9]+/_/ig;
151 $filename =~ s/^_+|_+$//g;
152 $doc_obj->add_utf8_metadata($topsection,"Date",$date);
153 }
154
[26948]155 # Special Case: HDFS *only* supported by separateIO flag (you need to move
156 # the file out of HDFS to local filespace to allow MediaInfo and Handbrake
157 # to be run on it.
158 my $separate_io = $self->{'separateIO'};
[27527]159 if (&FileUtils::isHDFS($file))
[26948]160 {
161 $separate_io = 'true';
162 }
163
[25842]164 my $process_dir = $ENV{'GSDLCOLLECTDIR'};
165 # If we are in a cluster, then we don't want to be writing all the logs
166 # etc to the shared file system. Instead, we write to the tmp drive
[26948]167 if ($separate_io eq 'true')
[25842]168 {
[27527]169 $process_dir = &FileUtils::filenameConcatenate('/tmp', 'gsimport-' . $filename);
170 if (!&FileUtils::directoryExists($process_dir))
[25842]171 {
172 mkdir($process_dir, 0775);
173 }
174 }
[27527]175 my $logs_dir = &FileUtils::filenameConcatenate($process_dir, "logs");
176 if (!&FileUtils::directoryExists($logs_dir))
[25780]177 {
178 mkdir($logs_dir, 0775);
179 }
[27527]180 my $convert_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '.log');
181 my $pass_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '-pass');
182 my $tmp_dir = &FileUtils::filenameConcatenate($process_dir, "cached");
183 if (!&FileUtils::directoryExists($tmp_dir))
[25780]184 {
185 mkdir($tmp_dir, 0775);
186 }
[27527]187 $tmp_dir = &FileUtils::filenameConcatenate($tmp_dir, $filename);
188 if (!&FileUtils::directoryExists($tmp_dir))
[25780]189 {
190 mkdir($tmp_dir, 0775);
191 }
192
[25842]193 # If we are separating IO, then we also start by copying the file to
194 # the process directory (local tmp) as well
[27527]195 my $ivideo_path = $file;
196 if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
197 {
198 $ivideo_path = &FileUtils::filenameConcatenate($base_dir, $file);
199 }
[26948]200 if ($separate_io eq 'true')
[25842]201 {
[27527]202 print STDERR "[B1:" . time() . "] Creating local copy of file: " . $ivideo_path . "\n";
203 my $local_ivideo_path = &FileUtils::filenameConcatenate($process_dir, $filename . ".ts");
204 &FileUtils::copyFiles($ivideo_path, $local_ivideo_path);
[25842]205 $ivideo_path = $local_ivideo_path;
206 print STDERR "[B2:" . time() . "] Complete\n";
207 }
208
[25780]209 # 1. Use MediaInfo to extract important metadata
[25842]210 print STDERR "[C1:" . time() . "] Extracting metadata\n";
[25780]211 print " - Extracting metadata using MediaInfo\n";
212 my $mi_metadata = $self->getMetadata($ivideo_path);
213 $doc_obj->add_utf8_metadata($topsection,"Format", 'multimedia (' . $mi_metadata->{'General'}->{'Format'} . ')');
214 $doc_obj->set_metadata_element($topsection,"FileSize",$mi_metadata->{'General'}->{'File_size'});
215 $doc_obj->add_utf8_metadata($topsection,"Duration",$mi_metadata->{'General'}->{'Duration'});
216 if (defined $mi_metadata->{'Video'}->{'Format_Info'} && defined $mi_metadata->{'Video'}->{'Format'})
217 {
[25842]218 $doc_obj->add_utf8_metadata($topsection,"VideoFormat",$mi_metadata->{'Video'}->{'Format_Info'} . ' (' . $mi_metadata->{'Video'}->{'Format'} . ')');
[25780]219 }
220 if (defined $mi_metadata->{'Audio'}->{'Format_Info'} && defined $mi_metadata->{'Audio'}->{'Format'})
221 {
[25842]222 $doc_obj->add_utf8_metadata($topsection,"AudioFormat",$mi_metadata->{'Audio'}->{'Format_Info'} . ' (' . $mi_metadata->{'Audio'}->{'Format'} . ')');
[25780]223 }
224 $doc_obj->add_utf8_metadata($topsection,"Width",$mi_metadata->{'Video'}->{'Width'});
225 $doc_obj->add_utf8_metadata($topsection,"Height",$mi_metadata->{'Video'}->{'Height'});
[25842]226 print STDERR "[C2:" . time() . "] Complete\n";
[25780]227
228 # 2. Convert into FLV, reprocess to make seekable, and associate
229 # - generate a path for our temporary converted video file
[25842]230 print STDERR "[D1:" . time() . "] Converting video to streamble format\n";
[27527]231 my $ovideo_path = &FileUtils::filenameConcatenate($tmp_dir, 'gsv.mp4');
232 if (&FileUtils::fileExists($ovideo_path))
[25780]233 {
234 print " - Found existing converted video in cache\n";
235 }
236 else
237 {
238 # - first conversion pass
239 print " - Convert using Handbrake\n";
240 my $streaming_HQ_size = $self->{'streamingHQsize'};
241 my $streaming_HQ_VideoBitrate = $self->{'streamingHQVideoBitrate'};
242 my $streaming_HQ_AudioBitrate = $self->{'streamingHQAudioBitrate'};
243 my $deinterlace = $self->{'videoDeinterlacingFilter'};
244 my $video_processing_parameters;
245 if (!$streaming_HQ_size || $streaming_HQ_size eq "fullsize")
246 {
247 $video_processing_parameters = "--strict-anamorphic";
248 }
249 else
250 {
251 $video_processing_parameters = "-w $streaming_HQ_size --loose-anamorphic";
252 }
253 if ($deinterlace eq "true")
254 {
255 $video_processing_parameters .= " --decomb";
256 }
257 # Default MenCoder options for x264
258 my $mencoder_options = 'ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0';
259 my $is_cluster = $self->{'isCluster'};
260 my $is_parallel = $self->{'isParallel'};
261 # If we are parallel processing on a single (presumably) multicore computer
262 # then we need to limit the number of threads (and hence CPUs) HandBrake
263 # will utilize in order to emulate true parallel processing (otherwise the
264 # first thread to get to HandBrake conversion will take up most the CPUs
265 # causing all other threads to wait anyway). It will interesting to test
266 # whether parallel processing or serial processing (with HandBrake parallel
[26948]267 # processing) is faster. *update* threads=1 *only* controls the encoding and
268 # several other parts of Handbrake can run parallel (demuxing etc). I've
269 # had to include a 'taskset' command to truely make Handbrake serial
270 if ($is_parallel eq 'true' && $is_cluster eq 'false')
[25780]271 {
[25842]272 $mencoder_options .= ':threads=1';
[25780]273 }
[26948]274 # Banish HandbrakeCLI to the (fixedCore-1)'th CPU if necessary
275 my $cmd = '';
276 if (defined $self->{'fixedCore'} && $self->{'fixedCore'} > 0)
277 {
278 $cmd .= 'taskset -c ' . ($self->{'fixedCore'} - 1) . ' ';
279 }
280 $cmd .= 'HandBrakeCLI -i "' . $ivideo_path . '" -t 1 -c 1 -f mp4 -O -o "' . $ovideo_path . '" ' . $video_processing_parameters . ' -e x264 -b ' . $streaming_HQ_VideoBitrate . ' -a 1 -E faac -6 dpl2 -R Auto -B ' . $streaming_HQ_AudioBitrate . ' -D 0.0 -x ' . $mencoder_options . ' > "' . $convert_log_path . '" 2>&1';
[27426]281 my $attempt_count = 0;
282 do
283 {
284 $attempt_count++;
285 print "[DEBUG: Video conversion attempt #" . $attempt_count . ": |" . $cmd . "|]\n";
286 `$cmd`;
287 }
[27527]288 while ($attempt_count < 5 && !&FileUtils::fileExists($ovideo_path))
[25780]289 }
[27527]290 if (!&FileUtils::fileExists($ovideo_path))
[25780]291 {
292 die("Fatal Error! Failed to convert video: " . $ovideo_path . "\nReason:" . $! . "\n");
293 }
[25842]294 print STDERR "[D2:" . time() . "] Complete\n";
[25780]295
[25842]296 # 3. Extract keyframes using hive
297 print STDERR "[E1:" . time() . "] Extract keyframes\n";
[27527]298 my $oshots_path = &FileUtils::filenameConcatenate($tmp_dir, 'shots.xml');
299 if (&FileUtils::fileExists($oshots_path))
[25780]300 {
301 print " - Found existing keyframe images in cache\n";
302 }
303 else
304 {
305 print " - Generating keyframe images using Hive2\n";
306 my $cmd = 'hive2_ffmpegsvn -o "' . $oshots_path . '" -k "' . $tmp_dir . '" "' . $ovideo_path . '" >> "' . $convert_log_path . '" 2>&1';
307 ###print "[cmd: " . $cmd . "]\n";
308 `$cmd`;
309 }
[27527]310 if (!&FileUtils::fileExists($oshots_path))
[25780]311 {
312 die("Fatal Error! Failed to extract keyframe images: " . $oshots_path . "\nReason:" . $! . "\n");
313 }
[25842]314 print STDERR "[E2:" . time() . "] Complete\n";
315
316
317 # 4. Associate files (copies back to shared space if IO separated)
318 print STDERR "[F1:" . time() . "] Associate derived files to doc_obj\n";
319 # - associate streamable video
320 $doc_obj->associate_file($ovideo_path,'gsv.mp4','video/mp4',$topsection);
[25780]321 # - associate all of the JPGs found in the temp directory
322 opendir(my $dh, $tmp_dir);
323 my @shots = readdir($dh);
324 closedir($dh);
325 my $thumbnail = 0;
326 foreach my $shot (sort @shots)
327 {
[27527]328 my $shot_path = &FileUtils::filenameConcatenate($tmp_dir, $shot);
[25780]329 if ($shot =~ /.jpg$/)
330 {
331 if (!$thumbnail)
332 {
333 $doc_obj->add_utf8_metadata($topsection,"Thumbnail",$shot);
334 $thumbnail = 1;
335 }
336 $doc_obj->add_utf8_metadata($topsection,"Keyframe",$shot);
337 $doc_obj->associate_file($shot_path,$shot,"image/jpeg",$topsection);
338 }
339 }
[25842]340 print STDERR "[F2:" . time() . "] Complete\n";
[25780]341
[25842]342 # 5. Done! Cleanup.
343 print STDERR "[G:" . time() . "] SimpleVideoPlugin: Complete!\n";
[25780]344 return 1;
345}
346
347sub getMetadata
348{
349 my ($self, $ivideo_path) = @_;
350 my $cmd = 'mediainfo --Output=XML "' . $ivideo_path . '" 2>&1';
[25842]351 ###print "Cmd: " . $cmd . "\n";
[25780]352 my $metadata_xml = `$cmd`;
353 my @lines = split(/\r?\n/, $metadata_xml);
354 my $metadata = {'Unknown'=>{}};
355 my $metadata_type = 'Unknown';
356 foreach my $line (@lines)
357 {
[25842]358 if ($line =~ /<track type="(.+)">/)
[25780]359 {
360 $metadata_type = $1;
361 if (!defined $metadata->{$metadata_type})
362 {
363 $metadata->{$metadata_type} = {};
364 }
365 }
366 elsif ($line =~ /<([^>]+)>(.+)<\/[^>]+>/)
367 {
368 my $field = $1;
369 my $value = $2;
370 $metadata->{$metadata_type}->{$field} = $value;
371 }
372 }
373 return $metadata;
374}
375
3761;
377
378
379
380
381
382
383
384
385
386
387
Note: See TracBrowser for help on using the repository browser.