source: gs2-extensions/video-and-audio/trunk/src/perllib/plugins/SimpleVideoPlugin.pm@ 27647

Last change on this file since 27647 was 27647, checked in by jmt12, 11 years ago

Add some more testing to ensure any local copy of a media file is the same size as the remote one (this was an issue in HDThriftFS when an incorrect CLASSPATH meant base91 encoding was only happening on one end of the connection)

File size: 15.1 KB
Line 
1###########################################################################
2#
3# SimpleVideoPlugin.pm -- Plugin for multimedia with some simple video
4# processing
5#
6# A component of the Greenstone digital library software from the New
7# Zealand Digital Library Project at the University of Waikato, New
8# Zealand.
9#
10# Copyright (C) 2012 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package SimpleVideoPlugin;
29
30use File::Temp qw/ tempdir /;
31
32use BasePlugin;
33use MetadataRead;
34use util;
35use FileUtils;
36
37use strict;
38use warnings;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
41sub BEGIN
42{
43 @SimpleVideoPlugin::ISA = ('MetadataRead', 'BasePlugin');
44}
45
46my $arguments = [ { 'name' => "process_exp",
47 'desc' => "{BasePlugin.process_exp}",
48 'type' => "regexp",
49 'reqd' => "no",
50 'deft' => &get_default_process_exp() },
51 { 'name' => "streamingHQsize",
52 'desc' => "{VideoPlugin.streamingsize}",
53 'type' => "int",
54 'deft' => "720",
55 'reqd' => "no" },
56 { 'name' => "streamingHQVideoBitrate",
57 'desc' => "{VideoPlugin.streamingbitrate}",
58 'type' => "int",
59 'deft' => "496",
60 'reqd' => "no" },
61 { 'name' => "streamingHQAudioBitrate",
62 'desc' => "{VideoPlugin.streamingbitrate}",
63 'type' => "int",
64 'deft' => "80",
65 'reqd' => "no" },
66 { 'name' => "videoDeinterlacingFilter",
67 'desc' => "Activate a deinterlacing filter to increase the quality of TV footage",
68 'type' => "enum",
69 'list' => [{'name' => "true", 'desc' => "{common.true}"},
70 {'name' => "false", 'desc' => "{common.false}"}],
71 'deft' => "false",
72 'reqd' => "no" },
73 { 'name' => "isParallel",
74 'desc' => "Will the import use parallel processing? (maybe this should be set by parallel-import.pl somehow)",
75 'type' => "enum",
76 'list' => [{'name' => "true", 'desc' => "{common.true}"},
77 {'name' => "false", 'desc' => "{common.false}"}],
78 'deft' => "true",
79 'reqd' => "no" },
80 { 'name' => "isCluster",
81 'desc' => "Will the import be run on a cluster (multiple computers) or not (single computer - possibly multiple processors)",
82 'type' => "enum",
83 'list' => [{'name' => "true", 'desc' => "{common.true}"},
84 {'name' => "false", 'desc' => "{common.false}"}],
85 'deft' => "false",
86 'reqd' => "no" },
87 { 'name' => "separateIO",
88 'desc' => "copy and process the file locally (good for segregating IO cost)",
89 'type' => "enum",
90 'list' => [{'name' => "true", 'desc' => "{common.true}"},
91 {'name' => "false", 'desc' => "{common.false}"}],
92 'deft' => "false",
93 'reqd' => "no" },
94 { 'name' => "fixedCore",
95 'desc' => "Restrict the execution of Handbrake to a single core (0 = no restriction, > 0 use value-1'th core)",
96 'type' => "int",
97 'deft' => "0",
98 'reqd' => "no" },
99 ];
100
101my $options = { 'name' => "BasicVideoPlugin",
102 'desc' => "",
103 'abstract' => "no",
104 'inherits' => "yes",
105 'args' => $arguments };
106
107sub new
108{
109 my ($class) = shift (@_);
110 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
111 push(@$pluginlist, $class);
112
113 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
114 push(@{$hashArgOptLists->{"OptList"}},$options);
115 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
116 return bless $self, $class;
117}
118
119sub get_default_process_exp
120{
121 return '(?i)\.ts$';
122}
123
124sub get_oid_hash_type
125{
126 my $self = shift (@_);
127 return "hash_on_ga_xml";
128}
129
130sub process
131{
132 my $self = shift (@_);
133 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
134
135 if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
136 {
137 $file = &FileUtils::filenameConcatenate($base_dir, $file);
138 }
139
140 print STDERR " * SimpleVideoPlugin processing: " . $file . "\n";
141
142 # - I have to add some text (yay, back to needing dummy text) otherwise the
143 # DocumentText formatting is ignored (?!?)
144 my $topsection = $doc_obj->get_top_section();
145 $doc_obj->add_utf8_text($topsection, "This is dummy text");
146
147
148 $file =~ /[\/]?([^\/]+)\.(?:ts)$/;
149 my $filename = $1;
150
151 # Optional date metadata (available on raw ReplayMe recordings)
152 if ($filename =~ /(\d\d\d\d)-(\d\d)-(\d\d)/)
153 {
154 my $date = $1 . $2 . $3;
155 $filename =~ s/[^a-z0-9]+/_/ig;
156 $filename =~ s/^_+|_+$//g;
157 $doc_obj->add_utf8_metadata($topsection,"Date",$date);
158 }
159
160 # Special Case: HDFS *only* supported by separateIO flag (you need to move
161 # the file out of HDFS to local filespace to allow MediaInfo and Handbrake
162 # to be run on it.
163 my $separate_io = $self->{'separateIO'};
164 if (&FileUtils::isHDFS($file))
165 {
166 $separate_io = 'true';
167 }
168 ###rint STDERR "[DEBUG] separate_io:" . $separate_io . "\n";
169
170 my $process_dir = $ENV{'GSDLCOLLECTDIR'};
171 # If we are in a cluster, then we don't want to be writing all the logs
172 # etc to the shared file system. Instead, we write to the tmp drive
173 if ($separate_io eq 'true')
174 {
175 $process_dir = &FileUtils::filenameConcatenate('/tmp', 'gsimport-' . $filename);
176 if (!&FileUtils::directoryExists($process_dir))
177 {
178 mkdir($process_dir, 0775);
179 }
180 }
181 my $logs_dir = &FileUtils::filenameConcatenate($process_dir, "logs");
182 if (!&FileUtils::directoryExists($logs_dir))
183 {
184 mkdir($logs_dir, 0775);
185 }
186 my $convert_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '.log');
187 my $pass_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '-pass');
188 my $tmp_dir = &FileUtils::filenameConcatenate($process_dir, "cached");
189 if (!&FileUtils::directoryExists($tmp_dir))
190 {
191 mkdir($tmp_dir, 0775);
192 }
193 $tmp_dir = &FileUtils::filenameConcatenate($tmp_dir, $filename);
194 if (!&FileUtils::directoryExists($tmp_dir))
195 {
196 mkdir($tmp_dir, 0775);
197 }
198
199 # If we are separating IO, then we also start by copying the file to
200 # the process directory (local tmp) as well
201 my $ivideo_path = $file;
202 if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
203 {
204 $ivideo_path = &FileUtils::filenameConcatenate($base_dir, $file);
205 }
206 if ($separate_io eq 'true')
207 {
208 my $io_start = time();
209 print " - creating local copy of file: " . $ivideo_path . " [IOS:" . time() . "]\n";
210 my $local_ivideo_path = &FileUtils::filenameConcatenate($process_dir, $filename . ".ts");
211 &FileUtils::copyFiles($ivideo_path, $local_ivideo_path);
212 my $remote_size = &FileUtils::fileSize($ivideo_path);
213 my $local_size = &FileUtils::fileSize($local_ivideo_path);
214 if ($remote_size != $local_size)
215 {
216 die('Error! File copied is not of same size as original: ' . $remote_size . 'b != ' . $local_size . "b\n");
217 }
218 elsif (&FileUtils::fileExists($local_ivideo_path))
219 {
220 $ivideo_path = $local_ivideo_path;
221 print ' - copied! [IOE:' . time() . "]\n";
222 }
223 else
224 {
225 die('Error! Failed to copy file: ' . $ivideo_path . ' => ' . $local_ivideo_path . ' [IOE:' . time() . "]\n");
226 }
227 }
228
229 # 1. Use MediaInfo to extract important metadata
230 print ' - Extracting metadata using MediaInfo...';
231 my $mi_metadata = $self->getMetadata($ivideo_path);
232 $doc_obj->add_utf8_metadata($topsection,"Format", 'multimedia (' . $mi_metadata->{'General'}->{'Format'} . ')');
233 if (defined $mi_metadata->{'General'}->{'File_size'})
234 {
235 $doc_obj->set_metadata_element($topsection, "FileSize", $mi_metadata->{'General'}->{'File_size'});
236 }
237 else
238 {
239 $doc_obj->set_metadata_element($topsection, "FileSize", &FileUtils::fileSize($ivideo_path));
240 }
241 $doc_obj->add_utf8_metadata($topsection,"Duration",$mi_metadata->{'General'}->{'Duration'});
242 if (defined $mi_metadata->{'Video'}->{'Format_Info'} && defined $mi_metadata->{'Video'}->{'Format'})
243 {
244 $doc_obj->add_utf8_metadata($topsection,"VideoFormat",$mi_metadata->{'Video'}->{'Format_Info'} . ' (' . $mi_metadata->{'Video'}->{'Format'} . ')');
245 }
246 if (defined $mi_metadata->{'Audio'}->{'Format_Info'} && defined $mi_metadata->{'Audio'}->{'Format'})
247 {
248 $doc_obj->add_utf8_metadata($topsection,"AudioFormat",$mi_metadata->{'Audio'}->{'Format_Info'} . ' (' . $mi_metadata->{'Audio'}->{'Format'} . ')');
249 }
250 $doc_obj->add_utf8_metadata($topsection,"Width",$mi_metadata->{'Video'}->{'Width'});
251 $doc_obj->add_utf8_metadata($topsection,"Height",$mi_metadata->{'Video'}->{'Height'});
252 print STDERR "Done!\n";
253
254 # 2. Convert into FLV, reprocess to make seekable, and associate
255 # - generate a path for our temporary converted video file
256 print STDERR " - Converting video to streamble format...\n";
257 my $ovideo_path = &FileUtils::filenameConcatenate($tmp_dir, 'gsv.mp4');
258 if (&FileUtils::fileExists($ovideo_path))
259 {
260 print " - Found existing converted video in cache!\n";
261 }
262 else
263 {
264 # - first conversion pass
265 print " - Convert using Handbrake\n";
266 my $streaming_HQ_size = $self->{'streamingHQsize'};
267 my $streaming_HQ_VideoBitrate = $self->{'streamingHQVideoBitrate'};
268 my $streaming_HQ_AudioBitrate = $self->{'streamingHQAudioBitrate'};
269 my $deinterlace = $self->{'videoDeinterlacingFilter'};
270 my $video_processing_parameters;
271 if (!$streaming_HQ_size || $streaming_HQ_size eq "fullsize")
272 {
273 $video_processing_parameters = "--strict-anamorphic";
274 }
275 else
276 {
277 $video_processing_parameters = "-w $streaming_HQ_size --loose-anamorphic";
278 }
279 if ($deinterlace eq "true")
280 {
281 $video_processing_parameters .= " --decomb";
282 }
283 # Default MenCoder options for x264
284 my $mencoder_options = 'ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0';
285 my $is_cluster = $self->{'isCluster'};
286 my $is_parallel = $self->{'isParallel'};
287 # If we are parallel processing on a single (presumably) multicore computer
288 # then we need to limit the number of threads (and hence CPUs) HandBrake
289 # will utilize in order to emulate true parallel processing (otherwise the
290 # first thread to get to HandBrake conversion will take up most the CPUs
291 # causing all other threads to wait anyway). It will interesting to test
292 # whether parallel processing or serial processing (with HandBrake parallel
293 # processing) is faster. *update* threads=1 *only* controls the encoding and
294 # several other parts of Handbrake can run parallel (demuxing etc). I've
295 # had to include a 'taskset' command to truely make Handbrake serial
296 if ($is_parallel eq 'true' && $is_cluster eq 'false')
297 {
298 $mencoder_options .= ':threads=1';
299 }
300 # Banish HandbrakeCLI to the (fixedCore-1)'th CPU if necessary
301 my $cmd = '';
302 if (defined $self->{'fixedCore'} && $self->{'fixedCore'} > 0)
303 {
304 $cmd .= 'taskset -c ' . ($self->{'fixedCore'} - 1) . ' ';
305 }
306 $cmd .= 'HandBrakeCLI -i "' . $ivideo_path . '" -t 1 -c 1 -f mp4 -O -o "' . $ovideo_path . '" ' . $video_processing_parameters . ' -e x264 -b ' . $streaming_HQ_VideoBitrate . ' -a 1 -E faac -6 dpl2 -R Auto -B ' . $streaming_HQ_AudioBitrate . ' -D 0.0 -x ' . $mencoder_options . ' > "' . $convert_log_path . '" 2>&1';
307 my $attempt_count = 0;
308 do
309 {
310 $attempt_count++;
311 ###rint "[DEBUG: Video conversion attempt #" . $attempt_count . ": |" . $cmd . "|]\n";
312 `$cmd`;
313 }
314 while ($attempt_count < 5 && !&FileUtils::fileExists($ovideo_path))
315 }
316 if (!&FileUtils::fileExists($ovideo_path))
317 {
318 die("Fatal Error! Failed to convert video: " . $ovideo_path . "\nReason:" . $! . "\n");
319 }
320 print STDERR " - conversion done!\n";
321
322 # 3. Extract keyframes using hive
323 print STDERR " - extract keyframes...\n";
324 my $oshots_path = &FileUtils::filenameConcatenate($tmp_dir, 'shots.xml');
325 if (&FileUtils::fileExists($oshots_path))
326 {
327 print " - found existing keyframe images in cache\n";
328 }
329 else
330 {
331 print " - generating keyframe images using Hive2\n";
332 my $cmd = 'hive2_ffmpegsvn -o "' . $oshots_path . '" -k "' . $tmp_dir . '" "' . $ovideo_path . '" >> "' . $convert_log_path . '" 2>&1';
333 ###print "[cmd: " . $cmd . "]\n";
334 `$cmd`;
335 }
336 if (!&FileUtils::fileExists($oshots_path))
337 {
338 die("Fatal Error! Failed to extract keyframe images: " . $oshots_path . "\nReason:" . $! . "\n");
339 }
340 print STDERR " - keyframes extracted!\n";
341
342
343 # 4. Associate files (copies back to shared space if IO separated)
344 print STDERR " - Associate derived files to doc_obj... ";
345 # - associate streamable video
346 $doc_obj->associate_file($ovideo_path,'gsv.mp4','video/mp4',$topsection);
347 # - associate all of the JPGs found in the temp directory
348 opendir(my $dh, $tmp_dir);
349 my @shots = readdir($dh);
350 closedir($dh);
351 my $thumbnail = 0;
352 foreach my $shot (sort @shots)
353 {
354 my $shot_path = &FileUtils::filenameConcatenate($tmp_dir, $shot);
355 if ($shot =~ /.jpg$/)
356 {
357 if (!$thumbnail)
358 {
359 $doc_obj->add_utf8_metadata($topsection,"Thumbnail",$shot);
360 $thumbnail = 1;
361 }
362 $doc_obj->add_utf8_metadata($topsection,"Keyframe",$shot);
363 $doc_obj->associate_file($shot_path,$shot,"image/jpeg",$topsection);
364 }
365 }
366 print STDERR "Done!\n";
367
368 # 5. Done! Cleanup.
369 print STDERR "SimpleVideoPlugin: Complete! [IOS:" . time() . "]\n";
370 return 1;
371}
372
373sub getMetadata
374{
375 my ($self, $ivideo_path) = @_;
376 my $cmd = 'mediainfo --Output=XML "' . $ivideo_path . '" 2>&1';
377 ###rint "[DEBUG] command: " . $cmd . "\n";
378 my $metadata_xml = `$cmd`;
379 ###rint "[DEBUG] result: " . $metadata_xml . "\n\n";
380 my @lines = split(/\r?\n/, $metadata_xml);
381 my $metadata = {'Unknown'=>{}};
382 my $metadata_type = 'Unknown';
383 foreach my $line (@lines)
384 {
385 if ($line =~ /<track type="(.+)">/)
386 {
387 $metadata_type = $1;
388 if (!defined $metadata->{$metadata_type})
389 {
390 $metadata->{$metadata_type} = {};
391 }
392 }
393 elsif ($line =~ /<([^>]+)>(.+)<\/[^>]+>/)
394 {
395 my $field = $1;
396 my $value = $2;
397 $metadata->{$metadata_type}->{$field} = $value;
398 }
399 }
400 return $metadata;
401}
402
4031;
404
405
406
407
408
409
410
411
412
413
414
Note: See TracBrowser for help on using the repository browser.