root/gs2-extensions/video-and-audio/trunk/src/perllib/plugins/SimpleVideoPlugin.pm @ 27529

Revision 27529, 14.9 KB (checked in by jmt12, 6 years ago)

Fixing a bug (HDFS drivers not being recognized due to sometimes being relative) and some tidy up stuff

Line 
1###########################################################################
2#
3# SimpleVideoPlugin.pm -- Plugin for multimedia with some simple video
4#                         processing
5#
6# A component of the Greenstone digital library software from the New
7# Zealand Digital Library Project at the University of Waikato, New
8# Zealand.
9#
10# Copyright (C) 2012 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package SimpleVideoPlugin;
29
30use File::Temp qw/ tempdir /;
31
32use BasePlugin;
33use MetadataRead;
34use util;
35use FileUtils;
36
37use strict;
38use warnings;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
41sub BEGIN
42{
43  @SimpleVideoPlugin::ISA = ('MetadataRead', 'BasePlugin');
44}
45
46my $arguments = [ { 'name' => "process_exp",
47                    'desc' => "{BasePlugin.process_exp}",
48                    'type' => "regexp",
49                    'reqd' => "no",
50                    'deft' => &get_default_process_exp() },
51                  { 'name' => "streamingHQsize",
52                    'desc' => "{VideoPlugin.streamingsize}",
53                    'type' => "int",
54                    'deft' => "720",
55                    'reqd' => "no" },
56                  { 'name' => "streamingHQVideoBitrate",
57                    'desc' => "{VideoPlugin.streamingbitrate}",
58                    'type' => "int",
59                    'deft' => "496",
60                    'reqd' => "no" },
61                  { 'name' => "streamingHQAudioBitrate",
62                    'desc' => "{VideoPlugin.streamingbitrate}",
63                    'type' => "int",
64                    'deft' => "80",
65                    'reqd' => "no" },
66                  { 'name' => "videoDeinterlacingFilter",
67                    'desc' => "Activate a deinterlacing filter to increase the quality of TV footage",
68                    'type' => "enum",
69                    'list' => [{'name' => "true", 'desc' => "{common.true}"},
70                               {'name' => "false", 'desc' => "{common.false}"}],
71                    'deft' => "false",
72                    'reqd' => "no" },
73                  { 'name' => "isParallel",
74                    'desc' => "Will the import use parallel processing? (maybe this should be set by parallel-import.pl somehow)",
75                    'type' => "enum",
76                    'list' => [{'name' => "true", 'desc' => "{common.true}"},
77                               {'name' => "false", 'desc' => "{common.false}"}],
78                    'deft' => "true",
79                    'reqd' => "no" },
80                  { 'name' => "isCluster",
81                    'desc' => "Will the import be run on a cluster (multiple computers) or not (single computer - possibly multiple processors)",
82                    'type' => "enum",
83                    'list' => [{'name' => "true", 'desc' => "{common.true}"},
84                               {'name' => "false", 'desc' => "{common.false}"}],
85                    'deft' => "false",
86                    'reqd' => "no" },
87                  { 'name' => "separateIO",
88                    'desc' => "copy and process the file locally (good for segregating IO cost)",
89                    'type' => "enum",
90                    'list' => [{'name' => "true", 'desc' => "{common.true}"},
91                               {'name' => "false", 'desc' => "{common.false}"}],
92                    'deft' => "false",
93                    'reqd' => "no" },
94                  { 'name' => "fixedCore",
95                    'desc' => "Restrict the execution of Handbrake to a single core (0 = no restriction, > 0 use value-1'th core)",
96                    'type' => "int",
97                    'deft' => "0",
98                    'reqd' => "no" },
99                ];
100
101my $options = { 'name'     => "BasicVideoPlugin",
102        'desc'     => "",
103        'abstract' => "no",
104        'inherits' => "yes",
105                'args'     => $arguments };
106
107sub new
108{
109  my ($class) = shift (@_);
110  my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
111  push(@$pluginlist, $class);
112
113  push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
114  push(@{$hashArgOptLists->{"OptList"}},$options);
115  my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
116  return bless $self, $class;
117}
118
119sub get_default_process_exp
120{
121  return '(?i)\.ts$';
122}
123
124sub get_oid_hash_type
125{
126  my $self = shift (@_);
127  return "hash_on_ga_xml";
128}
129
130sub process
131{
132  my $self = shift (@_);
133  my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
134
135  if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
136  {
137    $file = &FileUtils::filenameConcatenate($base_dir, $file);
138  }
139
140  print STDERR "[A:" . time() . "] SimpleVideoPlugin processing: " . $file . "\n";
141
142  # - I have to add some text (yay, back to needing dummy text) otherwise the
143  #   DocumentText formatting is ignored (?!?)
144  my $topsection = $doc_obj->get_top_section();
145  $doc_obj->add_utf8_text($topsection, "This is dummy text");
146
147
148  $file =~ /[\/]?([^\/]+)\.(?:ts)$/;
149  my $filename = $1;
150
151  # Optional date metadata (available on raw ReplayMe recordings)
152  if ($filename =~ /(\d\d\d\d)-(\d\d)-(\d\d)/)
153  {
154    my $date = $1 . $2 . $3;
155    $filename =~ s/[^a-z0-9]+/_/ig;
156    $filename =~ s/^_+|_+$//g;
157    $doc_obj->add_utf8_metadata($topsection,"Date",$date);
158  }
159
160  # Special Case: HDFS *only* supported by separateIO flag (you need to move
161  # the file out of HDFS to local filespace to allow MediaInfo and Handbrake
162  # to be run on it.
163  my $separate_io = $self->{'separateIO'};
164  if (&FileUtils::isHDFS($file))
165  {
166    $separate_io = 'true';
167  }
168  ###rint STDERR "[DEBUG] separate_io:" . $separate_io . "\n";
169
170  my $process_dir = $ENV{'GSDLCOLLECTDIR'};
171  # If we are in a cluster, then we don't want to be writing all the logs
172  # etc to the shared file system. Instead, we write to the tmp drive
173  if ($separate_io eq 'true')
174  {
175    $process_dir = &FileUtils::filenameConcatenate('/tmp', 'gsimport-' . $filename);
176    if (!&FileUtils::directoryExists($process_dir))
177    {
178      mkdir($process_dir, 0775);
179    }
180  }
181  my $logs_dir = &FileUtils::filenameConcatenate($process_dir, "logs");
182  if (!&FileUtils::directoryExists($logs_dir))
183  {
184    mkdir($logs_dir, 0775);
185  }
186  my $convert_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '.log');
187  my $pass_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '-pass');
188  my $tmp_dir = &FileUtils::filenameConcatenate($process_dir, "cached");
189  if (!&FileUtils::directoryExists($tmp_dir))
190  {
191    mkdir($tmp_dir, 0775);
192  }
193  $tmp_dir = &FileUtils::filenameConcatenate($tmp_dir, $filename);
194  if (!&FileUtils::directoryExists($tmp_dir))
195  {
196    mkdir($tmp_dir, 0775);
197  }
198
199  # If we are separating IO, then we also start by copying the file to
200  # the process directory (local tmp) as well
201  my $ivideo_path = $file;
202  if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
203  {
204    $ivideo_path = &FileUtils::filenameConcatenate($base_dir, $file);
205  }
206  if ($separate_io eq 'true')
207  {
208    print STDERR "[B1:" . time() . "] Creating local copy of file: " . $ivideo_path . "\n";
209    my $local_ivideo_path = &FileUtils::filenameConcatenate($process_dir, $filename . ".ts");
210    &FileUtils::copyFiles($ivideo_path, $local_ivideo_path);
211    $ivideo_path = $local_ivideo_path;
212    print STDERR "[B2:" . time() . "] Complete\n";
213  }
214  else
215  {
216    print "Not gonna work!\n";
217    exit;
218  }
219
220  # 1. Use MediaInfo to extract important metadata
221  print STDERR "[C1:" . time() . "] Extracting metadata\n";
222  print " - Extracting metadata using MediaInfo\n";
223  my $mi_metadata = $self->getMetadata($ivideo_path);
224  $doc_obj->add_utf8_metadata($topsection,"Format", 'multimedia (' . $mi_metadata->{'General'}->{'Format'} . ')');
225  if (defined $mi_metadata->{'General'}->{'File_size'})
226  {
227    $doc_obj->set_metadata_element($topsection, "FileSize", $mi_metadata->{'General'}->{'File_size'});
228  }
229  else
230  {
231    $doc_obj->set_metadata_element($topsection, "FileSize", &FileUtils::fileSize($ivideo_path));
232  }
233  $doc_obj->add_utf8_metadata($topsection,"Duration",$mi_metadata->{'General'}->{'Duration'});
234  if (defined $mi_metadata->{'Video'}->{'Format_Info'} && defined $mi_metadata->{'Video'}->{'Format'})
235  {
236    $doc_obj->add_utf8_metadata($topsection,"VideoFormat",$mi_metadata->{'Video'}->{'Format_Info'} . ' (' . $mi_metadata->{'Video'}->{'Format'} . ')');
237  }
238  if (defined $mi_metadata->{'Audio'}->{'Format_Info'} && defined $mi_metadata->{'Audio'}->{'Format'})
239  {
240    $doc_obj->add_utf8_metadata($topsection,"AudioFormat",$mi_metadata->{'Audio'}->{'Format_Info'} . ' (' . $mi_metadata->{'Audio'}->{'Format'} . ')');
241  }
242  $doc_obj->add_utf8_metadata($topsection,"Width",$mi_metadata->{'Video'}->{'Width'});
243  $doc_obj->add_utf8_metadata($topsection,"Height",$mi_metadata->{'Video'}->{'Height'});
244  print STDERR "[C2:" . time() . "] Complete\n";
245
246  # 2. Convert into FLV, reprocess to make seekable, and associate
247  # - generate a path for our temporary converted video file
248  print STDERR "[D1:" . time() . "] Converting video to streamble format\n";
249  my $ovideo_path = &FileUtils::filenameConcatenate($tmp_dir, 'gsv.mp4');
250  if (&FileUtils::fileExists($ovideo_path))
251  {
252    print " - Found existing converted video in cache\n";
253  }
254  else
255  {
256    # - first conversion pass
257    print " - Convert using Handbrake\n";
258    my $streaming_HQ_size    = $self->{'streamingHQsize'};
259    my $streaming_HQ_VideoBitrate    = $self->{'streamingHQVideoBitrate'};
260    my $streaming_HQ_AudioBitrate    = $self->{'streamingHQAudioBitrate'};
261    my $deinterlace = $self->{'videoDeinterlacingFilter'};
262    my $video_processing_parameters;
263    if (!$streaming_HQ_size || $streaming_HQ_size eq "fullsize")
264    {
265      $video_processing_parameters = "--strict-anamorphic";
266    }
267    else
268    {
269      $video_processing_parameters = "-w $streaming_HQ_size --loose-anamorphic";
270    }
271    if ($deinterlace eq "true")
272    {
273      $video_processing_parameters .= " --decomb";
274    }
275    # Default MenCoder options for x264
276    my $mencoder_options = 'ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0';
277    my $is_cluster = $self->{'isCluster'};
278    my $is_parallel = $self->{'isParallel'};
279    # If we are parallel processing on a single (presumably) multicore computer
280    # then we need to limit the number of threads (and hence CPUs) HandBrake
281    # will utilize in order to emulate true parallel processing (otherwise the
282    # first thread to get to HandBrake conversion will take up most the CPUs
283    # causing all other threads to wait anyway). It will interesting to test
284    # whether parallel processing or serial processing (with HandBrake parallel
285    # processing) is faster. *update* threads=1 *only* controls the encoding and
286    # several other parts of Handbrake can run parallel (demuxing etc). I've
287    # had to include a 'taskset' command to truely make Handbrake serial
288    if ($is_parallel eq 'true'  && $is_cluster eq 'false')
289    {
290      $mencoder_options .= ':threads=1';
291    }
292    # Banish HandbrakeCLI to the (fixedCore-1)'th CPU if necessary
293    my $cmd = '';
294    if (defined $self->{'fixedCore'} && $self->{'fixedCore'} > 0)
295    {
296      $cmd .= 'taskset -c ' . ($self->{'fixedCore'} - 1) . ' ';
297    }
298    $cmd .= 'HandBrakeCLI -i "' . $ivideo_path . '" -t 1 -c 1 -f mp4 -O -o "' . $ovideo_path . '" ' . $video_processing_parameters . ' -e x264 -b ' . $streaming_HQ_VideoBitrate . ' -a 1 -E faac -6 dpl2 -R Auto -B ' . $streaming_HQ_AudioBitrate . ' -D 0.0 -x ' . $mencoder_options . ' > "' . $convert_log_path . '" 2>&1';
299    my $attempt_count = 0;
300    do
301    {
302      $attempt_count++;
303      ###rint "[DEBUG: Video conversion attempt #" . $attempt_count . ": |" . $cmd . "|]\n";
304      `$cmd`;
305    }
306    while ($attempt_count < 5 && !&FileUtils::fileExists($ovideo_path))
307  }
308  if (!&FileUtils::fileExists($ovideo_path))
309  {
310    die("Fatal Error! Failed to convert video: " . $ovideo_path . "\nReason:" . $! . "\n");
311  }
312  print STDERR "[D2:" . time() . "] Complete\n";
313
314  # 3. Extract keyframes using hive
315  print STDERR "[E1:" . time() . "] Extract keyframes\n";
316  my $oshots_path = &FileUtils::filenameConcatenate($tmp_dir, 'shots.xml');
317  if (&FileUtils::fileExists($oshots_path))
318  {
319    print " - Found existing keyframe images in cache\n";
320  }
321  else
322  {
323    print " - Generating keyframe images using Hive2\n";
324    my $cmd = 'hive2_ffmpegsvn -o "' . $oshots_path . '" -k "' . $tmp_dir . '" "' . $ovideo_path . '" >> "' . $convert_log_path . '" 2>&1';
325    ###print "[cmd: " . $cmd . "]\n";
326    `$cmd`;
327  }
328  if (!&FileUtils::fileExists($oshots_path))
329  {
330    die("Fatal Error! Failed to extract keyframe images: " . $oshots_path . "\nReason:" . $! . "\n");
331  }
332  print STDERR "[E2:" . time() . "] Complete\n";
333
334
335  # 4. Associate files (copies back to shared space if IO separated)
336  print STDERR "[F1:" . time() . "] Associate derived files to doc_obj\n";
337  # - associate streamable video
338  $doc_obj->associate_file($ovideo_path,'gsv.mp4','video/mp4',$topsection);
339  # - associate all of the JPGs found in the temp directory
340  opendir(my $dh, $tmp_dir);
341  my @shots = readdir($dh);
342  closedir($dh);
343  my $thumbnail = 0;
344  foreach my $shot (sort @shots)
345  {
346    my $shot_path = &FileUtils::filenameConcatenate($tmp_dir, $shot);
347    if ($shot =~ /.jpg$/)
348    {
349      if (!$thumbnail)
350      {
351        $doc_obj->add_utf8_metadata($topsection,"Thumbnail",$shot);
352        $thumbnail = 1;
353      }
354      $doc_obj->add_utf8_metadata($topsection,"Keyframe",$shot);
355      $doc_obj->associate_file($shot_path,$shot,"image/jpeg",$topsection);
356    }
357  }
358  print STDERR "[F2:" . time() . "] Complete\n";
359
360  # 5. Done! Cleanup.
361  print STDERR "[G:" . time() . "] SimpleVideoPlugin: Complete!\n";
362  return 1;
363}
364
365sub getMetadata
366{
367  my ($self, $ivideo_path) = @_;
368  my $cmd = 'mediainfo --Output=XML "' . $ivideo_path . '" 2>&1';
369  ###rint "[DEBUG] command: " . $cmd . "\n";
370  my $metadata_xml = `$cmd`;
371  ###rint "[DEBUG] result: " . $metadata_xml . "\n\n";
372  my @lines = split(/\r?\n/, $metadata_xml);
373  my $metadata = {'Unknown'=>{}};
374  my $metadata_type = 'Unknown';
375  foreach my $line (@lines)
376  {
377   if ($line =~ /<track type="(.+)">/)
378    {
379      $metadata_type = $1;
380      if (!defined $metadata->{$metadata_type})
381      {
382        $metadata->{$metadata_type} = {};
383      }
384    }
385    elsif ($line =~ /<([^>]+)>(.+)<\/[^>]+>/)
386    {
387      my $field = $1;
388      my $value = $2;
389      $metadata->{$metadata_type}->{$field} = $value;
390    }
391  }
392  return $metadata;
393}
394
3951;
396
397
398
399
400
401
402
403
404
405
406
Note: See TracBrowser for help on using the browser.