########################################################################### # # SimpleVideoPlugin.pm -- Plugin for multimedia with some simple video # processing # # A component of the Greenstone digital library software from the New # Zealand Digital Library Project at the University of Waikato, New # Zealand. # # Copyright (C) 2012 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package SimpleVideoPlugin; use File::Temp qw/ tempdir /; use BasePlugin; use MetadataRead; use util; use FileUtils; use strict; use warnings; no strict 'refs'; # allow filehandles to be variables and viceversa sub BEGIN { @SimpleVideoPlugin::ISA = ('MetadataRead', 'BasePlugin'); } my $arguments = [ { 'name' => "process_exp", 'desc' => "{BasePlugin.process_exp}", 'type' => "regexp", 'reqd' => "no", 'deft' => &get_default_process_exp() }, { 'name' => "streamingHQsize", 'desc' => "{VideoPlugin.streamingsize}", 'type' => "int", 'deft' => "720", 'reqd' => "no" }, { 'name' => "streamingHQVideoBitrate", 'desc' => "{VideoPlugin.streamingbitrate}", 'type' => "int", 'deft' => "496", 'reqd' => "no" }, { 'name' => "streamingHQAudioBitrate", 'desc' => "{VideoPlugin.streamingbitrate}", 'type' => "int", 'deft' => "80", 'reqd' => "no" }, { 'name' => "videoDeinterlacingFilter", 'desc' => "Activate a deinterlacing filter to increase the quality of TV footage", 'type' => "enum", 'list' => [{'name' => "true", 'desc' => "{common.true}"}, {'name' => "false", 'desc' => "{common.false}"}], 'deft' => "false", 'reqd' => "no" }, { 'name' => "isParallel", 'desc' => "Will the import use parallel processing? (maybe this should be set by parallel-import.pl somehow)", 'type' => "enum", 'list' => [{'name' => "true", 'desc' => "{common.true}"}, {'name' => "false", 'desc' => "{common.false}"}], 'deft' => "true", 'reqd' => "no" }, { 'name' => "isCluster", 'desc' => "Will the import be run on a cluster (multiple computers) or not (single computer - possibly multiple processors)", 'type' => "enum", 'list' => [{'name' => "true", 'desc' => "{common.true}"}, {'name' => "false", 'desc' => "{common.false}"}], 'deft' => "false", 'reqd' => "no" }, { 'name' => "separateIO", 'desc' => "copy and process the file locally (good for segregating IO cost)", 'type' => "enum", 'list' => [{'name' => "true", 'desc' => "{common.true}"}, {'name' => "false", 'desc' => "{common.false}"}], 'deft' => "false", 'reqd' => "no" }, { 'name' => "fixedCore", 'desc' => "Restrict the execution of Handbrake to a single core (0 = no restriction, > 0 use value-1'th core)", 'type' => "int", 'deft' => "0", 'reqd' => "no" }, ]; my $options = { 'name' => "BasicVideoPlugin", 'desc' => "", 'abstract' => "no", 'inherits' => "yes", 'args' => $arguments }; sub new { my ($class) = shift (@_); my ($pluginlist,$inputargs,$hashArgOptLists) = @_; push(@$pluginlist, $class); push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); push(@{$hashArgOptLists->{"OptList"}},$options); my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); return bless $self, $class; } sub get_default_process_exp { return '(?i)\.ts$'; } sub get_oid_hash_type { my $self = shift (@_); return "hash_on_ga_xml"; } sub process { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '') { $file = &FileUtils::filenameConcatenate($base_dir, $file); } print STDERR "[A:" . time() . "] SimpleVideoPlugin processing: " . $file . "\n"; # - I have to add some text (yay, back to needing dummy text) otherwise the # DocumentText formatting is ignored (?!?) my $topsection = $doc_obj->get_top_section(); $doc_obj->add_utf8_text($topsection, "This is dummy text"); $file =~ /[\/]?([^\/]+)\.(?:ts)$/; my $filename = $1; # Optional date metadata (available on raw ReplayMe recordings) if ($filename =~ /(\d\d\d\d)-(\d\d)-(\d\d)/) { my $date = $1 . $2 . $3; $filename =~ s/[^a-z0-9]+/_/ig; $filename =~ s/^_+|_+$//g; $doc_obj->add_utf8_metadata($topsection,"Date",$date); } # Special Case: HDFS *only* supported by separateIO flag (you need to move # the file out of HDFS to local filespace to allow MediaInfo and Handbrake # to be run on it. my $separate_io = $self->{'separateIO'}; if (&FileUtils::isHDFS($file)) { $separate_io = 'true'; } ###rint STDERR "[DEBUG] separate_io:" . $separate_io . "\n"; my $process_dir = $ENV{'GSDLCOLLECTDIR'}; # If we are in a cluster, then we don't want to be writing all the logs # etc to the shared file system. Instead, we write to the tmp drive if ($separate_io eq 'true') { $process_dir = &FileUtils::filenameConcatenate('/tmp', 'gsimport-' . $filename); if (!&FileUtils::directoryExists($process_dir)) { mkdir($process_dir, 0775); } } my $logs_dir = &FileUtils::filenameConcatenate($process_dir, "logs"); if (!&FileUtils::directoryExists($logs_dir)) { mkdir($logs_dir, 0775); } my $convert_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '.log'); my $pass_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '-pass'); my $tmp_dir = &FileUtils::filenameConcatenate($process_dir, "cached"); if (!&FileUtils::directoryExists($tmp_dir)) { mkdir($tmp_dir, 0775); } $tmp_dir = &FileUtils::filenameConcatenate($tmp_dir, $filename); if (!&FileUtils::directoryExists($tmp_dir)) { mkdir($tmp_dir, 0775); } # If we are separating IO, then we also start by copying the file to # the process directory (local tmp) as well my $ivideo_path = $file; if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '') { $ivideo_path = &FileUtils::filenameConcatenate($base_dir, $file); } if ($separate_io eq 'true') { print STDERR "[B1:" . time() . "] Creating local copy of file: " . $ivideo_path . "\n"; my $local_ivideo_path = &FileUtils::filenameConcatenate($process_dir, $filename . ".ts"); &FileUtils::copyFiles($ivideo_path, $local_ivideo_path); $ivideo_path = $local_ivideo_path; print STDERR "[B2:" . time() . "] Complete\n"; } else { print "Not gonna work!\n"; exit; } # 1. Use MediaInfo to extract important metadata print STDERR "[C1:" . time() . "] Extracting metadata\n"; print " - Extracting metadata using MediaInfo\n"; my $mi_metadata = $self->getMetadata($ivideo_path); $doc_obj->add_utf8_metadata($topsection,"Format", 'multimedia (' . $mi_metadata->{'General'}->{'Format'} . ')'); if (defined $mi_metadata->{'General'}->{'File_size'}) { $doc_obj->set_metadata_element($topsection, "FileSize", $mi_metadata->{'General'}->{'File_size'}); } else { $doc_obj->set_metadata_element($topsection, "FileSize", &FileUtils::fileSize($ivideo_path)); } $doc_obj->add_utf8_metadata($topsection,"Duration",$mi_metadata->{'General'}->{'Duration'}); if (defined $mi_metadata->{'Video'}->{'Format_Info'} && defined $mi_metadata->{'Video'}->{'Format'}) { $doc_obj->add_utf8_metadata($topsection,"VideoFormat",$mi_metadata->{'Video'}->{'Format_Info'} . ' (' . $mi_metadata->{'Video'}->{'Format'} . ')'); } if (defined $mi_metadata->{'Audio'}->{'Format_Info'} && defined $mi_metadata->{'Audio'}->{'Format'}) { $doc_obj->add_utf8_metadata($topsection,"AudioFormat",$mi_metadata->{'Audio'}->{'Format_Info'} . ' (' . $mi_metadata->{'Audio'}->{'Format'} . ')'); } $doc_obj->add_utf8_metadata($topsection,"Width",$mi_metadata->{'Video'}->{'Width'}); $doc_obj->add_utf8_metadata($topsection,"Height",$mi_metadata->{'Video'}->{'Height'}); print STDERR "[C2:" . time() . "] Complete\n"; # 2. Convert into FLV, reprocess to make seekable, and associate # - generate a path for our temporary converted video file print STDERR "[D1:" . time() . "] Converting video to streamble format\n"; my $ovideo_path = &FileUtils::filenameConcatenate($tmp_dir, 'gsv.mp4'); if (&FileUtils::fileExists($ovideo_path)) { print " - Found existing converted video in cache\n"; } else { # - first conversion pass print " - Convert using Handbrake\n"; my $streaming_HQ_size = $self->{'streamingHQsize'}; my $streaming_HQ_VideoBitrate = $self->{'streamingHQVideoBitrate'}; my $streaming_HQ_AudioBitrate = $self->{'streamingHQAudioBitrate'}; my $deinterlace = $self->{'videoDeinterlacingFilter'}; my $video_processing_parameters; if (!$streaming_HQ_size || $streaming_HQ_size eq "fullsize") { $video_processing_parameters = "--strict-anamorphic"; } else { $video_processing_parameters = "-w $streaming_HQ_size --loose-anamorphic"; } if ($deinterlace eq "true") { $video_processing_parameters .= " --decomb"; } # Default MenCoder options for x264 my $mencoder_options = 'ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0'; my $is_cluster = $self->{'isCluster'}; my $is_parallel = $self->{'isParallel'}; # If we are parallel processing on a single (presumably) multicore computer # then we need to limit the number of threads (and hence CPUs) HandBrake # will utilize in order to emulate true parallel processing (otherwise the # first thread to get to HandBrake conversion will take up most the CPUs # causing all other threads to wait anyway). It will interesting to test # whether parallel processing or serial processing (with HandBrake parallel # processing) is faster. *update* threads=1 *only* controls the encoding and # several other parts of Handbrake can run parallel (demuxing etc). I've # had to include a 'taskset' command to truely make Handbrake serial if ($is_parallel eq 'true' && $is_cluster eq 'false') { $mencoder_options .= ':threads=1'; } # Banish HandbrakeCLI to the (fixedCore-1)'th CPU if necessary my $cmd = ''; if (defined $self->{'fixedCore'} && $self->{'fixedCore'} > 0) { $cmd .= 'taskset -c ' . ($self->{'fixedCore'} - 1) . ' '; } $cmd .= 'HandBrakeCLI -i "' . $ivideo_path . '" -t 1 -c 1 -f mp4 -O -o "' . $ovideo_path . '" ' . $video_processing_parameters . ' -e x264 -b ' . $streaming_HQ_VideoBitrate . ' -a 1 -E faac -6 dpl2 -R Auto -B ' . $streaming_HQ_AudioBitrate . ' -D 0.0 -x ' . $mencoder_options . ' > "' . $convert_log_path . '" 2>&1'; my $attempt_count = 0; do { $attempt_count++; ###rint "[DEBUG: Video conversion attempt #" . $attempt_count . ": |" . $cmd . "|]\n"; `$cmd`; } while ($attempt_count < 5 && !&FileUtils::fileExists($ovideo_path)) } if (!&FileUtils::fileExists($ovideo_path)) { die("Fatal Error! Failed to convert video: " . $ovideo_path . "\nReason:" . $! . "\n"); } print STDERR "[D2:" . time() . "] Complete\n"; # 3. Extract keyframes using hive print STDERR "[E1:" . time() . "] Extract keyframes\n"; my $oshots_path = &FileUtils::filenameConcatenate($tmp_dir, 'shots.xml'); if (&FileUtils::fileExists($oshots_path)) { print " - Found existing keyframe images in cache\n"; } else { print " - Generating keyframe images using Hive2\n"; my $cmd = 'hive2_ffmpegsvn -o "' . $oshots_path . '" -k "' . $tmp_dir . '" "' . $ovideo_path . '" >> "' . $convert_log_path . '" 2>&1'; ###print "[cmd: " . $cmd . "]\n"; `$cmd`; } if (!&FileUtils::fileExists($oshots_path)) { die("Fatal Error! Failed to extract keyframe images: " . $oshots_path . "\nReason:" . $! . "\n"); } print STDERR "[E2:" . time() . "] Complete\n"; # 4. Associate files (copies back to shared space if IO separated) print STDERR "[F1:" . time() . "] Associate derived files to doc_obj\n"; # - associate streamable video $doc_obj->associate_file($ovideo_path,'gsv.mp4','video/mp4',$topsection); # - associate all of the JPGs found in the temp directory opendir(my $dh, $tmp_dir); my @shots = readdir($dh); closedir($dh); my $thumbnail = 0; foreach my $shot (sort @shots) { my $shot_path = &FileUtils::filenameConcatenate($tmp_dir, $shot); if ($shot =~ /.jpg$/) { if (!$thumbnail) { $doc_obj->add_utf8_metadata($topsection,"Thumbnail",$shot); $thumbnail = 1; } $doc_obj->add_utf8_metadata($topsection,"Keyframe",$shot); $doc_obj->associate_file($shot_path,$shot,"image/jpeg",$topsection); } } print STDERR "[F2:" . time() . "] Complete\n"; # 5. Done! Cleanup. print STDERR "[G:" . time() . "] SimpleVideoPlugin: Complete!\n"; return 1; } sub getMetadata { my ($self, $ivideo_path) = @_; my $cmd = 'mediainfo --Output=XML "' . $ivideo_path . '" 2>&1'; ###rint "[DEBUG] command: " . $cmd . "\n"; my $metadata_xml = `$cmd`; ###rint "[DEBUG] result: " . $metadata_xml . "\n\n"; my @lines = split(/\r?\n/, $metadata_xml); my $metadata = {'Unknown'=>{}}; my $metadata_type = 'Unknown'; foreach my $line (@lines) { if ($line =~ //) { $metadata_type = $1; if (!defined $metadata->{$metadata_type}) { $metadata->{$metadata_type} = {}; } } elsif ($line =~ /<([^>]+)>(.+)<\/[^>]+>/) { my $field = $1; my $value = $2; $metadata->{$metadata_type}->{$field} = $value; } } return $metadata; } 1;