########################################################################### # # TimedHTMLPlug.pm -- # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package TimedHTMLPlugin; use HTMLPlugin; use VideoConverter; sub BEGIN { @TimedHTMLPlugin::ISA = ('HTMLPlugin', 'VideoConverter'); } use strict; # every perl program should have this! no strict 'refs'; # make an exception so we can use variables as filehandles my $arguments = [ { 'name' => "video_excerpt_duration", 'desc' => "{VideoPlug.video_excerpt_duration}", 'type' => "string", 'reqd' => "no" } ]; my $options = { 'name' => "TimedHTMLPlugin", 'desc' => "{TimedHTMLPlugin.desc}", 'abstract' => "no", 'inherits' => "yes", 'args' => $arguments }; sub new { my ($class) = shift (@_); my ($pluginlist,$inputargs,$hashArgOptLists) = @_; push(@$pluginlist, $class); if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; new VideoConverter($pluginlist, $inputargs, $hashArgOptLists); my $self = (defined $hashArgOptLists) ? new HTMLPlugin($pluginlist,$inputargs,$hashArgOptLists) : new HTMLPlugin($pluginlist,$inputargs); my $collect_dir = $ENV{'GSDLCOLLECTDIR'}; my $collect_bin = &util::filename_cat($collect_dir,"bin"); my $collect_script = &util::filename_cat($collect_bin,"script"); $ENV{'PATH'} .= ":" if (defined $ENV{'PATH'}); $ENV{'PATH'} .= "$collect_bin:$collect_script"; return bless $self, $class; } sub begin { my $self = shift (@_); my ($pluginfo, $base_dir, $processor, $maxdocs) = @_; $self->SUPER::begin(@_); $self->VideoConverter::begin(@_); } sub init { my $self = shift (@_); my ($verbosity, $outhandle, $failhandle) = @_; $self->SUPER::init(@_); $self->VideoConverter::init(@_); } sub html_to_text { my ($self, $text) = @_; $text =~ s/<\/?[^>]*>//g; $text =~ s/ / /g; $text =~ s/\s+/ /gs; $text =~ s/^\s+//; $text =~ s/\s+$//; return $text; } sub extract_metadata_table { my ($self, $textref) = @_; my $metatable_store = {}; my $metatable_re = "
]*>(.*?)<\/td>/gsi);
my $td_list_num = scalar(@td_list);
if ($td_list_num == 2) {
my $meta_name = $self->html_to_text($td_list[0]);
my $meta_value = $self->html_to_text($td_list[1]);
$meta_name =~ s/\s*//g;
if ($meta_value =~ m/^(\s*|\?+)$/) {
$in_filtered_tr = 0;
}
else {
my $meta_name_label = $meta_name;
$meta_name_label =~ s/\s+//g;
$metatable_store->{$meta_name_label} = $meta_value;
if ($meta_name_label =~ m/^GS\./) {
# even though we want to store this as metadata, we
# don't want this going into table presented at top of document
$in_filtered_tr = 0;
}
}
}
else {
print STDERR "Warning: Metadata table does not consist of two cells (metadata name : metadata value)\n";
print STDERR " Skipping\n";
}
if ($in_filtered_tr) {
push(@filtered_tr_list,$tr);
}
}
my $filtered_tr_block = join("\n",@filtered_tr_list);
my $filtered_table = "
";
my $tape_re = "T\\d+:";
my $tt_re = "\\d\\d";
my $back_re = ".*?<\\/span>";
my $time_match_re = "$front_re$tt_re:$tt_re:$tt_re$back_re(?:\:$tt_re)?";
# my $time_sub_re = "($front_re)($tape_re)? ?($tt_re):($tt_re):($tt_re)(?:\:$tt_re)?($back_re)";
my $tape_match_re = "$front_re($tape_re)? ?$tt_re:$tt_re:$tt_re(?:\:$tt_re)?$back_re";
my $tape_sub_re = "($front_re)($tt_re):($tt_re):($tt_re)(?:\:$tt_re)?($back_re)";
my @time_seq = ($$textref =~ m/$time_match_re/sgi);
# artifically add tape information into all time indexes
my $extrapolate_tape = "";
my $num_seq = scalar(@time_seq);
for (my $i=0; $i<$num_seq; $i++) {
my ($curr_tape) = ($time_seq[$i] =~ m/$tape_match_re/sgi);
if (!defined $curr_tape) {
$time_seq[$i] =~ s/$tape_sub_re/$1$extrapolate_tape$2:$3:$4$5/sgi;
}
else {
if ($curr_tape ne $extrapolate_tape) {
$extrapolate_tape = $curr_tape;
}
}
}
$self->{'time_seq'} = \@time_seq;
$self->{'time_pos'} = 0;
$$textref =~ s/( )(T\d+:)? ?(\d\d):(\d\d):(\d\d)(?:\:\d\d)?.*?(<\/span>)/$self->timed_event($1,$2,$3,$4,$5,$6)/sgie;
# knock out any div tags as they interfere with document broken up into sections
$$textref =~ s/<\/?div.*?>//gi;
# Embedded word style infomation is lost by HTMLPlug => make Question
# style use HTML italics
$$textref =~ s/ (.*?)<\/p>/ $1<\/i><\/p>/gsi;
}
sub store_block_files
{
my $self =shift (@_);
my ($filename_full_path, $block_hash) = @_;
# Would be better if this were tied to particular HTML docs that
# this plugin has processed.
# For now block all files native to DVD format
if (($filename_full_path =~ m/\.(IFO|BUP)$/) # block info and backup-info data
|| ($filename_full_path =~ m/VIDEO_TS\..*?$/) # block top-level files
|| ($filename_full_path =~ m/VTS_\d\d_\d\.VOB$/) #
) {
$block_hash->{'file_blocks'}->{$filename_full_path} = 1;
}
}
sub read_block {
my $self = shift (@_);
my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
print STDERR "***** TimedHTMLPlugin read_block\n";
my $filename = $file;
$filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
if (($filename =~ m/\.(IFO|BUP)$/) # block info and backup-info data
|| ($filename =~ m/VIDEO_TS\..*?$/) # block top-level files
|| ($filename =~ m/VTS_\d\d_\d\.VOB$/) #
) {
$self->{'num_blocked'} ++;
return (0,undef); # blocked
}
##### || ($filename =~ m/VTS_\d\d_0\.VOB$/) # block menu item for this track
if (defined $self->{'file_blocks'}->{$filename} && $self->{'file_blocks'}->{$filename} == 1){
$self->{'num_blocked'} ++;
return (0,undef); # blocked
}
if (defined $self->{'re_file_block'}) {
my $re_file_blocks = $self->{'re_file_block'};
foreach my $re_file_block (@$re_file_blocks) {
### print STDERR "**** re file block: $filename =~ m $re_file_block\n";
if ($filename =~ m/$re_file_block/) {
### print STDERR "*** blocking $filename\n";
$self->{'num_blocked'} ++;
return (0,undef); # blocked
}
}
}
return $self->SUPER::read_block(@_);
}
sub read_file {
my $self = shift @_;
my ($filename, $encoding, $language, $textref) = @_;
$only_num = 0;
$self->SUPER::read_file(@_);
my $metatable_store = $self->extract_metadata_table($textref);
$self->{'metatable_store'} = $metatable_store;
my ($root, $dirname, $suffix)
= &File::Basename::fileparse($filename, "\\.[^\\.]+\$");
# media defaults
my $media_type = undef;
my $media_dir = $root;
## my $media_root = "VTS_%02d_%02d";
my $media_root = "VTS";
if (defined $metatable_store->{'GS.Media'}) {
$media_type = $metatable_store->{'GS.Media'};
$media_type = lc($media_type);
}
if (defined $metatable_store->{'GS.MediaDirectory'}) {
$media_dir = $metatable_store->{'GS.MediaDirectory'};
}
if (defined $metatable_store->{'GS.MediaRoot'}) {
$media_root = $metatable_store->{'GS.MediaRoot'};
}
$self->{'media_type'} = $media_type;
$self->{'media_dir'} = $media_dir;
$self->{'media_root'} = $media_root;
my $media_base_dir = &util::filename_cat($dirname,$media_dir);
$self->{'media_base_dir'} = $media_base_dir;
$self->{'vob_duration_info'} = &VideoConverter::vob_durations($media_base_dir,"1",$self->{'outhandle'});
## $self->hyperlink_timing_info($textref);
# Convert entities to their UTF8 equivalents
$$textref =~ s/&(lt|gt|amp|quot|nbsp);/&z$1;/go;
$$textref =~ s/&([^;]+);/&ghtml::getcharequiv($1,1)/gseo;
$$textref =~ s/&z(lt|gt|amp|quot|nbsp);/&$1;/go;
}
sub streamable_video
{
my $self = shift (@_);
my ($base_dir,$filename,$doc_obj,$section) = @_;
my $outhandle = $self->{'outhandle'};
my $verbosity = $self->{'verbosity'};
my $media_base_dir = $self->{'media_base_dir'};
#---
# Determine size of input video
my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
my $media_type = $self->{'media_type'};
my $media_dir = $self->{'media_dir'};
my $media_root = $self->{'media_root'};
my $tape_num = "1";
my $src_file;
my $src_filename;
if ($media_type eq "video") {
$src_file = "$media_root\_01_$tape_num.VOB";
$src_filename
= &util::filename_cat($media_base_dir, $src_file);
}
elsif ($media_type =~ m/^audio$/) {
$src_file = "$media_root\_Tape$tape_num.mp3";
$src_filename
= &util::filename_cat($media_base_dir, $src_file);
}
else {
print STDERR "Warning: Unrecognised media type: $media_type\n";
}
if (-e $src_filename) {
$self->{'file_blocks'}->{$src_filename} = 1;
my $re_file_block = $src_filename;
$re_file_block =~ s/_01_$tape_num.VOB$/_\\d+_\\d+.VOB\$/;
push(@{$self->{'re_file_block'}},$re_file_block);
}
my ($video_type, $video_width, $video_height, $video_duration, $video_size,
$vcodec,$vfps,$atype,$afreq,$achan,$arate)
= &VideoConverter::identify($src_filename, $outhandle, $verbosity);
if ($vfps eq "unknown") {
print $outhandle "Unknown framerate, defaulting to 25 frames per second.\n";
$vfps = 25;
}
### print STDERR "*** video duration = $video_duration\n";
my ($dur_hour,$dur_min,$dur_sec)
= ($video_duration =~ m/(\d+):(\d+):(\d+\.\d+)/);
my $total_dur_secs = $dur_hour*3600 + $dur_min*60 + $dur_sec;
$self->{'video-fps'} = $vfps;
$self->{'num-total-frames'} = $total_dur_secs * $vfps;
# shorten duration prcessed for experimentation purposes
my $exp_duration = undef;
my $video_excerpt_duration = $self->{'video_excerpt_duration'};
print STDERR "**** video duration (secs) = $total_dur_secs\n";
if ((defined $video_excerpt_duration) && ($video_excerpt_duration ne "")) {
$exp_duration = $video_excerpt_duration;
my ($hh,$mm,$ss,$ms) = ($exp_duration =~ m/^(\d\d):(\d\d):(\d\d)\.?(\d\d)?/);
my $excerpt_dur_in_secs = $hh * 3600 + $mm * 60 + $ss;
if ($excerpt_dur_in_secs < $total_dur_secs) {
$self->{'num-total-frames'} = $excerpt_dur_in_secs * $vfps; # override calculation for full length duration
}
else {
# clip is already shorter than requested video excerpt duration
# set exp_duration back to undefined
$exp_duration = undef;
}
}
print $outhandle "TimedHTMLPlug: Preparing video files associated with $filename for HTMLPlug\n";
if (defined $exp_duration)
{
print $outhandle "Only encoding first $exp_duration of video.\n";
$self->{'exp_duration'} = $exp_duration;
}
### my $videoconvert
### = new videoconvert($base_dir,$src_filename,$verbosity,$outhandle,$exp_duration);
### $self->{'videoconvert'} = $videoconvert;
# ******
$self->init_cache_for_file($src_filename);
#---
# Generate the Flash FLV format for streaming purposes
my $streaming_bitrate = "256k"; # used to be 192k
my $streaming_size = "360"; # used to be 352 (CIF?)
my $streaming_achan = "1";
my $streaming_arate = "44100";
my $streaming_quality = "high";
#---
# Convert video to flash
#---
my ($stream_cmd,$oflash_filename,$oflash_file)
= $self->stream_cmd($src_filename,
$video_width,$video_height,
$streaming_quality, $streaming_bitrate, $streaming_size,
$streaming_achan, $streaming_arate);
my $streamable_options = { @{$self->{'ffmpeg_monitor'}},
'message_prefix' => "Stream",
'message' => "Generating streamable video: $oflash_file" };
my ($streamable_regenerated,$streamable_result,$streamable_had_error)
= $self->run_cached_general_cmd($stream_cmd,$oflash_filename,$streamable_options);
$self->{'streamable_regenerated'} = $streamable_regenerated;
if (!$streamable_had_error) {
#---
# Make video seekable
#---
my ($streamseekable_cmd,$ostreamseekable_filename) = $self->streamseekable_cmd($oflash_filename);
my $streamseekable_options = { @{$self->{'flvtool2_monitor'}},
'message_prefix' => "Stream Seekable",
'message' => "Reprocessing video stream to be seekable by timeline: $oflash_file" };
if ($streamable_regenerated) {
$self->run_general_cmd($streamseekable_cmd,$streamseekable_options);
}
my $streamable_url = $oflash_file;
## $streamable_url =~ s/ /%20/g;
$doc_obj->add_metadata ($section, "streamablevideo", $streamable_url);
$doc_obj->associate_file($oflash_filename,$oflash_file,"video/flash",
$section);
}
#
# FlowPlayer.swf height+22 pixels
# FlowPlayerBlack.swf height+16 pixels
# FlowPlayerThermo.swf height+16 pixels
# FlowPlayerWhite.swf height+26 pixels
$doc_obj->add_metadata ($section, "flashwidth", $video_width);
$doc_obj->add_metadata ($section, "flashheight", $video_height + 22 + 100);
my $base_url = "_httpstreamserverprefix_/collect/[collection]/index/assoc/{If}{[assocfilepath],[assocfilepath],[parent(Top):assocfilepath]}/";
$doc_obj->add_metadata ($section, "baseurl",$base_url);
$self->{'oflash_file'} = $oflash_file;
$self->{'oflash_filename'} = $oflash_filename;
}
sub add_cuepoints
{
my ($self) = shift @_;
my ($doc_obj) = @_;
my $section = $doc_obj->get_top_section();
my $chapters = $doc_obj->get_children($section);
# open file
my $output_dir = $self->{'cached_dir'};
my $cue_filename = &util::filename_cat($output_dir,"on_cue.xml");
open(CUEOUT,">$cue_filename")
|| die "Unable to open $cue_filename: $!\n";
print CUEOUT " (?:T\d+:)? ?(\d\d:\d\d:\d\d)(?:\:\d\d)?/gsi);
my $count = 0;
foreach my $cuepoint (@cuepoints)
{
my ($hh,$mm,$ss) = ($cuepoint =~ m/(\d\d):(\d\d):(\d\d)/);
my $cuept_insecs = $self->hms_to_secs($hh,$mm,$ss);
if ($cuept_insecs < 2) {
$cuept_insecs += 2;
}
my $cuept_inmsecs = 1000 * $cuept_insecs;
$doc_obj->add_metadata($section,"cuepoint",$cuept_inmsecs);
my $chap_title = $doc_obj->get_metadata_element($chap,"Title");
# Navigation point (used in seeking)
# print CUEOUT " |