########################################################################### # # RealMediaPlug.pm -- Extract metadata from Real Media files # # Original code by Xin Gao # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 2005 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package RealMediaPlug; use UnknownPlug; use rm::Header::PurePerl; use strict; no strict 'refs'; # make an exception so we can use variables as filehandles sub BEGIN { @RealMediaPlug::ISA = ('UnknownPlug'); } my $arguments = [ { 'name' => "process_exp", 'desc' => "{BasPlug.process_exp}", 'type' => "regexp", 'deft' => &get_default_process_exp(), 'reqd' => "no" } ]; my $options = { 'name' => "RealMediaPlug", 'desc' => "{RealMediaPlug.desc}", 'abstract' => "no", 'inherits' => "yes", 'args' => $arguments }; # This plugin processes Real Media files with the suffixes ".rm" and ".rmvb" sub get_default_process_exp { return q^(?i)(\.rm|rmvb)$^; } sub new { my ($class) = shift(@_); my ($pluginlist, $inputargs, $hashArgOptLists) = @_; push(@$pluginlist, $class); if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); } if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options); } my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists); return bless $self, $class; } # do plugin specific processing of doc_obj sub read { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; my $outhandle = $self->{'outhandle'}; #check process and block exps, smart block, etc my ($block_status,$filename) = $self->read_block(@_); return $block_status if ((!defined $block_status) || ($block_status==0)); # Report that we're processing the file print STDERR "\n" if ($gli); print $outhandle "RealMediaPlug: processing $file\n" if ($self->{'verbosity'}) > 1; # create a new index document my $doc_obj = new doc ($filename, "indexed_doc"); if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) { $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); } else { $doc_obj->set_OIDtype ("incremental"); # this is done to avoid hashing content of file } my $top_section = $doc_obj->get_top_section(); # replace spaces in filename with %20 in url for metadata entry my $url = $file; $url =~ s/ /%20/g; # Source (filename) to be consistent with other plugins $doc_obj->add_metadata($top_section, "Source", $url); my $text = ""; my $real_media = rm::Header::PurePerl->new($filename); foreach my $key (keys %{$real_media->info}) { my $value = $real_media->info->{$key}; $doc_obj->add_metadata($top_section, $key, $value); $text .= "$key: $value\n"; } $doc_obj->add_utf8_text($top_section, "
\n$text\n
"); # srclink $doc_obj->add_metadata($top_section, "FileFormat", "RealMedia"); $doc_obj->add_metadata($top_section, "srclink", ""); $doc_obj->add_metadata($top_section, "/srclink", ""); # srcicon (need to include "irmvideo.gif" in the greenstone images directory $doc_obj->add_metadata($top_section, "srcicon", ""); # Add the actual file as an associated file $doc_obj->associate_file($filename, $file, "RealMedia", $top_section); # include any metadata passed in from previous plugins my $section = $doc_obj->get_top_section(); $self->extra_metadata ($doc_obj, $section, $metadata); # do plugin specific processing of doc_obj return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj)); # do any automatic metadata extraction $self->auto_extract_metadata($doc_obj); # add an OID $doc_obj->set_OID(); # process the document $processor->process($doc_obj); $self->{'num_processed'}++; return 1; } 1;