Ignore:
Timestamp:
2008-06-05T09:29:32+12:00 (16 years ago)
Author:
kjdon
Message:

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/UnknownPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # UnknownPlug.pm -- Plugin for files you know about but Greenstone doesn't
     3# UnknownPlugin.pm -- Plugin for files you know about but Greenstone doesn't
    44#
    55# A component of the Greenstone digital library software from the New
     
    2626###########################################################################
    2727
    28 # UnknownPlug - a plugin for unknown files
     28# UnknownPlugin - a plugin for unknown files
    2929
    3030# This is a simple Plugin for importing files in formats that
     
    3838# movies, I add this line to the collection configuration file:
    3939
    40 # plugin UnknownPlug -process_exp "*.MOV" -assoc_field "movie"
     40# plugin UnknownPlugin -process_exp "*.MOV" -assoc_field "movie"
    4141
    4242# A document is created for each movie, with the associated movie
     
    4949# You can also add extra metadata, such as the Title, Subject, and
    5050# Duration, with metadata.xml files and RecPlug.  (If you want to use
    51 # UnknownPlug with more than one type of file, you will have to add
     51# UnknownPlugin with more than one type of file, you will have to add
    5252# some sort of distinguishing metadata in this way.)
    5353
    5454
    5555
    56 package UnknownPlug;
     56package UnknownPlugin;
    5757
    58 use BasPlug;
     58use BasePlugin;
    5959
    6060use strict;
     
    6262
    6363sub BEGIN {
    64     @UnknownPlug::ISA = ('BasPlug');
     64    @UnknownPlugin::ISA = ('BasePlugin');
    6565}
    6666
    6767my $arguments =
    6868    [ { 'name' => "assoc_field",
    69     'desc' => "{UnknownPlug.assoc_field}",
     69    'desc' => "{UnknownPlugin.assoc_field}",
    7070    'type' => "string",
    7171    'deft' => "",
    7272    'reqd' => "no" },
    7373      { 'name' => "file_format",
    74     'desc' => "{UnknownPlug.file_format}",
     74    'desc' => "{UnknownPlugin.file_format}",
    7575    'type' => "string",
    7676    'deft' => "",
    7777    'reqd' => "no" },
    7878      { 'name' => "mime_type",
    79     'desc' => "{UnknownPlug.mime_type}",
     79    'desc' => "{UnknownPlugin.mime_type}",
    8080    'type' => "string",
    8181    'deft' => "",
    8282    'reqd' => "no" },
    8383      { 'name' => "srcicon",
    84     'desc' => "{UnknownPlug.srcicon}",
     84    'desc' => "{UnknownPlugin.srcicon}",
    8585    'type' => "string",
    8686    'deft' => "iconunknown",
    8787    'reqd' => "no" },
    8888      { 'name' => "process_extension",
    89     'desc' => "{UnknownPlug.process_extension}",
     89    'desc' => "{UnknownPlugin.process_extension}",
    9090    'type' => "string",
    9191    'deft' => "",
    9292    'reqd' => "no" } ];
    9393
    94 my $options = { 'name'     => "UnknownPlug",
    95         'desc'     => "{UnknownPlug.desc}",
     94my $options = { 'name'     => "UnknownPlugin",
     95        'desc'     => "{UnknownPlugin.desc}",
    9696        'abstract' => "no",
    9797        'inherits' => "yes",
     
    107107    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    108108
    109     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     109    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    110110
    111111    # "-process_extension" is a simpler alternative to -process_exp for non-regexp people
     
    117117}
    118118
    119 sub get_default_process_exp {
    120     return '';
    121 }
    122119
     120sub process {
     121    my $self = shift (@_);
     122    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    123123
    124 # Associate the unknown file with the new document
     124    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     125    my $outhandle = $self->{'outhandle'};
     126    my $verbosity = $self->{'verbosity'};
    125127
    126 sub associate_unknown_file {
    127     my $self = shift (@_);
    128     my $filename = shift (@_);   # filename with full path
    129     my $file = shift (@_);       # filename without path
    130     my $doc_obj = shift (@_);
    131    
    132     my $verbosity = $self->{'verbosity'};
    133     my $outhandle = $self->{'outhandle'};
    134 
    135     # check the filename is okay
    136     return 0 if ($file eq "" || $filename eq "");
    137 
    138    
    139     my $url = $file;
    140     ##$url =~ s/ /%20/g;
     128    # check the filename is okay - do we need this??
     129    if ($filename_full_path eq "" || $filename_no_path eq "") {
     130    print $outhandle "UnknownPlugin: couldn't process \"$filename_no_path\"\n";
     131    return undef;
     132    }
    141133
    142134    # Add the file as an associated file ...
     
    146138    my $assoc_field = $self->{'assoc_field'} || "unknown_file";
    147139
    148     $doc_obj->associate_file($filename, $file, $mime_type, $section);
     140    $doc_obj->associate_file($filename_full_path, $filename_no_path, $mime_type, $section);
    149141    $doc_obj->add_metadata ($section, "FileFormat", $file_format);
    150142    $doc_obj->add_metadata ($section, "MimeType", $mime_type);
    151     $doc_obj->add_metadata ($section, $assoc_field, $file);
     143    $doc_obj->add_metadata ($section, $assoc_field, $filename_full_path);
    152144   
    153145    $doc_obj->add_metadata ($section, "srclink",
    154146                "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[$assoc_field]\">");
    155     #$doc_obj->add_metadata ($section, "srcicon", "_iconunknown_");
    156147    $doc_obj->add_metadata ($section, "srcicon", "_".$self->{'srcicon'}."_");
    157148    $doc_obj->add_metadata ($section, "/srclink", "</a>");
    158149   
    159     # add NoText metadata which can be used to suppress the dummy text
    160     $doc_obj->add_metadata ($section, "NoText", "1");
     150    # we have no text - add dummy text and NoText metadata
     151    $self->add_dummy_text($doc_obj, $section);
    161152
    162     return 1;
    163 }
    164 
    165 
    166 
    167 # The UnknownPlug read() function. This function does all the right
    168 # things to make general options work for a given plugin.  UnknownPlug
    169 # overrides read() because there is no need to read the actual text of
    170 # the file in, because the contents of the file is not text...
    171 #
    172 #
    173 # Return number of files processed, undef if can't process
    174 #
    175 # Note that $base_dir might be "" and that $file might include directories
    176 
    177 sub read {
    178     my $self = shift (@_);
    179     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    180 
    181     my $outhandle = $self->{'outhandle'};
    182 
    183     # Make sure we're processing the correct file
    184     my ($block_status,$filename) = $self->read_block(@_);   
    185     return $block_status if ((!defined $block_status) || ($block_status==0));
    186 
    187     print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli);
    188     print $outhandle "UnknownPlug processing \"$filename\"\n"
    189         if $self->{'verbosity'} > 1;
    190 
    191     #if there's a leading directory name, eat it...
    192     $file =~ s/^.*[\/\\]//;
    193    
    194     # create a new document
    195     my $doc_obj = new doc ($filename, "indexed_doc");
    196     my $top_section = $doc_obj->get_top_section();
    197 
    198     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});   
    199     $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
    200     $doc_obj->add_metadata($top_section, "Source", $file); # set the filename as Source metadata to be consistent with other plugins
    201     $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename));
    202 
    203     # URL metadata (even invalid ones) are used to support internal
    204     # links, so even if 'file_is_url' is off, still need to store info
    205 
    206     my $web_url = "http://$file";
    207     $doc_obj->add_metadata($top_section, "URL", $web_url);
    208 
    209 
    210     # associate the file with the document
    211     if (associate_unknown_file($self, $filename, $file, $doc_obj) != 1)
    212     {
    213     if ($gli) {
    214         print STDERR "<ProcessingError n='$file'>\n";
    215     }
    216     print $outhandle "UnknownPlug: couldn't process \"$filename\"\n";
    217     return -1; # error during processing
    218     }
    219 
    220     #create an empty text string so we don't break downstream plugins
    221     my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
    222 
    223     # include any metadata passed in from previous plugins
    224     my $section = $doc_obj->get_top_section();
    225     $self->extra_metadata ($doc_obj, $section, $metadata);
    226 
    227     $self->title_fallback($doc_obj,$section,$file);
    228 
    229     # do plugin specific processing of doc_obj
    230     unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
    231     print STDERR "<ProcessingError n='$file'>\n" if ($gli);
    232     return -1;
    233     }
    234 
    235     # do any automatic metadata extraction
    236     $self->auto_extract_metadata ($doc_obj);
    237 
    238     # add an OID
    239     $doc_obj->set_OID();
    240     $doc_obj->add_utf8_text($section, $text);
    241 
    242     # process the document
    243     $processor->process($doc_obj);
    244 
    245     $self->{'num_processed'} ++;
    246     return 1;
    247 }
    248 
    249 
    250 # UnknownPlug processing of doc_obj.  In practice we don't need to do
    251 # anything here because the read function takes care of everything.
    252 
    253 sub process {
    254     my $self = shift (@_);
    255     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    256     my $outhandle = $self->{'outhandle'};
    257    
    258153    return 1;
    259154}
Note: See TracChangeset for help on using the changeset viewer.