Changeset 15872 for gsdl/trunk/perllib/plugins/UnknownPlugin.pm
- Timestamp:
- 2008-06-05T09:29:32+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/UnknownPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # UnknownPlug .pm -- Plugin for files you know about but Greenstone doesn't3 # UnknownPlugin.pm -- Plugin for files you know about but Greenstone doesn't 4 4 # 5 5 # A component of the Greenstone digital library software from the New … … 26 26 ########################################################################### 27 27 28 # UnknownPlug - a plugin for unknown files28 # UnknownPlugin - a plugin for unknown files 29 29 30 30 # This is a simple Plugin for importing files in formats that … … 38 38 # movies, I add this line to the collection configuration file: 39 39 40 # plugin UnknownPlug -process_exp "*.MOV" -assoc_field "movie"40 # plugin UnknownPlugin -process_exp "*.MOV" -assoc_field "movie" 41 41 42 42 # A document is created for each movie, with the associated movie … … 49 49 # You can also add extra metadata, such as the Title, Subject, and 50 50 # Duration, with metadata.xml files and RecPlug. (If you want to use 51 # UnknownPlug with more than one type of file, you will have to add51 # UnknownPlugin with more than one type of file, you will have to add 52 52 # some sort of distinguishing metadata in this way.) 53 53 54 54 55 55 56 package UnknownPlug ;56 package UnknownPlugin; 57 57 58 use Bas Plug;58 use BasePlugin; 59 59 60 60 use strict; … … 62 62 63 63 sub BEGIN { 64 @UnknownPlug ::ISA = ('BasPlug');64 @UnknownPlugin::ISA = ('BasePlugin'); 65 65 } 66 66 67 67 my $arguments = 68 68 [ { 'name' => "assoc_field", 69 'desc' => "{UnknownPlug .assoc_field}",69 'desc' => "{UnknownPlugin.assoc_field}", 70 70 'type' => "string", 71 71 'deft' => "", 72 72 'reqd' => "no" }, 73 73 { 'name' => "file_format", 74 'desc' => "{UnknownPlug .file_format}",74 'desc' => "{UnknownPlugin.file_format}", 75 75 'type' => "string", 76 76 'deft' => "", 77 77 'reqd' => "no" }, 78 78 { 'name' => "mime_type", 79 'desc' => "{UnknownPlug .mime_type}",79 'desc' => "{UnknownPlugin.mime_type}", 80 80 'type' => "string", 81 81 'deft' => "", 82 82 'reqd' => "no" }, 83 83 { 'name' => "srcicon", 84 'desc' => "{UnknownPlug .srcicon}",84 'desc' => "{UnknownPlugin.srcicon}", 85 85 'type' => "string", 86 86 'deft' => "iconunknown", 87 87 'reqd' => "no" }, 88 88 { 'name' => "process_extension", 89 'desc' => "{UnknownPlug .process_extension}",89 'desc' => "{UnknownPlugin.process_extension}", 90 90 'type' => "string", 91 91 'deft' => "", 92 92 'reqd' => "no" } ]; 93 93 94 my $options = { 'name' => "UnknownPlug ",95 'desc' => "{UnknownPlug .desc}",94 my $options = { 'name' => "UnknownPlugin", 95 'desc' => "{UnknownPlugin.desc}", 96 96 'abstract' => "no", 97 97 'inherits' => "yes", … … 107 107 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 108 108 109 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);109 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 110 110 111 111 # "-process_extension" is a simpler alternative to -process_exp for non-regexp people … … 117 117 } 118 118 119 sub get_default_process_exp {120 return '';121 }122 119 120 sub process { 121 my $self = shift (@_); 122 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 123 123 124 # Associate the unknown file with the new document 124 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 125 my $outhandle = $self->{'outhandle'}; 126 my $verbosity = $self->{'verbosity'}; 125 127 126 sub associate_unknown_file { 127 my $self = shift (@_); 128 my $filename = shift (@_); # filename with full path 129 my $file = shift (@_); # filename without path 130 my $doc_obj = shift (@_); 131 132 my $verbosity = $self->{'verbosity'}; 133 my $outhandle = $self->{'outhandle'}; 134 135 # check the filename is okay 136 return 0 if ($file eq "" || $filename eq ""); 137 138 139 my $url = $file; 140 ##$url =~ s/ /%20/g; 128 # check the filename is okay - do we need this?? 129 if ($filename_full_path eq "" || $filename_no_path eq "") { 130 print $outhandle "UnknownPlugin: couldn't process \"$filename_no_path\"\n"; 131 return undef; 132 } 141 133 142 134 # Add the file as an associated file ... … … 146 138 my $assoc_field = $self->{'assoc_field'} || "unknown_file"; 147 139 148 $doc_obj->associate_file($filename , $file, $mime_type, $section);140 $doc_obj->associate_file($filename_full_path, $filename_no_path, $mime_type, $section); 149 141 $doc_obj->add_metadata ($section, "FileFormat", $file_format); 150 142 $doc_obj->add_metadata ($section, "MimeType", $mime_type); 151 $doc_obj->add_metadata ($section, $assoc_field, $file );143 $doc_obj->add_metadata ($section, $assoc_field, $filename_full_path); 152 144 153 145 $doc_obj->add_metadata ($section, "srclink", 154 146 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[$assoc_field]\">"); 155 #$doc_obj->add_metadata ($section, "srcicon", "_iconunknown_");156 147 $doc_obj->add_metadata ($section, "srcicon", "_".$self->{'srcicon'}."_"); 157 148 $doc_obj->add_metadata ($section, "/srclink", "</a>"); 158 149 159 # add NoText metadata which can be used to suppress the dummy text160 $ doc_obj->add_metadata ($section, "NoText", "1");150 # we have no text - add dummy text and NoText metadata 151 $self->add_dummy_text($doc_obj, $section); 161 152 162 return 1;163 }164 165 166 167 # The UnknownPlug read() function. This function does all the right168 # things to make general options work for a given plugin. UnknownPlug169 # overrides read() because there is no need to read the actual text of170 # the file in, because the contents of the file is not text...171 #172 #173 # Return number of files processed, undef if can't process174 #175 # Note that $base_dir might be "" and that $file might include directories176 177 sub read {178 my $self = shift (@_);179 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;180 181 my $outhandle = $self->{'outhandle'};182 183 # Make sure we're processing the correct file184 my ($block_status,$filename) = $self->read_block(@_);185 return $block_status if ((!defined $block_status) || ($block_status==0));186 187 print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli);188 print $outhandle "UnknownPlug processing \"$filename\"\n"189 if $self->{'verbosity'} > 1;190 191 #if there's a leading directory name, eat it...192 $file =~ s/^.*[\/\\]//;193 194 # create a new document195 my $doc_obj = new doc ($filename, "indexed_doc");196 my $top_section = $doc_obj->get_top_section();197 198 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});199 $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");200 $doc_obj->add_metadata($top_section, "Source", $file); # set the filename as Source metadata to be consistent with other plugins201 $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename));202 203 # URL metadata (even invalid ones) are used to support internal204 # links, so even if 'file_is_url' is off, still need to store info205 206 my $web_url = "http://$file";207 $doc_obj->add_metadata($top_section, "URL", $web_url);208 209 210 # associate the file with the document211 if (associate_unknown_file($self, $filename, $file, $doc_obj) != 1)212 {213 if ($gli) {214 print STDERR "<ProcessingError n='$file'>\n";215 }216 print $outhandle "UnknownPlug: couldn't process \"$filename\"\n";217 return -1; # error during processing218 }219 220 #create an empty text string so we don't break downstream plugins221 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);222 223 # include any metadata passed in from previous plugins224 my $section = $doc_obj->get_top_section();225 $self->extra_metadata ($doc_obj, $section, $metadata);226 227 $self->title_fallback($doc_obj,$section,$file);228 229 # do plugin specific processing of doc_obj230 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {231 print STDERR "<ProcessingError n='$file'>\n" if ($gli);232 return -1;233 }234 235 # do any automatic metadata extraction236 $self->auto_extract_metadata ($doc_obj);237 238 # add an OID239 $doc_obj->set_OID();240 $doc_obj->add_utf8_text($section, $text);241 242 # process the document243 $processor->process($doc_obj);244 245 $self->{'num_processed'} ++;246 return 1;247 }248 249 250 # UnknownPlug processing of doc_obj. In practice we don't need to do251 # anything here because the read function takes care of everything.252 253 sub process {254 my $self = shift (@_);255 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;256 my $outhandle = $self->{'outhandle'};257 258 153 return 1; 259 154 }
Note:
See TracChangeset
for help on using the changeset viewer.