Changeset 15871 for gsdl/trunk/perllib/plugins/ReadXMLFile.pm
- Timestamp:
- 2008-06-05T09:26:56+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/ReadXMLFile.pm
r15865 r15871 1 1 ########################################################################### 2 2 # 3 # XMLPlug.pm -- base class for XML plugins3 # ReadXMLFile.pm -- base class for XML plugins 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 24 24 ########################################################################### 25 25 26 package XMLPlug;27 28 use Bas Plug;26 package ReadXMLFile; 27 28 use BasePlugin; 29 29 use doc; 30 30 use strict; … … 32 32 33 33 sub BEGIN { 34 @ XMLPlug::ISA = ('BasPlug');34 @ReadXMLFile::ISA = ('BasePlugin'); 35 35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 36 36 } … … 40 40 my $arguments = 41 41 [ { 'name' => "process_exp", 42 'desc' => "{Bas Plug.process_exp}",42 'desc' => "{BasePlugin.process_exp}", 43 43 'type' => "regexp", 44 44 'deft' => &get_default_process_exp(), 45 45 'reqd' => "no" }, 46 46 { 'name' => "xslt", 47 'desc' => "{ XMLPlug.xslt}",47 'desc' => "{ReadXMLFile.xslt}", 48 48 'type' => "string", 49 49 'deft' => "", 50 50 'reqd' => "no" } ]; 51 51 52 my $options = { 'name' => " XMLPlug",53 'desc' => "{ XMLPlug.desc}",52 my $options = { 'name' => "ReadXMLFile", 53 'desc' => "{ReadXMLFile.desc}", 54 54 'abstract' => "yes", 55 55 'inherits' => "yes", … … 61 61 push(@$pluginlist, $class); 62 62 63 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 64 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 65 66 # $self is global for use within subroutines called by XML::Parser 67 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 63 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 64 push(@{$hashArgOptLists->{"OptList"}},$options); 65 66 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 68 67 69 68 if ($self->{'info_only'}) { 70 # don't worry about any options etc 69 # don't worry about creating the XML parser as all we want is the 70 # list of plugin options 71 71 return bless $self, $class; 72 72 } 73 73 74 74 my $parser = new XML::Parser('Style' => 'Stream', 75 'Pkg' => ' XMLPlug',75 'Pkg' => 'ReadXMLFile', 76 76 'PluginObj' => $self, 77 77 'Handlers' => {'Char' => \&Char, … … 198 198 if (defined $result) { 199 199 # we think we are processing this, but check that we actually are 200 my $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;200 my $filename = $self->get_full_filename($base_dir, $file); 201 201 202 202 if ($self->check_doctype($filename)) { … … 207 207 } 208 208 209 # we need to implement read cos we are not just using process_exp to determine 210 # whether to process this or not. 209 211 sub read { 210 212 my $self = shift (@_); … … 213 215 214 216 # Make sure we're processing the correct file, do blocking etc 215 my ($block_status,$filename ) = $self->read_block(@_);217 my ($block_status,$filename_full_path) = $self->read_block(@_); 216 218 return $block_status if ((!defined $block_status) || ($block_status==0)); 217 219 218 220 ## check the doctype to see whether we really want to process the file 219 if (!$self->check_doctype($filename )) {221 if (!$self->check_doctype($filename_full_path)) { 220 222 # this file is not for us 221 223 return undef; … … 225 227 $self->{'base_dir'} = $base_dir; 226 228 $self->{'file'} = $file; 227 $self->{'filename'} = $filename ;229 $self->{'filename'} = $filename_full_path; 228 230 $self->{'processor'} = $processor; 229 231 $self->{'metadata'} = $metadata; … … 233 235 if (defined $xslt && ($xslt ne "")) { 234 236 # perform xslt 235 my $transformed_xml = $self->apply_xslt($xslt,$filename );237 my $transformed_xml = $self->apply_xslt($xslt,$filename_full_path); 236 238 237 239 # feed transformed file (now in memory as string) into XML parser … … 239 241 } 240 242 else { 241 $self->{'parser'}->parsefile($filename );243 $self->{'parser'}->parsefile($filename_full_path); 242 244 } 243 245 }; … … 246 248 247 249 # parsefile may either croak somewhere in XML::Parser (e.g. because 248 # the document is not well formed) or die somewhere in XMLPlugor a250 # the document is not well formed) or die somewhere in ReadXMLFile or a 249 251 # derived plugin (e.g. because we're attempting to process a 250 252 # document whose DOCTYPE is not meant for this plugin). For the … … 271 273 } 272 274 273 # the following two methods are for if you want to do the parsing from a274 # plugin that inherits from this. it seems that you can't call the parse275 # methods directly. WHY???276 #277 # [Stefan 27/5/07] These two methods may not be necessary any more as I've278 # fixed XMLPlug so $self is no longer required to be a global variable279 # (that was why inheritance wasn't working quite right with XMLPlug I280 # think). I don't really know what other plugins rely on these methods281 # though so have left them here for now.282 sub parse_file {283 my $self = shift (@_);284 my ($filename) = @_;285 $self->{'parser'}->parsefile($filename);286 }287 288 sub parse_string {289 my $self = shift (@_);290 my ($xml_string) = @_;291 $self->{'parser'}->parse($xml_string);292 }293 275 294 276 sub get_default_process_exp { … … 344 326 345 327 my ($expat, $name, $sysid, $pubid, $internal) = @_; 346 die " XMLPlugCannot process XML document with DOCTYPE of $name";328 die "ReadXMLFile Cannot process XML document with DOCTYPE of $name"; 347 329 } 348 330 … … 395 377 $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc"); 396 378 $self->{'doc_obj'}->set_OIDtype ($self->{'processor'}->{'OIDtype'}, $self->{'processor'}->{'OIDmetadata'}); 379 $self->{'doc_obj'}->add_utf8_metadata($self->{'doc_obj'}->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 380 381 # do we want other auto metadata here (see BasePlugin.read_into_doc_obj) 397 382 } 398 383 … … 400 385 my $self = shift(@_); 401 386 my $doc_obj = $self->{'doc_obj'}; 387 388 # do we want other auto stuff here, see BasePlugin.read_into_doc_obj 389 402 390 # include any metadata passed in from previous plugins 403 391 # note that this metadata is associated with the top level section … … 410 398 411 399 # add an OID 412 $ doc_obj->set_OID();400 $self->add_OID(); 413 401 414 402 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); … … 419 407 420 408 $self->{'num_processed'} ++; 409 undef $self->{'doc_obj'}; 410 undef $doc_obj; # is this the same as above?? 421 411 } 422 412
Note:
See TracChangeset
for help on using the changeset viewer.