Changeset 16392
- Timestamp:
- 2008-07-14T14:57:38+12:00 (15 years ago)
- Location:
- gsdl/trunk/perllib/plugins
- Files:
-
- 25 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm
r16257 r16392 109 109 sub read { 110 110 my $self = shift (@_); 111 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs,$total_count, $gli) = @_;111 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_; 112 112 my $outhandle = $self->{'outhandle'}; 113 113 … … 137 137 my $tmp = &util::filename_cat ($file, $subfile->[0]); 138 138 next if $tmp eq $file; 139 139 140 140 # We always process the file... 141 141 my $process_file = 1; … … 156 156 if ($process_file) { 157 157 # note: metadata is not carried on to the next level 158 $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs, ($total_count+$count), $gli);158 $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli); 159 159 } 160 160 -
gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm
r15925 r16392 557 557 558 558 # Override ConvertBinaryFile read 559 # Needed so multiple .item files generate are sent down secondary plugin559 # Needed so multiple .item files generated are sent down secondary plugin 560 560 561 561 sub read { 562 562 my $self = shift (@_); 563 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;563 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 564 564 565 565 $self->{'gli'} = $gli; … … 570 570 my $outhandle = $self->{'outhandle'}; 571 571 572 my ($block_status,$filename) = $self->read_block(@_); 573 return $block_status if ((!defined $block_status) || ($block_status==0)); 572 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 573 return undef unless $self->can_process_this_file($filename_full_path); 574 574 575 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 575 576 … … 580 581 my $conv_filename_list = []; 581 582 582 $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename );583 $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename_full_path); 583 584 584 585 if (scalar(@$conv_filename_list)==0) { … … 611 612 my ($rv,$doc_obj) 612 613 = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename, 613 $ metadata, $processor, $maxdocs, $total_count,614 $block_hash, $metadata, $processor, $maxdocs, $total_count, 614 615 $gli); 615 616 … … 619 620 620 621 # Override previous gsdlsourcefilename set by secondary plugin 621 my $collect_file = &util::filename_within_collection($filename );622 my $collect_file = &util::filename_within_collection($filename_full_path); 622 623 my $collect_conv_file = &util::filename_within_collection($conv_filename); 623 624 $doc_obj->set_source_filename ($collect_file); … … 627 628 $self->set_Source_metadata($doc_obj, $filemeta); 628 629 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 629 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename ));630 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path)); 630 631 631 632 if ($self->{'cover_image'}) { 632 $self->associate_cover_image($doc_obj, $filename );633 $self->associate_cover_image($doc_obj, $filename_full_path); 633 634 } 634 635 -
gsdl/trunk/perllib/plugins/ConvertBinaryFile.pm
r16013 r16392 366 366 sub read_into_doc_obj { 367 367 my $self = shift (@_); 368 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;368 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 369 369 370 370 my $outhandle = $self->{'outhandle'}; 371 371 372 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);372 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 373 373 374 374 my $output_ext = $self->{'convert_to_ext'}; … … 411 411 412 412 # note: metadata is not carried on to the next level 413 ## **** I just replaced $metadata with {} in following 413 414 my ($rv,$doc_obj) 414 = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename, $ metadata, $processor, $maxdocs, $total_count, $gli);415 = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename, $block_hash, {}, $processor, $maxdocs, $total_count, $gli); 415 416 416 417 if ((!defined $rv) || ($rv<1)) { … … 438 439 my $topsection = $doc_obj->get_top_section(); 439 440 $self->add_associated_files($doc_obj, $filename_full_path); 441 442 # extra_metadata is already called by sec plugin in process?? 440 443 $self->extra_metadata($doc_obj, $topsection, $metadata); # do we need this here?? 441 444 # do any automatic metadata extraction -
gsdl/trunk/perllib/plugins/ConvertToRogPlugin.pm
r15872 r16392 311 311 sub read { 312 312 my $self = shift (@_); 313 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;313 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 314 314 315 315 my $outhandle = $self->{'outhandle'}; 316 316 317 # check process_exp, block_exp, associate_ext etc 318 my ($block_status,$filename) = $self->read_block(@_); 319 return $block_status if ((!defined $block_status) || ($block_status==0)); 317 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 318 return undef unless $self->can_process_this_file($filename_full_path); 320 319 321 320 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up … … 325 324 326 325 my $output_ext = $self->{'convert_to_ext'}; 327 my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename );326 my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename_full_path); 328 327 329 328 if ("$conv_filename" eq "") {return 0;} # allows continue on errors … … 334 333 #my $doc_obj = new doc ($conv_filename, "indexed_doc"); 335 334 # the original filename is used now 336 my $doc_obj = new doc ($filename , "indexed_doc");335 my $doc_obj = new doc ($filename_full_path, "indexed_doc"); 337 336 # the converted filename is set separately 338 337 $doc_obj->set_converted_filename($conv_filename); … … 349 348 350 349 if ($self->{'cover_image'}) { 351 $self->associate_cover_image($doc_obj, $filename );350 $self->associate_cover_image($doc_obj, $filename_full_path); 352 351 } 353 352 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 354 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename ));353 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path)); 355 354 356 355 my $track_no = "1"; -
gsdl/trunk/perllib/plugins/DBPlugin.pm
r16104 r16392 80 80 sub read { 81 81 my $self = shift (@_); 82 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs,$total_count,$gli) = @_;82 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count,$gli) = @_; 83 83 84 84 #see if we can handle the passed file... 85 my ($block_status,$filename) = $self->read_block(@_);86 return $block_status if ((!defined $block_status) || ($block_status==0));87 85 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 86 return undef unless $self->can_process_this_file($filename_full_path); 87 88 88 my $outhandle = $self->{'outhandle'}; 89 89 my $verbosity = $self->{'verbosity'}; … … 120 120 121 121 # read in config file. 122 if (!open (CONF, $filename )) {123 print $outhandle "DBPlugin: can't read $filename : $!\n";122 if (!open (CONF, $filename_full_path)) { 123 print $outhandle "DBPlugin: can't read $filename_full_path: $!\n"; 124 124 return 0; 125 125 } … … 172 172 $err =~ s/\.$//; # remove a trailing . 173 173 print $outhandle "DBPlugin: error evaluating `$statement'\n"; 174 print $outhandle " $err (in $filename )\n";174 print $outhandle " $err (in $filename_full_path)\n"; 175 175 return 0; # there was an error reading the config file 176 176 } … … 189 189 190 190 if (!defined($db)) { 191 print $outhandle "DBPlugin: error: $filename does not specify a db!\n";191 print $outhandle "DBPlugin: error: $filename_full_path does not specify a db!\n"; 192 192 return 0; 193 193 } … … 254 254 255 255 # create a new document 256 my $doc_obj = new doc ($filename , "indexed_doc");256 my $doc_obj = new doc ($filename_full_path, "indexed_doc"); 257 257 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 258 258 my $cursection = $doc_obj->get_top_section(); … … 272 272 273 273 if ($self->{'cover_image'}) { 274 $self->associate_cover_image($doc_obj, $filename );274 $self->associate_cover_image($doc_obj, $filename_full_path); 275 275 } 276 276 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); -
gsdl/trunk/perllib/plugins/DSpacePlugin.pm
r15872 r16392 240 240 sub metadata_read { 241 241 my $self = shift (@_); 242 my ($pluginfo, $base_dir, $file, $ metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;242 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 243 243 244 244 my $only_first_doc = $self->{'only_first_doc'}; … … 322 322 sub read { 323 323 my $self = shift (@_); 324 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;324 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 325 325 my $outhandle = $self->{'outhandle'}; 326 326 -
gsdl/trunk/perllib/plugins/FOXPlugin.pm
r15872 r16392 93 93 sub read { 94 94 my $self = shift (@_); 95 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;95 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 96 96 97 # check for associate_ext, blocking etc, are we processing this file?98 my ($ block_status,$fullname) = $self->read_block(@_);99 return $block_status if ((!defined $block_status) || ($block_status==0));97 # can we process this file?? 98 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 99 return undef unless $self->can_process_this_file($filename_full_path); 100 100 101 101 print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli); 102 102 print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1; 103 103 104 my ($parent_dir) = $f ullname=~ /^(.*)\/[^\/]+\.dbf$/i;104 my ($parent_dir) = $filename_full_path =~ /^(.*)\/[^\/]+\.dbf$/i; 105 105 106 106 # open the file 107 if (!open (FOXBASEIN, $f ullname)) {107 if (!open (FOXBASEIN, $filename_full_path)) { 108 108 if ($gli) { 109 print STDERR "<ProcessingError n='$file' r='Could not read $f ullname'>\n";110 } 111 print STDERR "FOXPlugin::read - couldn't read $f ullname\n";109 print STDERR "<ProcessingError n='$file' r='Could not read $filename_full_path'>\n"; 110 } 111 print STDERR "FOXPlugin::read - couldn't read $filename_full_path\n"; 112 112 return -1; # error in processing 113 113 } … … 141 141 print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n"; 142 142 } 143 print STDERR "FOXPlugin:read - $f ullnamedoesn't seem to be a Foxbase file\n";143 print STDERR "FOXPlugin:read - $filename_full_path doesn't seem to be a Foxbase file\n"; 144 144 return -1; 145 145 } … … 163 163 164 164 # open the dbt file if we need to 165 my $dbtfullname = $f ullname;166 if ($f ullname=~ /f$/) {165 my $dbtfullname = $filename_full_path; 166 if ($filename_full_path =~ /f$/) { 167 167 $dbtfullname =~ s/f$/t/; 168 168 } else { -
gsdl/trunk/perllib/plugins/GMLPlugin.pm
r15872 r16392 79 79 sub read { 80 80 my $self = shift (@_); 81 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;81 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 82 82 my $outhandle = $self->{'outhandle'}; 83 83 84 # check process and block exps, smart block, etc85 my ($ block_status,$filename) = $self->read_block(@_);86 return $block_status if ((!defined $block_status) || ($block_status==0));84 # can we process this file?? 85 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 86 return undef unless $self->can_process_this_file($filename_full_path); 87 87 88 88 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up … … 95 95 $parent_dir = &util::filename_cat ($base_dir, $parent_dir); 96 96 97 if (!open (INFILE, $filename )) {97 if (!open (INFILE, $filename_full_path)) { 98 98 if ($gli) { 99 print STDERR "<ProcessingError n='$file' r='Could not read $filename '>\n";99 print STDERR "<ProcessingError n='$file' r='Could not read $filename_full_path'>\n"; 100 100 } 101 print $outhandle "GMLPlugin::read - couldn't read $filename \n";101 print $outhandle "GMLPlugin::read - couldn't read $filename_full_path\n"; 102 102 return -1; 103 103 } … … 133 133 134 134 } else { 135 print $outhandle "GMLPlugin::read - error in file $filename \n";135 print $outhandle "GMLPlugin::read - error in file $filename_full_path\n"; 136 136 print $outhandle "text: \"$gml\"\n"; 137 137 last; -
gsdl/trunk/perllib/plugins/HBPlugin.pm
r16019 r16392 219 219 sub read { 220 220 my $self = shift (@_); 221 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;221 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 222 222 my $outhandle = $self->{'outhandle'}; 223 223 -
gsdl/trunk/perllib/plugins/HTMLPlugin.pm
r16247 r16392 491 491 { 492 492 my $self = shift (@_); 493 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 494 495 # check the process_exp and block_exp thing 496 my ($block_status,$filename) = $self->read_block(@_); 497 return $block_status if ((!defined $block_status) || ($block_status==0)); 498 493 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 494 499 495 # get the input file 500 496 my $input_filename = $file; … … 523 519 524 520 # call the parent read_into_doc_obj 525 my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli);521 my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli); 526 522 527 523 return ($process_status,$doc_obj); … … 554 550 my $self = shift (@_); 555 551 556 return q^(?i)\.(gif|jpe?g|jpe|jpg|png|css)$^; 552 #return q^(?i)\.(gif|jpe?g|jpe|jpg|png|css)$^; 553 return ""; 557 554 } 558 555 … … 567 564 { 568 565 my $self =shift (@_); 569 my ($filename) = @_; 570 my $html_fname = $filename; 566 my ($filename_full_path, $block_hash) = @_; 567 568 my $html_fname = $filename_full_path; 571 569 my @file_blocks; 572 570 573 my ($language, $encoding) = $self->textcat_get_language_encoding ($filename );571 my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path); 574 572 575 573 # read in file ($text will be in utf8) 576 574 my $text = ""; 577 $self->read_file ($filename , $encoding, $language, \$text);575 $self->read_file ($filename_full_path, $encoding, $language, \$text); 578 576 my $textref = \$text; 579 577 my $opencom = '(?:<!--|<!(?:—|—|--))'; … … 600 598 if ($link !~ m@^/@ && $link !~ m/^([A-Z]:?)\\/) { 601 599 # Turn relative file path into full path 602 my $dirname = &File::Basename::dirname($filename );600 my $dirname = &File::Basename::dirname($filename_full_path); 603 601 $link = &util::filename_cat($dirname, $link); 604 602 } 605 603 $link = $self->eval_dir_dots($link); 606 604 607 $ self->{'file_blocks'}->{$link} = 1;605 $block_hash->{'file_blocks'}->{$link} = 1; 608 606 } 609 607 } -
gsdl/trunk/perllib/plugins/ISISPlugin.pm
r16104 r16392 88 88 # This plugin blocks files with the suffix ".fdt" and ".xrf" 89 89 sub get_default_block_exp { 90 return q^(?i)(\.fdt|\.xrf)$^; 90 #return q^(?i)(\.fdt|\.xrf)$^; 91 return ""; 91 92 } 92 93 … … 122 123 } 123 124 124 125 sub read_file 126 { 125 # we block the corresponding fdt and xrf 126 sub store_block_files { 127 128 my $self =shift (@_); 129 my ($filename_full_path, $block_hash) = @_; 130 131 $self->check_auxiliary_files($filename_full_path); 132 if (-e $self->{'fdt_file_path'}) { 133 my $fdt_file = $self->{'fdt_file_path'}; 134 $block_hash->{'file_blocks'}->{$fdt_file} = 1; 135 } 136 if (-e $self->{'xrf_file_path'}) { 137 my $xrf_file = $self->{'xrf_file_path'}; 138 $block_hash->{'file_blocks'}->{$xrf_file} = 1; 139 } 140 141 142 } 143 144 sub check_auxiliary_files { 127 145 my $self = shift (@_); 128 my ($filename, $encoding, $language, $textref) = @_; 129 my $outhandle = $self->{'outhandle'}; 146 my ($filename) = @_; 130 147 131 148 my ($database_file_path_root) = ($filename =~ /(.*)\.mst$/i); 132 my $mst_file_path_relative = $filename;133 $mst_file_path_relative =~ s/^.+import.(.*?)$/$1/;134 135 149 # Check the associated .fdt and .xrf files exist 136 150 $self->{'fdt_file_path'} = $database_file_path_root . ".FDT"; … … 138 152 $self->{'fdt_file_path'} = $database_file_path_root . ".fdt"; 139 153 } 154 $self->{'xrf_file_path'} = $database_file_path_root . ".XRF"; 155 if (!-e $self->{'xrf_file_path'}) { 156 $self->{'xrf_file_path'} = $database_file_path_root . ".xrf"; 157 } 158 } 159 160 161 sub read_file 162 { 163 my $self = shift (@_); 164 my ($filename, $encoding, $language, $textref) = @_; 165 my $outhandle = $self->{'outhandle'}; 166 167 my ($database_file_path_root) = ($filename =~ /(.*)\.mst$/i); 168 my $mst_file_path_relative = $filename; 169 $mst_file_path_relative =~ s/^.+import.(.*?)$/$1/; 170 171 # Check the associated .fdt and .xrf files exist 172 $self->check_auxiliary_files($filename); 173 140 174 if (!-e $self->{'fdt_file_path'}) { 141 175 print STDERR "<ProcessingError n='$mst_file_path_relative' r='Could not find ISIS FDT file $self->{'fdt_file_path'}'>\n" if ($self->{'gli'}); 142 176 print $outhandle "Error: Could not find ISIS FDT file " . $self->{'fdt_file_path'} . ".\n"; 143 177 return; 144 }145 $self->{'xrf_file_path'} = $database_file_path_root . ".XRF";146 if (!-e $self->{'xrf_file_path'}) {147 $self->{'xrf_file_path'} = $database_file_path_root . ".xrf";148 178 } 149 179 if (!-e $self->{'xrf_file_path'}) { -
gsdl/trunk/perllib/plugins/IndexPlugin.pm
r15872 r16392 98 98 sub read { 99 99 my $self = shift (@_); 100 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;100 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 101 101 my $outhandle = $self->{'outhandle'}; 102 102 … … 148 148 } 149 149 } 150 $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $ metadata, $processor, $maxdocs, ($total_count +$count), $gli);150 $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $block_hash, $metadata, $processor, $maxdocs, ($total_count +$count), $gli); 151 151 } 152 152 } -
gsdl/trunk/perllib/plugins/LOMPlugin.pm
r16019 r16392 106 106 107 107 108 108 sub can_process_this_file { 109 my $self = shift(@_); 110 my ($filename) = @_; 111 112 if ($self->SUPER::can_process_this_file($filename) && $self->check_doctype($filename)) { 113 return 1; # its a file for us 114 } 115 return 0; 116 } 109 117 110 118 sub metadata_read { 111 119 my $self = shift (@_); 112 my ($pluginfo, $base_dir, $file, $ metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;120 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 113 121 114 122 my $outhandle = $self->{'outhandle'}; 115 123 116 my $filename = $file; 117 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 118 119 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 120 return undef; # can't recognise 121 } 122 if (!$self->check_doctype($filename)) { 123 # this file is not for us 124 return undef; 125 } 124 # can we process this file?? 125 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 126 return undef unless $self->can_process_this_file($filename_full_path); 126 127 127 128 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up … … 130 131 if $self->{'verbosity'} > 1; 131 132 132 my ($dir,$tail) = $filename =~ /^(.*?)([^\/\\]*)$/;133 my ($dir,$tail) = $filename_full_path =~ /^(.*?)([^\/\\]*)$/; 133 134 $self->{'output_dir'} = $dir; 134 135 135 136 eval { 136 $self->{'parser'}->parsefile($filename );137 $self->{'parser'}->parsefile($filename_full_path); 137 138 }; 138 139 139 140 if ($@) { 140 print $outhandle "LOMPlugin: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);141 print $outhandle "LOMPlugin: skipping $filename_full_path as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1); 141 142 print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2); 142 143 return 0; … … 212 213 sub read { 213 214 my $self = shift (@_); 214 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;215 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 215 216 216 217 my $outhandle = $self->{'outhandle'}; … … 218 219 return 0 if (defined $self->{'extra_blocks'}->{$file}); 219 220 220 # need to check whether this file is for us 221 my ($block_status,$filename) = $self->read_block(@_); 222 return $block_status if ((!defined $block_status) || ($block_status==0)); 223 if (!$self->check_doctype($filename)) { 224 # this file is not for us 225 return undef; 226 } 221 # can we process this file?? 222 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 223 return undef unless $self->can_process_this_file($filename_full_path); 227 224 228 225 $self->{'metadata_table'} = $metadata; -
gsdl/trunk/perllib/plugins/MP3Plugin.pm
r15911 r16392 89 89 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 90 90 91 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);91 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 92 92 # do something about OIDtype so no hashing 93 93 -
gsdl/trunk/perllib/plugins/OAIPlugin.pm
r16013 r16392 162 162 my $self = shift (@_); 163 163 164 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;164 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 165 165 166 166 my $outhandle = $self->{'outhandle'}; … … 208 208 209 209 return &plugin::read ($pluginfo, $filename_dir, $url_array->[0], 210 $metadata, $processor, $maxdocs, $total_count, $gli); 210 $block_hash, $metadata, $processor, $maxdocs, 211 $total_count, $gli); 211 212 } 212 213 else -
gsdl/trunk/perllib/plugins/OggVorbisPlugin.pm
r15911 r16392 86 86 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 87 87 88 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);88 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 89 89 # do something about OIDtype so no hashing 90 90 -
gsdl/trunk/perllib/plugins/OpenDocumentPlugin.pm
r16193 r16392 161 161 my $self = shift (@_); 162 162 163 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;164 165 # c heck process and block exps, smart block, associate_ext etc166 my ($ block_status,$filename) = $self->read_block(@_);167 return $block_status if ((!defined $block_status) || ($block_status==0));163 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 164 165 # can we process this file?? 166 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 167 return undef unless $self->can_process_this_file($filename_full_path); 168 168 169 169 my $outhandle = $self->{'outhandle'}; … … 175 175 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 176 176 $self->{'file'} = $file; 177 $self->{'filename'} = $filename ;177 $self->{'filename'} = $filename_full_path; 178 178 $self->{'processor'} = $processor; 179 179 $self->{'metadata'} = $metadata; … … 189 189 my $cwd = getcwd(); 190 190 chdir ($tmpdir) || die "Unable to change to $tmpdir"; 191 &util::cp ($filename , $tmpdir);191 &util::cp ($filename_full_path, $tmpdir); 192 192 193 193 $self->unzip ("\"$file_only\""); … … 197 197 } 198 198 } 199 $self->close_document($filename ,$file_only);199 $self->close_document($filename_full_path,$file_only); 200 200 201 201 chdir ($cwd) || die "Unable to change back to $cwd"; -
gsdl/trunk/perllib/plugins/ReadTextFile.pm
r16308 r16392 122 122 sub read_into_doc_obj { 123 123 my $self = shift (@_); 124 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;124 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 125 125 126 126 my $outhandle = $self->{'outhandle'}; 127 128 127 # should we move this to read? What about secondary plugins? 129 128 print STDERR "<Processing n='$file' p='$self->{'plugin_type'}'>\n" if ($gli); … … 131 130 if $self->{'verbosity'} > 1; 132 131 133 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 132 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 133 134 134 # Do encoding stuff 135 135 my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path); -
gsdl/trunk/perllib/plugins/ReadXMLFile.pm
r15971 r16392 157 157 return $transformed_xml; 158 158 159 } 160 161 sub can_process_this_file { 162 my $self = shift(@_); 163 my ($filename) = @_; 164 165 if ($self->SUPER::can_process_this_file($filename) && $self->check_doctype($filename)) { 166 return 1; # its a file for us 167 } 168 return 0; 159 169 } 160 170 … … 188 198 } 189 199 190 # because we are not just using process_exp to determine whether to process or not, we need to implement this too, so that a file can be passed down if we are not actually processing it191 sub metadata_read {192 my $self = shift (@_);193 194 my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;195 196 my $result = $self->SUPER::metadata_read($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli);197 198 if (defined $result) {199 # we think we are processing this, but check that we actually are200 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);201 202 if ($self->check_doctype($filename_full_path)) {203 return $result;204 }205 }206 return undef;207 }208 200 209 201 # we need to implement read cos we are not just using process_exp to determine … … 212 204 my $self = shift (@_); 213 205 214 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 215 216 # Make sure we're processing the correct file, do blocking etc 217 my ($block_status,$filename_full_path) = $self->read_block(@_); 218 return $block_status if ((!defined $block_status) || ($block_status==0)); 219 220 ## check the doctype to see whether we really want to process the file 221 if (!$self->check_doctype($filename_full_path)) { 222 # this file is not for us 223 return undef; 224 } 225 206 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 207 208 # can we process this file?? 209 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 210 return undef unless $self->can_process_this_file($filename_full_path); 211 226 212 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 227 213 $self->{'base_dir'} = $base_dir; -
gsdl/trunk/perllib/plugins/RealMediaPlugin.pm
r15872 r16392 82 82 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 83 83 84 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);84 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 85 85 my $top_section = $doc_obj->get_top_section(); 86 86 # prevent hashing: old code was in effect the following. -
gsdl/trunk/perllib/plugins/RogPlugin.pm
r15872 r16392 222 222 sub read { 223 223 my $self = shift (@_); 224 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;224 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 225 225 226 226 my $filename = &util::filename_cat($base_dir, $file); -
gsdl/trunk/perllib/plugins/SplitTextFile.pm
r16104 r16392 118 118 sub metadata_read { 119 119 my $self = shift (@_); 120 my ($pluginfo, $base_dir, $file, $ metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;120 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 121 121 122 122 # returns 1 if matches process_exp, and has done blocking in the meantime 123 my $matched = $self->SUPER::metadata_read($pluginfo, $base_dir, $file, 123 my $matched = $self->SUPER::metadata_read($pluginfo, $base_dir, $file, 124 $block_hash, 124 125 $metadata, $extrametakeys, 125 126 $extrametadata, $processor, … … 192 193 sub read { 193 194 my $self = shift (@_); 194 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;195 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 195 196 my $outhandle = $self->{'outhandle'}; 196 197 my $verbosity = $self->{'verbosity'}; 197 198 198 # check process and block exps, smart block, etc199 my ($ block_status,$filename) = $self->read_block(@_);200 return $block_status if ((!defined $block_status) || ($block_status==0));199 # can we process this file?? 200 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 201 return undef unless $self->can_process_this_file($filename_full_path); 201 202 202 203 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up … … 232 233 233 234 # create a new document 234 my $doc_obj = new doc ($filename , "indexed_doc");235 my $doc_obj = new doc ($filename_full_path, "indexed_doc"); 235 236 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 236 237 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language); … … 240 241 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$segment"); 241 242 if ($self->{'cover_image'}) { 242 $self->associate_cover_image($doc_obj, $filename );243 $self->associate_cover_image($doc_obj, $filename_full_path); 243 244 } 244 245 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); -
gsdl/trunk/perllib/plugins/UnknownPlugin.pm
r15918 r16392 122 122 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 123 123 124 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);124 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 125 125 my $outhandle = $self->{'outhandle'}; 126 126 my $verbosity = $self->{'verbosity'}; -
gsdl/trunk/perllib/plugins/W3ImagePlugin.pm
r15872 r16392 395 395 # include directories 396 396 sub read { 397 my ($self, $pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = (@_);397 my ($self, $pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = (@_); 398 398 my ($doc_obj, $section, $filepath, $imgtag, $pos, $context, $numdocs, $tndir, $imgs); 399 399 # forward normal read (runs HTMLPlugin if index_pages T) 400 my $ok = $self->SUPER::read($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli);400 my $ok = $self->SUPER::read($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli); 401 401 if ( ! $ok ) { return $ok } # what is this returning?? 402 402 -
gsdl/trunk/perllib/plugins/ZIPPlugin.pm
r15880 r16392 101 101 sub read { 102 102 my $self = shift (@_); 103 my ($pluginfo, $base_dir, $file, $ metadata, $processor, $maxdocs, $total_count, $gli) = @_;103 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 104 104 my $outhandle = $self->{'outhandle'}; 105 105 106 # c heck process_exp, block_exp, associate_ext etc107 my ($ block_status,$filename) = $self->read_block(@_);108 return $block_status if ((!defined $block_status) || ($block_status==0));106 # can we process this file?? 107 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 108 return undef unless $self->can_process_this_file($filename_full_path); 109 109 110 my ($file_only) = $file =~ /([^\\\/]*)$/;111 110 my $tmpdir = &util::get_tmp_filename (); 112 111 &util::mk_all_dir ($tmpdir); 113 112 114 print $outhandle "ZIPPlugin: extracting $file _onlyto $tmpdir\n"113 print $outhandle "ZIPPlugin: extracting $filename_no_path to $tmpdir\n" 115 114 if $self->{'verbosity'} > 1; 116 115 … … 118 117 my $cwd = cwd(); 119 118 chdir ($tmpdir) || die "Unable to change to $tmpdir"; 120 &util::cp ($filename , $tmpdir);119 &util::cp ($filename_full_path, $tmpdir); 121 120 122 121 if ($file =~ /\.bz$/i) { 123 $self->bunzip ($file _only);122 $self->bunzip ($filename_no_path); 124 123 } elsif ($file =~ /\.bz2$/i) { 125 $self->bunzip2 ($file _only);124 $self->bunzip2 ($filename_no_path); 126 125 } elsif ($file =~ /\.(zip|jar)$/i) { 127 $self->unzip ($file _only);126 $self->unzip ($filename_no_path); 128 127 } elsif ($file =~ /\.tar$/i) { 129 $self->untar ($file _only);128 $self->untar ($filename_no_path); 130 129 } else { 131 $self->gunzip ($file _only);130 $self->gunzip ($filename_no_path); 132 131 } 133 132 134 133 chdir ($cwd) || die "Unable to change back to $cwd"; 135 134 136 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $ metadata, $processor, $maxdocs, $total_count, $gli);135 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli); 137 136 &util::rm_r ($tmpdir); 138 137
Note:
See TracChangeset
for help on using the changeset viewer.