Changeset 16392


Ignore:
Timestamp:
2008-07-14T14:57:38+12:00 (16 years ago)
Author:
kjdon
Message:

global block pass: read_block is no more, use can_process_this_file to see whether a file is for us or not. extra arg (block_hash) to read, read_into_doc_obj, metadata_read etc

Location:
gsdl/trunk/perllib/plugins
Files:
25 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm

    r16257 r16392  
    109109sub read {
    110110    my $self = shift (@_);
    111     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
     111    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
    112112    my $outhandle = $self->{'outhandle'};
    113113
     
    137137        my $tmp = &util::filename_cat ($file, $subfile->[0]);
    138138        next if $tmp eq $file;
    139 
     139       
    140140        # We always process the file...
    141141        my $process_file = 1;
     
    156156        if ($process_file) {
    157157        # note: metadata is not carried on to the next level
    158         $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs, ($total_count+$count), $gli);
     158        $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli);
    159159        }
    160160
  • gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm

    r15925 r16392  
    557557
    558558# Override ConvertBinaryFile read
    559 # Needed so multiple .item files generate are sent down secondary plugin
     559# Needed so multiple .item files generated are sent down secondary plugin
    560560
    561561sub read {
    562562    my $self = shift (@_);
    563     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     563    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    564564
    565565    $self->{'gli'} = $gli;
     
    570570    my $outhandle = $self->{'outhandle'};
    571571   
    572     my ($block_status,$filename) = $self->read_block(@_);
    573     return $block_status if ((!defined $block_status) || ($block_status==0));
     572    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     573    return undef unless $self->can_process_this_file($filename_full_path);
     574
    574575    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    575576       
     
    580581    my $conv_filename_list = [];
    581582
    582     $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename);
     583    $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename_full_path);
    583584   
    584585    if (scalar(@$conv_filename_list)==0) {
     
    611612    my ($rv,$doc_obj)
    612613        = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename,
    613                             $metadata, $processor, $maxdocs, $total_count,
     614                            $block_hash, $metadata, $processor, $maxdocs, $total_count,
    614615                            $gli);
    615616
     
    619620
    620621    # Override previous gsdlsourcefilename set by secondary plugin
    621     my $collect_file = &util::filename_within_collection($filename);
     622    my $collect_file = &util::filename_within_collection($filename_full_path);
    622623    my $collect_conv_file = &util::filename_within_collection($conv_filename);
    623624    $doc_obj->set_source_filename ($collect_file);
     
    627628    $self->set_Source_metadata($doc_obj, $filemeta);
    628629    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    629     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename));
     630    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path));
    630631   
    631632    if ($self->{'cover_image'}) {
    632         $self->associate_cover_image($doc_obj, $filename);
     633        $self->associate_cover_image($doc_obj, $filename_full_path);
    633634    }
    634635   
  • gsdl/trunk/perllib/plugins/ConvertBinaryFile.pm

    r16013 r16392  
    366366sub read_into_doc_obj {
    367367    my $self = shift (@_);
    368     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     368    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    369369
    370370    my $outhandle = $self->{'outhandle'};
    371371
    372     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     372    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    373373
    374374    my $output_ext = $self->{'convert_to_ext'};
     
    411411
    412412    # note: metadata is not carried on to the next level
     413## **** I just replaced $metadata with {} in following
    413414    my ($rv,$doc_obj)
    414     = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename, $metadata, $processor, $maxdocs, $total_count, $gli);
     415    = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename, $block_hash, {}, $processor, $maxdocs, $total_count, $gli);
    415416
    416417    if ((!defined $rv) || ($rv<1)) {
     
    438439    my $topsection = $doc_obj->get_top_section();
    439440    $self->add_associated_files($doc_obj, $filename_full_path);
     441
     442    # extra_metadata is already called by sec plugin in process??
    440443    $self->extra_metadata($doc_obj, $topsection, $metadata); # do we need this here??
    441444    # do any automatic metadata extraction
  • gsdl/trunk/perllib/plugins/ConvertToRogPlugin.pm

    r15872 r16392  
    311311sub read {
    312312    my $self = shift (@_);
    313     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     313    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    314314
    315315    my $outhandle = $self->{'outhandle'};
    316316
    317     # check process_exp, block_exp, associate_ext etc
    318     my ($block_status,$filename) = $self->read_block(@_);   
    319     return $block_status if ((!defined $block_status) || ($block_status==0));
     317    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     318    return undef unless $self->can_process_this_file($filename_full_path);
    320319 
    321320    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     
    325324
    326325    my $output_ext = $self->{'convert_to_ext'};
    327     my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename);
     326    my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename_full_path);
    328327
    329328    if ("$conv_filename" eq "") {return 0;} # allows continue on errors
     
    334333    #my $doc_obj = new doc ($conv_filename, "indexed_doc");
    335334    # the original filename is used now
    336     my $doc_obj = new doc ($filename, "indexed_doc");
     335    my $doc_obj = new doc ($filename_full_path, "indexed_doc");
    337336    # the converted filename is set separately
    338337    $doc_obj->set_converted_filename($conv_filename);
     
    349348   
    350349    if ($self->{'cover_image'}) {
    351     $self->associate_cover_image($doc_obj, $filename);
     350    $self->associate_cover_image($doc_obj, $filename_full_path);
    352351    }
    353352    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    354     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename));
     353    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path));
    355354
    356355    my $track_no = "1";
  • gsdl/trunk/perllib/plugins/DBPlugin.pm

    r16104 r16392  
    8080sub read {
    8181    my $self = shift (@_);
    82     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count,$gli) = @_;
     82    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count,$gli) = @_;
    8383       
    8484     #see if we can handle the passed file...
    85      my ($block_status,$filename) = $self->read_block(@_);   
    86      return $block_status if ((!defined $block_status) || ($block_status==0));
    87 
     85    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     86    return undef unless $self->can_process_this_file($filename_full_path);
     87   
    8888    my $outhandle = $self->{'outhandle'};
    8989    my $verbosity = $self->{'verbosity'};
     
    120120
    121121    # read in config file.
    122     if (!open (CONF, $filename)) {
    123         print $outhandle "DBPlugin: can't read $filename: $!\n";
     122    if (!open (CONF, $filename_full_path)) {
     123        print $outhandle "DBPlugin: can't read $filename_full_path: $!\n";
    124124        return 0;
    125125    }
     
    172172            $err =~ s/\.$//; # remove a trailing .
    173173            print $outhandle "DBPlugin: error evaluating `$statement'\n";
    174             print $outhandle " $err (in $filename)\n";
     174            print $outhandle " $err (in $filename_full_path)\n";
    175175            return 0; # there was an error reading the config file
    176176        }
     
    189189   
    190190    if (!defined($db)) {
    191     print $outhandle "DBPlugin: error: $filename does not specify a db!\n";
     191    print $outhandle "DBPlugin: error: $filename_full_path does not specify a db!\n";
    192192    return 0;
    193193    }
     
    254254
    255255    # create a new document
    256     my $doc_obj = new doc ($filename, "indexed_doc");
     256    my $doc_obj = new doc ($filename_full_path, "indexed_doc");
    257257    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
    258258    my $cursection = $doc_obj->get_top_section();
     
    272272
    273273    if ($self->{'cover_image'}) {
    274         $self->associate_cover_image($doc_obj, $filename);
     274        $self->associate_cover_image($doc_obj, $filename_full_path);
    275275    }
    276276    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
  • gsdl/trunk/perllib/plugins/DSpacePlugin.pm

    r15872 r16392  
    240240sub metadata_read {
    241241    my $self = shift (@_);
    242     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     242    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
    243243
    244244    my $only_first_doc = $self->{'only_first_doc'};
     
    322322sub read {
    323323    my $self = shift (@_);
    324     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     324    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    325325    my $outhandle = $self->{'outhandle'};
    326326   
  • gsdl/trunk/perllib/plugins/FOXPlugin.pm

    r15872 r16392  
    9393sub read {
    9494    my $self = shift (@_);
    95     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     95    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    9696 
    97     #check for associate_ext, blocking etc, are we processing this file?
    98     my ($block_status,$fullname) = $self->read_block(@_);   
    99     return $block_status if ((!defined $block_status) || ($block_status==0));
     97    # can we process this file??
     98    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     99    return undef unless $self->can_process_this_file($filename_full_path);
    100100
    101101    print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli);
    102102    print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1;
    103103
    104     my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
     104    my ($parent_dir) = $filename_full_path =~ /^(.*)\/[^\/]+\.dbf$/i;
    105105
    106106    # open the file
    107     if (!open (FOXBASEIN, $fullname)) {
     107    if (!open (FOXBASEIN, $filename_full_path)) {
    108108    if ($gli) {
    109         print STDERR "<ProcessingError n='$file' r='Could not read $fullname'>\n";
    110     }
    111     print STDERR "FOXPlugin::read - couldn't read $fullname\n";
     109        print STDERR "<ProcessingError n='$file' r='Could not read $filename_full_path'>\n";
     110    }
     111    print STDERR "FOXPlugin::read - couldn't read $filename_full_path\n";
    112112    return -1; # error in processing
    113113    }
     
    141141        print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n";
    142142    }
    143     print STDERR "FOXPlugin:read - $fullname doesn't seem to be a Foxbase file\n";
     143    print STDERR "FOXPlugin:read - $filename_full_path doesn't seem to be a Foxbase file\n";
    144144    return -1;
    145145    }
     
    163163
    164164    # open the dbt file if we need to
    165     my $dbtfullname = $fullname;
    166     if ($fullname =~ /f$/) {
     165    my $dbtfullname = $filename_full_path;
     166    if ($filename_full_path =~ /f$/) {
    167167    $dbtfullname =~ s/f$/t/;
    168168    } else {
  • gsdl/trunk/perllib/plugins/GMLPlugin.pm

    r15872 r16392  
    7979sub read {
    8080    my $self = shift (@_);
    81     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     81    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    8282    my $outhandle = $self->{'outhandle'};
    8383
    84     #check process and block exps, smart block, etc
    85     my ($block_status,$filename) = $self->read_block(@_);   
    86     return $block_status if ((!defined $block_status) || ($block_status==0));
     84    # can we process this file??
     85    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     86    return undef unless $self->can_process_this_file($filename_full_path);
    8787
    8888    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     
    9595    $parent_dir = &util::filename_cat ($base_dir, $parent_dir);
    9696
    97     if (!open (INFILE, $filename)) {
     97    if (!open (INFILE, $filename_full_path)) {
    9898    if ($gli) {
    99         print STDERR "<ProcessingError n='$file' r='Could not read $filename'>\n";
     99        print STDERR "<ProcessingError n='$file' r='Could not read $filename_full_path'>\n";
    100100    }
    101     print $outhandle "GMLPlugin::read - couldn't read $filename\n";
     101    print $outhandle "GMLPlugin::read - couldn't read $filename_full_path\n";
    102102    return -1;
    103103    }
     
    133133
    134134        } else {
    135             print $outhandle "GMLPlugin::read - error in file $filename\n";
     135            print $outhandle "GMLPlugin::read - error in file $filename_full_path\n";
    136136            print $outhandle "text: \"$gml\"\n";
    137137            last;
  • gsdl/trunk/perllib/plugins/HBPlugin.pm

    r16019 r16392  
    219219sub read {
    220220    my $self = shift (@_);
    221     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     221    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    222222    my $outhandle = $self->{'outhandle'};
    223223
  • gsdl/trunk/perllib/plugins/HTMLPlugin.pm

    r16247 r16392  
    491491{
    492492    my $self = shift (@_); 
    493     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    494    
    495     # check the process_exp and block_exp thing
    496     my ($block_status,$filename) = $self->read_block(@_);   
    497     return $block_status if ((!defined $block_status) || ($block_status==0));
    498    
     493    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     494       
    499495    # get the input file
    500496    my $input_filename = $file;
     
    523519   
    524520    # call the parent read_into_doc_obj
    525     my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
     521    my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli);
    526522   
    527523    return ($process_status,$doc_obj);
     
    554550    my $self = shift (@_);
    555551   
    556     return q^(?i)\.(gif|jpe?g|jpe|jpg|png|css)$^;
     552    #return q^(?i)\.(gif|jpe?g|jpe|jpg|png|css)$^;
     553    return "";
    557554}
    558555
     
    567564{
    568565    my $self =shift (@_);
    569     my ($filename) = @_;
    570     my $html_fname = $filename;
     566    my ($filename_full_path, $block_hash) = @_;
     567
     568    my $html_fname = $filename_full_path;
    571569    my @file_blocks;
    572570   
    573     my ($language, $encoding) = $self->textcat_get_language_encoding ($filename);
     571    my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path);
    574572
    575573    # read in file ($text will be in utf8)
    576574    my $text = "";
    577     $self->read_file ($filename, $encoding, $language, \$text);
     575    $self->read_file ($filename_full_path, $encoding, $language, \$text);
    578576    my $textref = \$text;
    579577    my $opencom = '(?:<!--|&lt;!(?:&mdash;|&#151;|--))';
     
    600598    if ($link !~ m@^/@ && $link !~ m/^([A-Z]:?)\\/) {
    601599        # Turn relative file path into full path
    602         my $dirname = &File::Basename::dirname($filename);
     600        my $dirname = &File::Basename::dirname($filename_full_path);
    603601        $link = &util::filename_cat($dirname, $link);
    604602    }
    605603    $link = $self->eval_dir_dots($link);
    606604   
    607     $self->{'file_blocks'}->{$link} = 1;
     605    $block_hash->{'file_blocks'}->{$link} = 1;
    608606    }
    609607}
  • gsdl/trunk/perllib/plugins/ISISPlugin.pm

    r16104 r16392  
    8888# This plugin blocks files with the suffix ".fdt" and ".xrf"
    8989sub get_default_block_exp {
    90     return q^(?i)(\.fdt|\.xrf)$^;
     90    #return q^(?i)(\.fdt|\.xrf)$^;
     91    return "";
    9192}
    9293
     
    122123}
    123124
    124 
    125 sub read_file
    126 {
     125# we block the corresponding fdt and xrf
     126sub store_block_files {
     127   
     128    my $self =shift (@_);
     129    my ($filename_full_path, $block_hash) = @_;
     130   
     131    $self->check_auxiliary_files($filename_full_path);
     132    if (-e $self->{'fdt_file_path'}) {
     133    my $fdt_file = $self->{'fdt_file_path'};
     134    $block_hash->{'file_blocks'}->{$fdt_file} = 1;
     135    }
     136    if (-e $self->{'xrf_file_path'}) {
     137    my $xrf_file = $self->{'xrf_file_path'};
     138    $block_hash->{'file_blocks'}->{$xrf_file} = 1;
     139    }
     140   
     141
     142}
     143
     144sub check_auxiliary_files {
    127145    my $self = shift (@_);
    128     my ($filename, $encoding, $language, $textref) = @_;
    129     my $outhandle = $self->{'outhandle'};
     146    my ($filename) = @_;
    130147
    131148    my ($database_file_path_root) = ($filename =~ /(.*)\.mst$/i);
    132     my $mst_file_path_relative = $filename;
    133     $mst_file_path_relative =~ s/^.+import.(.*?)$/$1/;
    134 
    135149    # Check the associated .fdt and .xrf files exist
    136150    $self->{'fdt_file_path'} = $database_file_path_root . ".FDT";
     
    138152    $self->{'fdt_file_path'} = $database_file_path_root . ".fdt";
    139153    }
     154    $self->{'xrf_file_path'} = $database_file_path_root . ".XRF";
     155    if (!-e $self->{'xrf_file_path'}) {
     156    $self->{'xrf_file_path'} = $database_file_path_root . ".xrf";
     157    }
     158}
     159   
     160
     161sub read_file
     162{
     163    my $self = shift (@_);
     164    my ($filename, $encoding, $language, $textref) = @_;
     165    my $outhandle = $self->{'outhandle'};
     166
     167    my ($database_file_path_root) = ($filename =~ /(.*)\.mst$/i);
     168    my $mst_file_path_relative = $filename;
     169    $mst_file_path_relative =~ s/^.+import.(.*?)$/$1/;
     170
     171    # Check the associated .fdt and .xrf files exist
     172    $self->check_auxiliary_files($filename);
     173   
    140174    if (!-e $self->{'fdt_file_path'}) {
    141175    print STDERR "<ProcessingError n='$mst_file_path_relative' r='Could not find ISIS FDT file $self->{'fdt_file_path'}'>\n" if ($self->{'gli'});
    142176    print $outhandle "Error: Could not find ISIS FDT file " . $self->{'fdt_file_path'} . ".\n";
    143177    return;
    144     }
    145     $self->{'xrf_file_path'} = $database_file_path_root . ".XRF";
    146     if (!-e $self->{'xrf_file_path'}) {
    147     $self->{'xrf_file_path'} = $database_file_path_root . ".xrf";
    148178    }
    149179    if (!-e $self->{'xrf_file_path'}) {
  • gsdl/trunk/perllib/plugins/IndexPlugin.pm

    r15872 r16392  
    9898sub read {
    9999    my $self = shift (@_);
    100     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     100    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    101101    my $outhandle = $self->{'outhandle'};
    102102
     
    148148        }
    149149        }
    150         $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $metadata, $processor, $maxdocs, ($total_count +$count), $gli);
     150        $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $block_hash, $metadata, $processor, $maxdocs, ($total_count +$count), $gli);
    151151    }
    152152    }
  • gsdl/trunk/perllib/plugins/LOMPlugin.pm

    r16019 r16392  
    106106
    107107
    108 
     108sub can_process_this_file {
     109    my $self = shift(@_);
     110    my ($filename) = @_;
     111
     112    if ($self->SUPER::can_process_this_file($filename) && $self->check_doctype($filename)) {
     113    return 1; # its a file for us
     114    }
     115    return 0;
     116}
    109117
    110118sub metadata_read {
    111119    my $self = shift (@_);
    112     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     120    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
    113121
    114122    my $outhandle = $self->{'outhandle'};
    115123
    116     my $filename = $file;
    117     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    118    
    119     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    120     return undef; # can't recognise
    121     }
    122     if (!$self->check_doctype($filename)) {
    123     # this file is not for us
    124     return undef;
    125     }
     124    # can we process this file??
     125    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     126    return undef unless $self->can_process_this_file($filename_full_path);
    126127
    127128    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     
    130131    if $self->{'verbosity'} > 1;
    131132
    132     my ($dir,$tail) = $filename =~ /^(.*?)([^\/\\]*)$/;
     133    my ($dir,$tail) = $filename_full_path =~ /^(.*?)([^\/\\]*)$/;
    133134    $self->{'output_dir'} = $dir;
    134135
    135136    eval {
    136     $self->{'parser'}->parsefile($filename);
     137    $self->{'parser'}->parsefile($filename_full_path);
    137138    };
    138139   
    139140    if ($@) {
    140     print $outhandle "LOMPlugin: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);
     141    print $outhandle "LOMPlugin: skipping $filename_full_path as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);
    141142    print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2);
    142143    return 0;
     
    212213sub read {
    213214    my $self = shift (@_);
    214     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     215    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    215216
    216217    my $outhandle = $self->{'outhandle'};
     
    218219    return 0 if (defined $self->{'extra_blocks'}->{$file});
    219220
    220     # need to check whether this file is for us
    221     my ($block_status,$filename) = $self->read_block(@_);   
    222     return $block_status if ((!defined $block_status) || ($block_status==0));
    223     if (!$self->check_doctype($filename)) {
    224     # this file is not for us
    225     return undef;
    226     }
     221    # can we process this file??
     222    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     223    return undef unless $self->can_process_this_file($filename_full_path);
    227224
    228225    $self->{'metadata_table'} = $metadata;
  • gsdl/trunk/perllib/plugins/MP3Plugin.pm

    r15911 r16392  
    8989    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    9090
    91     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     91    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    9292    # do something about OIDtype so no hashing
    9393     
  • gsdl/trunk/perllib/plugins/OAIPlugin.pm

    r16013 r16392  
    162162    my $self = shift (@_); 
    163163 
    164     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     164    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    165165
    166166    my $outhandle = $self->{'outhandle'};
     
    208208       
    209209        return &plugin::read ($pluginfo, $filename_dir, $url_array->[0],
    210                   $metadata, $processor, $maxdocs, $total_count, $gli);
     210                  $block_hash, $metadata, $processor, $maxdocs,
     211                  $total_count, $gli);
    211212    }
    212213    else
  • gsdl/trunk/perllib/plugins/OggVorbisPlugin.pm

    r15911 r16392  
    8686    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    8787
    88     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     88    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    8989    # do something about OIDtype so no hashing
    9090     
  • gsdl/trunk/perllib/plugins/OpenDocumentPlugin.pm

    r16193 r16392  
    161161    my $self = shift (@_); 
    162162   
    163     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    164 
    165     # check process and block exps, smart block, associate_ext etc
    166     my ($block_status,$filename) = $self->read_block(@_);   
    167     return $block_status if ((!defined $block_status) || ($block_status==0));
     163    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     164
     165    # can we process this file??
     166    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     167    return undef unless $self->can_process_this_file($filename_full_path);
    168168
    169169    my $outhandle = $self->{'outhandle'};
     
    175175    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    176176    $self->{'file'} = $file;
    177     $self->{'filename'} = $filename;
     177    $self->{'filename'} = $filename_full_path;
    178178    $self->{'processor'} = $processor;
    179179    $self->{'metadata'} = $metadata;
     
    189189    my $cwd = getcwd();
    190190    chdir ($tmpdir) || die "Unable to change to $tmpdir";
    191     &util::cp ($filename, $tmpdir);
     191    &util::cp ($filename_full_path, $tmpdir);
    192192   
    193193    $self->unzip ("\"$file_only\"");
     
    197197        }
    198198    }
    199     $self->close_document($filename,$file_only);
     199    $self->close_document($filename_full_path,$file_only);
    200200   
    201201    chdir ($cwd) || die "Unable to change back to $cwd";
  • gsdl/trunk/perllib/plugins/ReadTextFile.pm

    r16308 r16392  
    122122sub read_into_doc_obj {
    123123    my $self = shift (@_); 
    124     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     124    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    125125
    126126    my $outhandle = $self->{'outhandle'};
    127 
    128127    # should we move this to read? What about secondary plugins?
    129128    print STDERR "<Processing n='$file' p='$self->{'plugin_type'}'>\n" if ($gli);
     
    131130        if $self->{'verbosity'} > 1;
    132131
    133     my ($filename_full_path, $filename_no_path) =  $self->get_full_filenames($base_dir, $file);
     132    my ($filename_full_path, $filename_no_path) =  &util::get_full_filenames($base_dir, $file);
     133
    134134    # Do encoding stuff
    135135    my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path);
  • gsdl/trunk/perllib/plugins/ReadXMLFile.pm

    r15971 r16392  
    157157    return $transformed_xml;
    158158
     159}
     160
     161sub can_process_this_file {
     162    my $self = shift(@_);
     163    my ($filename) = @_;
     164
     165    if ($self->SUPER::can_process_this_file($filename) && $self->check_doctype($filename)) {
     166    return 1; # its a file for us
     167    }
     168    return 0;
    159169}
    160170
     
    188198}
    189199
    190 # because we are not just using process_exp to determine whether to process or not, we need to implement this too, so that a file can be passed down if we are not actually processing it
    191 sub metadata_read {
    192     my $self = shift (@_);
    193    
    194     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
    195  
    196     my $result = $self->SUPER::metadata_read($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli);
    197 
    198     if (defined $result) {
    199     # we think we are processing this, but check that we actually are
    200     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
    201 
    202     if ($self->check_doctype($filename_full_path)) {
    203         return $result;
    204     }
    205     }
    206     return undef;
    207 }
    208200
    209201# we need to implement read cos we are not just using process_exp to determine
     
    212204    my $self = shift (@_); 
    213205 
    214     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    215 
    216     # Make sure we're processing the correct file, do blocking etc
    217     my ($block_status,$filename_full_path) = $self->read_block(@_);   
    218     return $block_status if ((!defined $block_status) || ($block_status==0));
    219 
    220     ## check the doctype to see whether we really want to process the file
    221     if (!$self->check_doctype($filename_full_path)) {
    222     # this file is not for us
    223     return undef;
    224     }
    225 
     206    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     207
     208    # can we process this file??
     209    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     210    return undef unless $self->can_process_this_file($filename_full_path);
     211   
    226212    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    227213    $self->{'base_dir'} = $base_dir;
  • gsdl/trunk/perllib/plugins/RealMediaPlugin.pm

    r15872 r16392  
    8282    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    8383
    84     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     84    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    8585    my $top_section = $doc_obj->get_top_section();
    8686    # prevent hashing: old code was in effect the following.
  • gsdl/trunk/perllib/plugins/RogPlugin.pm

    r15872 r16392  
    222222sub read {
    223223    my $self = shift (@_);
    224     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     224    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    225225
    226226    my $filename = &util::filename_cat($base_dir, $file);
  • gsdl/trunk/perllib/plugins/SplitTextFile.pm

    r16104 r16392  
    118118sub metadata_read {
    119119    my $self = shift (@_); 
    120     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     120    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
    121121
    122122    # returns 1 if matches process_exp, and has done blocking in the meantime
    123     my $matched = $self->SUPER::metadata_read($pluginfo, $base_dir, $file,
     123    my $matched = $self->SUPER::metadata_read($pluginfo, $base_dir, $file,
     124                          $block_hash,
    124125                          $metadata, $extrametakeys,
    125126                          $extrametadata, $processor,
     
    192193sub read {
    193194    my $self = shift (@_);
    194     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     195    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    195196    my $outhandle = $self->{'outhandle'};
    196197    my $verbosity = $self->{'verbosity'};
    197198
    198     #check process and block exps, smart block, etc
    199     my ($block_status,$filename) = $self->read_block(@_);   
    200     return $block_status if ((!defined $block_status) || ($block_status==0));
     199    # can we process this file??
     200    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     201    return undef unless $self->can_process_this_file($filename_full_path);
    201202
    202203    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     
    232233
    233234    # create a new document
    234     my $doc_obj = new doc ($filename, "indexed_doc");
     235    my $doc_obj = new doc ($filename_full_path, "indexed_doc");
    235236    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
    236237    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language);
     
    240241    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$segment");
    241242    if ($self->{'cover_image'}) {
    242         $self->associate_cover_image($doc_obj, $filename);
     243        $self->associate_cover_image($doc_obj, $filename_full_path);
    243244    }
    244245    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
  • gsdl/trunk/perllib/plugins/UnknownPlugin.pm

    r15918 r16392  
    122122    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    123123
    124     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     124    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    125125    my $outhandle = $self->{'outhandle'};
    126126    my $verbosity = $self->{'verbosity'};
  • gsdl/trunk/perllib/plugins/W3ImagePlugin.pm

    r15872 r16392  
    395395# include directories
    396396sub read {
    397     my ($self, $pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = (@_);
     397    my ($self, $pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = (@_);
    398398    my ($doc_obj, $section, $filepath, $imgtag, $pos, $context, $numdocs, $tndir, $imgs);
    399399    # forward normal read (runs HTMLPlugin if index_pages T)
    400     my $ok =  $self->SUPER::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
     400    my $ok =  $self->SUPER::read($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli);
    401401    if ( ! $ok ) { return $ok } # what is this returning??
    402402
  • gsdl/trunk/perllib/plugins/ZIPPlugin.pm

    r15880 r16392  
    101101sub read {
    102102    my $self = shift (@_);
    103     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     103    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    104104    my $outhandle = $self->{'outhandle'};
    105105
    106     # check process_exp, block_exp, associate_ext etc
    107     my ($block_status,$filename) = $self->read_block(@_);   
    108     return $block_status if ((!defined $block_status) || ($block_status==0));
     106    # can we process this file??
     107    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     108    return undef unless $self->can_process_this_file($filename_full_path);
    109109   
    110     my ($file_only) = $file =~ /([^\\\/]*)$/;
    111110    my $tmpdir = &util::get_tmp_filename ();
    112111    &util::mk_all_dir ($tmpdir);
    113112   
    114     print $outhandle "ZIPPlugin: extracting $file_only to $tmpdir\n"
     113    print $outhandle "ZIPPlugin: extracting $filename_no_path to $tmpdir\n"
    115114    if $self->{'verbosity'} > 1;
    116115   
     
    118117    my $cwd = cwd();
    119118    chdir ($tmpdir) || die "Unable to change to $tmpdir";
    120     &util::cp ($filename, $tmpdir);
     119    &util::cp ($filename_full_path, $tmpdir);
    121120   
    122121    if ($file =~ /\.bz$/i) {
    123     $self->bunzip ($file_only);
     122    $self->bunzip ($filename_no_path);
    124123    } elsif ($file =~ /\.bz2$/i) {
    125     $self->bunzip2 ($file_only);
     124    $self->bunzip2 ($filename_no_path);
    126125    } elsif ($file =~ /\.(zip|jar)$/i) {
    127     $self->unzip ($file_only);
     126    $self->unzip ($filename_no_path);
    128127    } elsif ($file =~ /\.tar$/i) {
    129     $self->untar ($file_only);
     128    $self->untar ($filename_no_path);
    130129    } else {
    131     $self->gunzip ($file_only);
     130    $self->gunzip ($filename_no_path);
    132131    }
    133132   
    134133    chdir ($cwd) || die "Unable to change back to $cwd";
    135134   
    136     my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli);
     135    my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli);
    137136    &util::rm_r ($tmpdir);
    138137   
Note: See TracChangeset for help on using the changeset viewer.