Changeset 16392

Show
Ignore:
Timestamp:
14.07.2008 14:57:38 (11 years ago)
Author:
kjdon
Message:

global block pass: read_block is no more, use can_process_this_file to see whether a file is for us or not. extra arg (block_hash) to read, read_into_doc_obj, metadata_read etc

Location:
gsdl/trunk/perllib/plugins
Files:
25 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm

    r16257 r16392  
    109109sub read { 
    110110    my $self = shift (@_); 
    111     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count, $gli) = @_; 
     111    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count, $gli) = @_; 
    112112    my $outhandle = $self->{'outhandle'}; 
    113113 
     
    137137        my $tmp = &util::filename_cat ($file, $subfile->[0]); 
    138138        next if $tmp eq $file; 
    139  
     139         
    140140        # We always process the file... 
    141141        my $process_file = 1; 
     
    156156        if ($process_file) { 
    157157        # note: metadata is not carried on to the next level 
    158         $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs, ($total_count+$count), $gli); 
     158        $count += &plugin::read ($pluginfo, $base_dir, $tmp, $block_hash, {}, $processor, $maxdocs, ($total_count+$count), $gli); 
    159159        } 
    160160 
  • gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm

    r15925 r16392  
    557557 
    558558# Override ConvertBinaryFile read 
    559 # Needed so multiple .item files generate are sent down secondary plugin 
     559# Needed so multiple .item files generated are sent down secondary plugin 
    560560 
    561561sub read { 
    562562    my $self = shift (@_); 
    563     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     563    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    564564 
    565565    $self->{'gli'} = $gli; 
     
    570570    my $outhandle = $self->{'outhandle'}; 
    571571     
    572     my ($block_status,$filename) = $self->read_block(@_); 
    573     return $block_status if ((!defined $block_status) || ($block_status==0)); 
     572    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     573    return undef unless $self->can_process_this_file($filename_full_path); 
     574 
    574575    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
    575576        
     
    580581    my $conv_filename_list = []; 
    581582 
    582     $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename); 
     583    $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename_full_path); 
    583584     
    584585    if (scalar(@$conv_filename_list)==0) { 
     
    611612    my ($rv,$doc_obj)  
    612613        = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename,  
    613                             $metadata, $processor, $maxdocs, $total_count, 
     614                            $block_hash, $metadata, $processor, $maxdocs, $total_count, 
    614615                            $gli); 
    615616 
     
    619620 
    620621    # Override previous gsdlsourcefilename set by secondary plugin 
    621     my $collect_file = &util::filename_within_collection($filename); 
     622    my $collect_file = &util::filename_within_collection($filename_full_path); 
    622623    my $collect_conv_file = &util::filename_within_collection($conv_filename); 
    623624    $doc_obj->set_source_filename ($collect_file);  
     
    627628    $self->set_Source_metadata($doc_obj, $filemeta); 
    628629    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
    629     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename)); 
     630    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path)); 
    630631     
    631632    if ($self->{'cover_image'}) { 
    632         $self->associate_cover_image($doc_obj, $filename); 
     633        $self->associate_cover_image($doc_obj, $filename_full_path); 
    633634    } 
    634635     
  • gsdl/trunk/perllib/plugins/ConvertBinaryFile.pm

    r16013 r16392  
    366366sub read_into_doc_obj { 
    367367    my $self = shift (@_); 
    368     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     368    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    369369 
    370370    my $outhandle = $self->{'outhandle'}; 
    371371 
    372     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 
     372    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
    373373 
    374374    my $output_ext = $self->{'convert_to_ext'}; 
     
    411411 
    412412    # note: metadata is not carried on to the next level 
     413## **** I just replaced $metadata with {} in following 
    413414    my ($rv,$doc_obj)  
    414     = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename, $metadata, $processor, $maxdocs, $total_count, $gli); 
     415    = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename, $block_hash, {}, $processor, $maxdocs, $total_count, $gli); 
    415416 
    416417    if ((!defined $rv) || ($rv<1)) { 
     
    438439    my $topsection = $doc_obj->get_top_section(); 
    439440    $self->add_associated_files($doc_obj, $filename_full_path); 
     441 
     442    # extra_metadata is already called by sec plugin in process?? 
    440443    $self->extra_metadata($doc_obj, $topsection, $metadata); # do we need this here?? 
    441444    # do any automatic metadata extraction 
  • gsdl/trunk/perllib/plugins/ConvertToRogPlugin.pm

    r15872 r16392  
    311311sub read { 
    312312    my $self = shift (@_); 
    313     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     313    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    314314 
    315315    my $outhandle = $self->{'outhandle'}; 
    316316 
    317     # check process_exp, block_exp, associate_ext etc 
    318     my ($block_status,$filename) = $self->read_block(@_);     
    319     return $block_status if ((!defined $block_status) || ($block_status==0)); 
     317    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     318    return undef unless $self->can_process_this_file($filename_full_path); 
    320319  
    321320    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
     
    325324 
    326325    my $output_ext = $self->{'convert_to_ext'}; 
    327     my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename); 
     326    my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename_full_path); 
    328327 
    329328    if ("$conv_filename" eq "") {return 0;} # allows continue on errors 
     
    334333    #my $doc_obj = new doc ($conv_filename, "indexed_doc"); 
    335334    # the original filename is used now 
    336     my $doc_obj = new doc ($filename, "indexed_doc"); 
     335    my $doc_obj = new doc ($filename_full_path, "indexed_doc"); 
    337336    # the converted filename is set separately 
    338337    $doc_obj->set_converted_filename($conv_filename); 
     
    349348     
    350349    if ($self->{'cover_image'}) { 
    351     $self->associate_cover_image($doc_obj, $filename); 
     350    $self->associate_cover_image($doc_obj, $filename_full_path); 
    352351    } 
    353352    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
    354     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename)); 
     353    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path)); 
    355354 
    356355    my $track_no = "1"; 
  • gsdl/trunk/perllib/plugins/DBPlugin.pm

    r16104 r16392  
    8080sub read { 
    8181    my $self = shift (@_); 
    82     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count,$gli) = @_; 
     82    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs,$total_count,$gli) = @_; 
    8383         
    8484     #see if we can handle the passed file... 
    85      my ($block_status,$filename) = $self->read_block(@_);     
    86      return $block_status if ((!defined $block_status) || ($block_status==0)); 
    87  
     85    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     86    return undef unless $self->can_process_this_file($filename_full_path); 
     87     
    8888    my $outhandle = $self->{'outhandle'}; 
    8989    my $verbosity = $self->{'verbosity'}; 
     
    120120 
    121121    # read in config file. 
    122     if (!open (CONF, $filename)) { 
    123         print $outhandle "DBPlugin: can't read $filename: $!\n"; 
     122    if (!open (CONF, $filename_full_path)) { 
     123        print $outhandle "DBPlugin: can't read $filename_full_path: $!\n"; 
    124124        return 0; 
    125125    }  
     
    172172            $err =~ s/\.$//; # remove a trailing . 
    173173            print $outhandle "DBPlugin: error evaluating `$statement'\n"; 
    174             print $outhandle " $err (in $filename)\n"; 
     174            print $outhandle " $err (in $filename_full_path)\n"; 
    175175            return 0; # there was an error reading the config file 
    176176        } 
     
    189189     
    190190    if (!defined($db)) { 
    191     print $outhandle "DBPlugin: error: $filename does not specify a db!\n"; 
     191    print $outhandle "DBPlugin: error: $filename_full_path does not specify a db!\n"; 
    192192    return 0; 
    193193    } 
     
    254254 
    255255    # create a new document 
    256     my $doc_obj = new doc ($filename, "indexed_doc"); 
     256    my $doc_obj = new doc ($filename_full_path, "indexed_doc"); 
    257257    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 
    258258    my $cursection = $doc_obj->get_top_section(); 
     
    272272 
    273273    if ($self->{'cover_image'}) { 
    274         $self->associate_cover_image($doc_obj, $filename); 
     274        $self->associate_cover_image($doc_obj, $filename_full_path); 
    275275    } 
    276276    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
  • gsdl/trunk/perllib/plugins/DSpacePlugin.pm

    r15872 r16392  
    240240sub metadata_read { 
    241241    my $self = shift (@_); 
    242     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 
     242    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 
    243243 
    244244    my $only_first_doc = $self->{'only_first_doc'}; 
     
    322322sub read { 
    323323    my $self = shift (@_); 
    324     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     324    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    325325    my $outhandle = $self->{'outhandle'}; 
    326326     
  • gsdl/trunk/perllib/plugins/FOXPlugin.pm

    r15872 r16392  
    9393sub read { 
    9494    my $self = shift (@_); 
    95     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     95    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    9696  
    97     #check for associate_ext, blocking etc, are we processing this file? 
    98     my ($block_status,$fullname) = $self->read_block(@_);     
    99     return $block_status if ((!defined $block_status) || ($block_status==0)); 
     97    # can we process this file?? 
     98    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     99    return undef unless $self->can_process_this_file($filename_full_path); 
    100100 
    101101    print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli); 
    102102    print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1; 
    103103 
    104     my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i; 
     104    my ($parent_dir) = $filename_full_path =~ /^(.*)\/[^\/]+\.dbf$/i; 
    105105 
    106106    # open the file 
    107     if (!open (FOXBASEIN, $fullname)) { 
     107    if (!open (FOXBASEIN, $filename_full_path)) { 
    108108    if ($gli) { 
    109         print STDERR "<ProcessingError n='$file' r='Could not read $fullname'>\n"; 
    110     } 
    111     print STDERR "FOXPlugin::read - couldn't read $fullname\n"; 
     109        print STDERR "<ProcessingError n='$file' r='Could not read $filename_full_path'>\n"; 
     110    } 
     111    print STDERR "FOXPlugin::read - couldn't read $filename_full_path\n"; 
    112112    return -1; # error in processing 
    113113    } 
     
    141141        print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n"; 
    142142    } 
    143     print STDERR "FOXPlugin:read - $fullname doesn't seem to be a Foxbase file\n"; 
     143    print STDERR "FOXPlugin:read - $filename_full_path doesn't seem to be a Foxbase file\n"; 
    144144    return -1; 
    145145    } 
     
    163163 
    164164    # open the dbt file if we need to 
    165     my $dbtfullname = $fullname; 
    166     if ($fullname =~ /f$/) { 
     165    my $dbtfullname = $filename_full_path; 
     166    if ($filename_full_path =~ /f$/) { 
    167167    $dbtfullname =~ s/f$/t/; 
    168168    } else { 
  • gsdl/trunk/perllib/plugins/GMLPlugin.pm

    r15872 r16392  
    7979sub read { 
    8080    my $self = shift (@_); 
    81     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     81    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    8282    my $outhandle = $self->{'outhandle'}; 
    8383 
    84     #check process and block exps, smart block, etc 
    85     my ($block_status,$filename) = $self->read_block(@_);     
    86     return $block_status if ((!defined $block_status) || ($block_status==0)); 
     84    # can we process this file?? 
     85    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     86    return undef unless $self->can_process_this_file($filename_full_path); 
    8787 
    8888    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
     
    9595    $parent_dir = &util::filename_cat ($base_dir, $parent_dir); 
    9696 
    97     if (!open (INFILE, $filename)) { 
     97    if (!open (INFILE, $filename_full_path)) { 
    9898    if ($gli) { 
    99         print STDERR "<ProcessingError n='$file' r='Could not read $filename'>\n"; 
     99        print STDERR "<ProcessingError n='$file' r='Could not read $filename_full_path'>\n"; 
    100100    } 
    101     print $outhandle "GMLPlugin::read - couldn't read $filename\n"; 
     101    print $outhandle "GMLPlugin::read - couldn't read $filename_full_path\n"; 
    102102    return -1; 
    103103    } 
     
    133133 
    134134        } else { 
    135             print $outhandle "GMLPlugin::read - error in file $filename\n"; 
     135            print $outhandle "GMLPlugin::read - error in file $filename_full_path\n"; 
    136136            print $outhandle "text: \"$gml\"\n"; 
    137137            last; 
  • gsdl/trunk/perllib/plugins/HBPlugin.pm

    r16019 r16392  
    219219sub read { 
    220220    my $self = shift (@_); 
    221     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     221    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    222222    my $outhandle = $self->{'outhandle'}; 
    223223 
  • gsdl/trunk/perllib/plugins/HTMLPlugin.pm

    r16247 r16392  
    491491{ 
    492492    my $self = shift (@_);   
    493     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    494      
    495     # check the process_exp and block_exp thing 
    496     my ($block_status,$filename) = $self->read_block(@_);     
    497     return $block_status if ((!defined $block_status) || ($block_status==0)); 
    498      
     493    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     494         
    499495    # get the input file 
    500496    my $input_filename = $file; 
     
    523519     
    524520    # call the parent read_into_doc_obj 
    525     my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli); 
     521    my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli); 
    526522     
    527523    return ($process_status,$doc_obj); 
     
    554550    my $self = shift (@_); 
    555551     
    556     return q^(?i)\.(gif|jpe?g|jpe|jpg|png|css)$^; 
     552    #return q^(?i)\.(gif|jpe?g|jpe|jpg|png|css)$^; 
     553    return ""; 
    557554} 
    558555 
     
    567564{ 
    568565    my $self =shift (@_); 
    569     my ($filename) = @_; 
    570     my $html_fname = $filename; 
     566    my ($filename_full_path, $block_hash) = @_; 
     567 
     568    my $html_fname = $filename_full_path; 
    571569    my @file_blocks; 
    572570     
    573     my ($language, $encoding) = $self->textcat_get_language_encoding ($filename); 
     571    my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path); 
    574572 
    575573    # read in file ($text will be in utf8) 
    576574    my $text = ""; 
    577     $self->read_file ($filename, $encoding, $language, \$text); 
     575    $self->read_file ($filename_full_path, $encoding, $language, \$text); 
    578576    my $textref = \$text; 
    579577    my $opencom = '(?:<!--|&lt;!(?:&mdash;|&#151;|--))'; 
     
    600598    if ($link !~ m@^/@ && $link !~ m/^([A-Z]:?)\\/) { 
    601599        # Turn relative file path into full path 
    602         my $dirname = &File::Basename::dirname($filename); 
     600        my $dirname = &File::Basename::dirname($filename_full_path); 
    603601        $link = &util::filename_cat($dirname, $link); 
    604602    } 
    605603    $link = $self->eval_dir_dots($link); 
    606604     
    607     $self->{'file_blocks'}->{$link} = 1; 
     605    $block_hash->{'file_blocks'}->{$link} = 1; 
    608606    } 
    609607} 
  • gsdl/trunk/perllib/plugins/ISISPlugin.pm

    r16104 r16392  
    8888# This plugin blocks files with the suffix ".fdt" and ".xrf" 
    8989sub get_default_block_exp { 
    90     return q^(?i)(\.fdt|\.xrf)$^; 
     90    #return q^(?i)(\.fdt|\.xrf)$^; 
     91    return ""; 
    9192} 
    9293 
     
    122123} 
    123124 
    124  
    125 sub read_file 
    126 { 
     125# we block the corresponding fdt and xrf 
     126sub store_block_files { 
     127     
     128    my $self =shift (@_); 
     129    my ($filename_full_path, $block_hash) = @_; 
     130     
     131    $self->check_auxiliary_files($filename_full_path); 
     132    if (-e $self->{'fdt_file_path'}) { 
     133    my $fdt_file = $self->{'fdt_file_path'}; 
     134    $block_hash->{'file_blocks'}->{$fdt_file} = 1; 
     135    } 
     136    if (-e $self->{'xrf_file_path'}) { 
     137    my $xrf_file = $self->{'xrf_file_path'}; 
     138    $block_hash->{'file_blocks'}->{$xrf_file} = 1; 
     139    } 
     140     
     141 
     142} 
     143 
     144sub check_auxiliary_files { 
    127145    my $self = shift (@_); 
    128     my ($filename, $encoding, $language, $textref) = @_; 
    129     my $outhandle = $self->{'outhandle'}; 
     146    my ($filename) = @_; 
    130147 
    131148    my ($database_file_path_root) = ($filename =~ /(.*)\.mst$/i); 
    132     my $mst_file_path_relative = $filename; 
    133     $mst_file_path_relative =~ s/^.+import.(.*?)$/$1/; 
    134  
    135149    # Check the associated .fdt and .xrf files exist 
    136150    $self->{'fdt_file_path'} = $database_file_path_root . ".FDT"; 
     
    138152    $self->{'fdt_file_path'} = $database_file_path_root . ".fdt"; 
    139153    } 
     154    $self->{'xrf_file_path'} = $database_file_path_root . ".XRF"; 
     155    if (!-e $self->{'xrf_file_path'}) { 
     156    $self->{'xrf_file_path'} = $database_file_path_root . ".xrf"; 
     157    } 
     158} 
     159     
     160 
     161sub read_file 
     162{ 
     163    my $self = shift (@_); 
     164    my ($filename, $encoding, $language, $textref) = @_; 
     165    my $outhandle = $self->{'outhandle'}; 
     166 
     167    my ($database_file_path_root) = ($filename =~ /(.*)\.mst$/i); 
     168    my $mst_file_path_relative = $filename; 
     169    $mst_file_path_relative =~ s/^.+import.(.*?)$/$1/; 
     170 
     171    # Check the associated .fdt and .xrf files exist 
     172    $self->check_auxiliary_files($filename); 
     173     
    140174    if (!-e $self->{'fdt_file_path'}) { 
    141175    print STDERR "<ProcessingError n='$mst_file_path_relative' r='Could not find ISIS FDT file $self->{'fdt_file_path'}'>\n" if ($self->{'gli'}); 
    142176    print $outhandle "Error: Could not find ISIS FDT file " . $self->{'fdt_file_path'} . ".\n"; 
    143177    return; 
    144     } 
    145     $self->{'xrf_file_path'} = $database_file_path_root . ".XRF"; 
    146     if (!-e $self->{'xrf_file_path'}) { 
    147     $self->{'xrf_file_path'} = $database_file_path_root . ".xrf"; 
    148178    } 
    149179    if (!-e $self->{'xrf_file_path'}) { 
  • gsdl/trunk/perllib/plugins/IndexPlugin.pm

    r15872 r16392  
    9898sub read { 
    9999    my $self = shift (@_); 
    100     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     100    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    101101    my $outhandle = $self->{'outhandle'}; 
    102102 
     
    148148        } 
    149149        } 
    150         $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $metadata, $processor, $maxdocs, ($total_count +$count), $gli); 
     150        $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $block_hash, $metadata, $processor, $maxdocs, ($total_count +$count), $gli); 
    151151    } 
    152152    } 
  • gsdl/trunk/perllib/plugins/LOMPlugin.pm

    r16019 r16392  
    106106 
    107107 
    108  
     108sub can_process_this_file { 
     109    my $self = shift(@_); 
     110    my ($filename) = @_; 
     111 
     112    if ($self->SUPER::can_process_this_file($filename) && $self->check_doctype($filename)) { 
     113    return 1; # its a file for us 
     114    } 
     115    return 0; 
     116} 
    109117 
    110118sub metadata_read { 
    111119    my $self = shift (@_); 
    112     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 
     120    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 
    113121 
    114122    my $outhandle = $self->{'outhandle'}; 
    115123 
    116     my $filename = $file; 
    117     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 
    118      
    119     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    120     return undef; # can't recognise 
    121     } 
    122     if (!$self->check_doctype($filename)) { 
    123     # this file is not for us 
    124     return undef; 
    125     } 
     124    # can we process this file?? 
     125    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     126    return undef unless $self->can_process_this_file($filename_full_path); 
    126127 
    127128    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
     
    130131    if $self->{'verbosity'} > 1; 
    131132 
    132     my ($dir,$tail) = $filename =~ /^(.*?)([^\/\\]*)$/; 
     133    my ($dir,$tail) = $filename_full_path =~ /^(.*?)([^\/\\]*)$/; 
    133134    $self->{'output_dir'} = $dir; 
    134135 
    135136    eval { 
    136     $self->{'parser'}->parsefile($filename); 
     137    $self->{'parser'}->parsefile($filename_full_path); 
    137138    }; 
    138139     
    139140    if ($@) { 
    140     print $outhandle "LOMPlugin: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1); 
     141    print $outhandle "LOMPlugin: skipping $filename_full_path as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1); 
    141142    print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2); 
    142143    return 0; 
     
    212213sub read { 
    213214    my $self = shift (@_); 
    214     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     215    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    215216 
    216217    my $outhandle = $self->{'outhandle'}; 
     
    218219    return 0 if (defined $self->{'extra_blocks'}->{$file}); 
    219220 
    220     # need to check whether this file is for us 
    221     my ($block_status,$filename) = $self->read_block(@_);     
    222     return $block_status if ((!defined $block_status) || ($block_status==0)); 
    223     if (!$self->check_doctype($filename)) { 
    224     # this file is not for us 
    225     return undef; 
    226     } 
     221    # can we process this file?? 
     222    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     223    return undef unless $self->can_process_this_file($filename_full_path); 
    227224 
    228225    $self->{'metadata_table'} = $metadata; 
  • gsdl/trunk/perllib/plugins/MP3Plugin.pm

    r15911 r16392  
    8989    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    9090 
    91     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 
     91    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
    9292    # do something about OIDtype so no hashing 
    9393      
  • gsdl/trunk/perllib/plugins/OAIPlugin.pm

    r16013 r16392  
    162162    my $self = shift (@_);   
    163163   
    164     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     164    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    165165 
    166166    my $outhandle = $self->{'outhandle'}; 
     
    208208         
    209209        return &plugin::read ($pluginfo, $filename_dir, $url_array->[0], 
    210                   $metadata, $processor, $maxdocs, $total_count, $gli); 
     210                  $block_hash, $metadata, $processor, $maxdocs, 
     211                  $total_count, $gli); 
    211212    } 
    212213    else 
  • gsdl/trunk/perllib/plugins/OggVorbisPlugin.pm

    r15911 r16392  
    8686    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    8787 
    88     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 
     88    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
    8989    # do something about OIDtype so no hashing 
    9090      
  • gsdl/trunk/perllib/plugins/OpenDocumentPlugin.pm

    r16193 r16392  
    161161    my $self = shift (@_);   
    162162    
    163     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    164  
    165     # check process and block exps, smart block, associate_ext etc 
    166     my ($block_status,$filename) = $self->read_block(@_);     
    167     return $block_status if ((!defined $block_status) || ($block_status==0)); 
     163    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     164 
     165    # can we process this file?? 
     166    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     167    return undef unless $self->can_process_this_file($filename_full_path); 
    168168 
    169169    my $outhandle = $self->{'outhandle'}; 
     
    175175    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
    176176    $self->{'file'} = $file; 
    177     $self->{'filename'} = $filename; 
     177    $self->{'filename'} = $filename_full_path; 
    178178    $self->{'processor'} = $processor; 
    179179    $self->{'metadata'} = $metadata; 
     
    189189    my $cwd = getcwd(); 
    190190    chdir ($tmpdir) || die "Unable to change to $tmpdir"; 
    191     &util::cp ($filename, $tmpdir); 
     191    &util::cp ($filename_full_path, $tmpdir); 
    192192     
    193193    $self->unzip ("\"$file_only\""); 
     
    197197        } 
    198198    } 
    199     $self->close_document($filename,$file_only); 
     199    $self->close_document($filename_full_path,$file_only); 
    200200     
    201201    chdir ($cwd) || die "Unable to change back to $cwd"; 
  • gsdl/trunk/perllib/plugins/ReadTextFile.pm

    r16308 r16392  
    122122sub read_into_doc_obj { 
    123123    my $self = shift (@_);   
    124     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     124    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    125125 
    126126    my $outhandle = $self->{'outhandle'}; 
    127  
    128127    # should we move this to read? What about secondary plugins? 
    129128    print STDERR "<Processing n='$file' p='$self->{'plugin_type'}'>\n" if ($gli); 
     
    131130        if $self->{'verbosity'} > 1; 
    132131 
    133     my ($filename_full_path, $filename_no_path) =  $self->get_full_filenames($base_dir, $file); 
     132    my ($filename_full_path, $filename_no_path) =  &util::get_full_filenames($base_dir, $file); 
     133 
    134134    # Do encoding stuff 
    135135    my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path); 
  • gsdl/trunk/perllib/plugins/ReadXMLFile.pm

    r15971 r16392  
    157157    return $transformed_xml; 
    158158 
     159} 
     160 
     161sub can_process_this_file { 
     162    my $self = shift(@_); 
     163    my ($filename) = @_; 
     164 
     165    if ($self->SUPER::can_process_this_file($filename) && $self->check_doctype($filename)) { 
     166    return 1; # its a file for us 
     167    } 
     168    return 0; 
    159169} 
    160170 
     
    188198} 
    189199 
    190 # because we are not just using process_exp to determine whether to process or not, we need to implement this too, so that a file can be passed down if we are not actually processing it 
    191 sub metadata_read { 
    192     my $self = shift (@_); 
    193      
    194     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 
    195   
    196     my $result = $self->SUPER::metadata_read($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli); 
    197  
    198     if (defined $result) { 
    199     # we think we are processing this, but check that we actually are 
    200     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 
    201  
    202     if ($self->check_doctype($filename_full_path)) { 
    203         return $result; 
    204     } 
    205     } 
    206     return undef; 
    207 } 
    208200 
    209201# we need to implement read cos we are not just using process_exp to determine 
     
    212204    my $self = shift (@_);   
    213205   
    214     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    215  
    216     # Make sure we're processing the correct file, do blocking etc 
    217     my ($block_status,$filename_full_path) = $self->read_block(@_);     
    218     return $block_status if ((!defined $block_status) || ($block_status==0)); 
    219  
    220     ## check the doctype to see whether we really want to process the file 
    221     if (!$self->check_doctype($filename_full_path)) { 
    222     # this file is not for us 
    223     return undef; 
    224     } 
    225  
     206    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     207 
     208    # can we process this file?? 
     209    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     210    return undef unless $self->can_process_this_file($filename_full_path); 
     211     
    226212    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
    227213    $self->{'base_dir'} = $base_dir; 
  • gsdl/trunk/perllib/plugins/RealMediaPlugin.pm

    r15872 r16392  
    8282    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    8383 
    84     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 
     84    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
    8585    my $top_section = $doc_obj->get_top_section(); 
    8686    # prevent hashing: old code was in effect the following.  
  • gsdl/trunk/perllib/plugins/RogPlugin.pm

    r15872 r16392  
    222222sub read { 
    223223    my $self = shift (@_); 
    224     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     224    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    225225 
    226226    my $filename = &util::filename_cat($base_dir, $file); 
  • gsdl/trunk/perllib/plugins/SplitTextFile.pm

    r16104 r16392  
    118118sub metadata_read { 
    119119    my $self = shift (@_);   
    120     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 
     120    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 
    121121 
    122122    # returns 1 if matches process_exp, and has done blocking in the meantime 
    123     my $matched = $self->SUPER::metadata_read($pluginfo, $base_dir, $file,  
     123    my $matched = $self->SUPER::metadata_read($pluginfo, $base_dir, $file, 
     124                          $block_hash,  
    124125                          $metadata, $extrametakeys,  
    125126                          $extrametadata, $processor,  
     
    192193sub read { 
    193194    my $self = shift (@_); 
    194     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     195    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    195196    my $outhandle = $self->{'outhandle'}; 
    196197    my $verbosity = $self->{'verbosity'}; 
    197198 
    198     #check process and block exps, smart block, etc 
    199     my ($block_status,$filename) = $self->read_block(@_);     
    200     return $block_status if ((!defined $block_status) || ($block_status==0)); 
     199    # can we process this file?? 
     200    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     201    return undef unless $self->can_process_this_file($filename_full_path); 
    201202 
    202203    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
     
    232233 
    233234    # create a new document 
    234     my $doc_obj = new doc ($filename, "indexed_doc"); 
     235    my $doc_obj = new doc ($filename_full_path, "indexed_doc"); 
    235236    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 
    236237    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language); 
     
    240241    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$segment"); 
    241242    if ($self->{'cover_image'}) { 
    242         $self->associate_cover_image($doc_obj, $filename); 
     243        $self->associate_cover_image($doc_obj, $filename_full_path); 
    243244    } 
    244245    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
  • gsdl/trunk/perllib/plugins/UnknownPlugin.pm

    r15918 r16392  
    122122    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    123123 
    124     my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 
     124    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
    125125    my $outhandle = $self->{'outhandle'}; 
    126126    my $verbosity = $self->{'verbosity'}; 
  • gsdl/trunk/perllib/plugins/W3ImagePlugin.pm

    r15872 r16392  
    395395# include directories 
    396396sub read { 
    397     my ($self, $pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = (@_); 
     397    my ($self, $pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = (@_); 
    398398    my ($doc_obj, $section, $filepath, $imgtag, $pos, $context, $numdocs, $tndir, $imgs); 
    399399    # forward normal read (runs HTMLPlugin if index_pages T) 
    400     my $ok =  $self->SUPER::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);  
     400    my $ok =  $self->SUPER::read($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli);  
    401401    if ( ! $ok ) { return $ok } # what is this returning?? 
    402402 
  • gsdl/trunk/perllib/plugins/ZIPPlugin.pm

    r15880 r16392  
    101101sub read { 
    102102    my $self = shift (@_); 
    103     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     103    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    104104    my $outhandle = $self->{'outhandle'}; 
    105105 
    106     # check process_exp, block_exp, associate_ext etc 
    107     my ($block_status,$filename) = $self->read_block(@_);     
    108     return $block_status if ((!defined $block_status) || ($block_status==0)); 
     106    # can we process this file?? 
     107    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
     108    return undef unless $self->can_process_this_file($filename_full_path); 
    109109     
    110     my ($file_only) = $file =~ /([^\\\/]*)$/; 
    111110    my $tmpdir = &util::get_tmp_filename (); 
    112111    &util::mk_all_dir ($tmpdir); 
    113112     
    114     print $outhandle "ZIPPlugin: extracting $file_only to $tmpdir\n" 
     113    print $outhandle "ZIPPlugin: extracting $filename_no_path to $tmpdir\n" 
    115114    if $self->{'verbosity'} > 1; 
    116115     
     
    118117    my $cwd = cwd(); 
    119118    chdir ($tmpdir) || die "Unable to change to $tmpdir"; 
    120     &util::cp ($filename, $tmpdir); 
     119    &util::cp ($filename_full_path, $tmpdir); 
    121120     
    122121    if ($file =~ /\.bz$/i) { 
    123     $self->bunzip ($file_only); 
     122    $self->bunzip ($filename_no_path); 
    124123    } elsif ($file =~ /\.bz2$/i) { 
    125     $self->bunzip2 ($file_only); 
     124    $self->bunzip2 ($filename_no_path); 
    126125    } elsif ($file =~ /\.(zip|jar)$/i) { 
    127     $self->unzip ($file_only); 
     126    $self->unzip ($filename_no_path); 
    128127    } elsif ($file =~ /\.tar$/i) { 
    129     $self->untar ($file_only); 
     128    $self->untar ($filename_no_path); 
    130129    } else { 
    131     $self->gunzip ($file_only); 
     130    $self->gunzip ($filename_no_path); 
    132131    } 
    133132     
    134133    chdir ($cwd) || die "Unable to change back to $cwd"; 
    135134     
    136     my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli); 
     135    my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli); 
    137136    &util::rm_r ($tmpdir); 
    138137