Changeset 38749 for main/trunk


Ignore:
Timestamp:
2024-02-15T15:15:55+13:00 (3 months ago)
Author:
davidb
Message:

Code introduced to set SourceDirectory as a piece of metadata for all plugins. Done in read_into_doc_obj(), and so with the inheritance we have across plugins, this needed in be added into 4-5 of our existing plugins. In doing so, it was noticed that not all of them called the post_process_doc_obj() and/or the newer apply_metadata_mapping() subroutine. These were fixed up as part of this coding change, along with improved consistency of declaring the top_section local variable

Location:
main/trunk/greenstone2/perllib/plugins
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BaseImporter.pm

    r38748 r38749  
    760760
    761761    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    762    
     762   
    763763    # create a new document
    764764    my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'});
     
    767767    $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
    768768    $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path));
    769    
     769
     770    my $file_dirname = &File::Basename::dirname($file);
     771    $doc_obj->add_utf8_metadata($top_section, "SourceDirectory", $file_dirname);
    770772
    771773    my $plugin_filename_encoding = $self->{'filename_encoding'};
     
    798800    }
    799801
     802    # force a new OID - this will use OIDtype option set for this plugin.
    800803    $self->add_OID($doc_obj);
    801804
  • main/trunk/greenstone2/perllib/plugins/ConvertBinaryFile.pm

    r38748 r38749  
    450450    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
    451451    $self->set_Source_metadata($doc_obj, $filename_full_path, $filename_encoding);
    452        
    453     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    454     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path));
    455 
     452
     453    my $top_section = $doc_obj->get_top_section();
     454   
     455    $doc_obj->set_utf8_metadata_element($top_section, "Plugin", "$self->{'plugin_type'}");
     456    $doc_obj->set_utf8_metadata_element($top_section, "FileSize", (-s $filename_full_path));
     457
     458    my $file_dirname = &File::Basename::dirname($file);
     459    $doc_obj->add_utf8_metadata($top_section, "SourceDirectory", $file_dirname);
     460   
    456461    # ****
    457462    my ($tailname, $dirname, $suffix)
    458463    = &File::Basename::fileparse($filename_full_path, "\\.[^\\.]+\$");
    459     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FilenameRoot", $tailname);
     464    $doc_obj->set_utf8_metadata_element($top_section, "FilenameRoot", $tailname);
    460465
    461466    # do plugin specific processing of doc_obj
     
    465470    }
    466471
    467     my $topsection = $doc_obj->get_top_section();
    468472    $self->add_associated_files($doc_obj, $filename_full_path);
    469473
    470474    # extra_metadata is already called by sec plugin in process??
    471     $self->extra_metadata($doc_obj, $topsection, $metadata); # do we need this here??
     475    $self->extra_metadata($doc_obj, $top_section, $metadata); # do we need this here??
    472476    # do any automatic metadata extraction
    473477    $self->auto_extract_metadata ($doc_obj);
    474478
    475479    # have we found a Title??
    476     $self->title_fallback($doc_obj,$topsection,$filename_no_path);
     480    $self->title_fallback($doc_obj,$top_section,$filename_no_path);
    477481
    478482    if ($self->{'metadata_mapping_rules'}) {
  • main/trunk/greenstone2/perllib/plugins/HathiTrustMETSPlugin.pm

    r32783 r38749  
    245245    $doc_obj->add_metadata($section, "FileFormat", "HathiTrustMETS");
    246246
     247    my $file_dirname = &File::Basename::dirname($file);
     248    $doc_obj->add_utf8_metadata($top_section, "SourceDirectory", $file_dirname);
     249
    247250    # include any metadata passed in from previous plugins
    248251    # note that this metadata is associated with the top level section
     
    254257    $self->title_fallback($doc_obj,$section,$filename_no_path);
    255258
     259    if ($self->{'metadata_mapping_rules'}) {
     260    $self->apply_metadata_mapping_file($doc_obj);
     261    }
     262   
    256263    $self->add_OID($doc_obj);
     264
     265    $self->post_process_doc_obj($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli);
    257266    return (1,$doc_obj);
    258267}
  • main/trunk/greenstone2/perllib/plugins/PagedImagePlugin.pm

    r38748 r38749  
    355355    }
    356356
    357     my $section = $doc_obj->get_top_section();
     357    my $top_section = $doc_obj->get_top_section();
    358358       
    359     $doc_obj->add_utf8_metadata($section, "Plugin", "$self->{'plugin_type'}");
    360     $doc_obj->add_metadata($section, "FileFormat", "PagedImage");
     359    $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
     360    $doc_obj->add_metadata($top_section, "FileFormat", "PagedImage");
     361
     362    my $dirname = &File::Basename::dirname($file);
     363    $doc_obj->add_utf8_metadata($top_section, "SourceDirectory", $dirname);
    361364
    362365    # include any metadata passed in from previous plugins
    363366    # note that this metadata is associated with the top level section
    364367    $self->add_associated_files($doc_obj, $filename_full_path);
    365     $self->extra_metadata ($doc_obj, $section, $metadata);
     368    $self->extra_metadata ($doc_obj, $top_section, $metadata);
    366369    $self->auto_extract_metadata ($doc_obj);
    367370    $self->plugin_specific_process($base_dir, $file, $doc_obj, $gli);
    368371    # if we haven't found any Title so far, assign one
    369     $self->title_fallback($doc_obj,$section,$filename_no_path);
     372    $self->title_fallback($doc_obj,$top_section,$filename_no_path);
    370373
    371374    if ($self->{'metadata_mapping_rules'}) {
  • main/trunk/greenstone2/perllib/plugins/ReadTextFile.pm

    r32500 r38749  
    142142    $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path));
    143143
     144    my $dirname = &File::Basename::dirname($file);
     145    $doc_obj->add_utf8_metadata($top_section, "SourceDirectory", $dirname);
     146   
    144147    my $plugin_filename_encoding = $self->{'filename_encoding'};
    145148    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     
    189192    $self->title_fallback($doc_obj,$top_section,$filename_no_path);
    190193
     194    if ($self->{'metadata_mapping_rules'}) {
     195    $self->apply_metadata_mapping_file($doc_obj);
     196    }
     197
    191198    $self->add_OID($doc_obj);
    192    
     199
     200    $self->post_process_doc_obj($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli);
    193201    return (1,$doc_obj);
    194202}
Note: See TracChangeset for help on using the changeset viewer.