Changeset 23349


Ignore:
Timestamp:
2010-11-26T12:09:53+13:00 (13 years ago)
Author:
davidb
Message:

More careful use of encoding parameter to $self->set_Source_metadata so it reflects the *filename* encoding that Greenstone has worked out, not the encoding for the *content* of the file, which of course could be completely different!

Location:
main/trunk/greenstone2/perllib/plugins
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/CONTENTdmPlugin.pm

    r22862 r23349  
    659659   
    660660    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    661     $self->set_Source_metadata($doc_obj, $filemeta);
     661    my $plugin_filename_encoding = $self->{'filename_encoding'};
     662    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     663    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding);
    662664    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    663665    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path));
  • main/trunk/greenstone2/perllib/plugins/ConvertBinaryFile.pm

    r22887 r23349  
    397397    $doc_obj->set_converted_filename($collect_conv_file);
    398398
    399     $self->set_Source_metadata($doc_obj, $filename_no_path);
     399    my $plugin_filename_encoding = $self->{'filename_encoding'};
     400    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     401    $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding);
    400402       
    401403    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
  • main/trunk/greenstone2/perllib/plugins/ConvertToRogPlugin.pm

    r22655 r23349  
    345345
    346346    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    347     $self->set_Source_metadata($doc_obj, $filemeta);
     347    my $plugin_filename_encoding = $self->{'filename_encoding'};
     348    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     349    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding);
    348350   
    349351    if ($self->{'cover_image'}) {
  • main/trunk/greenstone2/perllib/plugins/DatabasePlugin.pm

    r18327 r23349  
    270270        $doc_obj->add_utf8_metadata($cursection, "Encoding", $encoding);
    271271    }
    272     $self->set_Source_metadata($doc_obj, $db, $encoding);
     272
     273    my $plugin_filename_encoding = $self->{'filename_encoding'};
     274    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     275    $self->set_Source_metadata($doc_obj, $db, $filename_encoding);
    273276
    274277    if ($self->{'cover_image'}) {
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r23347 r23349  
    316316    my $collect_conv_file = &util::filename_within_collection($tidy_filename);
    317317    $doc_obj->set_converted_filename($collect_conv_file);
    318     $self->set_Source_metadata($doc_obj, $filename_no_path);
     318
     319    my $plugin_filename_encoding = $self->{'filename_encoding'};
     320    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     321    $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding);
    319322    }
    320323
  • main/trunk/greenstone2/perllib/plugins/ImageConverter.pm

    r23335 r23349  
    220220
    221221    $self->set_Source_metadata($doc_obj,&unicode::url_decode($filename_no_path),
    222                    $filename_encoding);
     222                               $filename_encoding);
    223223
    224224
  • main/trunk/greenstone2/perllib/plugins/MARCXMLPlugin.pm

    r20609 r23349  
    212212
    213213    my $processor = $self->{'processor'};
    214  
     214    my $metadata  = $self->{'metadata'};
     215
    215216    ##create a new document for each record
    216217    if ($element eq "record") {
     
    222223    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language);
    223224    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding);
     225
    224226    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    225     $self->set_Source_metadata($doc_obj, $filemeta, $encoding);
     227    my $plugin_filename_encoding = $self->{'filename_encoding'};
     228    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     229    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding);
     230
    226231    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$self->{'record_count'}");
    227232        if ($self->{'cover_image'}) {
  • main/trunk/greenstone2/perllib/plugins/OAIPlugin.pm

    r23212 r23349  
    297297   
    298298    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    299     $self->set_Source_metadata($doc_obj, $filemeta, $encoding);
     299    my $plugin_filename_encoding = $self->{'filename_encoding'};
     300    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     301    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding);
     302
    300303    $doc_obj->add_utf8_metadata($top_section, "Language", $language);
    301304    $doc_obj->add_utf8_metadata($top_section, "Encoding", $encoding);
  • main/trunk/greenstone2/perllib/plugins/OpenDocumentPlugin.pm

    r23248 r23349  
    264264    $doc_obj->add_metadata ("", "srclink_file", $doc_obj->get_sourcefile());
    265265    $doc_obj->add_utf8_metadata ("", "srcicon",  "<img border=\"0\" align=\"absmiddle\" src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/thumbnail.png\" alt=\"View the Open document\" title=\"View the Open document\">");
    266     $self->set_Source_metadata($doc_obj, $file_only);
     266
     267    my $plugin_filename_encoding = $self->{'filename_encoding'};
     268    my $filename_encoding = $self->deduce_filename_encoding($file_only,$metadata,$plugin_filename_encoding);
     269
     270    $self->set_Source_metadata($doc_obj, $file_only, $filename_encoding);
    267271     $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename));
    268272     
  • main/trunk/greenstone2/perllib/plugins/PagedImagePlugin.pm

    r22814 r23349  
    316316
    317317    #process the .item file
    318     $doc_obj = $self->process_item($filename_full_path, $dir, $file, $processor);
     318    $doc_obj = $self->process_item($filename_full_path, $dir, $file, $processor, $metadata);
    319319   
    320320    }
     
    513513    $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc", $self->{'file_rename_method'});
    514514    # TODO is file filenmae_no_path??
    515     $self->set_initial_doc_fields($self->{'doc_obj'}, $self->{'file'}, $self->{'processor'});
     515    $self->set_initial_doc_fields($self->{'doc_obj'}, $self->{'file'}, $self->{'processor'}, $self->{'metadata'});
    516516
    517517    my ($dir, $file) = $self->{'filename'} =~ /^(.*?)([^\/\\]*)$/;
     
    540540sub set_initial_doc_fields {
    541541    my $self = shift(@_);
    542     my ($doc_obj, $filename_no_path, $processor) = @_;
     542    my ($doc_obj, $filename_no_path, $processor, $metadata) = @_;
    543543
    544544    my $topsection = $doc_obj->get_top_section();
     
    552552    }
    553553
    554     $self->set_Source_metadata($doc_obj, $filename_no_path);
     554    my $plugin_filename_encoding = $self->{'filename_encoding'};
     555    my $filename_encoding = $self->deduce_filename_encoding($filename_no_path,$metadata,$plugin_filename_encoding);
     556    $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding);
    555557   
    556558    # if we want a header page, we need to add some text into the top section, otherwise this section will become invisible
     
    615617sub process_item {
    616618    my $self = shift (@_);
    617     my ($filename_full_path, $dir, $filename_no_path, $processor) = @_;
     619    my ($filename_full_path, $dir, $filename_no_path, $processor, $metadata) = @_;
    618620
    619621    my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'});
    620     $self->set_initial_doc_fields($doc_obj, $filename_no_path, $processor);
     622    $self->set_initial_doc_fields($doc_obj, $filename_no_path, $processor, $metadata);
    621623    my $topsection = $doc_obj->get_top_section();
    622624    open (ITEMFILE, $filename_full_path) || die "couldn't open $filename_full_path\n";
  • main/trunk/greenstone2/perllib/plugins/PowerPointPlugin.pm

    r22882 r23349  
    344344    $doc_obj->set_converted_filename(&util::filename_cat($dirname_within_collection, $file));
    345345   
    346     $self->set_Source_metadata($doc_obj, $filename_no_path);
     346    my $plugin_filename_encoding = $self->{'filename_encoding'};
     347    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     348    $self->set_Source_metadata($doc_obj, $filename_no_path,$filename_encoding);
    347349       
    348350    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
  • main/trunk/greenstone2/perllib/plugins/ReadXMLFile.pm

    r20830 r23349  
    213213    $self->{'filename_no_path'} = $filename_no_path;
    214214    $self->{'processor'} = $processor;
     215
    215216    # this contains metadata passed in from running metadata_read with other plugins (eg from MetadataXMLPlugin)
    216217    # we are also using it to store up any metadata found during parsing the XML, so that it can be added to the doc obj.
     
    368369    my $self = shift(@_);
    369370
     371    my $metadata = $self->{'metadata'};
     372
    370373    # create a new document
    371     $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc", $self->{'file_rename_method'});
    372     $self->{'doc_obj'}->add_utf8_metadata($self->{'doc_obj'}->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    373     $self->set_Source_metadata($self->{'doc_obj'}, $self->{'filename_no_path'});
    374    
    375 
     374    my $doc_obj = $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc", $self->{'file_rename_method'});
     375
     376    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
     377
     378    my $filename_no_path = $self->{'filename_no_path'};
     379    my $plugin_filename_encoding = $self->{'filename_encoding'};
     380    my $filename_encoding = $self->deduce_filename_encoding($filename_no_path,$metadata,$plugin_filename_encoding);
     381
     382    $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding);
     383   
    376384    # do we want other auto metadata here (see BasePlugin.read_into_doc_obj)
    377385}
  • main/trunk/greenstone2/perllib/plugins/SplitTextFile.pm

    r23212 r23349  
    239239    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language);
    240240    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding);
     241
    241242    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    242     $self->set_Source_metadata($doc_obj, $filemeta, $encoding);
     243    my $plugin_filename_encoding = $self->{'filename_encoding'};
     244    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     245    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding);
     246
    243247    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$segment");
    244248    if ($self->{'cover_image'}) {
Note: See TracChangeset for help on using the changeset viewer.