Changeset 23349

Show
Ignore:
Timestamp:
26.11.2010 12:09:53 (9 years ago)
Author:
davidb
Message:

More careful use of encoding parameter to $self->set_Source_metadata so it reflects the *filename* encoding that Greenstone has worked out, not the encoding for the *content* of the file, which of course could be completely different!

Location:
main/trunk/greenstone2/perllib/plugins
Files:
13 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/CONTENTdmPlugin.pm

    r22862 r23349  
    659659     
    660660    my ($filemeta) = $file =~ /([^\\\/]+)$/; 
    661     $self->set_Source_metadata($doc_obj, $filemeta); 
     661    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     662    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     663    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding); 
    662664    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
    663665    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path)); 
  • main/trunk/greenstone2/perllib/plugins/ConvertBinaryFile.pm

    r22887 r23349  
    397397    $doc_obj->set_converted_filename($collect_conv_file); 
    398398 
    399     $self->set_Source_metadata($doc_obj, $filename_no_path); 
     399    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     400    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     401    $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding); 
    400402         
    401403    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
  • main/trunk/greenstone2/perllib/plugins/ConvertToRogPlugin.pm

    r22655 r23349  
    345345 
    346346    my ($filemeta) = $file =~ /([^\\\/]+)$/; 
    347     $self->set_Source_metadata($doc_obj, $filemeta); 
     347    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     348    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     349    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding); 
    348350     
    349351    if ($self->{'cover_image'}) { 
  • main/trunk/greenstone2/perllib/plugins/DatabasePlugin.pm

    r18327 r23349  
    270270        $doc_obj->add_utf8_metadata($cursection, "Encoding", $encoding); 
    271271    } 
    272     $self->set_Source_metadata($doc_obj, $db, $encoding); 
     272 
     273    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     274    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     275    $self->set_Source_metadata($doc_obj, $db, $filename_encoding); 
    273276 
    274277    if ($self->{'cover_image'}) { 
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r23347 r23349  
    316316    my $collect_conv_file = &util::filename_within_collection($tidy_filename); 
    317317    $doc_obj->set_converted_filename($collect_conv_file); 
    318     $self->set_Source_metadata($doc_obj, $filename_no_path); 
     318 
     319    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     320    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     321    $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding); 
    319322    } 
    320323 
  • main/trunk/greenstone2/perllib/plugins/ImageConverter.pm

    r23335 r23349  
    220220 
    221221    $self->set_Source_metadata($doc_obj,&unicode::url_decode($filename_no_path), 
    222                    $filename_encoding); 
     222                               $filename_encoding); 
    223223 
    224224 
  • main/trunk/greenstone2/perllib/plugins/MARCXMLPlugin.pm

    r20609 r23349  
    212212 
    213213    my $processor = $self->{'processor'}; 
    214    
     214    my $metadata  = $self->{'metadata'}; 
     215 
    215216    ##create a new document for each record  
    216217    if ($element eq "record") { 
     
    222223    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language); 
    223224    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding); 
     225 
    224226    my ($filemeta) = $file =~ /([^\\\/]+)$/; 
    225     $self->set_Source_metadata($doc_obj, $filemeta, $encoding); 
     227    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     228    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     229    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding); 
     230 
    226231    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$self->{'record_count'}"); 
    227232        if ($self->{'cover_image'}) { 
  • main/trunk/greenstone2/perllib/plugins/OAIPlugin.pm

    r23212 r23349  
    297297     
    298298    my ($filemeta) = $file =~ /([^\\\/]+)$/; 
    299     $self->set_Source_metadata($doc_obj, $filemeta, $encoding); 
     299    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     300    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     301    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding); 
     302 
    300303    $doc_obj->add_utf8_metadata($top_section, "Language", $language); 
    301304    $doc_obj->add_utf8_metadata($top_section, "Encoding", $encoding); 
  • main/trunk/greenstone2/perllib/plugins/OpenDocumentPlugin.pm

    r23248 r23349  
    264264    $doc_obj->add_metadata ("", "srclink_file", $doc_obj->get_sourcefile()); 
    265265    $doc_obj->add_utf8_metadata ("", "srcicon",  "<img border=\"0\" align=\"absmiddle\" src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/thumbnail.png\" alt=\"View the Open document\" title=\"View the Open document\">");  
    266     $self->set_Source_metadata($doc_obj, $file_only); 
     266 
     267    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     268    my $filename_encoding = $self->deduce_filename_encoding($file_only,$metadata,$plugin_filename_encoding); 
     269 
     270    $self->set_Source_metadata($doc_obj, $file_only, $filename_encoding); 
    267271     $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename)); 
    268272      
  • main/trunk/greenstone2/perllib/plugins/PagedImagePlugin.pm

    r22814 r23349  
    316316 
    317317    #process the .item file 
    318     $doc_obj = $self->process_item($filename_full_path, $dir, $file, $processor); 
     318    $doc_obj = $self->process_item($filename_full_path, $dir, $file, $processor, $metadata); 
    319319     
    320320    } 
     
    513513    $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc", $self->{'file_rename_method'}); 
    514514    # TODO is file filenmae_no_path?? 
    515     $self->set_initial_doc_fields($self->{'doc_obj'}, $self->{'file'}, $self->{'processor'}); 
     515    $self->set_initial_doc_fields($self->{'doc_obj'}, $self->{'file'}, $self->{'processor'}, $self->{'metadata'}); 
    516516 
    517517    my ($dir, $file) = $self->{'filename'} =~ /^(.*?)([^\/\\]*)$/; 
     
    540540sub set_initial_doc_fields { 
    541541    my $self = shift(@_); 
    542     my ($doc_obj, $filename_no_path, $processor) = @_; 
     542    my ($doc_obj, $filename_no_path, $processor, $metadata) = @_; 
    543543 
    544544    my $topsection = $doc_obj->get_top_section(); 
     
    552552    } 
    553553 
    554     $self->set_Source_metadata($doc_obj, $filename_no_path); 
     554    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     555    my $filename_encoding = $self->deduce_filename_encoding($filename_no_path,$metadata,$plugin_filename_encoding); 
     556    $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding); 
    555557    
    556558    # if we want a header page, we need to add some text into the top section, otherwise this section will become invisible 
     
    615617sub process_item { 
    616618    my $self = shift (@_); 
    617     my ($filename_full_path, $dir, $filename_no_path, $processor) = @_; 
     619    my ($filename_full_path, $dir, $filename_no_path, $processor, $metadata) = @_; 
    618620 
    619621    my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'}); 
    620     $self->set_initial_doc_fields($doc_obj, $filename_no_path, $processor); 
     622    $self->set_initial_doc_fields($doc_obj, $filename_no_path, $processor, $metadata); 
    621623    my $topsection = $doc_obj->get_top_section(); 
    622624    open (ITEMFILE, $filename_full_path) || die "couldn't open $filename_full_path\n"; 
  • main/trunk/greenstone2/perllib/plugins/PowerPointPlugin.pm

    r22882 r23349  
    344344    $doc_obj->set_converted_filename(&util::filename_cat($dirname_within_collection, $file)); 
    345345     
    346     $self->set_Source_metadata($doc_obj, $filename_no_path); 
     346    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     347    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     348    $self->set_Source_metadata($doc_obj, $filename_no_path,$filename_encoding); 
    347349         
    348350    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
  • main/trunk/greenstone2/perllib/plugins/ReadXMLFile.pm

    r20830 r23349  
    213213    $self->{'filename_no_path'} = $filename_no_path; 
    214214    $self->{'processor'} = $processor; 
     215 
    215216    # this contains metadata passed in from running metadata_read with other plugins (eg from MetadataXMLPlugin) 
    216217    # we are also using it to store up any metadata found during parsing the XML, so that it can be added to the doc obj. 
     
    368369    my $self = shift(@_); 
    369370 
     371    my $metadata = $self->{'metadata'}; 
     372 
    370373    # create a new document 
    371     $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc", $self->{'file_rename_method'}); 
    372     $self->{'doc_obj'}->add_utf8_metadata($self->{'doc_obj'}->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
    373     $self->set_Source_metadata($self->{'doc_obj'}, $self->{'filename_no_path'}); 
    374      
    375  
     374    my $doc_obj = $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc", $self->{'file_rename_method'}); 
     375 
     376    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
     377 
     378    my $filename_no_path = $self->{'filename_no_path'}; 
     379    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     380    my $filename_encoding = $self->deduce_filename_encoding($filename_no_path,$metadata,$plugin_filename_encoding); 
     381 
     382    $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding); 
     383     
    376384    # do we want other auto metadata here (see BasePlugin.read_into_doc_obj) 
    377385} 
  • main/trunk/greenstone2/perllib/plugins/SplitTextFile.pm

    r23212 r23349  
    239239    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language); 
    240240    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding); 
     241 
    241242    my ($filemeta) = $file =~ /([^\\\/]+)$/; 
    242     $self->set_Source_metadata($doc_obj, $filemeta, $encoding); 
     243    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     244    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     245    $self->set_Source_metadata($doc_obj, $filemeta, $filename_encoding); 
     246 
    243247    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$segment"); 
    244248    if ($self->{'cover_image'}) {