Ignore:
Timestamp:
2010-11-26T09:43:59+13:00 (13 years ago)
Author:
davidb
Message:

Tidy up of debugging statements for handline filename encodings, plus finishing off the 'deduce_filename_encoding' routine

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r23335 r23347  
    530530    # check if the filename is already in UTF8. If it is, then we're done.
    531531    if($filename_encoding =~ m/auto/) {
    532     if(&unicode::check_is_utf8($filemeta))
    533     {
    534         $filename_encoding = "utf8";
    535         return $filemeta;
    536     }
     532        if(&unicode::check_is_utf8($filemeta))
     533        {
     534            $filename_encoding = "utf8";
     535            return $filemeta;
     536        }
    537537    }
    538538   
     
    540540    if ($filename_encoding eq "auto")
    541541    {
    542     # try textcat
    543     $filename_encoding = $self->textcat_encoding($filemeta);
     542        # try textcat
     543        $filename_encoding = $self->textcat_encoding($filemeta);
    544544   
    545     # check the locale next
    546     $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";
     545        # check the locale next
     546        $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";
    547547   
    548 
    549     # now try the encoding of the document, if available
    550     if ($filename_encoding eq "undefined" && defined $file_encoding) {
    551         $filename_encoding = $file_encoding;
    552     }
     548       
     549        # now try the encoding of the document, if available
     550        if ($filename_encoding eq "undefined" && defined $file_encoding) {
     551            $filename_encoding = $file_encoding;
     552        }
    553553
    554554    }
     
    633633
    634634    my $outhandle = $self->{'outhandle'};
     635
     636    print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n";
     637    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0);
     638    print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n";
     639
    635640
    636641    my ($filemeta) = $file =~ /([^\\\/]+)$/; # getting the tail of the filepath (skips all string parts containing slashes upto the end)
     
    791796{
    792797    my $self = shift (@_); 
    793     my ($file,$metadata) = @_;
     798    my ($file,$metadata,$plugin_filename_encoding) = @_;
    794799
    795800    my $gs_filename_encoding = $metadata->{"gs.filename_encoding"};
     
    798803    # Start by looking for manually assigned metadata
    799804    if (defined $gs_filename_encoding) {
    800     if (ref ($gs_filename_encoding) eq "ARRAY") {
    801         my $outhandle = $self->{'outhandle'};
    802        
    803         $deduced_filename_encoding = $gs_filename_encoding->[0];
    804        
    805         my $num_vals = scalar(@$gs_filename_encoding);
    806         if ($num_vals>1) {
    807         print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n";
    808         print $outhandle "         Selecting first value: $deduced_filename_encoding\n";
    809         }
    810     }
    811     else {
    812         $deduced_filename_encoding = $gs_filename_encoding;
    813     }
    814     }
    815    
    816 #   binmode(STDERR,":utf8");
    817    
    818 #   print STDERR "**** file = $file\n";
    819 #   print STDERR "**** debug file = ", &unicode::debug_unicode_string($file),"\n";;
    820    
    821 #   print STDERR "******* dfe = $deduced_filename_encoding\n";
    822    
     805        if (ref ($gs_filename_encoding) eq "ARRAY") {
     806            my $outhandle = $self->{'outhandle'};
     807           
     808            $deduced_filename_encoding = $gs_filename_encoding->[0];
     809           
     810            my $num_vals = scalar(@$gs_filename_encoding);
     811            if ($num_vals>1) {
     812                print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n";
     813                print $outhandle "         Selecting first value: $deduced_filename_encoding\n";
     814            }
     815        }
     816        else {
     817            $deduced_filename_encoding = $gs_filename_encoding;
     818        }
     819    }
     820       
    823821    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
    824     # Look to see if plugin specifies this value
     822        # Look to see if plugin specifies this value
     823
     824        if (defined $plugin_filename_encoding) {
     825            # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options)
     826            if ($plugin_filename_encoding =~ m/^auto-.*$/) {
     827                my $outhandle = $self->{'outhandle'};
     828                print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n";
     829                print $outhandle "         default to 'auto'\n";
     830                $self->{'filename_encoding'} = $plugin_filename_encoding = "auto";
     831            }
     832           
     833            if ($plugin_filename_encoding ne "auto") {
     834                # We've been given a specific filenamne encoding
     835                # => so use it!
     836                $deduced_filename_encoding = $plugin_filename_encoding;
     837            }
     838        }
    825839    }
    826840   
    827841    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
    828     # See if we can determine the file system encoding through locale
    829     # Unix only ?
     842        # See if we can determine the file system encoding through locale
     843        $deduced_filename_encoding = $self->locale_encoding();
     844
     845        # if locale shows us filesystem is utf8, check to see filename is consistent
     846        # => if not, then we have an "alien" filename on our hands
     847
     848        if ($deduced_filename_encoding =~ m/^utf-?8$/i) {
     849            if (!&unicode::check_is_utf8($file)) {
     850                # "alien" filename, so revert
     851                $deduced_filename_encoding = undef;
     852            }
     853        }
     854    }
     855   
     856   
     857#    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
     858#       # Last chance, apply textcat to deduce filename encoding
     859#       $deduced_filename_encoding = $self->textcat_encoding($file);
     860#    }
     861
     862    if ($self->{'verbosity'}>3) {
     863        my $outhandle = $self->{'outhandle'};
     864
     865        if (defined $deduced_filename_encoding) {
     866            print $outhandle "  Deduced filename encoding as: $deduced_filename_encoding\n";
     867        }
     868        else {
     869            print $outhandle "  No filename encoding deduced\n";
     870        }
     871    }
    830872   
    831     # if locale shows us filesystem is utf8, check to see filename is consistent
    832     # => if not, then we have an "alien" filename on our hands
    833     }
    834    
    835    
    836     if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
    837     # Last chance, apply textcat to deduce filename encoding
    838     }
    839    
    840873    return $deduced_filename_encoding;
    841874}
     
    861894   
    862895    # UTF-8 version of filename
    863     print STDERR "**** setting Source Metadata given: $raw_file\n";
    864 
    865 ##    my $filemeta = $self->filename_to_utf8_metadata($raw_file, $filename_encoding);
     896    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     897        print STDERR "****** Setting Source Metadata given: $raw_file\n";
     898    }
    866899
    867900    my $url_encoded_filename;
    868901    if (defined $filename_encoding) {
    869     # => Generate a pretty print version of filename that is mapped to Unicode
    870 
    871     # Use filename_encoding to map raw filename to a Perl unicode-aware string
    872     $url_encoded_filename = decode($filename_encoding,$raw_file);
    873 
    874     print STDERR "@@@@ pretty print using $filename_encoding: ", encode("utf8",$url_encoded_filename),"\n";
     902        # => Generate a pretty print version of filename that is mapped to Unicode
     903       
     904        # Use filename_encoding to map raw filename to a Perl unicode-aware string
     905        $url_encoded_filename = decode($filename_encoding,$raw_file);       
    875906    }
    876907    else {
    877     # otherwise generate %xx encoded version of filename for char > 127
    878     $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file);
    879     }
    880 
    881     print STDERR "***** saving Source as:             $url_encoded_filename\n";
     908        # otherwise generate %xx encoded version of filename for char > 127
     909        $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file);
     910    }
     911   
     912    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     913        print STDERR "***** saving Source as:             $url_encoded_filename\n";
     914    }
    882915
    883916   
     
    893926                    $renamed_raw_url);
    894927
    895     print STDERR "***** saving SourceFile as:         $renamed_raw_url\n";
     928    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     929        print STDERR "***** saving SourceFile as:         $renamed_raw_url\n";
     930    }
    896931}
    897932   
     
    953988 
    954989
    955     my $filename_encoding = $self->deduce_filename_encoding($file,$metadata);
     990    my $plugin_filename_encoding = $self->{'filename_encoding'};
     991    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
    956992    $self->set_Source_metadata($doc_obj,$filename_no_path,$filename_encoding);
    957993
Note: See TracChangeset for help on using the changeset viewer.