Changeset 23347


Ignore:
Timestamp:
2010-11-26T09:43:59+13:00 (13 years ago)
Author:
davidb
Message:

Tidy up of debugging statements for handline filename encodings, plus finishing off the 'deduce_filename_encoding' routine

Location:
main/trunk/greenstone2/perllib/plugins
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r23335 r23347  
    530530    # check if the filename is already in UTF8. If it is, then we're done.
    531531    if($filename_encoding =~ m/auto/) {
    532     if(&unicode::check_is_utf8($filemeta))
    533     {
    534         $filename_encoding = "utf8";
    535         return $filemeta;
    536     }
     532        if(&unicode::check_is_utf8($filemeta))
     533        {
     534            $filename_encoding = "utf8";
     535            return $filemeta;
     536        }
    537537    }
    538538   
     
    540540    if ($filename_encoding eq "auto")
    541541    {
    542     # try textcat
    543     $filename_encoding = $self->textcat_encoding($filemeta);
     542        # try textcat
     543        $filename_encoding = $self->textcat_encoding($filemeta);
    544544   
    545     # check the locale next
    546     $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";
     545        # check the locale next
     546        $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";
    547547   
    548 
    549     # now try the encoding of the document, if available
    550     if ($filename_encoding eq "undefined" && defined $file_encoding) {
    551         $filename_encoding = $file_encoding;
    552     }
     548       
     549        # now try the encoding of the document, if available
     550        if ($filename_encoding eq "undefined" && defined $file_encoding) {
     551            $filename_encoding = $file_encoding;
     552        }
    553553
    554554    }
     
    633633
    634634    my $outhandle = $self->{'outhandle'};
     635
     636    print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n";
     637    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0);
     638    print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n";
     639
    635640
    636641    my ($filemeta) = $file =~ /([^\\\/]+)$/; # getting the tail of the filepath (skips all string parts containing slashes upto the end)
     
    791796{
    792797    my $self = shift (@_); 
    793     my ($file,$metadata) = @_;
     798    my ($file,$metadata,$plugin_filename_encoding) = @_;
    794799
    795800    my $gs_filename_encoding = $metadata->{"gs.filename_encoding"};
     
    798803    # Start by looking for manually assigned metadata
    799804    if (defined $gs_filename_encoding) {
    800     if (ref ($gs_filename_encoding) eq "ARRAY") {
    801         my $outhandle = $self->{'outhandle'};
    802        
    803         $deduced_filename_encoding = $gs_filename_encoding->[0];
    804        
    805         my $num_vals = scalar(@$gs_filename_encoding);
    806         if ($num_vals>1) {
    807         print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n";
    808         print $outhandle "         Selecting first value: $deduced_filename_encoding\n";
    809         }
    810     }
    811     else {
    812         $deduced_filename_encoding = $gs_filename_encoding;
    813     }
    814     }
    815    
    816 #   binmode(STDERR,":utf8");
    817    
    818 #   print STDERR "**** file = $file\n";
    819 #   print STDERR "**** debug file = ", &unicode::debug_unicode_string($file),"\n";;
    820    
    821 #   print STDERR "******* dfe = $deduced_filename_encoding\n";
    822    
     805        if (ref ($gs_filename_encoding) eq "ARRAY") {
     806            my $outhandle = $self->{'outhandle'};
     807           
     808            $deduced_filename_encoding = $gs_filename_encoding->[0];
     809           
     810            my $num_vals = scalar(@$gs_filename_encoding);
     811            if ($num_vals>1) {
     812                print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n";
     813                print $outhandle "         Selecting first value: $deduced_filename_encoding\n";
     814            }
     815        }
     816        else {
     817            $deduced_filename_encoding = $gs_filename_encoding;
     818        }
     819    }
     820       
    823821    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
    824     # Look to see if plugin specifies this value
     822        # Look to see if plugin specifies this value
     823
     824        if (defined $plugin_filename_encoding) {
     825            # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options)
     826            if ($plugin_filename_encoding =~ m/^auto-.*$/) {
     827                my $outhandle = $self->{'outhandle'};
     828                print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n";
     829                print $outhandle "         default to 'auto'\n";
     830                $self->{'filename_encoding'} = $plugin_filename_encoding = "auto";
     831            }
     832           
     833            if ($plugin_filename_encoding ne "auto") {
     834                # We've been given a specific filenamne encoding
     835                # => so use it!
     836                $deduced_filename_encoding = $plugin_filename_encoding;
     837            }
     838        }
    825839    }
    826840   
    827841    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
    828     # See if we can determine the file system encoding through locale
    829     # Unix only ?
     842        # See if we can determine the file system encoding through locale
     843        $deduced_filename_encoding = $self->locale_encoding();
     844
     845        # if locale shows us filesystem is utf8, check to see filename is consistent
     846        # => if not, then we have an "alien" filename on our hands
     847
     848        if ($deduced_filename_encoding =~ m/^utf-?8$/i) {
     849            if (!&unicode::check_is_utf8($file)) {
     850                # "alien" filename, so revert
     851                $deduced_filename_encoding = undef;
     852            }
     853        }
     854    }
     855   
     856   
     857#    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
     858#       # Last chance, apply textcat to deduce filename encoding
     859#       $deduced_filename_encoding = $self->textcat_encoding($file);
     860#    }
     861
     862    if ($self->{'verbosity'}>3) {
     863        my $outhandle = $self->{'outhandle'};
     864
     865        if (defined $deduced_filename_encoding) {
     866            print $outhandle "  Deduced filename encoding as: $deduced_filename_encoding\n";
     867        }
     868        else {
     869            print $outhandle "  No filename encoding deduced\n";
     870        }
     871    }
    830872   
    831     # if locale shows us filesystem is utf8, check to see filename is consistent
    832     # => if not, then we have an "alien" filename on our hands
    833     }
    834    
    835    
    836     if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
    837     # Last chance, apply textcat to deduce filename encoding
    838     }
    839    
    840873    return $deduced_filename_encoding;
    841874}
     
    861894   
    862895    # UTF-8 version of filename
    863     print STDERR "**** setting Source Metadata given: $raw_file\n";
    864 
    865 ##    my $filemeta = $self->filename_to_utf8_metadata($raw_file, $filename_encoding);
     896    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     897        print STDERR "****** Setting Source Metadata given: $raw_file\n";
     898    }
    866899
    867900    my $url_encoded_filename;
    868901    if (defined $filename_encoding) {
    869     # => Generate a pretty print version of filename that is mapped to Unicode
    870 
    871     # Use filename_encoding to map raw filename to a Perl unicode-aware string
    872     $url_encoded_filename = decode($filename_encoding,$raw_file);
    873 
    874     print STDERR "@@@@ pretty print using $filename_encoding: ", encode("utf8",$url_encoded_filename),"\n";
     902        # => Generate a pretty print version of filename that is mapped to Unicode
     903       
     904        # Use filename_encoding to map raw filename to a Perl unicode-aware string
     905        $url_encoded_filename = decode($filename_encoding,$raw_file);       
    875906    }
    876907    else {
    877     # otherwise generate %xx encoded version of filename for char > 127
    878     $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file);
    879     }
    880 
    881     print STDERR "***** saving Source as:             $url_encoded_filename\n";
     908        # otherwise generate %xx encoded version of filename for char > 127
     909        $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file);
     910    }
     911   
     912    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     913        print STDERR "***** saving Source as:             $url_encoded_filename\n";
     914    }
    882915
    883916   
     
    893926                    $renamed_raw_url);
    894927
    895     print STDERR "***** saving SourceFile as:         $renamed_raw_url\n";
     928    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     929        print STDERR "***** saving SourceFile as:         $renamed_raw_url\n";
     930    }
    896931}
    897932   
     
    953988 
    954989
    955     my $filename_encoding = $self->deduce_filename_encoding($file,$metadata);
     990    my $plugin_filename_encoding = $self->{'filename_encoding'};
     991    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
    956992    $self->set_Source_metadata($doc_obj,$filename_no_path,$filename_encoding);
    957993
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r23335 r23347  
    379379
    380380    my ($tailname,$dirname) = &File::Basename::fileparse($file);
    381     print STDERR "***!! file = $file\n";
     381
    382382#    my $utf8_file = $self->filename_to_utf8_metadata($file);
    383383#    $utf8_file =~ s/&\#095;/_/g;
    384384    my $utf8_file = &unicode::raw_filename_to_url_encoded($tailname);
    385     print STDERR "***!! utf8_file = $utf8_file\n";
     385
     386    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     387        print STDERR "***!! file = $file\n";
     388        print STDERR "***!! utf8_file = $utf8_file\n";
     389    }
     390
    386391
    387392    my $web_url = "http://";
     
    758763
    759764    # If web page didn't give encoding, then default to utf8
    760     print "*************** looking up $file\n";
     765    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     766        print STDERR "*** Web page didn't give encoding, defaulting to UTF8!\n";
     767        print STDERR "*****  looking up $file\n";
     768    }
    761769
    762770    my $content_encoding= $self->{'content_encoding'} || "utf8";
     
    767775
    768776    &ghtml::urlsafe ($href);
    769     print STDERR "***!!! href=$href\n";   
     777    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     778        print STDERR "***!!! href=$href\n";   
     779    }
     780
    770781
    771782    return $front . "_httpextlink_&rl=" . $rl . "&href=" . $href . $hash_part . $back;
     
    818829    if (!-e $filename) {
    819830    # try the original filename stored in map
    820     print STDERR "***###!! orig filename did not exist: $filename\n";
     831    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     832        print STDERR "***###!! orig filename did not exist: $filename\n";
     833    }
    821834
    822835    my $original_filename = $self->{'utf8_to_original_filename'}->{$utf8_filename};
    823836
    824     print STDERR "**** Trying for $original_filename\n";
     837    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     838        print STDERR "**** Trying for $original_filename\n";
     839    }
    825840
    826841    if (defined $original_filename && -e $original_filename) {
    827         print STDERR "*** found match\n";
     842        if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     843            print STDERR "*** found match\n";
     844        }
    828845        $filename = $original_filename;
    829846    }
     
    11641181    $title =~ s/^$self->{'title_sub'}// if ($self->{'title_sub'});
    11651182    $title =~ s/^\s+//s; # in case title_sub introduced any...
    1166     print STDERR "**** adding Title: ", Encode::encode("utf8",$title), "\n";
    11671183    $doc_obj->add_utf8_metadata ($section, "Title", $title);
    11681184    print $outhandle " extracted Title metadata \"$title\" from $from\n"
  • main/trunk/greenstone2/perllib/plugins/ImagePlugin.pm

    r23335 r23347  
    112112    my $outhandle = $self->{'outhandle'};
    113113    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    114 
     114   
    115115    if ($self->{'image_conversion_available'} == 1)
    116116    {
    117     my $filename_encoding = $self->deduce_filename_encoding($file,$metadata);
    118 
    119 #   my $utf8_filename_no_path = $self->filepath_to_utf8($filename_no_path);
    120 #   my $url_encoded_filename = &util::rename_file($utf8_filename_no_path, $self->{'file_rename_method'});
    121 
    122 #   $self->generate_images($filename_full_path, $url_encoded_filename,
    123 #                  $doc_obj, $doc_obj->get_top_section()); # should we check the return value?
    124 
    125     $filename_no_path = &unicode::raw_filename_to_url_encoded($filename_no_path);
    126 
    127     # should we check the return value?
    128     $self->generate_images($filename_full_path, $filename_no_path,
    129                    $doc_obj, $doc_obj->get_top_section(),$filename_encoding);
    130 
     117        my $plugin_filename_encoding = $self->{'filename_encoding'};
     118        my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
     119       
     120        my $url_encoded_filename = &unicode::raw_filename_to_url_encoded($filename_no_path);
     121       
     122        # should we check the return value?
     123        $self->generate_images($filename_full_path, $url_encoded_filename,
     124                               $doc_obj, $doc_obj->get_top_section(),$filename_encoding);
     125       
    131126    }
    132127    else
    133128    {
    134     if ($gli) {
    135         &gsprintf(STDERR, "<Warning p='ImagePlugin' r='{ImageConverter.noconversionavailable}: {ImageConverter.".$self->{'no_image_conversion_reason'}."}'>");
    136     }
    137     # all we do is add the original image as an associated file, and set up srclink etc
    138     my $assoc_file = $doc_obj->get_assocfile_from_sourcefile();
    139     my $section = $doc_obj->get_top_section();
    140 
    141     $doc_obj->associate_file($filename_full_path, $assoc_file, "", $section);
    142 
    143     $doc_obj->add_metadata ($section, "srclink_file", $doc_obj->get_sourcefile());
    144     # We don't know the size of the image, but the browser should display it at full size
    145     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\">");
    146 
    147     # Add a fake thumbnail icon with the full-sized image scaled down by the browser
    148     $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\" width=\"" . $self->{'thumbnailsize'} . "\">");
     129        if ($gli) {
     130            &gsprintf(STDERR, "<Warning p='ImagePlugin' r='{ImageConverter.noconversionavailable}: {ImageConverter.".$self->{'no_image_conversion_reason'}."}'>");
     131        }
     132        # all we do is add the original image as an associated file, and set up srclink etc
     133        my $assoc_file = $doc_obj->get_assocfile_from_sourcefile();
     134        my $section = $doc_obj->get_top_section();
     135       
     136        $doc_obj->associate_file($filename_full_path, $assoc_file, "", $section);
     137       
     138        $doc_obj->add_metadata ($section, "srclink_file", $doc_obj->get_sourcefile());
     139        # We don't know the size of the image, but the browser should display it at full size
     140        $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\">");
     141       
     142        # Add a fake thumbnail icon with the full-sized image scaled down by the browser
     143        $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\" width=\"" . $self->{'thumbnailsize'} . "\">");
    149144    }
    150145    #we have no text - adds dummy text and NoText metadata
    151146    $self->add_dummy_text($doc_obj, $doc_obj->get_top_section());
    152 
     147   
    153148    return 1;
    154 
     149   
    155150}
    156151
Note: See TracChangeset for help on using the changeset viewer.