Changeset 23347

Show
Ignore:
Timestamp:
26.11.2010 09:43:59 (8 years ago)
Author:
davidb
Message:

Tidy up of debugging statements for handline filename encodings, plus finishing off the 'deduce_filename_encoding' routine

Location:
main/trunk/greenstone2/perllib/plugins
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r23335 r23347  
    530530    # check if the filename is already in UTF8. If it is, then we're done. 
    531531    if($filename_encoding =~ m/auto/) { 
    532     if(&unicode::check_is_utf8($filemeta))  
    533     { 
    534         $filename_encoding = "utf8"; 
    535         return $filemeta; 
    536     }  
     532        if(&unicode::check_is_utf8($filemeta))  
     533        { 
     534            $filename_encoding = "utf8"; 
     535            return $filemeta; 
     536        }  
    537537    } 
    538538     
     
    540540    if ($filename_encoding eq "auto")  
    541541    { 
    542     # try textcat 
    543     $filename_encoding = $self->textcat_encoding($filemeta); 
     542        # try textcat 
     543        $filename_encoding = $self->textcat_encoding($filemeta); 
    544544     
    545     # check the locale next 
    546     $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined"; 
     545        # check the locale next 
     546        $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined"; 
    547547     
    548  
    549     # now try the encoding of the document, if available 
    550     if ($filename_encoding eq "undefined" && defined $file_encoding) { 
    551         $filename_encoding = $file_encoding; 
    552     } 
     548         
     549        # now try the encoding of the document, if available 
     550        if ($filename_encoding eq "undefined" && defined $file_encoding) { 
     551            $filename_encoding = $file_encoding; 
     552        } 
    553553 
    554554    } 
     
    633633 
    634634    my $outhandle = $self->{'outhandle'}; 
     635 
     636    print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n"; 
     637    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0); 
     638    print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n"; 
     639 
    635640 
    636641    my ($filemeta) = $file =~ /([^\\\/]+)$/; # getting the tail of the filepath (skips all string parts containing slashes upto the end) 
     
    791796{ 
    792797    my $self = shift (@_);   
    793     my ($file,$metadata) = @_; 
     798    my ($file,$metadata,$plugin_filename_encoding) = @_; 
    794799 
    795800    my $gs_filename_encoding = $metadata->{"gs.filename_encoding"}; 
     
    798803    # Start by looking for manually assigned metadata 
    799804    if (defined $gs_filename_encoding) { 
    800     if (ref ($gs_filename_encoding) eq "ARRAY") { 
    801         my $outhandle = $self->{'outhandle'}; 
    802          
    803         $deduced_filename_encoding = $gs_filename_encoding->[0]; 
    804          
    805         my $num_vals = scalar(@$gs_filename_encoding); 
    806         if ($num_vals>1) { 
    807         print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 
    808         print $outhandle "         Selecting first value: $deduced_filename_encoding\n"; 
    809         } 
    810     }  
    811     else { 
    812         $deduced_filename_encoding = $gs_filename_encoding; 
    813     } 
    814     } 
    815      
    816 #   binmode(STDERR,":utf8"); 
    817      
    818 #   print STDERR "**** file = $file\n"; 
    819 #   print STDERR "**** debug file = ", &unicode::debug_unicode_string($file),"\n";; 
    820      
    821 #   print STDERR "******* dfe = $deduced_filename_encoding\n"; 
    822      
     805        if (ref ($gs_filename_encoding) eq "ARRAY") { 
     806            my $outhandle = $self->{'outhandle'}; 
     807             
     808            $deduced_filename_encoding = $gs_filename_encoding->[0]; 
     809             
     810            my $num_vals = scalar(@$gs_filename_encoding); 
     811            if ($num_vals>1) { 
     812                print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 
     813                print $outhandle "         Selecting first value: $deduced_filename_encoding\n"; 
     814            } 
     815        }  
     816        else { 
     817            $deduced_filename_encoding = $gs_filename_encoding; 
     818        } 
     819    } 
     820         
    823821    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 
    824     # Look to see if plugin specifies this value 
     822        # Look to see if plugin specifies this value 
     823 
     824        if (defined $plugin_filename_encoding) { 
     825            # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options) 
     826            if ($plugin_filename_encoding =~ m/^auto-.*$/) { 
     827                my $outhandle = $self->{'outhandle'}; 
     828                print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n"; 
     829                print $outhandle "         default to 'auto'\n"; 
     830                $self->{'filename_encoding'} = $plugin_filename_encoding = "auto"; 
     831            } 
     832             
     833            if ($plugin_filename_encoding ne "auto") { 
     834                # We've been given a specific filenamne encoding 
     835                # => so use it! 
     836                $deduced_filename_encoding = $plugin_filename_encoding; 
     837            } 
     838        } 
    825839    } 
    826840     
    827841    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 
    828     # See if we can determine the file system encoding through locale 
    829     # Unix only ? 
     842        # See if we can determine the file system encoding through locale 
     843        $deduced_filename_encoding = $self->locale_encoding(); 
     844 
     845        # if locale shows us filesystem is utf8, check to see filename is consistent 
     846        # => if not, then we have an "alien" filename on our hands 
     847 
     848        if ($deduced_filename_encoding =~ m/^utf-?8$/i) { 
     849            if (!&unicode::check_is_utf8($file)) { 
     850                # "alien" filename, so revert 
     851                $deduced_filename_encoding = undef; 
     852            } 
     853        } 
     854    } 
     855     
     856     
     857#    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 
     858#       # Last chance, apply textcat to deduce filename encoding 
     859#       $deduced_filename_encoding = $self->textcat_encoding($file); 
     860#    } 
     861 
     862    if ($self->{'verbosity'}>3) { 
     863        my $outhandle = $self->{'outhandle'}; 
     864 
     865        if (defined $deduced_filename_encoding) { 
     866            print $outhandle "  Deduced filename encoding as: $deduced_filename_encoding\n"; 
     867        } 
     868        else { 
     869            print $outhandle "  No filename encoding deduced\n"; 
     870        } 
     871    } 
    830872     
    831     # if locale shows us filesystem is utf8, check to see filename is consistent 
    832     # => if not, then we have an "alien" filename on our hands 
    833     } 
    834      
    835      
    836     if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 
    837     # Last chance, apply textcat to deduce filename encoding 
    838     } 
    839      
    840873    return $deduced_filename_encoding; 
    841874} 
     
    861894     
    862895    # UTF-8 version of filename 
    863     print STDERR "**** setting Source Metadata given: $raw_file\n"; 
    864  
    865 ##    my $filemeta = $self->filename_to_utf8_metadata($raw_file, $filename_encoding); 
     896    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
     897        print STDERR "****** Setting Source Metadata given: $raw_file\n"; 
     898    } 
    866899 
    867900    my $url_encoded_filename; 
    868901    if (defined $filename_encoding) { 
    869     # => Generate a pretty print version of filename that is mapped to Unicode 
    870  
    871     # Use filename_encoding to map raw filename to a Perl unicode-aware string  
    872     $url_encoded_filename = decode($filename_encoding,$raw_file); 
    873  
    874     print STDERR "@@@@ pretty print using $filename_encoding: ", encode("utf8",$url_encoded_filename),"\n"; 
     902        # => Generate a pretty print version of filename that is mapped to Unicode 
     903         
     904        # Use filename_encoding to map raw filename to a Perl unicode-aware string  
     905        $url_encoded_filename = decode($filename_encoding,$raw_file);        
    875906    } 
    876907    else { 
    877     # otherwise generate %xx encoded version of filename for char > 127 
    878     $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file); 
    879     } 
    880  
    881     print STDERR "***** saving Source as:             $url_encoded_filename\n"; 
     908        # otherwise generate %xx encoded version of filename for char > 127 
     909        $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file); 
     910    } 
     911     
     912    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
     913        print STDERR "***** saving Source as:             $url_encoded_filename\n"; 
     914    } 
    882915 
    883916     
     
    893926                    $renamed_raw_url); 
    894927 
    895     print STDERR "***** saving SourceFile as:         $renamed_raw_url\n"; 
     928    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
     929        print STDERR "***** saving SourceFile as:         $renamed_raw_url\n"; 
     930    } 
    896931} 
    897932    
     
    953988  
    954989 
    955     my $filename_encoding = $self->deduce_filename_encoding($file,$metadata); 
     990    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     991    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
    956992    $self->set_Source_metadata($doc_obj,$filename_no_path,$filename_encoding); 
    957993 
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r23335 r23347  
    379379 
    380380    my ($tailname,$dirname) = &File::Basename::fileparse($file); 
    381     print STDERR "***!! file = $file\n"; 
     381 
    382382#    my $utf8_file = $self->filename_to_utf8_metadata($file); 
    383383#    $utf8_file =~ s/&\#095;/_/g; 
    384384    my $utf8_file = &unicode::raw_filename_to_url_encoded($tailname); 
    385     print STDERR "***!! utf8_file = $utf8_file\n"; 
     385 
     386    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
     387        print STDERR "***!! file = $file\n"; 
     388        print STDERR "***!! utf8_file = $utf8_file\n"; 
     389    } 
     390 
    386391 
    387392    my $web_url = "http://"; 
     
    758763 
    759764    # If web page didn't give encoding, then default to utf8 
    760     print "*************** looking up $file\n"; 
     765    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
     766        print STDERR "*** Web page didn't give encoding, defaulting to UTF8!\n"; 
     767        print STDERR "*****  looking up $file\n"; 
     768    } 
    761769 
    762770    my $content_encoding= $self->{'content_encoding'} || "utf8"; 
     
    767775 
    768776    &ghtml::urlsafe ($href); 
    769     print STDERR "***!!! href=$href\n";     
     777    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
     778        print STDERR "***!!! href=$href\n";     
     779    } 
     780 
    770781 
    771782    return $front . "_httpextlink_&rl=" . $rl . "&href=" . $href . $hash_part . $back; 
     
    818829    if (!-e $filename) { 
    819830    # try the original filename stored in map 
    820     print STDERR "***###!! orig filename did not exist: $filename\n"; 
     831    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
     832        print STDERR "***###!! orig filename did not exist: $filename\n"; 
     833    } 
    821834 
    822835    my $original_filename = $self->{'utf8_to_original_filename'}->{$utf8_filename}; 
    823836 
    824     print STDERR "**** Trying for $original_filename\n"; 
     837    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
     838        print STDERR "**** Trying for $original_filename\n"; 
     839    } 
    825840 
    826841    if (defined $original_filename && -e $original_filename) { 
    827         print STDERR "*** found match\n"; 
     842        if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
     843            print STDERR "*** found match\n"; 
     844        } 
    828845        $filename = $original_filename; 
    829846    } 
     
    11641181    $title =~ s/^$self->{'title_sub'}// if ($self->{'title_sub'}); 
    11651182    $title =~ s/^\s+//s; # in case title_sub introduced any... 
    1166     print STDERR "**** adding Title: ", Encode::encode("utf8",$title), "\n"; 
    11671183    $doc_obj->add_utf8_metadata ($section, "Title", $title); 
    11681184    print $outhandle " extracted Title metadata \"$title\" from $from\n"  
  • main/trunk/greenstone2/perllib/plugins/ImagePlugin.pm

    r23335 r23347  
    112112    my $outhandle = $self->{'outhandle'}; 
    113113    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
    114  
     114     
    115115    if ($self->{'image_conversion_available'} == 1) 
    116116    { 
    117     my $filename_encoding = $self->deduce_filename_encoding($file,$metadata); 
    118  
    119 #   my $utf8_filename_no_path = $self->filepath_to_utf8($filename_no_path); 
    120 #   my $url_encoded_filename = &util::rename_file($utf8_filename_no_path, $self->{'file_rename_method'}); 
    121  
    122 #   $self->generate_images($filename_full_path, $url_encoded_filename,  
    123 #                  $doc_obj, $doc_obj->get_top_section()); # should we check the return value? 
    124  
    125     $filename_no_path = &unicode::raw_filename_to_url_encoded($filename_no_path); 
    126  
    127     # should we check the return value? 
    128     $self->generate_images($filename_full_path, $filename_no_path,  
    129                    $doc_obj, $doc_obj->get_top_section(),$filename_encoding);  
    130  
     117        my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     118        my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding); 
     119         
     120        my $url_encoded_filename = &unicode::raw_filename_to_url_encoded($filename_no_path); 
     121         
     122        # should we check the return value? 
     123        $self->generate_images($filename_full_path, $url_encoded_filename,  
     124                               $doc_obj, $doc_obj->get_top_section(),$filename_encoding);  
     125         
    131126    } 
    132127    else 
    133128    { 
    134     if ($gli) { 
    135         &gsprintf(STDERR, "<Warning p='ImagePlugin' r='{ImageConverter.noconversionavailable}: {ImageConverter.".$self->{'no_image_conversion_reason'}."}'>"); 
    136     } 
    137     # all we do is add the original image as an associated file, and set up srclink etc 
    138     my $assoc_file = $doc_obj->get_assocfile_from_sourcefile(); 
    139     my $section = $doc_obj->get_top_section(); 
    140  
    141     $doc_obj->associate_file($filename_full_path, $assoc_file, "", $section); 
    142  
    143     $doc_obj->add_metadata ($section, "srclink_file", $doc_obj->get_sourcefile());  
    144     # We don't know the size of the image, but the browser should display it at full size 
    145     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\">"); 
    146  
    147     # Add a fake thumbnail icon with the full-sized image scaled down by the browser 
    148     $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\" width=\"" . $self->{'thumbnailsize'} . "\">"); 
     129        if ($gli) { 
     130            &gsprintf(STDERR, "<Warning p='ImagePlugin' r='{ImageConverter.noconversionavailable}: {ImageConverter.".$self->{'no_image_conversion_reason'}."}'>"); 
     131        } 
     132        # all we do is add the original image as an associated file, and set up srclink etc 
     133        my $assoc_file = $doc_obj->get_assocfile_from_sourcefile(); 
     134        my $section = $doc_obj->get_top_section(); 
     135         
     136        $doc_obj->associate_file($filename_full_path, $assoc_file, "", $section); 
     137         
     138        $doc_obj->add_metadata ($section, "srclink_file", $doc_obj->get_sourcefile());  
     139        # We don't know the size of the image, but the browser should display it at full size 
     140        $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\">"); 
     141         
     142        # Add a fake thumbnail icon with the full-sized image scaled down by the browser 
     143        $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[srclink_file]\" width=\"" . $self->{'thumbnailsize'} . "\">"); 
    149144    } 
    150145    #we have no text - adds dummy text and NoText metadata 
    151146    $self->add_dummy_text($doc_obj, $doc_obj->get_top_section()); 
    152  
     147     
    153148    return 1; 
    154  
     149     
    155150} 
    156151