Changeset 23457


Ignore:
Timestamp:
2010-12-13T14:22:45+13:00 (13 years ago)
Author:
kjdon
Message:

reindented the file in emacs

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r23419 r23457  
    6363      { 'name' => "unicode",
    6464    'desc' => "{BasePlugin.encoding.unicode}" } ];
    65      
     65
    6666
    6767my $e = $encodings::encodings;
     
    7878    [ { 'name' => "auto",
    7979    'desc' => "{BasePlugin.filename_encoding.auto}" },
    80        { 'name' => "auto-language-analysis",
     80      { 'name' => "auto-language-analysis",
    8181    'desc' => "{BasePlugin.filename_encoding.auto_language_analysis}" }, # textcat
    8282      { 'name' => "auto-filesystem-encoding",
     
    166166    'list' => $file_rename_method_list,
    167167    'reqd' => "no"
    168       }
     168    }
    169169     
    170170      ];
     
    384384sub block_filename
    385385{
    386     my $self = shift(@_);
    387     my ($block_hash,$filename) = @_;
     386    my $self = shift(@_);
     387    my ($block_hash,$filename) = @_;
     388   
     389    if ($ENV{'GSDLOS'} =~ m/^windows$/) {
    388390   
    389     if ($ENV{'GSDLOS'} =~ m/^windows$/) {
    390        
    391         my $lower_drive = $filename;
    392         $lower_drive =~ s/^([A-Z]):/\l$1:/i;
    393        
    394         my $upper_drive = $filename;
    395         $upper_drive =~ s/^([A-Z]):/\u$1:/i;
    396        
    397         $block_hash->{'file_blocks'}->{$lower_drive} = 1;
    398         $block_hash->{'file_blocks'}->{$upper_drive} = 1;       
    399     }
    400     else {
    401         $block_hash->{'file_blocks'}->{$filename} = 1;
    402     }
     391    my $lower_drive = $filename;
     392    $lower_drive =~ s/^([A-Z]):/\l$1:/i;
     393   
     394    my $upper_drive = $filename;
     395    $upper_drive =~ s/^([A-Z]):/\u$1:/i;
     396   
     397    $block_hash->{'file_blocks'}->{$lower_drive} = 1;
     398    $block_hash->{'file_blocks'}->{$upper_drive} = 1;       
     399    }
     400    else {
     401    $block_hash->{'file_blocks'}->{$filename} = 1;
     402    }
    403403}
    404404
     
    516516    return undef; # can't recognise
    517517    }
    518    
     518    
    519519    # if we have a block_exp, then this overrides the normal 'smart' blocking
    520520    $self->store_block_files($filename_full_path, $block_hash) unless ($self->{'no_blocking'} || $self->{'block_exp'} ne "");
     
    524524    $self->block_cover_image($filename_full_path, $block_hash);
    525525    }
    526        
     526   
    527527    return 1;
    528528}
     
    555555    # check if the filename is already in UTF8. If it is, then we're done.
    556556    if($filename_encoding =~ m/auto/) {
    557         if(&unicode::check_is_utf8($filemeta))
    558         {
    559             $filename_encoding = "utf8";
    560             return $filemeta;
    561         }
     557    if(&unicode::check_is_utf8($filemeta))
     558    {
     559        $filename_encoding = "utf8";
     560        return $filemeta;
     561    }
    562562    }
    563563   
     
    565565    if ($filename_encoding eq "auto")
    566566    {
    567         # try textcat
    568         $filename_encoding = $self->textcat_encoding($filemeta);
     567    # try textcat
     568    $filename_encoding = $self->textcat_encoding($filemeta);
    569569   
    570         # check the locale next
    571         $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";
     570    # check the locale next
     571    $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";
    572572   
    573        
    574         # now try the encoding of the document, if available
    575         if ($filename_encoding eq "undefined" && defined $file_encoding) {
    576             $filename_encoding = $file_encoding;
    577         }
     573   
     574    # now try the encoding of the document, if available
     575    if ($filename_encoding eq "undefined" && defined $file_encoding) {
     576        $filename_encoding = $file_encoding;
     577    }
    578578
    579579    }
     
    602602    # try textcat
    603603    $filename_encoding = $self->textcat_encoding($filemeta) if $filename_encoding eq "undefined";
    604        
     604   
    605605    # else assume filename encoding is encoding of file content, if that's available
    606606    if ($filename_encoding eq "undefined" && defined $file_encoding) {
     
    608608    }
    609609    }
    610    
     610   
    611611    elsif ($filename_encoding eq "auto-lf")
    612612    {
     
    622622    $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";
    623623    }
    624        
     624   
    625625    # if still undefined, use utf8 as fallback
    626626    if ($filename_encoding eq "undefined") {
     
    642642    if ($filename_encoding !~ m/(?:ascii|utf8|unicode)/) {
    643643    $filemeta = &unicode::unicode2utf8(
    644       &unicode::convert2unicode($filename_encoding, \$filemeta)
    645     );
     644                       &unicode::convert2unicode($filename_encoding, \$filemeta)
     645                       );
    646646    }
    647647
     
    659659    my $outhandle = $self->{'outhandle'};
    660660
    661     print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n";
    662     my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0);
    663     print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n";
     661    print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n";
     662    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0);
     663    print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n";
    664664
    665665
     
    709709    my $outhandle = $self->{'outhandle'};
    710710    my $best_encoding = undef;
    711    
     711    
    712712    # get the language/encoding of the textstring using textcat
    713713    require textcat;  # Only load the textcat module if it is required
     
    727727    return undef;
    728728    }
    729        
     729   
    730730    if (defined $best_encoding && $best_encoding =~ m/^iso_8859/ && &unicode::check_is_utf8($text)) {
    731731    # the text is valid utf8, so assume that's the real encoding (since textcat is based on probabilities)
     
    840840    # Start by looking for manually assigned metadata
    841841    if (defined $gs_filename_encoding) {
    842         if (ref ($gs_filename_encoding) eq "ARRAY") {
    843             my $outhandle = $self->{'outhandle'};
    844            
    845             $deduced_filename_encoding = $gs_filename_encoding->[0];
    846            
    847             my $num_vals = scalar(@$gs_filename_encoding);
    848             if ($num_vals>1) {
    849                 print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n";
    850                 print $outhandle "         Selecting first value: $deduced_filename_encoding\n";
    851             }
    852         }
    853         else {
    854             $deduced_filename_encoding = $gs_filename_encoding;
    855         }
    856     }
    857        
     842    if (ref ($gs_filename_encoding) eq "ARRAY") {
     843        my $outhandle = $self->{'outhandle'};
     844       
     845        $deduced_filename_encoding = $gs_filename_encoding->[0];
     846       
     847        my $num_vals = scalar(@$gs_filename_encoding);
     848        if ($num_vals>1) {
     849        print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n";
     850        print $outhandle "         Selecting first value: $deduced_filename_encoding\n";
     851        }
     852    }
     853    else {
     854        $deduced_filename_encoding = $gs_filename_encoding;
     855    }
     856    }
     857   
    858858    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
    859         # Look to see if plugin specifies this value
    860 
    861         if (defined $plugin_filename_encoding) {
    862             # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options)
    863             if ($plugin_filename_encoding =~ m/^auto-.*$/) {
    864                 my $outhandle = $self->{'outhandle'};
    865                 print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n";
    866                 print $outhandle "         default to 'auto'\n";
    867                 $self->{'filename_encoding'} = $plugin_filename_encoding = "auto";
    868             }
    869            
    870             if ($plugin_filename_encoding ne "auto") {
    871                 # We've been given a specific filenamne encoding
    872                 # => so use it!
    873                 $deduced_filename_encoding = $plugin_filename_encoding;
    874             }
    875         }
     859    # Look to see if plugin specifies this value
     860
     861    if (defined $plugin_filename_encoding) {
     862        # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options)
     863        if ($plugin_filename_encoding =~ m/^auto-.*$/) {
     864        my $outhandle = $self->{'outhandle'};
     865        print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n";
     866        print $outhandle "         default to 'auto'\n";
     867        $self->{'filename_encoding'} = $plugin_filename_encoding = "auto";
     868        }
     869       
     870        if ($plugin_filename_encoding ne "auto") {
     871        # We've been given a specific filenamne encoding
     872        # => so use it!
     873        $deduced_filename_encoding = $plugin_filename_encoding;
     874        }
     875    }
    876876    }
    877877   
     
    892892        # See if we can determine the file system encoding through locale
    893893        $deduced_filename_encoding = $self->locale_encoding();
    894    
     894       
    895895        # if locale shows us filesystem is utf8, check to see filename is consistent
    896896        # => if not, then we have an "alien" filename on our hands
    897        
     897
    898898        if ($deduced_filename_encoding =~ m/^utf-?8$/i) {
    899899        if (!&unicode::check_is_utf8($file)) {
     
    910910#    }
    911911
    912     if ($self->{'verbosity'}>3) {
    913         my $outhandle = $self->{'outhandle'};
    914 
    915         if (defined $deduced_filename_encoding) {
    916             print $outhandle "  Deduced filename encoding as: $deduced_filename_encoding\n";
    917         }
    918         else {
    919             print $outhandle "  No filename encoding deduced\n";
    920         }
    921     }
    922    
     912    if ($self->{'verbosity'}>3) {
     913    my $outhandle = $self->{'outhandle'};
     914
     915    if (defined $deduced_filename_encoding) {
     916        print $outhandle "  Deduced filename encoding as: $deduced_filename_encoding\n";
     917    }
     918    else {
     919        print $outhandle "  No filename encoding deduced\n";
     920    }
     921    }
     922   
    923923    return $deduced_filename_encoding;
    924924}
     
    998998#    }
    999999}
    1000    
     1000
    10011001# this should be called by all plugins to set the oid of the doc obj, rather
    10021002# than calling doc_obj->set_OID directly
     
    10171017
    10181018}
    1019  
     1019
    10201020# The BasePlugin read_into_doc_obj() function. This function does all the
    10211021# right things to make general options work for a given plugin.  It doesn't do anything with the file other than setting reads in
     
    10451045    my $pp_file = &util::prettyprint_file($base_dir,$file);
    10461046    print $outhandle "$self->{'plugin_type'} processing $pp_file\n"
    1047         if $self->{'verbosity'} > 1;
     1047    if $self->{'verbosity'} > 1;
    10481048
    10491049    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    1050    
     1050   
    10511051    # create a new document
    10521052    my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'});
     
    10551055    $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
    10561056    $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path));
    1057  
     1057    
    10581058
    10591059    my $plugin_filename_encoding = $self->{'filename_encoding'};
     
    11231123    $self->associate_source_file($doc_obj, $filename);
    11241124    }
    1125    
     1125    
    11261126
    11271127}
     
    11921192    gsprintf(STDERR, "BasePlugin::process {common.must_be_implemented}\n");
    11931193
    1194     my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
     1194    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
    11951195    print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
    11961196
    1197     die "\n";
     1197    die "\n";
    11981198
    11991199    return undef; # never gets here
     
    12151215    if (!open (FILE, ">:utf8", $filename)) {
    12161216    gsprintf(STDERR, "ConvertToPlug::write_file {ConvertToPlug.could_not_open_for_writing} ($!)\n", $filename);
    1217      die "\n";
    1218      }
     1217    die "\n";
     1218    }
    12191219    print FILE $$textref;
    12201220   
     
    12651265    }
    12661266    }
    1267        
    1268 }
    1269  
     1267   
     1268}
     1269
    12701270# add any extra metadata that's been passed around from one
    12711271# plugin to another.
Note: See TracChangeset for help on using the changeset viewer.