Changeset 23457

Show
Ignore:
Timestamp:
13.12.2010 14:22:45 (9 years ago)
Author:
kjdon
Message:

reindented the file in emacs

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r23419 r23457  
    6363      { 'name' => "unicode", 
    6464    'desc' => "{BasePlugin.encoding.unicode}" } ]; 
    65        
     65 
    6666 
    6767my $e = $encodings::encodings; 
     
    7878    [ { 'name' => "auto", 
    7979    'desc' => "{BasePlugin.filename_encoding.auto}" }, 
    80        { 'name' => "auto-language-analysis", 
     80      { 'name' => "auto-language-analysis", 
    8181    'desc' => "{BasePlugin.filename_encoding.auto_language_analysis}" }, # textcat 
    8282      { 'name' => "auto-filesystem-encoding", 
     
    166166    'list' => $file_rename_method_list, 
    167167    'reqd' => "no" 
    168       } 
     168    } 
    169169       
    170170      ]; 
     
    384384sub block_filename 
    385385{ 
    386     my $self = shift(@_); 
    387     my ($block_hash,$filename) = @_; 
     386    my $self = shift(@_); 
     387    my ($block_hash,$filename) = @_; 
     388     
     389    if ($ENV{'GSDLOS'} =~ m/^windows$/) { 
    388390     
    389     if ($ENV{'GSDLOS'} =~ m/^windows$/) { 
    390          
    391         my $lower_drive = $filename; 
    392         $lower_drive =~ s/^([A-Z]):/\l$1:/i; 
    393          
    394         my $upper_drive = $filename; 
    395         $upper_drive =~ s/^([A-Z]):/\u$1:/i; 
    396          
    397         $block_hash->{'file_blocks'}->{$lower_drive} = 1; 
    398         $block_hash->{'file_blocks'}->{$upper_drive} = 1;        
    399     } 
    400     else { 
    401         $block_hash->{'file_blocks'}->{$filename} = 1; 
    402     } 
     391    my $lower_drive = $filename; 
     392    $lower_drive =~ s/^([A-Z]):/\l$1:/i; 
     393     
     394    my $upper_drive = $filename; 
     395    $upper_drive =~ s/^([A-Z]):/\u$1:/i; 
     396     
     397    $block_hash->{'file_blocks'}->{$lower_drive} = 1; 
     398    $block_hash->{'file_blocks'}->{$upper_drive} = 1;        
     399    } 
     400    else { 
     401    $block_hash->{'file_blocks'}->{$filename} = 1; 
     402    } 
    403403} 
    404404 
     
    516516    return undef; # can't recognise 
    517517    } 
    518     
     518     
    519519    # if we have a block_exp, then this overrides the normal 'smart' blocking 
    520520    $self->store_block_files($filename_full_path, $block_hash) unless ($self->{'no_blocking'} || $self->{'block_exp'} ne ""); 
     
    524524    $self->block_cover_image($filename_full_path, $block_hash);  
    525525    } 
    526         
     526     
    527527    return 1; 
    528528} 
     
    555555    # check if the filename is already in UTF8. If it is, then we're done. 
    556556    if($filename_encoding =~ m/auto/) { 
    557         if(&unicode::check_is_utf8($filemeta))  
    558         { 
    559             $filename_encoding = "utf8"; 
    560             return $filemeta; 
    561         }  
     557    if(&unicode::check_is_utf8($filemeta))  
     558    { 
     559        $filename_encoding = "utf8"; 
     560        return $filemeta; 
     561    }  
    562562    } 
    563563     
     
    565565    if ($filename_encoding eq "auto")  
    566566    { 
    567         # try textcat 
    568         $filename_encoding = $self->textcat_encoding($filemeta); 
     567    # try textcat 
     568    $filename_encoding = $self->textcat_encoding($filemeta); 
    569569     
    570         # check the locale next 
    571         $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined"; 
     570    # check the locale next 
     571    $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined"; 
    572572     
    573          
    574         # now try the encoding of the document, if available 
    575         if ($filename_encoding eq "undefined" && defined $file_encoding) { 
    576             $filename_encoding = $file_encoding; 
    577         } 
     573     
     574    # now try the encoding of the document, if available 
     575    if ($filename_encoding eq "undefined" && defined $file_encoding) { 
     576        $filename_encoding = $file_encoding; 
     577    } 
    578578 
    579579    } 
     
    602602    # try textcat 
    603603    $filename_encoding = $self->textcat_encoding($filemeta) if $filename_encoding eq "undefined"; 
    604          
     604     
    605605    # else assume filename encoding is encoding of file content, if that's available 
    606606    if ($filename_encoding eq "undefined" && defined $file_encoding) { 
     
    608608    } 
    609609    } 
    610      
     610     
    611611    elsif ($filename_encoding eq "auto-lf")  
    612612    { 
     
    622622    $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined"; 
    623623    } 
    624          
     624     
    625625    # if still undefined, use utf8 as fallback 
    626626    if ($filename_encoding eq "undefined") { 
     
    642642    if ($filename_encoding !~ m/(?:ascii|utf8|unicode)/) { 
    643643    $filemeta = &unicode::unicode2utf8( 
    644       &unicode::convert2unicode($filename_encoding, \$filemeta) 
    645     ); 
     644                       &unicode::convert2unicode($filename_encoding, \$filemeta) 
     645                       ); 
    646646    } 
    647647 
     
    659659    my $outhandle = $self->{'outhandle'}; 
    660660 
    661     print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n"; 
    662     my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0); 
    663     print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n"; 
     661    print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n"; 
     662    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0); 
     663    print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n"; 
    664664 
    665665 
     
    709709    my $outhandle = $self->{'outhandle'}; 
    710710    my $best_encoding = undef; 
    711     
     711     
    712712    # get the language/encoding of the textstring using textcat 
    713713    require textcat;  # Only load the textcat module if it is required 
     
    727727    return undef; 
    728728    }  
    729          
     729     
    730730    if (defined $best_encoding && $best_encoding =~ m/^iso_8859/ && &unicode::check_is_utf8($text)) { 
    731731    # the text is valid utf8, so assume that's the real encoding (since textcat is based on probabilities) 
     
    840840    # Start by looking for manually assigned metadata 
    841841    if (defined $gs_filename_encoding) { 
    842         if (ref ($gs_filename_encoding) eq "ARRAY") { 
    843             my $outhandle = $self->{'outhandle'}; 
    844              
    845             $deduced_filename_encoding = $gs_filename_encoding->[0]; 
    846              
    847             my $num_vals = scalar(@$gs_filename_encoding); 
    848             if ($num_vals>1) { 
    849                 print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 
    850                 print $outhandle "         Selecting first value: $deduced_filename_encoding\n"; 
    851             } 
    852         }  
    853         else { 
    854             $deduced_filename_encoding = $gs_filename_encoding; 
    855         } 
    856     } 
    857          
     842    if (ref ($gs_filename_encoding) eq "ARRAY") { 
     843        my $outhandle = $self->{'outhandle'}; 
     844         
     845        $deduced_filename_encoding = $gs_filename_encoding->[0]; 
     846         
     847        my $num_vals = scalar(@$gs_filename_encoding); 
     848        if ($num_vals>1) { 
     849        print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n"; 
     850        print $outhandle "         Selecting first value: $deduced_filename_encoding\n"; 
     851        } 
     852    }  
     853    else { 
     854        $deduced_filename_encoding = $gs_filename_encoding; 
     855    } 
     856    } 
     857     
    858858    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) { 
    859         # Look to see if plugin specifies this value 
    860  
    861         if (defined $plugin_filename_encoding) { 
    862             # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options) 
    863             if ($plugin_filename_encoding =~ m/^auto-.*$/) { 
    864                 my $outhandle = $self->{'outhandle'}; 
    865                 print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n"; 
    866                 print $outhandle "         default to 'auto'\n"; 
    867                 $self->{'filename_encoding'} = $plugin_filename_encoding = "auto"; 
    868             } 
    869              
    870             if ($plugin_filename_encoding ne "auto") { 
    871                 # We've been given a specific filenamne encoding 
    872                 # => so use it! 
    873                 $deduced_filename_encoding = $plugin_filename_encoding; 
    874             } 
    875         } 
     859    # Look to see if plugin specifies this value 
     860 
     861    if (defined $plugin_filename_encoding) { 
     862        # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options) 
     863        if ($plugin_filename_encoding =~ m/^auto-.*$/) { 
     864        my $outhandle = $self->{'outhandle'}; 
     865        print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n"; 
     866        print $outhandle "         default to 'auto'\n"; 
     867        $self->{'filename_encoding'} = $plugin_filename_encoding = "auto"; 
     868        } 
     869         
     870        if ($plugin_filename_encoding ne "auto") { 
     871        # We've been given a specific filenamne encoding 
     872        # => so use it! 
     873        $deduced_filename_encoding = $plugin_filename_encoding; 
     874        } 
     875    } 
    876876    } 
    877877     
     
    892892        # See if we can determine the file system encoding through locale 
    893893        $deduced_filename_encoding = $self->locale_encoding(); 
    894      
     894         
    895895        # if locale shows us filesystem is utf8, check to see filename is consistent 
    896896        # => if not, then we have an "alien" filename on our hands 
    897          
     897 
    898898        if ($deduced_filename_encoding =~ m/^utf-?8$/i) { 
    899899        if (!&unicode::check_is_utf8($file)) { 
     
    910910#    } 
    911911 
    912     if ($self->{'verbosity'}>3) { 
    913         my $outhandle = $self->{'outhandle'}; 
    914  
    915         if (defined $deduced_filename_encoding) { 
    916             print $outhandle "  Deduced filename encoding as: $deduced_filename_encoding\n"; 
    917         } 
    918         else { 
    919             print $outhandle "  No filename encoding deduced\n"; 
    920         } 
    921     } 
    922      
     912    if ($self->{'verbosity'}>3) { 
     913    my $outhandle = $self->{'outhandle'}; 
     914 
     915    if (defined $deduced_filename_encoding) { 
     916        print $outhandle "  Deduced filename encoding as: $deduced_filename_encoding\n"; 
     917    } 
     918    else { 
     919        print $outhandle "  No filename encoding deduced\n"; 
     920    } 
     921    } 
     922     
    923923    return $deduced_filename_encoding; 
    924924} 
     
    998998#    } 
    999999} 
    1000     
     1000 
    10011001# this should be called by all plugins to set the oid of the doc obj, rather 
    10021002# than calling doc_obj->set_OID directly 
     
    10171017 
    10181018} 
    1019    
     1019 
    10201020# The BasePlugin read_into_doc_obj() function. This function does all the 
    10211021# right things to make general options work for a given plugin.  It doesn't do anything with the file other than setting reads in 
     
    10451045    my $pp_file = &util::prettyprint_file($base_dir,$file); 
    10461046    print $outhandle "$self->{'plugin_type'} processing $pp_file\n" 
    1047         if $self->{'verbosity'} > 1; 
     1047    if $self->{'verbosity'} > 1; 
    10481048 
    10491049    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 
    1050      
     1050     
    10511051    # create a new document 
    10521052    my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'}); 
     
    10551055    $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}"); 
    10561056    $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path)); 
    1057   
     1057     
    10581058 
    10591059    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     
    11231123    $self->associate_source_file($doc_obj, $filename); 
    11241124    } 
    1125     
     1125     
    11261126 
    11271127} 
     
    11921192    gsprintf(STDERR, "BasePlugin::process {common.must_be_implemented}\n"); 
    11931193 
    1194     my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1); 
     1194    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1); 
    11951195    print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n"; 
    11961196 
    1197     die "\n"; 
     1197    die "\n"; 
    11981198 
    11991199    return undef; # never gets here 
     
    12151215    if (!open (FILE, ">:utf8", $filename)) { 
    12161216    gsprintf(STDERR, "ConvertToPlug::write_file {ConvertToPlug.could_not_open_for_writing} ($!)\n", $filename); 
    1217      die "\n"; 
    1218      } 
     1217    die "\n"; 
     1218    } 
    12191219    print FILE $$textref; 
    12201220     
     
    12651265    }  
    12661266    } 
    1267          
    1268 } 
    1269    
     1267     
     1268} 
     1269 
    12701270# add any extra metadata that's been passed around from one 
    12711271# plugin to another.