Changeset 3856 for trunk/gsdl/perllib


Ignore:
Timestamp:
2003-03-12T11:53:28+13:00 (21 years ago)
Author:
davidb
Message:

General improvement to the translator facility.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/MACROPlug.pm

    r3724 r3856  
    5555}
    5656
     57
     58sub load_language_table
     59{
     60    my $lang_table = {};
     61
     62    my $lang_fname = util::filename_cat($ENV{'GSDLHOME'},"tmp","lang",
     63                    "package_forms","languages.log");
     64    open (LANGFILE, "<$lang_fname")
     65    || die ("Unable to open $lang_fname: $!\n");
     66
     67    my $full_name;
     68    my $abbr_name;
     69
     70    while (defined ($full_name=<LANGFILE>)) {
     71    chomp($full_name);
     72   
     73    $abbr_name = <LANGFILE>;
     74    chomp($abbr_name);
     75
     76    $lang_table->{$full_name} = $abbr_name;
     77
     78    my $fourchar_name = substr($full_name,0,4);
     79    if (!defined $lang_table->{$fourchar_name}) {
     80        $lang_table->{$fourchar_name} = $abbr_name;
     81    }
     82    else {
     83        print STDERR "Warning: Clash on four character abbreviation for language $fourchar_name\n";
     84    }
     85    }
     86
     87    close LANGFILE;
     88
     89    return $lang_table;
     90}
     91
     92
     93
    5794sub new {
    5895    my ($class) = @_;
     
    63100    push( @{$option_list}, $options );
    64101
     102    $self->{'lang_abbr'} = load_language_table();
     103
    65104    return bless $self, $class;
    66105}
     
    71110    return q^(?i)\.dm$^;
    72111}
     112
     113
     114sub read {
     115    my $self = shift (@_); 
     116    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     117
     118    my $outhandle = $self->{'outhandle'};
     119
     120    my $lang_table = $self->{'lang_abbr'};
     121    my $fn = $file;
     122    $fn =~ s/.*\/(.*)\..*/$1/;
     123    $fn =~ s/\d+$//; # remove any digits from end of filename
     124
     125    my $filename = $file;
     126    $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
     127
     128    if ((!-d $filename) && ($file !~ m/doc.xml$/) && (!defined $lang_table->{$fn})) {
     129    print $outhandle "MACROPlug: blocking $file\n"
     130        if $self->{'verbosity'} > 2;
     131    $self->{'num_blocked'} ++;
     132    return 0;
     133    }
     134
     135    return $self->SUPER::read(@_);
     136}
     137
    73138
    74139# do plugin specific processing of doc_obj
     
    94159}
    95160
    96 1;
    97161
    98162sub extract_macronames {
     
    103167    my $outhandle = $self->{'outhandle'};
    104168
    105     print $outhandle " extracting macronames ...\n";
     169    print $outhandle " extracting macronames ...\n"
     170    if ($self->{'verbosity'}>3);
    106171   
    107     my @textarray = split ("\n", $$$textref);
     172    my @textarray = split ("\n", $$$textref)
     173    if ($self->{'verbosity'}>3);
    108174
    109175    my $macro_text = "";
     
    276342    }
    277343   
    278     print $outhandle "done extracting macros\n";
    279 }
    280 
    281 
    282 sub get_language_encoding {
     344    print $outhandle "done extracting macros\n"
     345    if ($self->{'verbosity'}>3);
     346
     347}
     348
     349
     350sub get_language_encoding_old {
    283351    my $self = shift (@_);
    284352    my ($filename) = @_;
     
    300368    my $results = [];
    301369
    302     if ($filename =~ m/spanish\.dm/) {
    303     $results->[0] = "es-utf8";
    304     }
    305     else {
    306    
    307     # get the language/encoding
    308     $results = $self->{'textcat'}->classify(\$text);
    309     }
     370    # get the language/encoding
     371    $results = $self->{'textcat'}->classify(\$text);
    310372
    311373    foreach $r (@$results) {
    312     print $outhandle "MY1 $r\n";
     374    print $outhandle "Results: $r\n";
    313375   
    314376    }
     
    317379    # first one in the list - otherwise use the defaults
    318380    if (scalar @$results > 3) {
    319                                        
    320     open (LANGFILE, "</research/kde2/gsdl/tmp/lang/package_forms/languages.log") or die ("MURGH LANGFILE\n");
     381                               
     382        my $lang_fname = util::filename_cat($ENV{'GSDLHOME'},"tmp","lang","package_forms",
     383                        "languages.log");
     384    open (LANGFILE, "<$lang_fname") or die ("Unable to open $lang_fname: $!\n");
    321385
    322386    while (<LANGFILE>) {
     
    397461    }
    398462
     463    print STDERR "**** forcing encoding to be utf8\n";
     464    $encoding = "utf8";
     465
     466    print STDERR "**** forcing language to be first two letters\n";
     467    my $lfname = $filename;
     468    $lfname =~ s/^.*\///;
     469    $language = substr($lfname,0,2);
     470
    399471    print $outhandle "RETURNING VALUES $language $encoding\n";
    400472
     
    403475
    404476
    405 
    406 
    407 
    408 
     477sub find_language {
     478    my ($self,$fn) = @_;
     479
     480    my $lang_table = $self->{'lang_abbr'};
     481
     482    if (!defined $lang_table->{$fn}) {
     483
     484    # try and find it with shorter string name
     485   
     486    my $try_len = length($fn);
     487
     488    while ($try_len>=4) {
     489        $try_fn = substr($fn,0,$try_len);
     490
     491        if (defined $lang_table->{$try_fn}) {
     492        $fn = $try_fn;
     493        last;
     494        }
     495        $try_len--;
     496    }
     497    }
     498
     499    return $fn;
     500}
     501
     502
     503sub get_language_encoding {
     504    my $self = shift (@_);
     505    my ($filename) = @_;
     506    my $outhandle = $self->{'outhandle'};
     507
     508    my $fn = $filename;
     509    $fn =~ s/.*\/(.*)\..*/$1/;
     510    $fn =~ s/\d+$//; # remove any digits from end of filename
     511
     512    my $languge;
     513    my $encoding = "utf8";
     514
     515    ## my $lang_lookup = $self->find_language($fn);
     516
     517    my $lang_table = $self->{'lang_abbr'};
     518
     519    if (!defined $lang_table->{$fn}) {
     520
     521    print $outhandle "Warning: Macro file name $filename not in list of languages.\n";
     522    print $outhandle "         Using default language.\n";
     523    $language = $self->{'default_language'};
     524    }
     525    else {
     526    $language = $lang_table->{$fn};
     527    }
     528
     529    ## print $outhandle "Storing $filename as $language $encoding\n";
     530
     531    return ($language, $encoding);
     532}
     533
     534
     5351;
Note: See TracChangeset for help on using the changeset viewer.