Changeset 3856 for trunk/gsdl/perllib
- Timestamp:
- 2003-03-12T11:53:28+13:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/MACROPlug.pm
r3724 r3856 55 55 } 56 56 57 58 sub load_language_table 59 { 60 my $lang_table = {}; 61 62 my $lang_fname = util::filename_cat($ENV{'GSDLHOME'},"tmp","lang", 63 "package_forms","languages.log"); 64 open (LANGFILE, "<$lang_fname") 65 || die ("Unable to open $lang_fname: $!\n"); 66 67 my $full_name; 68 my $abbr_name; 69 70 while (defined ($full_name=<LANGFILE>)) { 71 chomp($full_name); 72 73 $abbr_name = <LANGFILE>; 74 chomp($abbr_name); 75 76 $lang_table->{$full_name} = $abbr_name; 77 78 my $fourchar_name = substr($full_name,0,4); 79 if (!defined $lang_table->{$fourchar_name}) { 80 $lang_table->{$fourchar_name} = $abbr_name; 81 } 82 else { 83 print STDERR "Warning: Clash on four character abbreviation for language $fourchar_name\n"; 84 } 85 } 86 87 close LANGFILE; 88 89 return $lang_table; 90 } 91 92 93 57 94 sub new { 58 95 my ($class) = @_; … … 63 100 push( @{$option_list}, $options ); 64 101 102 $self->{'lang_abbr'} = load_language_table(); 103 65 104 return bless $self, $class; 66 105 } … … 71 110 return q^(?i)\.dm$^; 72 111 } 112 113 114 sub read { 115 my $self = shift (@_); 116 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 117 118 my $outhandle = $self->{'outhandle'}; 119 120 my $lang_table = $self->{'lang_abbr'}; 121 my $fn = $file; 122 $fn =~ s/.*\/(.*)\..*/$1/; 123 $fn =~ s/\d+$//; # remove any digits from end of filename 124 125 my $filename = $file; 126 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; 127 128 if ((!-d $filename) && ($file !~ m/doc.xml$/) && (!defined $lang_table->{$fn})) { 129 print $outhandle "MACROPlug: blocking $file\n" 130 if $self->{'verbosity'} > 2; 131 $self->{'num_blocked'} ++; 132 return 0; 133 } 134 135 return $self->SUPER::read(@_); 136 } 137 73 138 74 139 # do plugin specific processing of doc_obj … … 94 159 } 95 160 96 1;97 161 98 162 sub extract_macronames { … … 103 167 my $outhandle = $self->{'outhandle'}; 104 168 105 print $outhandle " extracting macronames ...\n"; 169 print $outhandle " extracting macronames ...\n" 170 if ($self->{'verbosity'}>3); 106 171 107 my @textarray = split ("\n", $$$textref); 172 my @textarray = split ("\n", $$$textref) 173 if ($self->{'verbosity'}>3); 108 174 109 175 my $macro_text = ""; … … 276 342 } 277 343 278 print $outhandle "done extracting macros\n"; 279 } 280 281 282 sub get_language_encoding { 344 print $outhandle "done extracting macros\n" 345 if ($self->{'verbosity'}>3); 346 347 } 348 349 350 sub get_language_encoding_old { 283 351 my $self = shift (@_); 284 352 my ($filename) = @_; … … 300 368 my $results = []; 301 369 302 if ($filename =~ m/spanish\.dm/) { 303 $results->[0] = "es-utf8"; 304 } 305 else { 306 307 # get the language/encoding 308 $results = $self->{'textcat'}->classify(\$text); 309 } 370 # get the language/encoding 371 $results = $self->{'textcat'}->classify(\$text); 310 372 311 373 foreach $r (@$results) { 312 print $outhandle " MY1$r\n";374 print $outhandle "Results: $r\n"; 313 375 314 376 } … … 317 379 # first one in the list - otherwise use the defaults 318 380 if (scalar @$results > 3) { 319 320 open (LANGFILE, "</research/kde2/gsdl/tmp/lang/package_forms/languages.log") or die ("MURGH LANGFILE\n"); 381 382 my $lang_fname = util::filename_cat($ENV{'GSDLHOME'},"tmp","lang","package_forms", 383 "languages.log"); 384 open (LANGFILE, "<$lang_fname") or die ("Unable to open $lang_fname: $!\n"); 321 385 322 386 while (<LANGFILE>) { … … 397 461 } 398 462 463 print STDERR "**** forcing encoding to be utf8\n"; 464 $encoding = "utf8"; 465 466 print STDERR "**** forcing language to be first two letters\n"; 467 my $lfname = $filename; 468 $lfname =~ s/^.*\///; 469 $language = substr($lfname,0,2); 470 399 471 print $outhandle "RETURNING VALUES $language $encoding\n"; 400 472 … … 403 475 404 476 405 406 407 408 477 sub find_language { 478 my ($self,$fn) = @_; 479 480 my $lang_table = $self->{'lang_abbr'}; 481 482 if (!defined $lang_table->{$fn}) { 483 484 # try and find it with shorter string name 485 486 my $try_len = length($fn); 487 488 while ($try_len>=4) { 489 $try_fn = substr($fn,0,$try_len); 490 491 if (defined $lang_table->{$try_fn}) { 492 $fn = $try_fn; 493 last; 494 } 495 $try_len--; 496 } 497 } 498 499 return $fn; 500 } 501 502 503 sub get_language_encoding { 504 my $self = shift (@_); 505 my ($filename) = @_; 506 my $outhandle = $self->{'outhandle'}; 507 508 my $fn = $filename; 509 $fn =~ s/.*\/(.*)\..*/$1/; 510 $fn =~ s/\d+$//; # remove any digits from end of filename 511 512 my $languge; 513 my $encoding = "utf8"; 514 515 ## my $lang_lookup = $self->find_language($fn); 516 517 my $lang_table = $self->{'lang_abbr'}; 518 519 if (!defined $lang_table->{$fn}) { 520 521 print $outhandle "Warning: Macro file name $filename not in list of languages.\n"; 522 print $outhandle " Using default language.\n"; 523 $language = $self->{'default_language'}; 524 } 525 else { 526 $language = $lang_table->{$fn}; 527 } 528 529 ## print $outhandle "Storing $filename as $language $encoding\n"; 530 531 return ($language, $encoding); 532 } 533 534 535 1;
Note:
See TracChangeset
for help on using the changeset viewer.