Changeset 25155 for gs2-extensions


Ignore:
Timestamp:
2012-02-28T16:40:01+13:00 (12 years ago)
Author:
papitha
Message:

Tidy up of code to give controlled output (verbosity>=2)

Location:
gs2-extensions/ngramj/perllib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/ngramj/perllib/ngramj.pm

    r25141 r25155  
    3333
    3434use strict;
    35 
     35no strict 'refs'; # allow filehandles to be variables and viceversa
    3636
    3737sub new {
    3838    my $class = shift (@_);
    39 
    40     my $self = {};
     39    my ($verbosity,$outhandle) = @_;
     40   
     41    my $self = { 'verbosity' => $verbosity, 'outhandle' => $outhandle };
    4142
    4243    my $ngram_jar = &util::filename_cat($ENV{'GSDLHOME'},"ext","ngramj","jars","cngram.jar");
     
    4546   
    4647    $self->{'java_cmd'} = $java_cmd;
    47    
    4848 
    4949    return bless $self, $class;
     
    8787                my @lang_array = split(/\s+/,$lang_group);
    8888               
     89                my @lang_summary = ( "++ Ngram language probabilities:\n++  ");
     90               
    8991                foreach my $l (@lang_array) {
    90                     print STDERR "l = $l\n";
     92                    push(@lang_summary,$l);
    9193                    my ($lang,$score) = ($l =~ m/^(.+):(.+)$/);
    9294                   
     
    9597                   
    9698                    push(@$lang_encode_pairs,$lang_pair);
     99                }
     100                push(@lang_summary,"\n");
     101               
     102                if ($self->{'verbosity'}>=2) {
     103                    my $outhandle = $self->{'outhandle'};
     104                    my $lang_summary_str = join(" ",@lang_summary);
     105                    print $outhandle $lang_summary_str;
    97106                }
    98107            }
     
    105114        return undef;
    106115    }
    107    
    108    
    109116   
    110117    &util::rm($tmp_txt_filename);
     
    119126
    120127    return $self->classify_contents($contents_ref,$filename,$filter_by_encoding);
    121    
    122128}
    123129   
  • gs2-extensions/ngramj/perllib/plugins/ReadTextFile.pm

    r25141 r25155  
    120120    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    121121
    122     print STDERR "**** In Pei Jones Local version of ReadTextFile\n";
     122    my $verbosity = $self->{'verbosity'};
     123    if ($verbosity>=2) {
     124        my $outhandle = $self->{'outhandle'};
     125        print $outhandle "++ Using Ngram-Java version of ReadTextFile.pm ++\n";
     126    }
    123127   
    124128    my $outhandle = $self->{'outhandle'};
     
    329333    my ($filename) = @_;
    330334
    331     $self->{'textcat'} = new ngramj() if (!defined($self->{'textcat'}));
     335    $self->{'textcat'} = new ngramj($self->{'verbosity'},$self->{'outhandle'}) if (!defined($self->{'textcat'}));
    332336   
    333337    my ($language, $encoding, $extracted_encoding);
     
    629633
    630634    else { # need to use textcat to get either the language, or get both language and encoding
    631     $self->{'ngramj'} = new ngramj() if (!defined($self->{'ngramj'}));
     635    $self->{'ngramj'} = new ngramj($self->{'verbosity'},$self->{'outhandle'}) if (!defined($self->{'ngramj'}));
    632636   
    633637    if($found_html_encoding) { # know encoding, find language by limiting search to known encoding
Note: See TracChangeset for help on using the changeset viewer.