Changeset 9853


Ignore:
Timestamp:
2005-05-10T16:55:00+12:00 (19 years ago)
Author:
kjdon
Message:

fixed up maxdocs - now pass an extra parameter to the read function

Location:
trunk/gsdl/perllib
Files:
29 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/lucenebuilder.pm

    r9669 r9853  
    131131           $self->{'buildproc'}, $self->{'maxdocs'});   
    132132    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    133            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     133           "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    134134    &plugin::end($self->{'pluginfo'});
    135135    close ($handle) unless $self->{'debug'};
     
    313313    $self->{'buildproc'}->reset();
    314314    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    315            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     315           "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    316316    close ($handle) unless $self->{'debug'};
    317317
  • trunk/gsdl/perllib/mgbuilder.pm

    r9669 r9853  
    278278           $self->{'buildproc'}, $self->{'maxdocs'});
    279279    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    280            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     280           "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    281281    &plugin::end($self->{'pluginfo'});
    282282   
     
    316316
    317317    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    318            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     318           "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    319319
    320320    close ($handle) unless $self->{'debug'};
     
    605605    $self->{'buildproc'}->reset();
    606606    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    607            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     607           "", {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
    608608    close ($handle) unless $self->{'debug'};
    609609
     
    639639    $self->{'buildproc'}->reset();
    640640    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    641            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     641           "", {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
    642642
    643643   
     
    810810   
    811811    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    812            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     812           "", {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
    813813   
    814814    # output classification information
  • trunk/gsdl/perllib/mgppbuilder.pm

    r9669 r9853  
    329329    }
    330330    }
    331    
     331    $mgpp_passes_sections .= "-K SENT ";
    332332    print $outhandle "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1);
    333333    print STDERR "<Stage name='CompressText'>\n" if $self->{'gli'};
     
    365365           $self->{'buildproc'}, $self->{'maxdocs'});
    366366    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    367            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     367           "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    368368    &plugin::end($self->{'pluginfo'});
    369369    close (PIPEOUT);
     
    407407
    408408    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    409            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     409           "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    410410    close ($handle) unless $self->{'debug'};
    411411
     
    705705    $self->{'buildproc'}->reset();
    706706    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    707            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     707           "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    708708    close ($handle) unless $self->{'debug'};
    709709
     
    740740    $self->{'buildproc'}->reset();
    741741    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    742            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     742           "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    743743
    744744    $self->print_stats ();
     
    939939   
    940940    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    941            "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     941           "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    942942
    943943    # output classification information
  • trunk/gsdl/perllib/plugin.pm

    r9584 r9853  
    155155
    156156sub read {
    157     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli, $aux) = @_;
     157    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
    158158
    159159    $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
     160    $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
    160161    $gli = 0 unless defined $gli;
    161162
     
    187188
    188189        $rv = $plugobj->read($pluginfo, $base_dir, $file,
    189                  $metadata, $processor, $maxdocs, $gli, $aux);
     190                 $metadata, $processor, $maxdocs, $total_count, $gli, $aux);
    190191
    191192    if (defined $rv) {
  • trunk/gsdl/perllib/plugins/ArcPlug.pm

    r8716 r9853  
    7575sub read {
    7676    my $self = shift (@_);
    77     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     77    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
    7878    my $outhandle = $self->{'outhandle'};
    7979
     
    9696    # process each file
    9797    foreach my $subfile (@$file_list) {
    98         last if ($maxdocs != -1 && $count >= $maxdocs);
     98        last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
    9999
    100100        my $tmp = &util::filename_cat ($file, $subfile->[0]);
    101101        next if $tmp eq $file;
    102102        # note: metadata is not carried on to the next level
    103         $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs);
     103        $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs, ($total_count+$count), $gli);
    104104    }
    105105
  • trunk/gsdl/perllib/plugins/BNContentePlug.pm

    r9462 r9853  
    148148sub metadata_read {
    149149    my $self = shift (@_);
    150     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs) = @_;
     150    my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
    151151
    152152    my $outhandle = $self->{'outhandle'};
     
    203203sub readxxx {
    204204    my $self = shift (@_);
    205     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     205    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    206206    my $outhandle = $self->{'outhandle'};
    207207   
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r9703 r9853  
    662662    my $self = shift (@_); 
    663663 
    664     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     664    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    665665
    666666    if ($self->is_recursive()) {
  • trunk/gsdl/perllib/plugins/ConvertToPlug.pm

    r9584 r9853  
    267267sub read {
    268268    my $self = shift (@_);
    269     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     269    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    270270#    if ($self->is_recursive()) {
    271271#        die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
  • trunk/gsdl/perllib/plugins/ConvertToRogPlug.pm

    r9584 r9853  
    334334sub read {
    335335    my $self = shift (@_);
    336     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     336    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    337337#    if ($self->is_recursive()) {
    338338#        die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
  • trunk/gsdl/perllib/plugins/DBPlug.pm

    r8121 r9853  
    9090sub read {
    9191    my $self = shift (@_);
    92     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     92    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    9393
    9494    # see if we can handle the passed file...
  • trunk/gsdl/perllib/plugins/DSpacePlug.pm

    r9694 r9853  
    245245sub metadata_read {
    246246    my $self = shift (@_);
    247     my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs) = @_;
     247    my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
    248248
    249249    my $only_first_doc = $self->{'only_first_doc'};
     
    336336sub read {
    337337    my $self = shift (@_);
    338     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     338    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    339339    my $outhandle = $self->{'outhandle'};
    340340   
  • trunk/gsdl/perllib/plugins/FOXPlug.pm

    r9703 r9853  
    7373sub read {
    7474    my $self = shift (@_);
    75     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     75    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    7676    my $fullname = &util::filename_cat ($base_dir, $file);
    7777
  • trunk/gsdl/perllib/plugins/GMLPlug.pm

    r9703 r9853  
    7373sub read {
    7474    my $self = shift (@_);
    75     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     75    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    7676    my $outhandle = $self->{'outhandle'};
    7777
     
    227227   
    228228    $no_docs++;
    229     last if ($maxdocs > -1 && $no_docs >= $maxdocs);
     229    last if ($maxdocs > -1 && ($total_count+$no_docs) >= $maxdocs);
    230230    last unless defined $gml && $gml =~ /\w/;
    231231    } # while(1) document level
  • trunk/gsdl/perllib/plugins/HBPlug.pm

    r8166 r9853  
    237237sub read {
    238238    my $self = shift (@_);
    239     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     239    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    240240    my $outhandle = $self->{'outhandle'};
    241241
  • trunk/gsdl/perllib/plugins/ImagePlug.pm

    r9703 r9853  
    362362sub read {
    363363    my $self = shift (@_);
    364     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     364    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    365365
    366366    my $outhandle = $self->{'outhandle'};
  • trunk/gsdl/perllib/plugins/IndexPlug.pm

    r6408 r9853  
    9191sub read {
    9292    my $self = shift (@_);
    93     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     93    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    9494    my $outhandle = $self->{'outhandle'};
    9595
     
    117117    my $count = 0;
    118118    foreach my $docfile (keys (%$list)) {
    119     last if ($maxdocs != -1 && $count >= $maxdocs);
     119    last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
    120120    $metadata = {}; # at present we can do this as metadata
    121121                    # will always be empty when it arrives
     
    141141        }
    142142        }
    143         $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $metadata, $processor, $maxdocs);
     143        $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $metadata, $processor, $maxdocs, ($total_count +$count), $gli);
    144144    }
    145145    }
  • trunk/gsdl/perllib/plugins/MACROPlug.pm

    r8166 r9853  
    106106sub read {
    107107    my $self = shift (@_); 
    108     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     108    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    109109
    110110    my $outhandle = $self->{'outhandle'};
  • trunk/gsdl/perllib/plugins/MP3Plug.pm

    r9167 r9853  
    308308sub read {
    309309    my $self = shift (@_);
    310     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     310    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    311311
    312312    my $outhandle = $self->{'outhandle'};
  • trunk/gsdl/perllib/plugins/NULPlug.pm

    r9584 r9853  
    9191sub read {
    9292    my $self = shift (@_);
    93     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     93    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    9494
    9595    my $outhandle = $self->{'outhandle'};
  • trunk/gsdl/perllib/plugins/OAIPlug.pm

    r9738 r9853  
    7979    my $self = shift (@_); 
    8080 
    81     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     81    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    8282
    8383    my $outhandle = $self->{'outhandle'};
     
    133133
    134134    return &plugin::read ($pluginfo, $url_base_dir, $url_array->[0],
    135                   $metadata, $processor, $maxdocs, $gli);
     135                  $metadata, $processor, $maxdocs, $total_count, $gli);
    136136    }
    137137    else
  • trunk/gsdl/perllib/plugins/OggVorbisPlug.pm

    r8365 r9853  
    8787{
    8888    my $self = shift (@_);
    89     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     89    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    9090
    9191    my $outhandle = $self->{'outhandle'};
  • trunk/gsdl/perllib/plugins/PagedImgPlug.pm

    r9584 r9853  
    501501sub read {
    502502    my $self = shift (@_);
    503     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     503    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    504504
    505505    my $outhandle   = $self->{'outhandle'};
  • trunk/gsdl/perllib/plugins/RecPlug.pm

    r9703 r9853  
    190190sub read {
    191191    my $self = shift (@_);
    192     my ($pluginfo, $base_dir, $file, $in_metadata, $processor, $maxdocs, $gli) = @_;
     192    my ($pluginfo, $base_dir, $file, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    193193   
    194194    my $outhandle = $self->{'outhandle'};
     
    360360    my $subfile = $dir[$i];
    361361    my $this_file_base_dir = $base_dir;
    362     last if ($maxdocs != -1 && $count >= $maxdocs);
     362    last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
    363363    next if ($subfile =~ /^\.\.?$/);
    364364    next if ($read_metadata_files && $subfile =~ /metadata\.xml$/);
     
    419419    $count += &plugin::read ($pluginfo, $this_file_base_dir,
    420420                 &util::filename_cat($file, $subfile),
    421                  $out_metadata, $processor, $maxdocs, $gli);
     421                 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
    422422    }
    423423
  • trunk/gsdl/perllib/plugins/RogPlug.pm

    r8166 r9853  
    223223sub read {
    224224    my $self = shift (@_);
    225     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     225    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    226226
    227227    my $filename = &util::filename_cat($base_dir, $file);
     
    251251    $doc_count++;
    252252
    253         if (defined $maxdocs && $maxdocs =~ /\d/)
    254     {
    255         last if ($maxdocs >=0 && $doc_count >= $maxdocs);
    256     }
    257 
     253    last if ($maxdocs !=-1 && ($total_count+$doc_count) >= $maxdocs);
     254   
    258255    if (($doc_count % 10) == 0)
    259256    {
  • trunk/gsdl/perllib/plugins/SplitPlug.pm

    r9493 r9853  
    185185sub read {
    186186    my $self = shift (@_);
    187     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     187    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    188188    my $outhandle = $self->{'outhandle'};
    189189    my $verbosity = $self->{'verbosity'};
  • trunk/gsdl/perllib/plugins/UnknownPlug.pm

    r9706 r9853  
    175175sub read {
    176176    my $self = shift (@_);
    177     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     177    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    178178
    179179    my $outhandle = $self->{'outhandle'};
  • trunk/gsdl/perllib/plugins/W3ImgPlug.pm

    r8121 r9853  
    416416# include directories
    417417sub read {
    418     my ($self, $pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = (@_);
     418    my ($self, $pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = (@_);
    419419    my ($doc_obj, $section, $filepath, $imgtag, $pos, $context, $numdocs, $tndir, $imgs);
    420420    # forward normal read (runs HTMLPlug if index_pages T)
    421     my $ok =  $self->SUPER::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs);
     421    my $ok =  $self->SUPER::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
    422422    if ( ! $ok ) { return $ok } # what is this returning??
    423423
  • trunk/gsdl/perllib/plugins/XMLPlug.pm

    r9584 r9853  
    8181    $self = shift (@_); 
    8282 
    83     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
     83    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    8484
    8585    my $filename = $file;
  • trunk/gsdl/perllib/plugins/ZIPPlug.pm

    r9229 r9853  
    113113sub read {
    114114    my $self = shift (@_);
    115     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     115    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    116116    my $outhandle = $self->{'outhandle'};
    117117
     
    150150    chdir ($cwd) || die "Unable to change back to $cwd";
    151151
    152     my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs);
     152    my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli);
    153153    &util::rm_r ($tmpdir);
    154154
Note: See TracChangeset for help on using the changeset viewer.