Index: /trunk/gsdl/perllib/mgppbuilder.pm
===================================================================
--- /trunk/gsdl/perllib/mgppbuilder.pm (revision 1916)
+++ /trunk/gsdl/perllib/mgppbuilder.pm (revision 1917)
@@ -135,4 +135,16 @@
}
+ # make sure that the same index isn't specified more than once
+ my %tmphash = ();
+ my @tmparray = @{$self->{'collect_cfg'}->{'indexes'}};
+ $self->{'collect_cfg'}->{'indexes'} = [];
+ foreach my $i (@tmparray) {
+ if (!defined ($tmphash{$i})) {
+ push (@{$self->{'collect_cfg'}->{'indexes'}}, $i);
+ $tmphash{$i} = 1;
+ }
+ }
+
+
# get the levels (Section, Paragraph) for indexing and compression
$self->{'levels'} = {};
@@ -150,5 +162,5 @@
# load all the plugins
- $self->{'pluginfo'} = &plugin::load_plugins ($plugins);
+ $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity, $outhandle);
if (scalar(@{$self->{'pluginfo'}}) == 0) {
print $outhandle "No plugins were loaded.\n";
@@ -163,5 +175,5 @@
# load all the classifiers
- $self->{'classifiers'} = &classify::load_classifiers ($classifiers, $outhandle);
+ $self->{'classifiers'} = &classify::load_classifiers ($classifiers, $build_dir, $outhandle);
# load up any dontgdbm fields
Index: /trunk/gsdl/perllib/mgppbuildproc.pm
===================================================================
--- /trunk/gsdl/perllib/mgppbuildproc.pm (revision 1916)
+++ /trunk/gsdl/perllib/mgppbuildproc.pm (revision 1917)
@@ -324,5 +324,5 @@
# output all the section metadata
my $metadata = $doc_obj->get_all_metadata ($section);
- foreach $pair (@$metadata) {
+ foreach my $pair (@$metadata) {
my ($field, $value) = (@$pair);
@@ -482,5 +482,5 @@
# see if this document belongs to this subcollection
- foreach $indexexp (@{$self->{'indexexparr'}}) {
+ foreach my $indexexp (@{$self->{'indexexparr'}}) {
$indexed_doc = 0;
my ($field, $exp, $options) = split /\//, $indexexp;
@@ -525,13 +525,6 @@
}
my $doc_section = 0; # just for this document
- my $text = "";
- if ($self->{'num_docs'} == 1) {
- $text = "\n";
- }
- else {
- $text = "\n";
- }
- my $text_extra = "";
-
+ my $text = "\n";
+
# get the text for this document
my $section = $doc_obj->get_top_section();
@@ -544,5 +537,5 @@
if ($indexed_doc) {
$self->{'num_bytes'} += $doc_obj->get_text_length ($section);
- foreach $field (split (/,/, $fields)) {
+ foreach my $field (split (/,/, $fields)) {
# only deal with this field if it doesn't start with top or
# this is the first section
@@ -618,8 +611,4 @@
$self->filter_text ($field, $new_text);
- #????????????????????
- if ($self->{'indexing_text'} &&
- $new_text =~ /[\(\)\{\}]/) {
- }
$self->{'num_processed_bytes'} += length ($new_text);
$text .= "$new_text";
@@ -630,5 +619,5 @@
$section = $doc_obj->get_next_section($section);
} #while defined section
- print $handle "$text";
+ print $handle "$text\n\n";
}