Changeset 6332
- Timestamp:
- 2003-12-19T15:16:29+13:00 (20 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 25 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/classify.pm
r5682 r6332 115 115 # to the gdbm 116 116 sub output_classify_info { 117 my ($classifiers, $handle, $allclassifications ) = @_;117 my ($classifiers, $handle, $allclassifications, $gli) = @_; 118 118 # $handle = "main::STDOUT"; 119 120 $gli = 0 unless defined $gli; 119 121 120 122 # create a classification containing all the info … … 124 126 # get each of the classifications 125 127 foreach $classobj (@$classifiers) { 126 my $tempinfo = $classobj->get_classify_info( );128 my $tempinfo = $classobj->get_classify_info($gli); 127 129 $tempinfo->{'classifyOID'} = "CL$next_classify_num"; 128 130 $next_classify_num++; -
trunk/gsdl/perllib/mgbuilder.pm
r5768 r6332 62 62 my ($class, $collection, $source_dir, $build_dir, $verbosity, 63 63 $maxdocs, $debug, $keepold, $allclassifications, 64 $outhandle, $no_text, $failhandle ) = @_;64 $outhandle, $no_text, $failhandle, $gli) = @_; 65 65 66 66 $outhandle = STDERR unless defined $outhandle; … … 80 80 'no_text'=>$no_text, 81 81 'failhandle'=>$failhandle, 82 'notbuilt'=>{} # indexes not built 82 'notbuilt'=>{}, # indexes not built 83 'gli'=>$gli 83 84 }, $class; 84 85 86 $self->{'gli'} = 0 unless defined $self->{'gli'}; 85 87 86 88 # read in the collection configuration file … … 233 235 234 236 print $outhandle "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1); 237 print STDERR "<Stage name='CompressText'>\n" if $self->{'gli'}; 235 238 236 239 # collect the statistics for the text 237 240 # -b $maxdocsize sets the maximum document size to be 12 meg 238 241 print $outhandle "\n collecting text statistics\n" if ($self->{'verbosity'} >= 1); 242 print STDERR "<Phase name='CollectTextStats'>\n" if $self->{'gli'}; 239 243 240 244 my ($handle); … … 276 280 if (!$self->{'debug'}) { 277 281 print $outhandle "\n creating the compression dictionary\n" if ($self->{'verbosity'} >= 1); 282 print STDERR "<Phase name='CreatingCompress'>\n" if $self->{'gli'}; 278 283 if (!-e "$mg_compression_dict_exe") { 279 284 die "mgbuilder::compress_text - couldn't run $mg_compression_dict_exe\n"; … … 287 292 } 288 293 } 294 else { 295 print STDERR "<Phase name='SkipCreatingComp'>\n" if $self->{'gli'}; 296 } 289 297 290 298 $self->{'buildproc'}->reset(); 291 299 # compress the text 292 300 print $outhandle "\n compressing the text\n" if ($self->{'verbosity'} >= 1); 301 print STDERR "<Phase name='CompressingText'>\n" if $self->{'gli'}; 302 293 303 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'}, 294 304 "", {}, $self->{'buildproc'}, $self->{'maxdocs'}); … … 336 346 print $outhandle "\n*** building index $index in subdirectory " . 337 347 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1); 348 print STDERR "<Phase name='Index' source='$index'>\n" if $self->{'gli'}; 338 349 $self->build_index($index); 339 350 } else { … … 558 569 # Build index dictionary. Uses verbatim stem method 559 570 print $outhandle "\n creating index dictionary\n" if ($self->{'verbosity'} >= 1); 571 print STDERR "<Stage name='CreatingIndexDic'>\n" if $self->{'gli'}; 560 572 my ($handle); 561 573 if ($self->{'debug'}) { … … 608 620 # invert the text 609 621 print $outhandle "\n inverting the text\n" if ($self->{'verbosity'} >= 1); 610 622 print STDERR "<Stage name='InvertingText'>\n" if $self->{'gli'}; 611 623 $self->{'buildproc'}->reset(); 612 624 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'}, … … 621 633 # create the weights file 622 634 print $outhandle "\n create the weights file\n" if ($self->{'verbosity'} >= 1); 635 print STDERR "<Stage name='CreateTheWeights'>\n" if $self->{'gli'}; 623 636 if (!-e "$mg_weights_build_exe") { 624 637 die "mgbuilder::build_index - couldn't run $mg_weights_build_exe\n"; … … 628 641 # create 'on-disk' stemmed dictionary 629 642 print $outhandle "\n creating 'on-disk' stemmed dictionary\n" if ($self->{'verbosity'} >= 1); 643 print STDERR "<Stage name='CreateStemmedDic'>\n" if $self->{'gli'}; 630 644 if (!-e "$mg_invf_dict_exe") { 631 645 die "mgbuilder::build_index - couldn't run $mg_invf_dict_exe\n"; … … 636 650 # creates stem index files for the various stemming methods 637 651 print $outhandle "\n creating stem indexes\n" if ($self->{'verbosity'} >= 1); 652 print STDERR "<Stage name='CreatingStemIndx'>\n" if $self->{'gli'}; 638 653 if (!-e "$mg_stem_idx_exe") { 639 654 die "mgbuilder::build_index - couldn't run $mg_stem_idx_exe\n"; … … 681 696 print $outhandle "\n*** creating the info database and processing associated files\n" 682 697 if ($self->{'verbosity'} >= 1); 698 print STDERR "<Phase name='CreateInfoData'>\n" if $self->{'gli'}; 683 699 684 700 # init all the classifiers … … 774 790 # output classification information 775 791 &classify::output_classify_info ($self->{'classifiers'}, $handle, 776 $self->{'allclassifications'} );777 792 $self->{'allclassifications'}, 793 $self->{'gli'}); 778 794 779 795 … … 803 819 804 820 print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1); 821 print STDERR "<Phase name='CreatingAuxilary'>\n" if $self->{'gli'}; 805 822 806 823 # get the text directory -
trunk/gsdl/perllib/plugin.pm
r5682 r6332 92 92 93 93 sub read { 94 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $ aux) = @_;94 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli, $aux) = @_; 95 95 96 96 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/; 97 $gli = 0 unless defined $gli; 98 97 99 my $rv = 0; 100 101 # Announce to GLI that we are handling a file 102 print STDERR "<File n='$file'>\n" if $gli; 98 103 99 104 # the .kill file is a handy (if not very elegant) way of aborting … … 109 114 foreach $plugobj (@$pluginfo) { 110 115 $rv = $plugobj->read($pluginfo, $base_dir, $file, 111 $metadata, $processor, $maxdocs, $ aux);116 $metadata, $processor, $maxdocs, $gli, $aux); 112 117 return $rv if defined $rv; 113 118 } … … 130 135 # are only output after import.pl - 131 136 sub write_stats { 132 my ($pluginfo, $statshandle, $faillog) = @_; 137 my ($pluginfo, $statshandle, $faillog, $gli) = @_; 138 139 $gli = 0 unless defined $gli; 133 140 134 141 foreach $plugobj (@$pluginfo) { … … 138 145 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} + 139 146 $stats->{'num_not_processed'}; 147 148 print STDERR "<ImportComplete c='$stats->{'num_processed'}' p='$stats->{'num_processed'}'>\n" if $gli; 140 149 141 150 if ($total == 1) { -
trunk/gsdl/perllib/plugins/ArcPlug.pm
r5680 r6332 85 85 86 86 # found an archives.inf file 87 &gsprintf($outhandle, "ArcPlug: {common.processing} $archive_info_filename\n") ;87 &gsprintf($outhandle, "ArcPlug: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1; 88 88 89 89 # read in the archives information file -
trunk/gsdl/perllib/plugins/BasPlug.pm
r5924 r6332 77 77 'desc' => "{BasPlug.default_encoding}", 78 78 'type' => "enum", 79 'list' => $unicode_list, 79 80 'reqd' => "no", 80 81 'deft' => "utf8" }, … … 490 491 my $self = shift (@_); 491 492 492 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs ) = @_;493 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_; 493 494 494 495 if ($self->is_recursive()) { … … 547 548 548 549 # do plugin specific processing of doc_obj 549 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj ));550 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli)); 550 551 551 552 # do any automatic metadata extraction -
trunk/gsdl/perllib/plugins/BibTexPlug.pm
r5924 r6332 89 89 sub process { 90 90 my $self = shift (@_); 91 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;91 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 92 92 my $outhandle = $self->{'outhandle'}; 93 93 … … 101 101 102 102 # Report that we're processing the file 103 print STDERR "<Processing n='$file' p='BibTexPlug'>\n" if ($gli); 103 104 print $outhandle "BibTexPlug: processing $file\n" 104 105 if ($self->{'verbosity'}) > 1; -
trunk/gsdl/perllib/plugins/BookPlug.pm
r5924 r6332 103 103 sub process { 104 104 my $self = shift (@_); 105 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;105 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 106 106 my $outhandle = $self->{'outhandle'}; 107 107 108 print STDERR "<Processing n='$file' p='BookPlug'>\n" if ($gli); 108 109 print $outhandle "BookPlug: processing $file\n" 109 110 if $self->{'verbosity'} > 1; -
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r6079 r6332 159 159 160 160 my $self = shift (@_); 161 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;161 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 162 162 my $outhandle = $self->{'outhandle'}; 163 163 … … 172 172 } 173 173 174 175 print STDERR "<Processing n='$file' p='EMAILPlug'>\n" if ($gli); 174 176 175 177 print $outhandle "EMAILPlug: processing $file\n" -
trunk/gsdl/perllib/plugins/FOXPlug.pm
r5924 r6332 72 72 sub read { 73 73 my $self = shift (@_); 74 my ($pluginfo, $base_dir, $file, $metadata, $processor ) = @_;74 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_; 75 75 my $fullname = &util::filename_cat ($base_dir, $file); 76 76 … … 88 88 return undef; 89 89 } 90 91 print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli); 90 92 91 93 print STDERR "FOXPlug: processing $file\n"; -
trunk/gsdl/perllib/plugins/GAPlug.pm
r5680 r6332 75 75 76 76 my $outhandle = $self->{'outhandle'}; 77 print $outhandle "GAPLug: processing $self->{'file'}\n" ;77 print $outhandle "GAPLug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 78 78 } 79 79 -
trunk/gsdl/perllib/plugins/GMLPlug.pm
r5680 r6332 64 64 sub read { 65 65 my $self = shift (@_); 66 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs ) = @_;66 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_; 67 67 my $outhandle = $self->{'outhandle'}; 68 68 … … 76 76 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 77 77 78 print STDERR "<Processing n='$file' p='GMLPlug'>\n" if ($gli); 78 79 print $outhandle "GMLPlug: processing $file\n"; 79 80 -
trunk/gsdl/perllib/plugins/HBPlug.pm
r5924 r6332 227 227 sub read { 228 228 my $self = shift (@_); 229 my ($pluginfo, $base_dir, $file, $metadata, $processor ) = @_;229 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_; 230 230 my $outhandle = $self->{'outhandle'}; 231 231 … … 239 239 return undef unless -e $htmlfile; 240 240 241 print STDERR "<Processing n='$file' p='HBPlug'>\n" if ($gli); 241 242 print $outhandle "HBPlug: processing $file\n"; 242 243 -
trunk/gsdl/perllib/plugins/HTMLPlug.pm
r5924 r6332 194 194 sub process { 195 195 my $self = shift (@_); 196 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;196 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 197 197 my $outhandle = $self->{'outhandle'}; 198 199 print STDERR "<Processing n='$file' p='HTMLPlug'>\n" if ($gli); 200 198 201 print $outhandle "HTMLPlug: processing $file\n" 199 202 if $self->{'verbosity'} > 1; … … 648 651 if (defined $1) { 649 652 my $title = $1; 653 # Arg. This allows only ascii value characters in titles 650 654 if ($title =~ /\w/) { 651 655 $title =~ s/<[^>]*>/ /g; -
trunk/gsdl/perllib/plugins/ISISPlug.pm
r6138 r6332 135 135 { 136 136 my $self = shift (@_); 137 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;137 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 138 138 my $outhandle = $self->{'outhandle'}; 139 139 … … 142 142 143 143 # Report that we're processing the file 144 print STDERR "<Processing n='$file' p='ISISPlug'>\n" if ($gli); 144 145 print $outhandle "IsisPlug: processing $file\n" 145 146 if ($self->{'verbosity'}) > 1; -
trunk/gsdl/perllib/plugins/ImagePlug.pm
r5924 r6332 364 364 sub read { 365 365 my $self = shift (@_); 366 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs ) = @_;366 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_; 367 367 368 368 my $outhandle = $self->{'outhandle'}; … … 373 373 return undef; 374 374 } 375 376 print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli); 375 377 print $outhandle "ImagePlug processing \"$filename\"\n" 376 378 if $self->{'verbosity'} > 1; -
trunk/gsdl/perllib/plugins/IndexPlug.pm
r5919 r6332 90 90 sub read { 91 91 my $self = shift (@_); 92 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs ) = @_;92 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_; 93 93 my $outhandle = $self->{'outhandle'}; 94 94 … … 100 100 101 101 # found an index.txt file 102 print STDERR "<Processing n='$file' p='IndexPlug'>\n" if ($gli); 102 103 print $outhandle "IndexPlug: processing $indexfile\n"; 103 104 -
trunk/gsdl/perllib/plugins/MACROPlug.pm
r5680 r6332 141 141 sub process { 142 142 my $self = shift (@_); 143 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;143 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 144 144 my $outhandle = $self->{'outhandle'}; 145 145 146 print STDERR "<Processing n='$file' p='MACROPlug'>\n" if ($gli); 146 147 print $outhandle "MACROPlug: processing $file\n" 147 148 if $self->{'verbosity'} > 1; -
trunk/gsdl/perllib/plugins/MARCPlug.pm
r5924 r6332 52 52 require MARC::Record; 53 53 require MARC::Batch; 54 #use MARC::Record; 55 #use MARC::Batch; 54 56 55 57 # sub print_usage { … … 211 213 sub process { 212 214 my $self = shift (@_); 213 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;215 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 214 216 my $outhandle = $self->{'outhandle'}; 215 217 … … 219 221 return undef; 220 222 } 223 224 print STDERR "<Processing n='$file' p='MARCPlug'>\n" if ($gli); 221 225 print $outhandle "MARCPlug: processing $file\n" 222 226 if $self->{'verbosity'} > 1; -
trunk/gsdl/perllib/plugins/OAIPlug.pm
r6132 r6332 162 162 sub process { 163 163 my $self = shift (@_); 164 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;164 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 165 165 my $outhandle = $self->{'outhandle'}; 166 166 167 print STDERR "<Processing n='$file' p='OAIPlug'>\n" if ($gli); 167 168 print $outhandle "OAIPlug: processing $file\n" 168 169 if $self->{'verbosity'} > 1; -
trunk/gsdl/perllib/plugins/RecPlug.pm
r5680 r6332 146 146 if (!parsargv::parse(\@_, 147 147 q^use_metadata_files^, \$self->{'use_metadata_files'}, 148 q^show_progress^, \$self->{'show_progress'}, # Undocumented (for GLI)149 148 "allow_extra_options")) { 150 149 print STDERR "\nRecPlug uses an incorrect option.\n"; … … 192 191 sub read { 193 192 my $self = shift (@_); 194 my ($pluginfo, $base_dir, $file, $in_metadata, $processor, $maxdocs ) = @_;193 my ($pluginfo, $base_dir, $file, $in_metadata, $processor, $maxdocs, $gli) = @_; 195 194 196 195 my $outhandle = $self->{'outhandle'}; … … 232 231 my (@dir, $subfile); 233 232 my $count = 0; 233 234 234 print $outhandle "RecPlug: getting directory $dirname\n" if ($verbosity); 235 235 … … 308 308 # Recursively read each $subfile 309 309 print $outhandle "RecPlug recurring: $subfile\n" if ($verbosity > 2); 310 print $outhandle "RecPlug - $subfile\n" if ($self->{'show_progress'});311 310 312 311 $count += &plugin::read ($pluginfo, $base_dir, 313 312 &util::filename_cat($file, $subfile), 314 $out_metadata, $processor, $maxdocs );313 $out_metadata, $processor, $maxdocs, $gli); 315 314 } 316 315 return $count; -
trunk/gsdl/perllib/plugins/ReferPlug.pm
r5924 r6332 110 110 sub process { 111 111 my $self = shift (@_); 112 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;112 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 113 113 my $outhandle = $self->{'outhandle'}; 114 114 … … 117 117 118 118 # Report that we're processing the file 119 print STDERR "<Processing n='$file' p='ReferPlug'>\n" if ($gli); 119 120 print $outhandle "ReferPlug: processing $file\n" 120 121 if ($self->{'verbosity'}) > 1; -
trunk/gsdl/perllib/plugins/RogPlug.pm
r5924 r6332 211 211 sub read { 212 212 my $self = shift (@_); 213 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs ) = @_;213 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_; 214 214 215 215 my $filename = &util::filename_cat($base_dir, $file); … … 219 219 my $gz = (defined $3) ? 1: 0; 220 220 221 print STDERR "<Processing n='$file' p='RogPlug'>\n" if ($gli); 221 222 print STDERR "RogPlug: processing $filename\n" if $processor->{'verbosity'}; 222 223 -
trunk/gsdl/perllib/plugins/SRCPlug.pm
r5924 r6332 118 118 sub process { 119 119 my $self = shift (@_); 120 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;120 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 121 121 my $outhandle = $self->{'outhandle'}; 122 122 123 print STDERR "<Processing n='$file' p='SRCPlug'>\n" if ($gli); 123 124 print $outhandle "SRCPlug: processing $file\n" 124 125 if $self->{'verbosity'} > 1; -
trunk/gsdl/perllib/plugins/TEXTPlug.pm
r5924 r6332 95 95 sub process { 96 96 my $self = shift (@_); 97 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj ) = @_;97 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 98 98 my $outhandle = $self->{'outhandle'}; 99 99 100 print STDERR "<Processing n='$file' p='TEXTPlug'>\n" if ($gli); 100 101 print $outhandle "TEXTPlug: processing $file\n" 101 102 if $self->{'verbosity'} > 1; -
trunk/gsdl/perllib/plugins/UnknownPlug.pm
r6214 r6332 170 170 sub read { 171 171 my $self = shift (@_); 172 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs ) = @_;172 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_; 173 173 174 174 my $outhandle = $self->{'outhandle'}; … … 180 180 return undef; 181 181 } 182 print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli); 182 183 print $outhandle "UnknownPlug processing \"$filename\"\n" 183 184 if $self->{'verbosity'} > 1;
Note:
See TracChangeset
for help on using the changeset viewer.