- Timestamp:
- 2000-08-18T17:37:15+12:00 (24 years ago)
- Location:
- trunk/gsdl
- Files:
-
- 19 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/build
r1277 r1424 15 15 use parsargv; 16 16 use util; 17 17 use FileHandle; 18 18 19 19 if (!parsargv::parse(\@ARGV, 20 'remove_archives', \$remove_archives, 21 'remove_import', \$remove_import, 20 22 'buildtype/^(build|import)$/import', \$buildtype, 21 'maxdocs/^\-?\d+/-1', \$maxdocs)) { 23 'maxdocs/^\-?\d+/-1', \$maxdocs, 24 'download/.+', \@download, 25 'out/.*/STDERR', \$out)) { 26 22 27 &print_usage(); 23 28 die "\n"; … … 37 42 $maxdocs = "-maxdocs $maxdocs"; 38 43 } 39 40 44 41 45 my $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect", $collection); … … 46 50 my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin"); 47 51 52 my $close_out = 0; 53 my $outfile = $out; 54 if ($out !~ /^(STDERR|STDOUT)$/i) { 55 open (OUT, ">$out") || die "Couldn't open output file $out\n"; 56 $out = "OUT"; 57 $close_out = 1; 58 } 59 $out->autoflush(1); 60 48 61 &main(); 62 63 close OUT if $close_out; 49 64 50 65 sub print_usage { 51 66 print STDERR "\n usage: $0 [options] collection-name\n\n"; 52 67 print STDERR " options:\n"; 68 print STDERR " -remove_archives Remove archives directory after successfully\n"; 69 print STDERR " building the collection.\n"; 70 print STDERR " -remove_import Remove import directory after successfully\n"; 71 print STDERR " importing the collection.\n"; 53 72 print STDERR " -buildtype build|import If 'build' attempt to build directly\n"; 54 73 print STDERR " from archives directory (bypassing import\n"; 55 74 print STDERR " stage). Defaults to 'import'\n"; 56 print STDERR " -maxdocs number Maximum number of documents to build\n\n"; 75 print STDERR " -maxdocs number Maximum number of documents to build\n"; 76 print STDERR " -download directory Directory (or file) to get import documents from.\n"; 77 print STDERR " There may be multiple download directories and they\n"; 78 print STDERR " may be of type http://, ftp://, or file://\n."; 79 print STDERR " Note that any existing import directory will be\n"; 80 print STDERR " deleted to make way for the downloaded data if\n"; 81 print STDERR " a -download option is supplied\n"; 82 print STDERR " -out Filename or handle to print output status to.\n"; 83 print STDERR " The default is STDERR\n\n"; 57 84 } 58 85 59 86 sub main { 87 88 # do the download thing if we have any -download options 89 if (scalar (@download)) { 90 # remove any existing import data 91 &util::rm_r ($importdir) if -d $importdir; 92 93 foreach $download_dir (@download) { 94 95 if ($download_dir =~ /^http:\/\//) { 96 # http download 97 98 } elsif ($download_dir =~ /^ftp:\/\//) { 99 # ftp download 100 101 } else { 102 # we assume anything not beginning with http:// or ftp:// 103 # is a file or directory on the local file system. 104 $download_dir =~ s/^file:\/\///; 105 106 if (-e $download_dir) { 107 # copy download_dir and all it contains to the import directory 108 my $download_cmd = "perl " . &util::filename_cat ($bindir, "script", "filecopy.pl"); 109 $download_cmd .= " -out \"$outfile\" \"" . $download_dir . "\" " . $collection; 110 system ($download_cmd); 111 } else { 112 print $out "WARNING: $download_dir does not exist\n"; 113 } 114 } 115 } 116 } 60 117 61 118 if (-e &util::filename_cat ($archivedir, "archives.inf")) { … … 69 126 } else { 70 127 # there are archives but no import, build directly from archives 71 print STDERR"build: no import material was found, building directly\n";72 print STDERR" from archives\n";128 print $out "build: no import material was found, building directly\n"; 129 print $out " from archives\n"; 73 130 &gsdl_build(); 74 131 } … … 76 133 if (&has_content ($importdir)) { 77 134 if ($buildtype eq "build") { 78 print STDERR"build: can't build directly from archives as no\n";79 print STDERR" imported archives exist (did you forget to\n";80 print STDERR" move the contents of $collection/import to\n";81 print STDERR" collection/archives?)\n";135 print $out "build: can't build directly from archives as no\n"; 136 print $out " imported archives exist (did you forget to\n"; 137 print $out " move the contents of $collection/import to\n"; 138 print $out " collection/archives?)\n"; 82 139 } 83 140 &gsdl_import(); … … 85 142 } else { 86 143 # no import or archives 87 print STDERR"build: ERROR: The $collection collection has no import\n";88 print STDERR" or archives data. Try downloading an unbuilt version\n";89 print STDERR" of the collection from www.nzdl.org\n";144 print $out "build: ERROR: The $collection collection has no import\n"; 145 print $out " or archives data. Try downloading an unbuilt version\n"; 146 print $out " of the collection from www.nzdl.org\n"; 90 147 die "\n"; 91 148 } … … 95 152 sub gsdl_import { 96 153 97 print STDERR"importing the $collection collection\n\n";154 print $out "importing the $collection collection\n\n"; 98 155 99 156 my $import = &util::filename_cat ($bindir, "script", "import.pl"); 100 system ("perl $import -removeold $maxdocs $collection");157 system ("perl $import -removeold $maxdocs -out \"$outfile\" $collection"); 101 158 if (-e &util::filename_cat ($archivedir, "archives.inf")) { 102 print STDERR "$collection collection imported successfully\n\n"; 159 print $out "$collection collection imported successfully\n\n"; 160 if ($remove_import) { 161 print $out "removing import directory ($importdir)\n"; 162 &util::rm_r ($importdir); 163 } 103 164 } else { 104 165 die "\nimport.pl failed\n"; … … 108 169 sub gsdl_build { 109 170 110 print STDERR"building the $collection collection\n\n";171 print $out "building the $collection collection\n\n"; 111 172 112 173 my $buildcol = &util::filename_cat ($bindir, "script", "buildcol.pl"); 113 system ("perl $buildcol $maxdocs $collection");174 system ("perl $buildcol $maxdocs -out \"$outfile\" $collection"); 114 175 if (-e &util::filename_cat ($buildingdir, "text", "$collection.ldb") || 115 176 -e &util::filename_cat ($buildingdir, "text", "$collection.bdb")) { 116 print STDERR "$collection collection built successfully\n\n"; 177 print $out "$collection collection built successfully\n\n"; 178 if ($remove_archives) { 179 print $out "removing archives directory ($archivedir)\n"; 180 &util::rm_r ($archivedir); 181 } 117 182 } else { 118 183 die "\nbuildcol.pl failed\n"; … … 121 186 # replace old indexes with new ones 122 187 if (&has_content ($indexdir)) { 123 print STDERR"removing old indexes\n";188 print $out "removing old indexes\n"; 124 189 &util::rm_r ($indexdir); 125 190 } -
trunk/gsdl/bin/script/buildcol.pl
r1383 r1424 25 25 # 26 26 ########################################################################### 27 28 package buildcol; 27 29 28 30 BEGIN { … … 37 39 use parsargv; 38 40 use util; 41 use FileHandle; 39 42 40 43 &main(); … … 58 61 print STDERR " collection. This relies on the Gimp being\n"; 59 62 print STDERR " installed along with relevant perl modules\n"; 60 print STDERR " to allow scripting from perl\n\n"; 63 print STDERR " to allow scripting from perl\n"; 64 print STDERR " -out Filename or handle to print output status to.\n"; 65 print STDERR " The default is STDERR\n\n"; 61 66 } 62 67 … … 66 71 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs, 67 72 $debug, $mode, $indexname, $keepold, $allclassifications, 68 $create_images );73 $create_images, $out); 69 74 if (!parsargv::parse(\@ARGV, 70 75 'verbosity/\d+/2', \$verbosity, … … 78 83 'keepold', \$keepold, 79 84 'allclassifications', \$allclassifications, 80 'create_images', \$create_images)) { 85 'create_images', \$create_images, 86 'out/.*/STDERR', \$out)) { 81 87 &print_usage(); 82 88 die "\n"; 83 89 } 90 91 my $close_out = 0; 92 if ($out !~ /^(STDERR|STDOUT)$/i) { 93 open (OUT, ">>$out") || die "Couldn't open output file $out\n"; 94 $out = "buildcol::OUT"; 95 $close_out = 1; 96 } 97 $out->autoflush(1); 84 98 85 99 # get and check the collection … … 127 141 # update the archive cache if needed 128 142 if ($cachedir) { 129 print STDERR"Updating archive cache\n" if ($verbosity >= 1);143 print $out "Updating archive cache\n" if ($verbosity >= 1); 130 144 131 145 $cachedir =~ s/[\\\/]+$//; … … 162 176 eval("\$builder = new $buildertype(\$collection, " . 163 177 "\$realarchivedir, \$realbuilddir, \$verbosity, " . 164 "\$maxdocs, \$debug, \$keepold, \$allclassifications )");178 "\$maxdocs, \$debug, \$keepold, \$allclassifications, \$out)"); 165 179 die "$@" if $@; 166 180 … … 186 200 187 201 if (($realbuilddir ne $builddir) && !$debug) { 188 print STDERR"Copying back the cached build\n" if ($verbosity >= 1);202 print $out "Copying back the cached build\n" if ($verbosity >= 1); 189 203 &util::rm_r ($builddir); 190 204 &util::cp_r ($realbuilddir, $builddir); 191 205 } 206 207 close OUT if $close_out; 192 208 } 193 209 … … 197 213 my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon.pl"); 198 214 if (!-e $image_script) { 199 print STDERR"WARNING: Image making script ($image_script) could not be found\n";200 print STDERR" Default images will not be generated\n\n";215 print $out "WARNING: Image making script ($image_script) could not be found\n"; 216 print $out " Default images will not be generated\n\n"; 201 217 return; 202 218 } … … 213 229 # to be changed when the config file format changes) 214 230 if (!open (CFGFILE, $configfilename)) { 215 print STDERR"WARNING: Couldn't open config file ($configfilename)\n";216 print STDERR" for updating so collection images may not be linked correctly\n";231 print $out "WARNING: Couldn't open config file ($configfilename)\n"; 232 print $out " for updating so collection images may not be linked correctly\n"; 217 233 return; 218 234 } … … 236 252 237 253 if (!open (CFGFILE, ">$configfilename")) { 238 print STDERR"WARNING: Couldn't open config file ($configfilename)\n";239 print STDERR" for updating so collection images may not be linked correctly\n";254 print $out "WARNING: Couldn't open config file ($configfilename)\n"; 255 print $out " for updating so collection images may not be linked correctly\n"; 240 256 return; 241 257 } -
trunk/gsdl/bin/script/filecopy.pl
r1179 r1424 35 35 36 36 use util; 37 use parsargv; 37 38 use File::stat; 39 use FileHandle; 38 40 39 sub print_usage 40 { 41 print STDERR "\n usage: $0 [filenames] [directories] collection-name\n\n"; 41 sub print_usage { 42 print STDERR "\n usage: $0 [options] [directories] collection-name\n\n"; 43 44 print STDERR " options:\n"; 45 print STDERR " -out Filename or handle to print output status to.\n"; 46 print STDERR " The default is STDERR\n\n"; 42 47 } 43 44 48 45 49 sub download_files … … 70 74 else 71 75 { 72 print STDERR"Error: filename '$a' does not exist\n";76 print $out "Error: filename '$a' does not exist\n"; 73 77 } 74 78 } … … 101 105 if ($do_copy eq "yes") 102 106 { 103 print STDOUT"Copying $src_file-->$dst_file\n";107 print $out "Copying $src_file-->$dst_file\n"; 104 108 &util::cp($src_file,$dst_file); 105 109 } … … 124 128 if (!opendir (INDIR, $d)) 125 129 { 126 print STDERR"Error: Could not open directory $d\n";130 print $out "Error: Could not open directory $d\n"; 127 131 } 128 132 else … … 140 144 sub main 141 145 { 142 if (scalar(@ARGV)<2) 143 { 144 print_usage(); 145 exit(1); 146 if (!parsargv::parse(\@ARGV, 'out/.*/STDERR', \$out)) { 147 &print_usage(); 148 die "\n"; 146 149 } 147 150 151 my $close_out = 0; 152 if ($out !~ /^(STDERR|STDOUT)$/i) { 153 open (OUT, ">>$out") || die "Couldn't open output file $out\n"; 154 $out = "MAIN::OUT"; 155 $close_out = 1; 156 } 157 $out->autoflush(1); 158 148 159 download_files(@ARGV); 160 161 close OUT if $close_out; 149 162 return 0; 150 163 } -
trunk/gsdl/bin/script/import.pl
r1287 r1424 28 28 29 29 # This program will import a number of files into a particular collection 30 31 package import; 30 32 31 33 BEGIN { … … 37 39 } 38 40 39 use strict;40 41 use arcinfo; 41 42 use colcfg; … … 44 45 use util; 45 46 use parsargv; 47 use FileHandle; 46 48 47 49 sub print_usage { … … 62 64 print STDERR " -sortmeta metadata Sort documents alphabetically by metadata for\n"; 63 65 print STDERR " building. This will be disabled if groupsize > 1\n"; 64 print STDERR " -debug Print imported text to STDOUT\n\n"; 66 print STDERR " -debug Print imported text to STDOUT\n"; 67 print STDERR " -out Filename or handle to print output status to.\n"; 68 print STDERR " The default is STDERR\n\n"; 65 69 } 66 70 67 68 &main (); 71 &main(); 69 72 70 73 sub main { … … 72 75 $removeold, $gzip, $groupsize, $debug, $maxdocs, $collection, 73 76 $configfilename, $collectcfg, $pluginfo, $sortmeta, 74 $archive_info_filename, $archive_info, $processor );77 $archive_info_filename, $archive_info, $processor, $out); 75 78 if (!parsargv::parse(\@ARGV, 76 79 'verbosity/\d+/2', \$verbosity, … … 83 86 'sortmeta/.*/', \$sortmeta, 84 87 'debug', \$debug, 85 'maxdocs/^\-?\d+/-1', \$maxdocs)) { 88 'maxdocs/^\-?\d+/-1', \$maxdocs, 89 'out/.*/STDERR', \$out)) { 86 90 &print_usage(); 87 91 die "\n"; 88 92 } 93 94 my $close_out = 0; 95 if ($out !~ /^(STDERR|STDOUT)$/i) { 96 open (OUT, ">>$out") || die "Couldn't open output file $out\n"; 97 $out = 'import::OUT'; 98 $close_out = 1; 99 } 100 $out->autoflush(1); 89 101 90 102 # set removeold to false if it has been defined … … 100 112 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/; 101 113 if (defined $sortmeta && $groupsize > 1) { 102 print STDERR"WARNING: import.pl cannot sort documents when groupsize > 1\n";103 print STDERR" sortmeta option will be ignored\n\n";114 print $out "WARNING: import.pl cannot sort documents when groupsize > 1\n"; 115 print $out " sortmeta option will be ignored\n\n"; 104 116 $sortmeta = undef; 105 117 } … … 148 160 149 161 # load all the plugins 150 $pluginfo = &plugin::load_plugins ($plugins, $verbosity );162 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out); 151 163 if (scalar(@$pluginfo) == 0) { 152 print STDERR"No plugins were loaded.\n";164 print $out "No plugins were loaded.\n"; 153 165 die "\n"; 154 166 } … … 156 168 # remove the old contents of the archives directory if needed 157 169 if ($removeold && -e $archivedir) { 158 print STDERR"Warning - removing current contents of the archives directory\n";159 print STDERR" in preparation for the import\n";170 print $out "Warning - removing current contents of the archives directory\n"; 171 print $out " in preparation for the import\n"; 160 172 sleep(5); # just in case... 161 173 &util::rm_r ($archivedir); … … 169 181 170 182 # create a docsave object to process the documents 171 $processor = new docsave ($collection, $archive_info, $verbosity, $gzip, $groupsize );183 $processor = new docsave ($collection, $archive_info, $verbosity, $gzip, $groupsize, $out); 172 184 $processor->setarchivedir ($archivedir); 173 185 $processor->set_sortmeta ($sortmeta) if defined $sortmeta; … … 188 200 $archive_info->save_info($archive_info_filename); 189 201 } 202 close OUT if $close_out; 190 203 } 191 192 193 194 195 -
trunk/gsdl/perllib/docsave.pm
r1287 r1424 40 40 41 41 sub new { 42 my ($class, $collection,$archive_info,$verbosity,$gzip,$groupsize) = @_; 42 my ($class, $collection, $archive_info, $verbosity, 43 $gzip, $groupsize, $outhandle) = @_; 43 44 my $self = new docproc (); 44 45 45 46 46 47 $groupsize=1 unless defined $groupsize; 47 48 $self->{'collection'} = $collection; … … 52 53 $self->{'groupsize'} = $groupsize; 53 54 $self->{'gs_count'} = 0; 54 55 56 $self->{'outhandle'} = STDERR; 57 $self->{'outhandle'} = $outhandle if defined $outhandle; 58 55 59 # set a default for the archive directory 56 60 $self->{'archive_dir'} = "$ENV{'GSDLHOME'}/collect/$self->{'collection'}/archives"; 57 61 58 62 $self->{'sortmeta'} = undef; 59 63 60 64 return bless $self, $class; 61 65 } … … 64 68 my $self = shift (@_); 65 69 my ($archive_dir) = @_; 66 70 67 71 $self->{'archive_dir'} = $archive_dir; 68 72 } … … 71 75 my $self = shift (@_); 72 76 my ($sortmeta) = @_; 73 77 74 78 $self->{'sortmeta'} = $sortmeta; 75 79 } … … 78 82 my $self = shift (@_); 79 83 my ($doc_obj) = @_; 80 84 85 my $outhandle = $self->{'outhandle'}; 86 81 87 if ($self->{'groupsize'} > 1) { 82 88 $self->group_process ($doc_obj); 83 89 84 90 } else { 85 91 # groupsize is 1 (i.e. one document per GML file) so sortmeta 86 92 # may be used 87 93 88 94 my $OID = $doc_obj->get_OID(); 89 95 $OID = "NULL" unless defined $OID; … … 91 97 # get document's directory 92 98 my $doc_dir = $self->get_doc_dir ($OID); 93 99 94 100 # copy all the associated files, add this information as metadata 95 101 # to the document 96 102 $self->process_assoc_files ($doc_obj, $doc_dir); 97 103 98 104 my $doc_file 99 105 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.gml"); 100 106 my $short_doc_file = &util::filename_cat ($doc_dir, "doc.gml"); 101 107 102 108 if (!open (OUTDOC, ">$doc_file")) { 103 print STDERR"docsave::process could not write to file $doc_file\n";109 print $outhandle "docsave::process could not write to file $doc_file\n"; 104 110 return; 105 111 } … … 115 121 $short_doc_file .= ".gz"; 116 122 if (!-e $doc_file) { 117 print STDERR"error while gzipping: $doc_file doesn't exist\n";123 print $outhandle "error while gzipping: $doc_file doesn't exist\n"; 118 124 return 0; 119 125 } … … 134 140 my $self = shift (@_); 135 141 my ($doc_obj) = @_; 142 143 my $outhandle = $self->{'outhandle'}; 136 144 137 145 my $OID = $doc_obj->get_OID(); … … 165 173 166 174 if (!open (OUTDOC, ">$doc_file")) { 167 print STDERR"docsave::group_process could not write to file $doc_file\n";175 print $outhandle "docsave::group_process could not write to file $doc_file\n"; 168 176 return; 169 177 } … … 219 227 my ($doc_obj, $doc_dir) = @_; 220 228 229 my $outhandle = $self->{'outhandle'}; 230 221 231 my @assoc_files = (); 222 232 foreach $assoc_file (@{$doc_obj->get_assoc_files()}) { … … 230 240 "$afile:$assoc_file->[2]:$dir"); 231 241 } else { 232 print STDERR"docsave::process couldn't copy the associated file " .242 print $outhandle "docsave::process couldn't copy the associated file " . 233 243 "$assoc_file->[0] to $afile\n"; 234 244 } … … 240 250 { 241 251 my ($self) = @_; 242 252 243 253 close OUTDOC; 244 254 … … 252 262 $short_doc_file .= ".gz"; 253 263 if (!-e $doc_file) { 254 print STDERR "error while gzipping: $doc_file doesn't exist\n"; 264 my $outhandle = $self->{'outhandle'}; 265 print $outhandle "error while gzipping: $doc_file doesn't exist\n"; 255 266 return 0; 256 267 } -
trunk/gsdl/perllib/mgbuilder.pm
r1304 r1424 61 61 sub new { 62 62 my ($class, $collection, $source_dir, $build_dir, $verbosity, 63 $maxdocs, $debug, $keepold, $allclassifications) = @_; 63 $maxdocs, $debug, $keepold, $allclassifications, $outhandle) = @_; 64 65 $outhandle = STDERR unless defined $outhandle; 64 66 65 67 # create an mgbuilder object … … 72 74 'keepold'=>$keepold, 73 75 'allclassifications'=>$allclassifications, 76 'outhandle'=>$outhandle, 74 77 'notbuilt'=>[] # indexes not built 75 78 }, $class; … … 112 115 113 116 # load all the plugins 114 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity );117 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity, $outhandle); 115 118 if (scalar(@{$self->{'pluginfo'}}) == 0) { 116 print STDERR"No plugins were loaded.\n";119 print $outhandle "No plugins were loaded.\n"; 117 120 die "\n"; 118 121 } … … 149 152 150 153 eval("\$self->{'buildproc'} = new $buildproctype(\$collection, " . 151 "\$source_dir, \$build_dir, \$verbosity )");154 "\$source_dir, \$build_dir, \$verbosity, \$outhandle)"); 152 155 die "$@" if $@; 153 156 … … 176 179 my $mg_passes_exe = &util::filename_cat($exedir, "mg_passes$exe"); 177 180 my $mg_compression_dict_exe = &util::filename_cat($exedir, "mg_compression_dict$exe"); 181 my $outhandle = $self->{'outhandle'}; 178 182 179 183 &util::mk_all_dir (&util::filename_cat($self->{'build_dir'}, "text")); … … 188 192 } 189 193 190 print STDERR"\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1);194 print $outhandle "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1); 191 195 192 196 # collect the statistics for the text 193 197 # -b $maxdocsize sets the maximum document size to be 12 meg 194 print STDERR"\n collecting text statistics\n" if ($self->{'verbosity'} >= 1);198 print $outhandle "\n collecting text statistics\n" if ($self->{'verbosity'} >= 1); 195 199 196 200 my ($handle); … … 226 230 # words being put into the dictionary first (-2 -k 5120) 227 231 if (!$self->{'debug'}) { 228 print STDERR"\n creating the compression dictionary\n" if ($self->{'verbosity'} >= 1);232 print $outhandle "\n creating the compression dictionary\n" if ($self->{'verbosity'} >= 1); 229 233 if (!-e "$mg_compression_dict_exe") { 230 234 die "mgbuilder::compress_text - couldn't run $mg_compression_dict_exe\n"; … … 241 245 $self->{'buildproc'}->reset(); 242 246 # compress the text 243 print STDERR"\n compressing the text\n" if ($self->{'verbosity'} >= 1);247 print $outhandle "\n compressing the text\n" if ($self->{'verbosity'} >= 1); 244 248 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'}, 245 249 "", {}, $self->{'buildproc'}, $self->{'maxdocs'}); … … 268 272 my $self = shift (@_); 269 273 my ($indexname) = @_; 274 my $outhandle = $self->{'outhandle'}; 270 275 271 276 my $indexes = []; … … 283 288 foreach $index (@$indexes) { 284 289 if ($self->want_built($index)) { 285 print STDERR"\n*** building index $index in subdirectory " .290 print $outhandle "\n*** building index $index in subdirectory " . 286 291 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1); 287 292 $self->build_index($index); 288 293 } else { 289 print STDERR"\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);294 print $outhandle "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1); 290 295 } 291 296 } … … 413 418 my $self = shift (@_); 414 419 my ($index) = @_; 420 my $outhandle = $self->{'outhandle'}; 415 421 416 422 # get the full index directory path and make sure it exists … … 472 478 473 479 # Build index dictionary. Uses verbatim stem method 474 print STDERR"\n creating index dictionary\n" if ($self->{'verbosity'} >= 1);480 print $outhandle "\n creating index dictionary\n" if ($self->{'verbosity'} >= 1); 475 481 my ($handle); 476 482 if ($self->{'debug'}) { … … 513 519 514 520 # invert the text 515 print STDERR"\n inverting the text\n" if ($self->{'verbosity'} >= 1);521 print $outhandle "\n inverting the text\n" if ($self->{'verbosity'} >= 1); 516 522 517 523 $self->{'buildproc'}->reset(); … … 526 532 527 533 # create the weights file 528 print STDERR"\n create the weights file\n" if ($self->{'verbosity'} >= 1);534 print $outhandle "\n create the weights file\n" if ($self->{'verbosity'} >= 1); 529 535 if (!-e "$mg_weights_build_exe") { 530 536 die "mgbuilder::build_index - couldn't run $mg_weights_build_exe\n"; … … 533 539 534 540 # create 'on-disk' stemmed dictionary 535 print STDERR"\n creating 'on-disk' stemmed dictionary\n" if ($self->{'verbosity'} >= 1);541 print $outhandle "\n creating 'on-disk' stemmed dictionary\n" if ($self->{'verbosity'} >= 1); 536 542 if (!-e "$mg_invf_dict_exe") { 537 543 die "mgbuilder::build_index - couldn't run $mg_invf_dict_exe\n"; … … 541 547 542 548 # creates stem index files for the various stemming methods 543 print STDERR"\n creating stem indexes\n" if ($self->{'verbosity'} >= 1);549 print $outhandle "\n creating stem indexes\n" if ($self->{'verbosity'} >= 1); 544 550 if (!-e "$mg_stem_idx_exe") { 545 551 die "mgbuilder::build_index - couldn't run $mg_stem_idx_exe\n"; … … 559 565 if (defined $suffix && !defined $wanted_index_files{$suffix}) { 560 566 # delete it! 561 print STDERR"deleting $file\n" if $self->{'verbosity'} > 2;567 print $outhandle "deleting $file\n" if $self->{'verbosity'} > 2; 562 568 &util::rm (&util::filename_cat ($tmpdir, $file)); 563 569 } … … 569 575 sub make_infodatabase { 570 576 my $self = shift (@_); 577 my $outhandle = $self->{'outhandle'}; 578 571 579 my $textdir = &util::filename_cat($self->{'build_dir'}, "text"); 572 580 my $assocdir = &util::filename_cat($self->{'build_dir'}, "assoc"); … … 584 592 my $txt2db_exe = &util::filename_cat($exedir, "txt2db$exe"); 585 593 586 print STDERR"\n*** creating the info database and processing associated files\n"594 print $outhandle "\n*** creating the info database and processing associated files\n" 587 595 if ($self->{'verbosity'} >= 1); 588 596 … … 624 632 $self->{'collect_cfg'}->{'collectionmeta'}->{".$cmeta"} . "\n"; 625 633 } else { 626 print STDERR"mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n";634 print $outhandle "mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n"; 627 635 } 628 636 } else { … … 652 660 my ($index); 653 661 my %build_cfg = (); 654 655 print STDERR "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1); 662 my $outhandle = $self->{'outhandle'}; 663 664 print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1); 656 665 657 666 # get the text directory … … 671 680 my $input_file = &util::filename_cat ("text", $self->{'collection'}); 672 681 if (!-e "$mgstat_exe" || !open (PIPEIN, "$mgstat_exe -d $self->{'build_dir'} -f $input_file |")) { 673 print STDERR"Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n";682 print $outhandle "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n"; 674 683 } else { 675 684 my $line = ""; … … 721 730 my $self = shift (@_); 722 731 732 my $outhandle = $self->{'outhandle'}; 723 733 my $indexing_text = $self->{'buildproc'}->get_indexing_text(); 724 734 my $index = $self->{'buildproc'}->get_index(); … … 727 737 728 738 if ($indexing_text) { 729 print STDERR"Stats (Creating index $index)\n";730 } else { 731 print STDERR"Stats (Compressing text from $index)\n";732 } 733 print STDERR"Total bytes in collection: $num_bytes\n";734 print STDERR"Total bytes in $index: $num_processed_bytes\n";739 print $outhandle "Stats (Creating index $index)\n"; 740 } else { 741 print $outhandle "Stats (Compressing text from $index)\n"; 742 } 743 print $outhandle "Total bytes in collection: $num_bytes\n"; 744 print $outhandle "Total bytes in $index: $num_processed_bytes\n"; 735 745 736 746 if ($num_processed_bytes < 50) { 737 print STDERR"***************\n";738 print STDERR"WARNING: There is very little or no text to process for $index\n";747 print $outhandle "***************\n"; 748 print $outhandle "WARNING: There is very little or no text to process for $index\n"; 739 749 if ($indexing_text) { 740 print STDERR"This may cause an error while attempting to build the index\n";750 print $outhandle "This may cause an error while attempting to build the index\n"; 741 751 } else { 742 print STDERR"This may cause an error while attempting to compress the text\n";743 } 744 print STDERR"***************\n";752 print $outhandle "This may cause an error while attempting to compress the text\n"; 753 } 754 print $outhandle "***************\n"; 745 755 } 746 756 } -
trunk/gsdl/perllib/mgbuildproc.pm
r1251 r1424 41 41 42 42 sub new { 43 my ($class, $collection, $source_dir, $build_dir, $verbosity) = @_; 43 my ($class, $collection, $source_dir, $build_dir, 44 $verbosity, $outhandle) = @_; 44 45 my $self = new docproc (); 46 47 # outhandle is where all the debugging info goes 48 # output_handle is where the output of the plugins is piped 49 # to (i.e. mg, gdbm etc.) 50 $outhandle = STDERR unless defined $outhandle; 45 51 46 52 $self->{'collection'} = $collection; … … 59 65 $self->{'num_bytes'} = 0; 60 66 $self->{'num_processed_bytes'} = 0; 67 $self->{'outhandle'} = $outhandle; 61 68 62 69 $self->{'indexing_text'} = 0; -
trunk/gsdl/perllib/plugin.pm
r1244 r1424 29 29 30 30 sub load_plugins { 31 my ($plugin_list, $verbosity ) = @_;31 my ($plugin_list, $verbosity, $outhandle) = @_; 32 32 my @plugin_objects = (); 33 33 … … 56 56 57 57 # initialize plugin 58 $plugobj->init($verbosity );58 $plugobj->init($verbosity, $outhandle); 59 59 60 60 # add this object to the list -
trunk/gsdl/perllib/plugins/ArcPlug.pm
r1244 r1424 39 39 } 40 40 41 use strict;42 43 41 sub new { 44 42 my ($class) = @_; … … 61 59 my $self = shift (@_); 62 60 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 61 my $outhandle = $self->{'outhandle'}; 63 62 64 63 my $count = 0; … … 70 69 71 70 # found an archives.inf file 72 print STDERR"ArcPlug: processing $archive_info_filename\n";71 print $outhandle "ArcPlug: processing $archive_info_filename\n"; 73 72 74 73 # read in the archives information file -
trunk/gsdl/perllib/plugins/BasPlug.pm
r1411 r1424 31 31 use acronym; 32 32 use textcat; 33 use strict;34 33 use doc; 35 34 use diagnostics; … … 85 84 my $self = {}; 86 85 my $encodings = "^(iso_8859_1|Latin1|ascii|gb|iso_8859_6|windows_1256|Arabic|utf8|unicode)\$"; 86 $self->{'outhandle'} = STDERR; 87 87 my $year = (localtime)[5]+1900; 88 88 89 # general options available to all plugins 89 90 if (!parsargv::parse(\@_, … … 114 115 sub init { 115 116 my $self = shift (@_); 116 my ($verbosity ) = @_;117 my ($verbosity, $outhandle) = @_; 117 118 118 119 # verbosity is passed through from the processor 119 120 $self->{'verbosity'} = $verbosity; 121 122 # as is the outhandle ... 123 $self->{'outhandle'} = $outhandle if defined $outhandle; 120 124 121 125 # set process_exp and block_exp to defaults unless they were … … 209 213 210 214 if ($text !~ /\w/) { 211 print STDERR "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'}; 215 my $outhandle = $self->{'outhandle'}; 216 print $outhandle "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'}; 212 217 return 0; 213 218 } … … 387 392 388 393 # extract acronyms from a section in a document. progress is 389 # reported to STDERRbased on the verbosity. both the Acronym394 # reported to outhandle based on the verbosity. both the Acronym 390 395 # and the AcronymKWIC metadata items are created. 391 396 … … 393 398 my $self = shift (@_); 394 399 my ($textref, $doc_obj, $thissection) = @_; 395 396 print STDERR " extracting acronyms ...\n" 400 my $outhandle = $self->{'outhandle'}; 401 402 print $outhandle " extracting acronyms ...\n" 397 403 if ($self->{'verbosity'} >= 2); 398 404 … … 408 414 { 409 415 $seen_before = "true"; 410 print STDERR" already seen ". $acro->to_string() . "\n"416 print $outhandle " already seen ". $acro->to_string() . "\n" 411 417 if ($self->{'verbosity'} >= 2); 412 418 } … … 420 426 #do the normal acronym 421 427 $doc_obj->add_utf8_metadata($thissection, "Acronym", $acro->to_string()); 422 print STDERR" adding ". $acro->to_string() . "\n"428 print $outhandle " adding ". $acro->to_string() . "\n" 423 429 if ($self->{'verbosity'} >= 1); 424 430 … … 432 438 } 433 439 } 434 print STDERR" done extracting acronyms. \n"440 print $outhandle " done extracting acronyms. \n" 435 441 if ($self->{'verbosity'} >= 2); 436 442 } … … 439 445 my $self = shift (@_); 440 446 my ($text, $doc_obj, $thissection) = @_; 441 442 print STDERR " marking up acronyms ...\n" 447 my $outhandle = $self->{'outhandle'}; 448 449 print $outhandle " marking up acronyms ...\n" 443 450 if ($self->{'verbosity'} >= 2); 444 451 … … 446 453 $text = &acronym::markup_acronyms($text, $self); 447 454 448 print STDERR" done marking up acronyms. \n"455 print $outhandle " done marking up acronyms. \n" 449 456 if ($self->{'verbosity'} >= 2); 450 457 -
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r1244 r1424 70 70 } 71 71 72 use strict;73 74 72 # Create a new EMAILPlug object with which to parse a file. 75 73 # Accomplished by creating a new BasPlug and using bless to … … 93 91 my $self = shift (@_); 94 92 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 95 93 my $outhandle = $self->{'outhandle'}; 94 96 95 # Check that we're dealing with a valid mail file 97 96 return undef unless (($$textref =~ /From:/) || ($$textref =~ /To:/)); 98 97 99 print STDERR"EMAILPlug: processing $file\n"98 print $outhandle "EMAILPlug: processing $file\n" 100 99 if $self->{'verbosity'} > 1; 101 100 -
trunk/gsdl/perllib/plugins/GMLPlug.pm
r1401 r1424 37 37 } 38 38 39 use strict;40 41 39 sub new { 42 40 my ($class) = @_; … … 58 56 my $self = shift (@_); 59 57 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 58 my $outhandle = $self->{'outhandle'}; 60 59 61 60 my $filename = &util::filename_cat($base_dir, $file); … … 66 65 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 67 66 68 print STDERR"GMLPlug: processing $file\n";67 print $outhandle "GMLPlug: processing $file\n"; 69 68 70 69 my $parent_dir = $file; … … 73 72 74 73 if (!open (INFILE, $filename)) { 75 print STDERR"GMLPlug::read - couldn't read $filename\n";74 print $outhandle "GMLPlug::read - couldn't read $filename\n"; 76 75 return 0; 77 76 } … … 107 106 108 107 } else { 109 print STDERR"GMLPlug::read - error in file $filename\n";110 print STDERR"text: \"$gml\"\n";108 print $outhandle "GMLPlug::read - error in file $filename\n"; 109 print $outhandle "text: \"$gml\"\n"; 111 110 last; 112 111 } -
trunk/gsdl/perllib/plugins/HBPlug.pm
r1244 r1424 59 59 sub init { 60 60 my $self = shift (@_); 61 my ($verbosity ) = @_;62 63 $self->BasPlug::init( );61 my ($verbosity, $outhandle) = @_; 62 63 $self->BasPlug::init($verbosity, $outhandle); 64 64 65 65 # this plugin only handles ascii encodings … … 85 85 # load in the file 86 86 if (!open (FILE, $htmlfile)) { 87 print STDERR "ERROR - could not open $htmlfile\n"; 87 my $outhandle = $self->{'outhandle'}; 88 print $outhandle "ERROR - could not open $htmlfile\n"; 88 89 return; 89 90 } … … 105 106 my $self = shift (@_); 106 107 my ($foundbody, $text, $handle) = @_; 108 my $outhandle = $self->{'outhandle'}; 107 109 108 110 my $line = ""; … … 120 122 if ($line =~ /<font [^>]*?face\s*=\s*\"?(\w+)\"?/i) { 121 123 my $font = $1; 122 print STDERR"HBPlug::HB_gettext - warning removed font $font\n"124 print $outhandle "HBPlug::HB_gettext - warning removed font $font\n" 123 125 if ($font !~ /^arial$/i); 124 126 } … … 206 208 my $self = shift (@_); 207 209 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_; 210 my $outhandle = $self->{'outhandle'}; 208 211 209 212 # get the html filename and see if this is an HTML Book... … … 216 219 return undef unless -e $htmlfile; 217 220 218 print STDERR"HBPlug: processing $file\n";221 print $outhandle "HBPlug: processing $file\n"; 219 222 220 223 # read in the file and do basic html cleaning (removing header etc) … … 270 273 } 271 274 if ($curtoclevel+1 < $toclevel) { 272 print STDERR"WARNING - jump in toc levels in $htmlfile " .275 print $outhandle "WARNING - jump in toc levels in $htmlfile " . 273 276 "from $curtoclevel to $toclevel\n"; 274 277 } … … 297 300 } 298 301 } else { 299 print STDERR"WARNING - leftover text\n" , $self->shorten($html),302 print $outhandle "WARNING - leftover text\n" , $self->shorten($html), 300 303 "\nin $htmlfile\n"; 301 304 last; -
trunk/gsdl/perllib/plugins/HBSPlug.pm
r1244 r1424 56 56 } 57 57 58 use strict;59 60 58 sub new { 61 59 my ($class) = @_; … … 81 79 my $self = shift (@_); 82 80 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 83 84 print STDERR "HBSPlug: processing $file\n" 81 my $outhandle = $self->{'outhandle'}; 82 83 print $outhandle "HBSPlug: processing $file\n" 85 84 if $self->{'verbosity'} > 1; 86 85 … … 122 121 } 123 122 if ($curtoclevel+1 < $toclevel) { 124 print STDERR"WARNING - jump in toc levels in $filename " .123 print $outhandle "WARNING - jump in toc levels in $filename " . 125 124 "from $curtoclevel to $toclevel\n"; 126 125 } … … 166 165 167 166 sub replace_image_links { 168 169 167 my ($dir, $doc_obj, $front, $link, $back) = @_; 168 my $outhandle = $self->{'outhandle'}; 170 169 171 170 my ($filename, $error); … … 177 176 if ($imagetype eq "jpg") {$imagetype = "jpeg";} 178 177 if ($imagetype !~ /^(jpeg|gif|png)$/) { 179 print STDERR"HBSPlug: Warning - unknown image type ($imagetype)\n";178 print $outhandle "HBSPlug: Warning - unknown image type ($imagetype)\n"; 180 179 } 181 180 my ($imagefile) = $link =~ /([^\/]*)$/; … … 199 198 $foundimage = 1; 200 199 } elsif (defined $error) { 201 print STDERR"$error $filename\n";200 print $outhandle "$error $filename\n"; 202 201 } else { 203 print STDERR"HBSPlug: Warning - couldn't find image file $imagefile in $filename\n";202 print $outhandle "HBSPlug: Warning - couldn't find image file $imagefile in $filename\n"; 204 203 } 205 204 } -
trunk/gsdl/perllib/plugins/HTMLPlug.pm
r1410 r1424 46 46 @ISA = ('ConvertToBasPlug'); 47 47 } 48 49 use strict;50 48 51 49 sub print_usage { … … 121 119 my $self = shift (@_); 122 120 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 123 124 print STDERR "HTMLPlug: processing $file\n" 121 my $outhandle = $self->{'outhandle'} 122 123 print $outhandle "HTMLPlug: processing $file\n" 125 124 if $self->{'verbosity'} > 1; 126 125 … … 264 263 $hash_part = "" if !defined $hash_part; 265 264 if (!defined $before_hash || $before_hash !~ /[\w\.\/]/) { 266 print STDERR "HTMLPlug: ERROR - badly formatted tag ignored ($link)\n" 265 my $outhandle = $self->{'outhandle'}; 266 print $outhandle "HTMLPlug: ERROR - badly formatted tag ignored ($link)\n" 267 267 if $self->{'verbosity'}; 268 268 return ($link, "", 0); -
trunk/gsdl/perllib/plugins/IndexPlug.pm
r1244 r1424 62 62 } 63 63 64 use strict;65 66 64 sub new { 67 65 my ($class) = @_; … … 84 82 my $self = shift (@_); 85 83 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 84 my $outhandle = $self->{'outhandle'}; 86 85 87 86 my $indexfile = &util::filename_cat($base_dir, $file, "index.txt"); … … 92 91 93 92 # found an index.txt file 94 print STDERR"IndexPlug: processing $indexfile\n";93 print $outhandle "IndexPlug: processing $indexfile\n"; 95 94 96 95 # read in the index.txt -
trunk/gsdl/perllib/plugins/RecPlug.pm
r1244 r1424 38 38 } 39 39 40 use strict;41 42 40 sub new { 43 41 my ($class) = @_; … … 63 61 my $self = shift (@_); 64 62 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 63 my $outhandle = $self->{'outhandle'}; 65 64 66 65 foreach my $etd ( @{$self->{'exclude_tail_dirs'}} ) … … 80 79 if (!opendir (DIR, $dirname)) 81 80 { 82 print STDERR"RecPlug: WARNING - couldn't read directory $dirname\n";81 print $outhandle "RecPlug: WARNING - couldn't read directory $dirname\n"; 83 82 return; 84 83 } … … 87 86 closedir (DIR); 88 87 89 print STDERR"RecPlug: getting directory $dirname\n";88 print $outhandle "RecPlug: getting directory $dirname\n"; 90 89 91 90 # process each file -
trunk/gsdl/perllib/plugins/TEXTPlug.pm
r1410 r1424 35 35 } 36 36 37 use strict;38 39 37 sub new { 40 38 my ($class) = @_; … … 57 55 my $self = shift (@_); 58 56 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 59 60 print STDERR "TEXTPlug: processing $file\n" 57 my $outhandle = $self->{'outhandle'}; 58 59 print $outhandle "TEXTPlug: processing $file\n" 61 60 if $self->{'verbosity'} > 1; 62 61 -
trunk/gsdl/perllib/plugins/ZIPPlug.pm
r1269 r1424 53 53 } 54 54 55 use strict;56 57 55 sub new { 58 56 my ($class) = @_; … … 75 73 my $self = shift (@_); 76 74 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 75 my $outhandle = $self->{'outhandle'}; 77 76 78 77 if ($file =~ /\.(gz|tgz|z|taz|bz|zip|tar)$/i) { … … 80 79 my $filename = &util::filename_cat ($base_dir, $file); 81 80 if (!-e $filename) { 82 print STDERR"ZIPPLug: WARNING: $filename does not exist\n";81 print $outhandle "ZIPPLug: WARNING: $filename does not exist\n"; 83 82 return undef; 84 83 } … … 88 87 &util::mk_all_dir ($tmpdir); 89 88 90 print STDERR"ZIPPlug: extracting $file_only to $tmpdir\n";89 print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n"; 91 90 92 91 # save current working directory
Note:
See TracChangeset
for help on using the changeset viewer.