Changeset 1424 for trunk


Ignore:
Timestamp:
2000-08-18T17:37:15+12:00 (24 years ago)
Author:
sjboddie
Message:

Added a -out option to most of the perl building scripts to allow output
debug information to be directed to a file.

Location:
trunk/gsdl
Files:
19 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/build

    r1277 r1424  
    1515use parsargv;
    1616use util;
    17 
     17use FileHandle;
    1818
    1919if (!parsargv::parse(\@ARGV,
     20             'remove_archives', \$remove_archives,
     21             'remove_import', \$remove_import,
    2022             'buildtype/^(build|import)$/import', \$buildtype,
    21              'maxdocs/^\-?\d+/-1', \$maxdocs)) {
     23             'maxdocs/^\-?\d+/-1', \$maxdocs,
     24             'download/.+', \@download,
     25             'out/.*/STDERR', \$out)) {
     26   
    2227    &print_usage();
    2328    die "\n";
     
    3742    $maxdocs = "-maxdocs $maxdocs";
    3843}
    39 
    4044
    4145my $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect", $collection);
     
    4650my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin");
    4751
     52my $close_out = 0;
     53my $outfile = $out;
     54if ($out !~ /^(STDERR|STDOUT)$/i) {
     55    open (OUT, ">$out") || die "Couldn't open output file $out\n";
     56    $out = "OUT";
     57    $close_out = 1;
     58}
     59$out->autoflush(1);
     60
    4861&main();
     62
     63close OUT if $close_out;
    4964
    5065sub print_usage {
    5166    print STDERR "\n  usage: $0 [options] collection-name\n\n";
    5267    print STDERR "  options:\n";
     68    print STDERR "   -remove_archives        Remove archives directory after successfully\n";
     69    print STDERR "                           building the collection.\n";
     70    print STDERR "   -remove_import          Remove import directory after successfully\n";
     71    print STDERR "                           importing the collection.\n";
    5372    print STDERR "   -buildtype build|import If 'build' attempt to build directly\n";
    5473    print STDERR "                           from archives directory (bypassing import\n";
    5574    print STDERR "                           stage). Defaults to 'import'\n";
    56     print STDERR "   -maxdocs number         Maximum number of documents to build\n\n";
     75    print STDERR "   -maxdocs number         Maximum number of documents to build\n";
     76    print STDERR "   -download directory     Directory (or file) to get import documents from.\n";
     77    print STDERR "                           There may be multiple download directories and they\n";
     78    print STDERR "                           may be of type http://, ftp://, or file://\n.";
     79    print STDERR "                           Note that any existing import directory will be\n";
     80    print STDERR "                           deleted to make way for the downloaded data if\n";
     81    print STDERR "                           a -download option is supplied\n";
     82    print STDERR "   -out                    Filename or handle to print output status to.\n";
     83    print STDERR "                           The default is STDERR\n\n";
    5784}
    5885
    5986sub main {
     87   
     88    # do the download thing if we have any -download options
     89    if (scalar (@download)) {
     90    # remove any existing import data
     91    &util::rm_r ($importdir) if -d $importdir;
     92
     93    foreach $download_dir (@download) {
     94       
     95        if ($download_dir =~ /^http:\/\//) {
     96        # http download
     97
     98        } elsif ($download_dir =~ /^ftp:\/\//) {
     99        # ftp download
     100
     101        } else {
     102        # we assume anything not beginning with http:// or ftp://
     103        # is a file or directory on the local file system.
     104        $download_dir =~ s/^file:\/\///;
     105       
     106        if (-e $download_dir) {
     107            # copy download_dir and all it contains to the import directory
     108            my $download_cmd = "perl " . &util::filename_cat ($bindir, "script", "filecopy.pl");
     109            $download_cmd .= " -out \"$outfile\" \"" . $download_dir . "\" " . $collection;
     110            system ($download_cmd);
     111        } else {
     112            print $out "WARNING: $download_dir does not exist\n";
     113        }
     114        }
     115    }
     116    }
    60117
    61118    if (-e &util::filename_cat ($archivedir, "archives.inf")) {
     
    69126    } else {
    70127        # there are archives but no import, build directly from archives
    71         print STDERR "build: no import material was found, building directly\n";
    72         print STDERR "       from archives\n";
     128        print $out "build: no import material was found, building directly\n";
     129        print $out "       from archives\n";
    73130        &gsdl_build();
    74131    }
     
    76133    if (&has_content ($importdir)) {
    77134        if ($buildtype eq "build") {
    78         print STDERR "build: can't build directly from archives as no\n";
    79         print STDERR "       imported archives exist (did you forget to\n";
    80         print STDERR "       move the contents of $collection/import to\n";
    81         print STDERR "       collection/archives?)\n";
     135        print $out "build: can't build directly from archives as no\n";
     136        print $out "       imported archives exist (did you forget to\n";
     137        print $out "       move the contents of $collection/import to\n";
     138        print $out "       collection/archives?)\n";
    82139        }
    83140        &gsdl_import();
     
    85142    } else {
    86143        # no import or archives
    87         print STDERR "build: ERROR: The $collection collection has no import\n";
    88         print STDERR "       or archives data. Try downloading an unbuilt version\n";
    89         print STDERR "       of the collection from www.nzdl.org\n";
     144        print $out "build: ERROR: The $collection collection has no import\n";
     145        print $out "       or archives data. Try downloading an unbuilt version\n";
     146        print $out "       of the collection from www.nzdl.org\n";
    90147        die "\n";
    91148    }
     
    95152sub gsdl_import {
    96153
    97     print STDERR "importing the $collection collection\n\n";
     154    print $out "importing the $collection collection\n\n";
    98155
    99156    my $import = &util::filename_cat ($bindir, "script", "import.pl");
    100     system ("perl $import -removeold $maxdocs $collection");
     157    system ("perl $import -removeold $maxdocs -out \"$outfile\" $collection");
    101158    if (-e &util::filename_cat ($archivedir, "archives.inf")) {
    102     print STDERR "$collection collection imported successfully\n\n";
     159    print $out "$collection collection imported successfully\n\n";
     160    if ($remove_import) {
     161        print $out "removing import directory ($importdir)\n";
     162        &util::rm_r ($importdir);
     163    }
    103164    } else {
    104165    die "\nimport.pl failed\n";
     
    108169sub gsdl_build {
    109170
    110     print STDERR "building the $collection collection\n\n";
     171    print $out "building the $collection collection\n\n";
    111172
    112173    my $buildcol = &util::filename_cat ($bindir, "script", "buildcol.pl");
    113     system ("perl $buildcol $maxdocs $collection");
     174    system ("perl $buildcol $maxdocs -out \"$outfile\" $collection");
    114175    if (-e &util::filename_cat ($buildingdir, "text", "$collection.ldb") ||
    115176    -e &util::filename_cat ($buildingdir, "text", "$collection.bdb")) {
    116     print STDERR "$collection collection built successfully\n\n";
     177    print $out "$collection collection built successfully\n\n";
     178    if ($remove_archives) {
     179        print $out "removing archives directory ($archivedir)\n";
     180        &util::rm_r ($archivedir);
     181    }
    117182    } else {
    118183    die "\nbuildcol.pl failed\n";
     
    121186    # replace old indexes with new ones
    122187    if (&has_content ($indexdir)) {
    123     print STDERR "removing old indexes\n";
     188    print $out "removing old indexes\n";
    124189    &util::rm_r ($indexdir);
    125190    }
  • trunk/gsdl/bin/script/buildcol.pl

    r1383 r1424  
    2525#
    2626###########################################################################
     27
     28package buildcol;
    2729
    2830BEGIN {
     
    3739use parsargv;
    3840use util;
     41use FileHandle;
    3942
    4043&main();
     
    5861    print STDERR "                         collection. This relies on the Gimp being\n";
    5962    print STDERR "                         installed along with relevant perl modules\n";
    60     print STDERR "                         to allow scripting from perl\n\n";
     63    print STDERR "                         to allow scripting from perl\n";
     64    print STDERR "   -out                  Filename or handle to print output status to.\n";
     65    print STDERR "                         The default is STDERR\n\n";
    6166}
    6267
     
    6671    my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
    6772    $debug, $mode, $indexname, $keepold, $allclassifications,
    68     $create_images);
     73    $create_images, $out);
    6974    if (!parsargv::parse(\@ARGV,
    7075             'verbosity/\d+/2', \$verbosity,
     
    7883             'keepold', \$keepold,
    7984             'allclassifications', \$allclassifications,
    80              'create_images', \$create_images)) {
     85             'create_images', \$create_images,
     86             'out/.*/STDERR', \$out)) {
    8187    &print_usage();
    8288    die "\n";
    8389    }
     90
     91    my $close_out = 0;
     92    if ($out !~ /^(STDERR|STDOUT)$/i) {
     93    open (OUT, ">>$out") || die "Couldn't open output file $out\n";
     94    $out = "buildcol::OUT";
     95    $close_out = 1;
     96    }
     97    $out->autoflush(1);
    8498
    8599    # get and check the collection
     
    127141    # update the archive cache if needed
    128142    if ($cachedir) {
    129     print STDERR "Updating archive cache\n" if ($verbosity >= 1);
     143    print $out "Updating archive cache\n" if ($verbosity >= 1);
    130144
    131145    $cachedir =~ s/[\\\/]+$//;
     
    162176    eval("\$builder = new $buildertype(\$collection, " .
    163177     "\$realarchivedir, \$realbuilddir, \$verbosity, " .
    164      "\$maxdocs, \$debug, \$keepold, \$allclassifications)");
     178     "\$maxdocs, \$debug, \$keepold, \$allclassifications, \$out)");
    165179    die "$@" if $@;
    166180
     
    186200   
    187201    if (($realbuilddir ne $builddir) && !$debug) {
    188     print STDERR "Copying back the cached build\n" if ($verbosity >= 1);
     202    print $out "Copying back the cached build\n" if ($verbosity >= 1);
    189203    &util::rm_r ($builddir);
    190204    &util::cp_r ($realbuilddir, $builddir);
    191205    }
     206
     207    close OUT if $close_out;
    192208}
    193209
     
    197213    my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon.pl");
    198214    if (!-e $image_script) {
    199     print STDERR "WARNING: Image making script ($image_script) could not be found\n";
    200     print STDERR "         Default images will not be generated\n\n";
     215    print $out "WARNING: Image making script ($image_script) could not be found\n";
     216    print $out "         Default images will not be generated\n\n";
    201217    return;
    202218    }
     
    213229    # to be changed when the config file format changes)
    214230    if (!open (CFGFILE, $configfilename)) {
    215     print STDERR "WARNING: Couldn't open config file ($configfilename)\n";
    216     print STDERR "         for updating so collection images may not be linked correctly\n";
     231    print $out "WARNING: Couldn't open config file ($configfilename)\n";
     232    print $out "         for updating so collection images may not be linked correctly\n";
    217233    return;
    218234    }
     
    236252
    237253    if (!open (CFGFILE, ">$configfilename")) {
    238     print STDERR "WARNING: Couldn't open config file ($configfilename)\n";
    239     print STDERR "         for updating so collection images may not be linked correctly\n";
     254    print $out "WARNING: Couldn't open config file ($configfilename)\n";
     255    print $out "         for updating so collection images may not be linked correctly\n";
    240256    return;
    241257    }
  • trunk/gsdl/bin/script/filecopy.pl

    r1179 r1424  
    3535
    3636use util;
     37use parsargv;
    3738use File::stat;
     39use FileHandle;
    3840
    39 sub print_usage
    40 {
    41     print STDERR "\n  usage: $0 [filenames] [directories] collection-name\n\n";
     41sub print_usage {
     42    print STDERR "\n  usage: $0 [options] [directories] collection-name\n\n";
     43
     44    print STDERR "  options:\n";
     45    print STDERR "   -out                  Filename or handle to print output status to.\n";
     46    print STDERR "                         The default is STDERR\n\n";
    4247}
    43 
    4448
    4549sub download_files
     
    7074    else
    7175    {
    72         print STDERR "Error: filename '$a' does not exist\n";
     76        print $out "Error: filename '$a' does not exist\n";
    7377    }
    7478    }
     
    101105        if ($do_copy eq "yes")
    102106        {
    103         print STDOUT "Copying $src_file-->$dst_file\n";
     107        print $out "Copying $src_file-->$dst_file\n";
    104108        &util::cp($src_file,$dst_file);
    105109        }
     
    124128            if (!opendir (INDIR, $d))
    125129        {
    126                 print STDERR "Error: Could not open directory $d\n";
     130                print $out "Error: Could not open directory $d\n";
    127131            }
    128132        else
     
    140144sub main
    141145{
    142     if (scalar(@ARGV)<2)
    143     {
    144     print_usage();
    145     exit(1);
     146    if (!parsargv::parse(\@ARGV, 'out/.*/STDERR', \$out)) {
     147    &print_usage();
     148    die "\n";
    146149    }
    147150
     151    my $close_out = 0;
     152    if ($out !~ /^(STDERR|STDOUT)$/i) {
     153    open (OUT, ">>$out") || die "Couldn't open output file $out\n";
     154    $out = "MAIN::OUT";
     155    $close_out = 1;
     156    }
     157    $out->autoflush(1);
     158
    148159    download_files(@ARGV);
     160
     161    close OUT if $close_out;
    149162    return 0;
    150163}
  • trunk/gsdl/bin/script/import.pl

    r1287 r1424  
    2828
    2929# This program will import a number of files into a particular collection
     30
     31package import;
    3032
    3133BEGIN {
     
    3739}
    3840
    39 use strict;
    4041use arcinfo;
    4142use colcfg;
     
    4445use util;
    4546use parsargv;
     47use FileHandle;
    4648
    4749sub print_usage {
     
    6264    print STDERR "   -sortmeta metadata     Sort documents alphabetically by metadata for\n";
    6365    print STDERR "                          building. This will be disabled if groupsize > 1\n";
    64     print STDERR "   -debug                 Print imported text to STDOUT\n\n";
     66    print STDERR "   -debug                 Print imported text to STDOUT\n";
     67    print STDERR "   -out                   Filename or handle to print output status to.\n";
     68    print STDERR "                          The default is STDERR\n\n";
    6569}
    6670
    67 
    68 &main ();
     71&main();
    6972
    7073sub main {
     
    7275    $removeold, $gzip, $groupsize, $debug, $maxdocs, $collection,
    7376    $configfilename, $collectcfg, $pluginfo, $sortmeta,
    74     $archive_info_filename, $archive_info, $processor);
     77    $archive_info_filename, $archive_info, $processor, $out);
    7578    if (!parsargv::parse(\@ARGV,
    7679             'verbosity/\d+/2', \$verbosity,
     
    8386             'sortmeta/.*/', \$sortmeta,
    8487             'debug', \$debug,
    85              'maxdocs/^\-?\d+/-1', \$maxdocs)) {
     88             'maxdocs/^\-?\d+/-1', \$maxdocs,
     89             'out/.*/STDERR', \$out)) {
    8690    &print_usage();
    8791    die "\n";
    8892    }
     93
     94    my $close_out = 0;
     95    if ($out !~ /^(STDERR|STDOUT)$/i) {
     96    open (OUT, ">>$out") || die "Couldn't open output file $out\n";
     97    $out = 'import::OUT';
     98    $close_out = 1;
     99    }
     100    $out->autoflush(1);
    89101
    90102    # set removeold to false if it has been defined
     
    100112    $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
    101113    if (defined $sortmeta && $groupsize > 1) {
    102     print STDERR "WARNING: import.pl cannot sort documents when groupsize > 1\n";
    103     print STDERR "         sortmeta option will be ignored\n\n";
     114    print $out "WARNING: import.pl cannot sort documents when groupsize > 1\n";
     115    print $out "         sortmeta option will be ignored\n\n";
    104116    $sortmeta = undef;
    105117    }
     
    148160
    149161    # load all the plugins
    150     $pluginfo = &plugin::load_plugins ($plugins, $verbosity);
     162    $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out);
    151163    if (scalar(@$pluginfo) == 0) {
    152     print STDERR "No plugins were loaded.\n";
     164    print $out "No plugins were loaded.\n";
    153165    die "\n";
    154166    }
     
    156168    # remove the old contents of the archives directory if needed
    157169    if ($removeold && -e $archivedir) {
    158     print STDERR "Warning - removing current contents of the archives directory\n";
    159     print STDERR "          in preparation for the import\n";
     170    print $out "Warning - removing current contents of the archives directory\n";
     171    print $out "          in preparation for the import\n";
    160172    sleep(5); # just in case...
    161173    &util::rm_r ($archivedir);
     
    169181
    170182    # create a docsave object to process the documents
    171     $processor = new docsave ($collection, $archive_info, $verbosity, $gzip, $groupsize);
     183    $processor = new docsave ($collection, $archive_info, $verbosity, $gzip, $groupsize, $out);
    172184    $processor->setarchivedir ($archivedir);
    173185    $processor->set_sortmeta ($sortmeta) if defined $sortmeta;
     
    188200    $archive_info->save_info($archive_info_filename);
    189201    }
     202    close OUT if $close_out;
    190203}
    191 
    192 
    193 
    194 
    195 
  • trunk/gsdl/perllib/docsave.pm

    r1287 r1424  
    4040
    4141sub new {
    42     my ($class, $collection,$archive_info,$verbosity,$gzip,$groupsize) = @_;
     42    my ($class, $collection, $archive_info, $verbosity,
     43    $gzip, $groupsize, $outhandle) = @_;
    4344    my $self = new docproc ();
    44 
    45 
     45   
     46   
    4647    $groupsize=1 unless defined $groupsize;
    4748    $self->{'collection'} = $collection;
     
    5253    $self->{'groupsize'} = $groupsize;
    5354    $self->{'gs_count'} = 0;
    54 
     55   
     56    $self->{'outhandle'} = STDERR;
     57    $self->{'outhandle'} = $outhandle if defined $outhandle;
     58   
    5559    # set a default for the archive directory
    5660    $self->{'archive_dir'} = "$ENV{'GSDLHOME'}/collect/$self->{'collection'}/archives";
    57 
     61   
    5862    $self->{'sortmeta'} = undef;
    59 
     63   
    6064    return bless $self, $class;
    6165}
     
    6468    my $self = shift (@_);
    6569    my ($archive_dir) = @_;
    66 
     70   
    6771    $self->{'archive_dir'} = $archive_dir;
    6872}
     
    7175    my $self = shift (@_);
    7276    my ($sortmeta) = @_;
    73 
     77   
    7478    $self->{'sortmeta'} = $sortmeta;
    7579}
     
    7882    my $self = shift (@_);
    7983    my ($doc_obj) = @_;
    80 
     84 
     85    my $outhandle = $self->{'outhandle'};
     86   
    8187    if ($self->{'groupsize'} > 1) {
    8288    $self->group_process ($doc_obj);
    83 
     89   
    8490    } else {
    8591    # groupsize is 1 (i.e. one document per GML file) so sortmeta
    8692    # may be used
    87 
     93   
    8894    my $OID = $doc_obj->get_OID();
    8995    $OID = "NULL" unless defined $OID;
     
    9197    # get document's directory
    9298    my $doc_dir = $self->get_doc_dir ($OID);
    93    
     99   
    94100    # copy all the associated files, add this information as metadata
    95101    # to the document
    96102    $self->process_assoc_files ($doc_obj, $doc_dir);
    97 
     103   
    98104    my $doc_file
    99105        = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.gml");
    100106    my $short_doc_file = &util::filename_cat ($doc_dir, "doc.gml");
    101        
     107   
    102108    if (!open (OUTDOC, ">$doc_file")) {
    103         print STDERR "docsave::process could not write to file $doc_file\n";
     109        print $outhandle "docsave::process could not write to file $doc_file\n";
    104110        return;
    105111    }
     
    115121        $short_doc_file .= ".gz";
    116122        if (!-e $doc_file) {
    117         print STDERR "error while gzipping: $doc_file doesn't exist\n";
     123        print $outhandle "error while gzipping: $doc_file doesn't exist\n";
    118124        return 0;
    119125        }
     
    134140    my $self = shift (@_);
    135141    my ($doc_obj) = @_;
     142   
     143    my $outhandle = $self->{'outhandle'};
    136144
    137145    my $OID = $doc_obj->get_OID();
     
    165173       
    166174        if (!open (OUTDOC, ">$doc_file")) {
    167         print STDERR "docsave::group_process could not write to file $doc_file\n";
     175        print $outhandle "docsave::group_process could not write to file $doc_file\n";
    168176        return;
    169177        }
     
    219227    my ($doc_obj, $doc_dir) = @_;
    220228
     229    my $outhandle = $self->{'outhandle'};
     230
    221231    my @assoc_files = ();
    222232    foreach $assoc_file (@{$doc_obj->get_assoc_files()}) {
     
    230240                     "$afile:$assoc_file->[2]:$dir");
    231241    } else {
    232         print STDERR "docsave::process couldn't copy the associated file " .
     242        print $outhandle "docsave::process couldn't copy the associated file " .
    233243        "$assoc_file->[0] to $afile\n";
    234244    }
     
    240250{
    241251    my ($self) = @_;
    242 
     252   
    243253    close OUTDOC;
    244254
     
    252262    $short_doc_file .= ".gz";
    253263    if (!-e $doc_file) {
    254         print STDERR "error while gzipping: $doc_file doesn't exist\n";
     264        my $outhandle = $self->{'outhandle'};
     265        print $outhandle "error while gzipping: $doc_file doesn't exist\n";
    255266        return 0;
    256267    }
  • trunk/gsdl/perllib/mgbuilder.pm

    r1304 r1424  
    6161sub new {
    6262    my ($class, $collection, $source_dir, $build_dir, $verbosity,
    63     $maxdocs, $debug, $keepold, $allclassifications) = @_;
     63    $maxdocs, $debug, $keepold, $allclassifications, $outhandle) = @_;
     64
     65    $outhandle = STDERR unless defined $outhandle;
    6466
    6567    # create an mgbuilder object
     
    7274              'keepold'=>$keepold,
    7375              'allclassifications'=>$allclassifications,
     76              'outhandle'=>$outhandle,
    7477              'notbuilt'=>[]    # indexes not built
    7578              }, $class;
     
    112115   
    113116    # load all the plugins
    114     $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity);
     117    $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity, $outhandle);
    115118    if (scalar(@{$self->{'pluginfo'}}) == 0) {
    116     print STDERR "No plugins were loaded.\n";
     119    print $outhandle "No plugins were loaded.\n";
    117120    die "\n";
    118121    }
     
    149152
    150153    eval("\$self->{'buildproc'} = new $buildproctype(\$collection, " .
    151      "\$source_dir, \$build_dir, \$verbosity)");
     154     "\$source_dir, \$build_dir, \$verbosity, \$outhandle)");
    152155    die "$@" if $@;
    153156
     
    176179    my $mg_passes_exe = &util::filename_cat($exedir, "mg_passes$exe");
    177180    my $mg_compression_dict_exe = &util::filename_cat($exedir, "mg_compression_dict$exe");
     181    my $outhandle = $self->{'outhandle'};
    178182
    179183    &util::mk_all_dir (&util::filename_cat($self->{'build_dir'}, "text"));
     
    188192    }
    189193
    190     print STDERR "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1);
     194    print $outhandle "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1);
    191195
    192196    # collect the statistics for the text
    193197    # -b $maxdocsize sets the maximum document size to be 12 meg
    194     print STDERR "\n    collecting text statistics\n"  if ($self->{'verbosity'} >= 1);
     198    print $outhandle "\n    collecting text statistics\n"  if ($self->{'verbosity'} >= 1);
    195199
    196200    my ($handle);
     
    226230    # words being put into the dictionary first (-2 -k 5120)
    227231    if (!$self->{'debug'}) {
    228     print STDERR "\n    creating the compression dictionary\n"  if ($self->{'verbosity'} >= 1);
     232    print $outhandle "\n    creating the compression dictionary\n"  if ($self->{'verbosity'} >= 1);
    229233    if (!-e "$mg_compression_dict_exe") {
    230234        die "mgbuilder::compress_text - couldn't run $mg_compression_dict_exe\n";
     
    241245    $self->{'buildproc'}->reset();
    242246    # compress the text
    243     print STDERR "\n    compressing the text\n"  if ($self->{'verbosity'} >= 1);
     247    print $outhandle "\n    compressing the text\n"  if ($self->{'verbosity'} >= 1);
    244248    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    245249           "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
     
    268272    my $self = shift (@_);
    269273    my ($indexname) = @_;
     274    my $outhandle = $self->{'outhandle'};
    270275
    271276    my $indexes = [];
     
    283288    foreach $index (@$indexes) {
    284289    if ($self->want_built($index)) {
    285         print STDERR "\n*** building index $index in subdirectory " .
     290        print $outhandle "\n*** building index $index in subdirectory " .
    286291        "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1);
    287292        $self->build_index($index);
    288293    } else {
    289         print STDERR "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);
     294        print $outhandle "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);
    290295    }
    291296    }
     
    413418    my $self = shift (@_);
    414419    my ($index) = @_;
     420    my $outhandle = $self->{'outhandle'};
    415421
    416422    # get the full index directory path and make sure it exists
     
    472478
    473479    # Build index dictionary. Uses verbatim stem method
    474     print STDERR "\n    creating index dictionary\n"  if ($self->{'verbosity'} >= 1);
     480    print $outhandle "\n    creating index dictionary\n"  if ($self->{'verbosity'} >= 1);
    475481    my ($handle);
    476482    if ($self->{'debug'}) {
     
    513519   
    514520    # invert the text
    515     print STDERR "\n    inverting the text\n"  if ($self->{'verbosity'} >= 1);
     521    print $outhandle "\n    inverting the text\n"  if ($self->{'verbosity'} >= 1);
    516522
    517523    $self->{'buildproc'}->reset();
     
    526532   
    527533    # create the weights file
    528     print STDERR "\n    create the weights file\n"  if ($self->{'verbosity'} >= 1);
     534    print $outhandle "\n    create the weights file\n"  if ($self->{'verbosity'} >= 1);
    529535    if (!-e "$mg_weights_build_exe") {
    530536        die "mgbuilder::build_index - couldn't run $mg_weights_build_exe\n";
     
    533539
    534540    # create 'on-disk' stemmed dictionary
    535     print STDERR "\n    creating 'on-disk' stemmed dictionary\n"  if ($self->{'verbosity'} >= 1);
     541    print $outhandle "\n    creating 'on-disk' stemmed dictionary\n"  if ($self->{'verbosity'} >= 1);
    536542    if (!-e "$mg_invf_dict_exe") {
    537543        die "mgbuilder::build_index - couldn't run $mg_invf_dict_exe\n";
     
    541547
    542548    # creates stem index files for the various stemming methods
    543     print STDERR "\n    creating stem indexes\n"  if ($self->{'verbosity'} >= 1);
     549    print $outhandle "\n    creating stem indexes\n"  if ($self->{'verbosity'} >= 1);
    544550    if (!-e "$mg_stem_idx_exe") {
    545551        die "mgbuilder::build_index - couldn't run $mg_stem_idx_exe\n";
     
    559565        if (defined $suffix && !defined $wanted_index_files{$suffix}) {
    560566        # delete it!
    561         print STDERR "deleting $file\n" if $self->{'verbosity'} > 2;
     567        print $outhandle "deleting $file\n" if $self->{'verbosity'} > 2;
    562568        &util::rm (&util::filename_cat ($tmpdir, $file));
    563569        }
     
    569575sub make_infodatabase {
    570576    my $self = shift (@_);
     577    my $outhandle = $self->{'outhandle'};
     578
    571579    my $textdir = &util::filename_cat($self->{'build_dir'}, "text");
    572580    my $assocdir = &util::filename_cat($self->{'build_dir'}, "assoc");
     
    584592    my $txt2db_exe = &util::filename_cat($exedir, "txt2db$exe");
    585593
    586     print STDERR "\n*** creating the info database and processing associated files\n"
     594    print $outhandle "\n*** creating the info database and processing associated files\n"
    587595    if ($self->{'verbosity'} >= 1);
    588596
     
    624632            $self->{'collect_cfg'}->{'collectionmeta'}->{".$cmeta"} . "\n";
    625633        } else {
    626             print STDERR "mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n";
     634            print $outhandle "mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n";
    627635        }
    628636        } else {
     
    652660    my ($index);
    653661    my %build_cfg = ();
    654 
    655     print STDERR "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
     662    my $outhandle = $self->{'outhandle'};
     663
     664    print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
    656665
    657666    # get the text directory
     
    671680    my $input_file = &util::filename_cat ("text", $self->{'collection'});
    672681    if (!-e "$mgstat_exe" || !open (PIPEIN, "$mgstat_exe -d $self->{'build_dir'} -f $input_file |")) {
    673     print STDERR "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n";
     682    print $outhandle "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n";
    674683    } else {
    675684    my $line = "";
     
    721730    my $self = shift (@_);
    722731
     732    my $outhandle = $self->{'outhandle'};
    723733    my $indexing_text = $self->{'buildproc'}->get_indexing_text();
    724734    my $index = $self->{'buildproc'}->get_index();
     
    727737
    728738    if ($indexing_text) {
    729     print STDERR "Stats (Creating index $index)\n";
    730     } else {
    731     print STDERR "Stats (Compressing text from $index)\n";
    732     }
    733     print STDERR "Total bytes in collection: $num_bytes\n";
    734     print STDERR "Total bytes in $index: $num_processed_bytes\n";
     739    print $outhandle "Stats (Creating index $index)\n";
     740    } else {
     741    print $outhandle "Stats (Compressing text from $index)\n";
     742    }
     743    print $outhandle "Total bytes in collection: $num_bytes\n";
     744    print $outhandle "Total bytes in $index: $num_processed_bytes\n";
    735745
    736746    if ($num_processed_bytes < 50) {
    737     print STDERR "***************\n";
    738     print STDERR "WARNING: There is very little or no text to process for $index\n";
     747    print $outhandle "***************\n";
     748    print $outhandle "WARNING: There is very little or no text to process for $index\n";
    739749    if ($indexing_text) {
    740         print STDERR "This may cause an error while attempting to build the index\n";
     750        print $outhandle "This may cause an error while attempting to build the index\n";
    741751    } else {
    742         print STDERR "This may cause an error while attempting to compress the text\n";
    743     }
    744     print STDERR "***************\n";
     752        print $outhandle "This may cause an error while attempting to compress the text\n";
     753    }
     754    print $outhandle "***************\n";
    745755    }
    746756}
  • trunk/gsdl/perllib/mgbuildproc.pm

    r1251 r1424  
    4141
    4242sub new {
    43     my ($class, $collection, $source_dir, $build_dir, $verbosity) = @_;
     43    my ($class, $collection, $source_dir, $build_dir,
     44    $verbosity, $outhandle) = @_;
    4445    my $self = new docproc ();
     46
     47    # outhandle is where all the debugging info goes
     48    # output_handle is where the output of the plugins is piped
     49    # to (i.e. mg, gdbm etc.)
     50    $outhandle = STDERR unless defined $outhandle;
    4551
    4652    $self->{'collection'} = $collection;
     
    5965    $self->{'num_bytes'} = 0;
    6066    $self->{'num_processed_bytes'} = 0;
     67    $self->{'outhandle'} = $outhandle;
    6168
    6269    $self->{'indexing_text'} = 0;
  • trunk/gsdl/perllib/plugin.pm

    r1244 r1424  
    2929
    3030sub load_plugins {
    31     my ($plugin_list, $verbosity) = @_;
     31    my ($plugin_list, $verbosity, $outhandle) = @_;
    3232    my @plugin_objects = ();
    3333
     
    5656   
    5757    # initialize plugin
    58     $plugobj->init($verbosity);
     58    $plugobj->init($verbosity, $outhandle);
    5959
    6060    # add this object to the list
  • trunk/gsdl/perllib/plugins/ArcPlug.pm

    r1244 r1424  
    3939}
    4040
    41 use strict;
    42 
    4341sub new {
    4442    my ($class) = @_;
     
    6159    my $self = shift (@_);
    6260    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     61    my $outhandle = $self->{'outhandle'};
    6362
    6463    my $count = 0;
     
    7069
    7170    # found an archives.inf file
    72     print STDERR "ArcPlug: processing $archive_info_filename\n";
     71    print $outhandle "ArcPlug: processing $archive_info_filename\n";
    7372
    7473    # read in the archives information file
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r1411 r1424  
    3131use acronym;
    3232use textcat;
    33 use strict;
    3433use doc;
    3534use diagnostics;
     
    8584    my $self = {};
    8685    my $encodings = "^(iso_8859_1|Latin1|ascii|gb|iso_8859_6|windows_1256|Arabic|utf8|unicode)\$";
     86    $self->{'outhandle'} = STDERR;
    8787    my $year = (localtime)[5]+1900;
     88
    8889    # general options available to all plugins
    8990    if (!parsargv::parse(\@_,
     
    114115sub init {
    115116    my $self = shift (@_);
    116     my ($verbosity) = @_;
     117    my ($verbosity, $outhandle) = @_;
    117118
    118119    # verbosity is passed through from the processor
    119120    $self->{'verbosity'} = $verbosity;
     121
     122    # as is the outhandle ...
     123    $self->{'outhandle'} = $outhandle if defined $outhandle;
    120124
    121125    # set process_exp and block_exp to defaults unless they were
     
    209213
    210214    if ($text !~ /\w/) {
    211     print STDERR "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'};
     215    my $outhandle = $self->{'outhandle'};
     216    print $outhandle "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'};
    212217    return 0;
    213218    }
     
    387392
    388393# extract acronyms from a section in a document. progress is
    389 # reported to STDERR based on the verbosity. both the Acronym
     394# reported to outhandle based on the verbosity. both the Acronym
    390395# and the AcronymKWIC metadata items are created.
    391396
     
    393398    my $self = shift (@_);
    394399    my ($textref, $doc_obj, $thissection) = @_;
    395 
    396     print STDERR " extracting acronyms ...\n"
     400    my $outhandle = $self->{'outhandle'};
     401
     402    print $outhandle " extracting acronyms ...\n"
    397403    if ($self->{'verbosity'} >= 2);
    398404
     
    408414        {
    409415        $seen_before = "true";
    410         print STDERR "  already seen ". $acro->to_string() . "\n"
     416        print $outhandle "  already seen ". $acro->to_string() . "\n"
    411417            if ($self->{'verbosity'} >= 2);
    412418        }       
     
    420426        #do the normal acronym
    421427        $doc_obj->add_utf8_metadata($thissection, "Acronym",  $acro->to_string());
    422         print STDERR "  adding ". $acro->to_string() . "\n"
     428        print $outhandle "  adding ". $acro->to_string() . "\n"
    423429            if ($self->{'verbosity'} >= 1);
    424430       
     
    432438    }
    433439    }
    434     print STDERR " done extracting acronyms. \n"
     440    print $outhandle " done extracting acronyms. \n"
    435441    if ($self->{'verbosity'} >= 2);
    436442}
     
    439445    my $self = shift (@_);
    440446    my ($text, $doc_obj, $thissection) = @_;
    441 
    442     print STDERR " marking up acronyms ...\n"
     447    my $outhandle = $self->{'outhandle'};
     448
     449    print $outhandle " marking up acronyms ...\n"
    443450    if ($self->{'verbosity'} >= 2);
    444451
     
    446453    $text = &acronym::markup_acronyms($text, $self);
    447454
    448     print STDERR " done marking up acronyms. \n"
     455    print $outhandle " done marking up acronyms. \n"
    449456    if ($self->{'verbosity'} >= 2);
    450457
  • trunk/gsdl/perllib/plugins/EMAILPlug.pm

    r1244 r1424  
    7070}
    7171
    72 use strict;
    73 
    7472# Create a new EMAILPlug object with which to parse a file.
    7573# Accomplished by creating a new BasPlug and using bless to
     
    9391    my $self = shift (@_);
    9492    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    95    
     93    my $outhandle = $self->{'outhandle'};
     94
    9695    # Check that we're dealing with a valid mail file
    9796    return undef unless (($$textref =~ /From:/) || ($$textref =~ /To:/));
    9897
    99     print STDERR "EMAILPlug: processing $file\n"
     98    print $outhandle "EMAILPlug: processing $file\n"
    10099    if $self->{'verbosity'} > 1;
    101100
  • trunk/gsdl/perllib/plugins/GMLPlug.pm

    r1401 r1424  
    3737}
    3838
    39 use strict;
    40 
    4139sub new {
    4240    my ($class) = @_;
     
    5856    my $self = shift (@_);
    5957    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     58    my $outhandle = $self->{'outhandle'};
    6059
    6160    my $filename = &util::filename_cat($base_dir, $file);
     
    6665    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    6766
    68     print STDERR "GMLPlug: processing $file\n";
     67    print $outhandle "GMLPlug: processing $file\n";
    6968
    7069    my $parent_dir = $file;
     
    7372
    7473    if (!open (INFILE, $filename)) {
    75     print STDERR "GMLPlug::read - couldn't read $filename\n";
     74    print $outhandle "GMLPlug::read - couldn't read $filename\n";
    7675    return 0;
    7776    }
     
    107106
    108107        } else {
    109             print STDERR "GMLPlug::read - error in file $filename\n";
    110             print STDERR "text: \"$gml\"\n";
     108            print $outhandle "GMLPlug::read - error in file $filename\n";
     109            print $outhandle "text: \"$gml\"\n";
    111110            last;
    112111        }
  • trunk/gsdl/perllib/plugins/HBPlug.pm

    r1244 r1424  
    5959sub init {
    6060    my $self = shift (@_);
    61     my ($verbosity) = @_;
    62 
    63     $self->BasPlug::init();
     61    my ($verbosity, $outhandle) = @_;
     62
     63    $self->BasPlug::init($verbosity, $outhandle);
    6464
    6565    # this plugin only handles ascii encodings
     
    8585    # load in the file
    8686    if (!open (FILE, $htmlfile)) {
    87     print STDERR "ERROR - could not open $htmlfile\n";
     87    my $outhandle = $self->{'outhandle'};
     88    print $outhandle "ERROR - could not open $htmlfile\n";
    8889    return;
    8990    }
     
    105106    my $self = shift (@_);
    106107    my ($foundbody, $text, $handle) = @_;
     108    my $outhandle = $self->{'outhandle'};
    107109
    108110    my $line = "";
     
    120122    if ($line =~ /<font [^>]*?face\s*=\s*\"?(\w+)\"?/i) {
    121123        my $font = $1;
    122         print STDERR "HBPlug::HB_gettext - warning removed font $font\n"
     124        print $outhandle "HBPlug::HB_gettext - warning removed font $font\n"
    123125        if ($font !~ /^arial$/i);
    124126    }
     
    206208    my $self = shift (@_);
    207209    my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
     210    my $outhandle = $self->{'outhandle'};
    208211
    209212    # get the html filename and see if this is an HTML Book...
     
    216219    return undef unless -e $htmlfile;
    217220
    218     print STDERR "HBPlug: processing $file\n";
     221    print $outhandle "HBPlug: processing $file\n";
    219222
    220223    # read in the file and do basic html cleaning (removing header etc)
     
    270273        }
    271274        if ($curtoclevel+1 < $toclevel) {
    272         print STDERR "WARNING - jump in toc levels in $htmlfile " .
     275        print $outhandle "WARNING - jump in toc levels in $htmlfile " .
    273276            "from $curtoclevel to $toclevel\n";
    274277        }
     
    297300        }
    298301    } else {
    299         print STDERR "WARNING - leftover text\n" , $self->shorten($html),
     302        print $outhandle "WARNING - leftover text\n" , $self->shorten($html),
    300303        "\nin $htmlfile\n";
    301304        last;
  • trunk/gsdl/perllib/plugins/HBSPlug.pm

    r1244 r1424  
    5656}
    5757
    58 use strict;
    59 
    6058sub new {
    6159    my ($class) = @_;
     
    8179    my $self = shift (@_);
    8280    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    83    
    84     print STDERR "HBSPlug: processing $file\n"
     81    my $outhandle = $self->{'outhandle'};
     82
     83    print $outhandle "HBSPlug: processing $file\n"
    8584    if $self->{'verbosity'} > 1;
    8685   
     
    122121    }
    123122    if ($curtoclevel+1 < $toclevel) {
    124         print STDERR "WARNING - jump in toc levels in $filename " .
     123        print $outhandle "WARNING - jump in toc levels in $filename " .
    125124        "from $curtoclevel to $toclevel\n";
    126125    }
     
    166165
    167166sub replace_image_links {
    168 
    169167    my ($dir, $doc_obj, $front, $link, $back) = @_;
     168    my $outhandle = $self->{'outhandle'};
    170169
    171170    my ($filename, $error);
     
    177176    if ($imagetype eq "jpg") {$imagetype = "jpeg";}
    178177    if ($imagetype !~ /^(jpeg|gif|png)$/) {
    179     print STDERR "HBSPlug: Warning - unknown image type ($imagetype)\n";
     178    print $outhandle "HBSPlug: Warning - unknown image type ($imagetype)\n";
    180179    }
    181180    my ($imagefile) = $link =~ /([^\/]*)$/;
     
    199198        $foundimage = 1;
    200199    } elsif (defined $error) {
    201         print STDERR "$error $filename\n";
     200        print $outhandle "$error $filename\n";
    202201    } else {
    203         print STDERR "HBSPlug: Warning - couldn't find image file $imagefile in $filename\n";
     202        print $outhandle "HBSPlug: Warning - couldn't find image file $imagefile in $filename\n";
    204203    }
    205204    }
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r1410 r1424  
    4646    @ISA = ('ConvertToBasPlug');
    4747}
    48 
    49 use strict;
    5048
    5149sub print_usage {
     
    121119    my $self = shift (@_);
    122120    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    123 
    124     print STDERR "HTMLPlug: processing $file\n"
     121    my $outhandle = $self->{'outhandle'}
     122
     123    print $outhandle "HTMLPlug: processing $file\n"
    125124    if $self->{'verbosity'} > 1;
    126125
     
    264263    $hash_part = "" if !defined $hash_part;
    265264    if (!defined $before_hash || $before_hash !~ /[\w\.\/]/) {
    266     print STDERR "HTMLPlug: ERROR - badly formatted tag ignored ($link)\n"
     265    my $outhandle = $self->{'outhandle'};
     266    print $outhandle "HTMLPlug: ERROR - badly formatted tag ignored ($link)\n"
    267267        if $self->{'verbosity'};
    268268    return ($link, "", 0);
  • trunk/gsdl/perllib/plugins/IndexPlug.pm

    r1244 r1424  
    6262}
    6363
    64 use strict;
    65 
    6664sub new {
    6765    my ($class) = @_;
     
    8482    my $self = shift (@_);
    8583    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     84    my $outhandle = $self->{'outhandle'};
    8685
    8786    my $indexfile = &util::filename_cat($base_dir, $file, "index.txt");
     
    9291
    9392    # found an index.txt file
    94     print STDERR "IndexPlug: processing $indexfile\n";
     93    print $outhandle "IndexPlug: processing $indexfile\n";
    9594
    9695    # read in the index.txt
  • trunk/gsdl/perllib/plugins/RecPlug.pm

    r1244 r1424  
    3838}
    3939
    40 use strict;
    41 
    4240sub new {
    4341    my ($class) = @_;
     
    6361    my $self = shift (@_);
    6462    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     63    my $outhandle = $self->{'outhandle'};
    6564
    6665    foreach my $etd ( @{$self->{'exclude_tail_dirs'}} )
     
    8079        if (!opendir (DIR, $dirname))
    8180        {
    82             print STDERR "RecPlug: WARNING - couldn't read directory $dirname\n";
     81            print $outhandle "RecPlug: WARNING - couldn't read directory $dirname\n";
    8382            return;
    8483        }
     
    8786    closedir (DIR);
    8887
    89     print STDERR "RecPlug: getting directory $dirname\n";
     88    print $outhandle "RecPlug: getting directory $dirname\n";
    9089
    9190    # process each file
  • trunk/gsdl/perllib/plugins/TEXTPlug.pm

    r1410 r1424  
    3535}
    3636
    37 use strict;
    38 
    3937sub new {
    4038    my ($class) = @_;
     
    5755    my $self = shift (@_);
    5856    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    59    
    60     print STDERR "TEXTPlug: processing $file\n"
     57    my $outhandle = $self->{'outhandle'};
     58
     59    print $outhandle "TEXTPlug: processing $file\n"
    6160    if $self->{'verbosity'} > 1;
    6261   
  • trunk/gsdl/perllib/plugins/ZIPPlug.pm

    r1269 r1424  
    5353}
    5454
    55 use strict;
    56 
    5755sub new {
    5856    my ($class) = @_;
     
    7573    my $self = shift (@_);
    7674    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     75    my $outhandle = $self->{'outhandle'};
    7776
    7877    if ($file =~ /\.(gz|tgz|z|taz|bz|zip|tar)$/i) {
     
    8079    my $filename = &util::filename_cat ($base_dir, $file);
    8180    if (!-e $filename) {
    82         print STDERR "ZIPPLug: WARNING: $filename does not exist\n";
     81        print $outhandle "ZIPPLug: WARNING: $filename does not exist\n";
    8382        return undef;
    8483    }
     
    8887    &util::mk_all_dir ($tmpdir);
    8988
    90     print STDERR "ZIPPlug: extracting $file_only to $tmpdir\n";
     89    print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n";
    9190
    9291    # save current working directory
Note: See TracChangeset for help on using the changeset viewer.