Changeset 1839


Ignore:
Timestamp:
2001-01-16T11:50:05+13:00 (23 years ago)
Author:
paynter
Message:

Updated classifiers to use the parsearg library instead of ad-hoc
"x=y" style parsing. (Backwards compatability maintained through
a quick hack to the load_classifier function in classfy.pm.)

Location:
trunk/gsdl/perllib
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/classify.pm

    r1515 r1839  
    3434
    3535sub load_classifiers {
    36     my ($classify_list) = @_;
     36    my ($classify_list, $build_dir, $outhandle) = @_;
    3737    my @classify_objects = ();
    3838   
    39     my $outhandle = pop (@_);
    40 
    4139    foreach $classifyoption (@$classify_list) {
    4240
     
    4442    my $classname = shift @$classifyoption;
    4543    next unless defined $classname;
    46    
     44
    4745    # find the classifier
    4846    my $colclassname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/classify",
     
    5755    # create the classify object
    5856    my ($classobj);
    59     map { $_ = "\"$_\""; } @$classifyoption;
    60     my $options = join (",", @$classifyoption, $outhandle);
     57
     58    # backwards compatability hack: if the classifier options are
     59    # in "x=y" format, convert them to parsearg ("-x y") format.
     60    my ($opt, $key, $value);
     61    my @newoptions;
     62    foreach $opt (@$classifyoption) {
     63        if ($opt =~ /^(\w+)=(.*)$/) {
     64        push @newoptions, "-$1", $2;
     65        } else {
     66        push @newoptions, $opt;
     67        }
     68    }
     69    push @newoptions, "-builddir", "$build_dir";
     70    push @newoptions, "-outhandle", "$outhandle";
     71    push @newoptions, "-verbosity", "2";
     72
     73    map { $_ = "\"$_\""; } @newoptions;
     74    my $options .= join (",", @newoptions);
     75
     76    # print STDERR "$classname\noptions: $options\n";
    6177    eval ("\$classobj = new \$classname($options)");
    6278    die "$@" if $@;
    63    
    64     # add this object to the list
     79
     80        # add this object to the list
    6581    push (@classify_objects, $classobj);
    6682    }
  • trunk/gsdl/perllib/classify/AZCompactList.pm

    r1716 r1839  
    5050}
    5151
     52sub print_usage {
     53    print STDERR "
     54  usage: classify AZCompactList [options]
     55  options:
     56   -metadata     Metadata field used for classification
     57   -title        Title to use on web pages
     58
     59   -mingroup
     60   -minnesting
     61   -mincompact
     62   -maxcompact
     63   -doclevel
     64   -onlyfirst
     65   -recopt
     66";
     67}
     68
    5269sub new {
    53     my ($class, @options) = @_;
    54     my $self = new BasClas ($class, @_);
     70    my $class = shift (@_);
     71    my $self = new BasClas($class, @_);
    5572
    5673    my ($metaname, $title);
     
    6380    my $recopt   = undef;
    6481
    65     foreach $option (@options) {
    66     if ($option =~ /^metadata=(.*)$/i) {
    67         $metaname = $1;
    68     } elsif ($option =~ /^title=(.*)$/i) {
    69         $title = $1;
    70     } elsif ($option =~ /^mingroup(size)?=(.*)$/i) {
    71         $mingroup = $2;
    72     } elsif ($option =~ /^minnesting=(.*)$/i) {
    73         $minnesting = $1;
    74     } elsif ($option =~ /^mincompact=(.*)$/i) {
    75         $mincompact = $1;
    76     } elsif ($option =~ /^maxcompact=(.*)$/i) {
    77         $maxcompact = $1;
    78     } elsif ($option =~ /^doclevel=(.*)$/i) {
    79         $doclevel = $1;
    80     } elsif ($option =~ /^onlyfirst$/i) {
    81         $onlyfirst = 1;
    82     } elsif ($option =~ /^recopt$/i) {
    83         $recopt = "on";
    84     }
    85     }
     82     if (!parsargv::parse(\@_,
     83             q^metadata/.*/^, \$metaname,
     84             q^title/.*/^, \$title,
     85
     86             q^mingroup/.*/2^, \$mingroup,
     87             q^minnesting/.*/20^, \$minnesting,
     88             q^mincompact/.*/10^, \$mincompact,
     89             q^maxcompact/.*/30^, \$maxcompact,
     90             q^doclevel/.*/top^, \$doclevel,
     91             q^onlyfirst/.*/0^, \$onlyfirst,
     92             q^recopt/.*/-1^, \$recopt,
     93
     94             "allow_extra_options")) {
     95   
     96    print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
     97    &print_usage();
     98    die "\n";
     99    }
     100
     101# These are the old-fashioned optins to AZCompactList.
     102# They should be deleted once we're sure the parsearg version is working.
     103#    foreach $option (@options) {
     104#   if ($option =~ /^metadata=(.*)$/i) {
     105#       $metaname = $1;
     106#   } elsif ($option =~ /^title=(.*)$/i) {
     107#       $title = $1;
     108#   } elsif ($option =~ /^mingroup(size)?=(.*)$/i) {
     109#       $mingroup = $2;
     110#   } elsif ($option =~ /^minnesting=(.*)$/i) {
     111#       $minnesting = $1;
     112#   } elsif ($option =~ /^mincompact=(.*)$/i) {
     113#       $mincompact = $1;
     114#   } elsif ($option =~ /^maxcompact=(.*)$/i) {
     115#       $maxcompact = $1;
     116#   } elsif ($option =~ /^doclevel=(.*)$/i) {
     117#       $doclevel = $1;
     118#   } elsif ($option =~ /^onlyfirst$/i) {
     119#       $onlyfirst = 1;
     120#   } elsif ($option =~ /^recopt$/i) {
     121#       $recopt = "on";
     122#   }
     123#    }
    86124
    87125    if (!defined $metaname) {
     
    91129    }
    92130
    93     $title = $metaname unless defined $title;
     131    $title = $metaname unless ($title);
    94132
    95133    $self->{'list'} = {};
     
    104142    $self->{'maxcompact'} = $maxcompact;
    105143    $self->{'doclevel'} = $doclevel;
     144
     145    if ($onlyfirst != 0) {
     146    $onlyfirst = 1;
     147    }
    106148    $self->{'onlyfirst'} = $onlyfirst;
     149 
     150    if ($recopt == -1) {
     151    $recopt = undef;
     152    } else {
     153    $recopt = "on";
     154    }
    107155    $self->{'recopt'} = $recopt;
    108156
  • trunk/gsdl/perllib/classify/AZList.pm

    r1483 r1839  
    4141}
    4242
     43sub print_usage {
     44    print STDERR "
     45  usage: classify AZList [options]
     46  options:
     47 
     48  -metadata X     (erquired) Metadata field used for classification.
     49                  List will be sorted by this element.
     50
     51   -title X       (optional) Title field for this classification.
     52                  defaults to metadata name.
     53";
     54}
     55
    4356sub new {
    44     my ($class, @options) = @_;
    45     my $self = new BasClas ($class, @_);
    46 
     57    my $class = shift (@_);
     58    my $self = new BasClas($class, @_);
     59   
    4760    my ($metaname, $title);
    48 
    49     foreach $option (@options) {
    50     if ($option =~ /^metadata=(.*)$/i) {
    51         $metaname = $1;
    52     } elsif ($option =~ /^title=(.*)$/i) {
    53         $title = $1;
    54     }
     61   
     62    if (!parsargv::parse(\@_,
     63             q^metadata/.*/^, \$metaname,
     64             q^title/.*/^, \$title,
     65             "allow_extra_options")) {
     66   
     67    print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
     68    &print_usage();
     69    die "\n";
    5570    }
    5671
    5772    if (!defined $metaname) {
    58     my $outhandle = $self->{'outhandle'};
    59     print $outhandle "AZList used with no metadata name to classify by\n";
     73    &print_usage;
     74    print STDERR "AZList used with no metadata name\n";
    6075    die "\n";
    6176    }
    62 
    63     $title = $metaname unless defined $title;
    64 
     77   
     78    $title = $metaname unless ($title);
     79   
    6580    $self->{'list'} = {};
    6681    $self->{'metaname'} = $metaname;
  • trunk/gsdl/perllib/classify/AZSectionList.pm

    r1611 r1839  
    4343}
    4444
     45sub print_usage {
     46    print STDERR "
     47  usage: classify AZSectionList [options]
     48  options:
     49
     50   -metadata X    Metadata field used for classification,
     51                  list will be sorted by this element.
     52
     53   -title X       (OPTIONAL) Title field for this classification.
     54                  if not included title field will be Metaname.
     55
     56This is very similar to AZList except it sorts by section level metadata
     57(excluding the top level) instead of just top level metadata.
     58";
     59}
     60
    4561sub new {
    46     my ($class, @options) = @_;
    47     my $self = new AZList ($class, @_);
     62    my $class = shift (@_);
     63    my $self = new AZList($class, @_);
     64
    4865    return bless $self, $class;
    4966}
  • trunk/gsdl/perllib/classify/BasClas.pm

    r1483 r1839  
    2626package BasClas;
    2727
     28use parsargv;
     29
     30
     31sub print_general_usage {
     32    my ($plugin_name) = @_;
     33    print STDERR "\n  usage: plugin $plugin_name [options]\n\n";
     34}
     35
     36# print_usage should be overridden for any sub-classes
     37sub print_usage {
     38    print STDERR "\nThis classifier has no classifier-specific options\n\n";
     39
     40}
    2841
    2942sub new {
    30     my ($class, @options) = @_;
     43    my $class = shift (@_);
     44    my $name = shift (@_);
    3145
    3246    my $self = {};
    3347
    34     # this relies on the outpit handle always being
    35     # the last thing in the list - it should be tidied
    36     # up when the options are fixed up to use parseargv
    37     $self->{'outhandle'} = pop @options;
    38     if (!defined $self->{'outhandle'}) {
    39     $self->{'outhandle'} = STDERR;
     48    $self->{'outhandle'} = STDERR;
     49   
     50    # general options available to all classifiers
     51    if (!parsargv::parse(\@_,
     52             q^builddir/.*/^, \$self->{'builddir'},
     53             q^outhandle/.*/STDERR^, \$self->{'outhandle'},
     54             q^verbosity/\d/2^, \$self->{'verbosity'},
     55             "allow_extra_options")) {
     56   
     57    print STDERR "\nThe $name classifier uses an incorrect general option\n";
     58    print STDERR "(general options are those available to all classifiers).\n";
     59    print STDERR "Check your collect.cfg configuration file.\n";
     60        &print_general_usage($plugin_name);
     61    die "\n";
    4062    }
     63
     64
    4165
    4266    return bless $self, $class;
  • trunk/gsdl/perllib/classify/DateList.pm

    r1483 r1839  
    4141}
    4242
     43sub print_usage {
     44    print STDERR "
     45  usage: classify DateList
     46
     47Classifier plugin for sorting by date.
     48No options - always sorts by 'Date' metadata
     49Date is assumed to be in the form yyyymmdd
     50At present dates are split by year - this should change
     51
     52Any errors are Dana's problem.
     53";
     54}
     55
    4356sub new {
    44     my ($class, @options) = @_;
    45     my $self = new BasClas ($class, @_);
     57    my $class = shift (@_);
     58    my $self = new BasClas($class, @_);
    4659
    4760    $self->{'list'} = {};
  • trunk/gsdl/perllib/classify/HTML.pm

    r1483 r1839  
    3939}
    4040
     41sub print_usage {
     42    print STDERR "
     43  usage: classify AZList [options]
     44  options:
     45   -url X         The url of the web page to link to.
     46   -title X       (optional) the title field for this classification.
     47                  The default is 'Browse'
     48
     49HTML classifier plugin - creates classifier that is a link to a web page
     50
     51";
     52}
     53
    4154sub new {
    42     my ($class, @options) = @_;
    43     my $self = new BasClas ($class, @_);
    44 
     55    my $class = shift (@_);
     56    my $self = new BasClas($class, @_);
     57   
    4558    my ($title, $url);
    46 
    47     foreach $option (@options) {
    48     if ($option =~ /^url=(.*)$/i) {
    49         $url = $1;
    50     } elsif ($option =~ /^title=(.*)$/i) {
    51         $title = $1;
    52     }
     59   
     60    if (!parsargv::parse(\@_,
     61             q^url/.*/^, \$url,
     62             q^title/.*/Browse^, \$title,
     63             "allow_extra_options")) {
     64   
     65    print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
     66    &print_usage();
     67    die "\n";
    5368    }
    5469
     
    5873    die "\n";
    5974    }
    60 
    61     $title = 'Browse' unless defined $title;
    6275
    6376    $self->{'url'} = $url;
  • trunk/gsdl/perllib/classify/Hierarchy.pm

    r1608 r1839  
    5050}
    5151
     52sub print_usage {
     53    print STDERR "
     54  usage: classify Hierarchy [options]
     55  options:
     56
     57   -title X       Title field for this classification.
     58                  Defaults to metadata name.
     59
     60   -metadata X    Metadata field used for classification,
     61                  list will be sorted by this element.
     62
     63   -hfile X       The classification structure file
     64
     65   -sort X        Metadata field to sort by (defaults to none)
     66";
     67}
     68
     69
    5270sub new {
    53     my ($class, @options) = @_;
    54     my $self = new BasClas ($class, @_);
    55 
     71    my $class = shift (@_);
     72    my $self = new BasClas($class, @_);
     73   
    5674    my $sortname = "Title";
    5775    my ($hfile, $metadata, $title);
    5876
    59     foreach $option (@options) {
    60     if ($option =~ /^sort=(.*)$/i) {
    61         $sortname = $1;
    62         $sortname = undef if $sortname =~ /^nosort$/i;
    63     } elsif ($option =~ /^hfile=(.*)$/i) {
    64         $hfile = $1;
    65     } elsif ($option =~ /^metadata=(.*)$/i) {
    66         $metadata = $1;
    67     } elsif ($option =~ /^title=(.*)$/i) {
    68         $title = $1;
    69     }
    70     }
    71 
    72     if (!defined ($hfile) || !defined ($metadata)) {
    73     my $outhandle = $self->{'outhandle'};
    74     print $outhandle "Error in options passed to Hierarchy classification\n";
    75     print $outhandle "hfile and metadata options aren't optional\n\n";
    76     print $outhandle "Options:\n";
    77     print $outhandle "hfile -- the classification structure file\n";
    78     print $outhandle "metadata -- metadata field to test against hfile\n";
    79     print $outhandle "sort -- metadata field to sort by\n";
    80     print $outhandle "title -- title of classification\n\n";
     77    if (!parsargv::parse(\@_,
     78             q^title/.*/^, \$title,
     79             q^sort/.*/nosort^, \$sortname,
     80             q^hfile/.*/^, \$hfile,
     81             q^metadata/.*/^, \$metadata,
     82             "allow_extra_options")) {
     83   
     84    print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
     85    &print_usage();
    8186    die "\n";
    8287    }
    8388
    84     $title = $metadata unless defined $title;
     89    if (!$metadata) {
     90    &print_usage;
     91    print STDERR "\nHierarchy error: no metadata supplied\n";
     92    die "\n";
     93    }
     94
     95    $title = $metadata unless ($title);
     96
     97    $sortname = undef if $sortname =~ /^nosort$/;
     98
     99    if (!$hfile) {
     100    &print_usage;
     101    print STDERR "\nHierarchy error: No -hfile supplied\n";
     102    die "\n";
     103    }
    85104   
    86105    my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
     
    90109    if (!-e $subjectfile) {
    91110        my $outhandle = $self->{'outhandle'};
    92         print $outhandle "Hierarchy Error: Can't locate subject file $hfile\n";
    93         print $outhandle "This file should be in $collfile or $subjectfile\n";
     111        &print_usage;
     112        print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
     113        print STDERR "This file should be in $collfile or $subjectfile\n";
    94114        die "\n";
    95115    }
  • trunk/gsdl/perllib/classify/List.pm

    r1483 r1839  
    3838#                      if metadata is also not included title will be 'List'
    3939
     40use BasClas;
    4041package List;
    4142
     
    4647}
    4748
     49sub print_usage {
     50    print STDERR "
     51  usage: classify List [options]
     52  options:
     53
     54   -metadata X    Metadata field used for classification,
     55                  list will be sorted by this element.
     56
     57   -title X       (optional) Title field for this classification.
     58                  if not included title field will be Metaname.
     59
     60   -sort X        (optional) Sort documents in list by this metadata field.
     61                  By default it will sort by Metaname, or (if this is not
     62                  set) in build (random) order.
     63";
     64}
     65
    4866sub new {
    49     my ($class, @options) = @_;
    50     my $self = new BasClas ($class, @_);
    51 
    52     my $list = [];
    53     my ($metaname, $title, $sortname);
    54 
    55     foreach $option (@options) {
    56     if ($option =~ /^metadata=(.*)$/i) {
    57         $metaname = $1;
    58         $list = {};
    59     } elsif ($option =~ /^title=(.*)$/i) {
    60         $title = $1;
    61     } elsif ($option =~ /^sort=(.*)$/i) {
    62         $sortname = $1;
    63     }
    64     }
    65 
    66     if (!defined $title) {
    67     if (defined $metaname) {
     67    my $class = shift (@_);
     68    my $self = new BasClas($class, @_);
     69   
     70    my ($metaname, $title, $sortname, $list);
     71
     72    if (!parsargv::parse(\@_,
     73             q^metadata/.*/^, \$metaname,
     74             q^title/.*/^, \$title,
     75             q^sort/.*/^, \$sortname,
     76             "allow_extra_options")) {
     77   
     78    print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
     79    &print_usage();
     80    die "\n";
     81    }
     82
     83    if (!$title) {
     84    if ($metaname) {
    6885        $title = $metaname;
    6986    } else {
     
    7289    }
    7390
    74     if (defined $sortname && $sortname =~ /^nosort$/i) {
    75     $sortname = undef;
    76     } elsif (!defined $sortname && defined $metaname) {
    77     $sortname = $metaname;
     91    if (!$sortname) {
     92    if ($metaname) {
     93        $sortname = $metaname;
     94    } else {
     95        $sortname = undef;
     96    }
     97    }
     98
     99    if ($metaname) {
     100    $list = {};
     101    } else {
     102    $list = [];
     103    $metaname = undef;
    78104    }
    79105
  • trunk/gsdl/perllib/classify/SectionList.pm

    r1611 r1839  
    3737}
    3838
     39sub print_usage {
     40    print STDERR "
     41  usage: classify SectionList [options]
     42  options:
     43
     44   -metadata X    Metadata field used for classification,
     45                  list will be sorted by this element.
     46
     47   -title X       (optional) Title field for this classification.
     48                  if not included title field will be Metaname.
     49
     50   -sort X        (optional) Sort documents in list by this metadata field.
     51                  By default it will sort by Metaname, or (if this is not
     52                  set) in build (random) order.
     53
     54Same as List classifier but includes all sections of document
     55(excluding top level) rather than just top level document.
     56";
     57}
     58
    3959sub new {
    40     my ($class, @options) = @_;
    41     my $self = new List ($class, @_);
     60    my $class = shift (@_);
     61    my $self = new List($class, @_);
     62 
    4263    return bless $self, $class;
    4364}
  • trunk/gsdl/perllib/classify/phind.pm

    r1829 r1839  
    8585my @delimiters = ($colstart, $colend, $doclimit, $senlimit);
    8686
    87 # Create a new phind browser based on the options in collect.cfg
     87
     88sub print_usage {
     89    print STDERR "
     90  usage: classify phind [options]
     91
     92  options:
     93   -title        Title to use on web pages
     94   -text
     95   -title
     96   -button
     97   -language
     98   -savephrases
     99   -suffixsize
     100   -suffixmode
     101   -thesaurus
     102   -untidy
     103";
     104}
     105
     106# Create a new phind browser based on collect.cfg
    88107
    89108sub new {
    90     my ($class, @options) = @_;
    91     my $self = new BasClas ($class, @_);
    92     my $out = pop @options;
     109    my $class = shift (@_);
     110    my $self = new BasClas($class, @_);
     111
     112    my $out = $self->{'outhandle'};
     113
    93114
    94115    # Phind installation check
     
    98119    # and get all the files in place, then we let them proceed.
    99120   
    100     print $out "The Phind classifier for Greenstone.\n";
    101     print $out "Checking the phind phrase browser requirements...\n";
     121    print $out "Checking Phind phrase browser requirements...\n";
    102122
    103123    # Make sure we're not in windows
    104124    if ($ENV{'GSDLOS'} =~ /windows/i) {
    105     print STDERR "Phind currently only works under Unix";
     125    print STDERR "Sorry - Phind currently only works under Unix";
    106126    exit(1);
    107127    }
     
    144164    }
    145165
    146     # The installation appears OK - set up the classifier
    147     my $collection = $ENV{'GSDLCOLLECTION'};
    148     my $language = "english";
    149 
    150     my $button = "Phrase";
    151     my $title = "Title";
    152     my $indexes = "section:Title,section:text";
    153 
     166    # Parse classifier arguments
    154167    my $builddir = "";
    155168    my $phinddir = "";
    156 
    157     my $suffixmode = 1;
    158     my $suffixsize = 40000000;
    159     my $savephrases = 0;
    160 
    161     my $verbosity = 2;
    162     my $untidy = 0;
    163 
    164     my $thesaurus = "";
    165 
    166     # parse the options
    167     foreach $option (@options) {
    168 
    169     print STDERR "option: $option\n";
    170 
    171     if ($option =~ /^text=(.*)$/i) {
    172         $indexes = $1;
    173     } elsif ($option =~ /^title=(.*)$/i) {
    174         $title = $1;
    175     } elsif ($option =~ /^button=(.*)$/i) {
    176         $button = $1;
    177     } elsif ($option =~ /^builddir=(.*)$/i) {
    178         $builddir = $1;
    179     } elsif ($option =~ /^phinddir=(.*)$/i) {
    180         $phinddir = $1;
    181     } elsif ($option =~ /^savephrases=(.*)$/i) {
    182         $savephrases = $1;
    183     } elsif ($option =~ /^suffixsize=(.*)$/i) {
    184         $suffixsize = $1;
    185     } elsif ($option =~ /^suffixmode=(.*)$/i) {
    186         $suffixmode = $1;
    187     } elsif ($option =~ /^thesaurus=(.*)$/i) {
    188         $thesaurus = $1;
    189     } elsif ($option =~ /^untidy/i) {
    190         $untidy = 1;
    191     } elsif ($option =~ /^verbosity=(.*)$/i) {
    192         $verbosity = $1;
    193     }
    194     }
    195 
     169    if (!parsargv::parse(\@_,
     170             q^text/.*/section:Title,section:text^, \$self->{'indexes'},
     171             q^title/.*/Title^, \$self->{'titlefield'},
     172             q^button/.*/Phrase^, \$self->{'buttonname'},
     173             q^language/.*/english^, \$language,
     174             q^builddir/.*/^, \$builddir,
     175             q^savephrases/\d/0^, \$self->{'savephrases'},
     176             q^suffixsize/\d+/40000^, \$self->{'suffixsize'},
     177             q^suffixmode/\d/1^, \$self->{'suffixmode'},
     178             q^thesaurus/.*/^, \$self->{'thesaurus'},
     179             q^untidy^, \$self->{'untidy'},
     180             "allow_extra_options")) {
     181   
     182    print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
     183    &print_usage();
     184    die "\n";
     185    }
     186   
    196187    # classifier information
    197     $self->{'collection'} = $collection;
    198     $self->{'titlefield'} = $title;
    199     $self->{'buttonname'} = $button;
    200     $self->{'indexes'} = $indexes;
    201 
    202     # phrase extraction options
    203     $self->{'suffixmode'} = $suffixmode;
    204     $self->{'suffixsize'} = $suffixsize;
    205     $self->{'savephrases'} = $savephrases;
    206     $self->{'thesaurus'} = $thesaurus;
     188    $self->{'collection'} = $ENV{'GSDLCOLLECTION'};
    207189
    208190    # limit languages
    209191    $language =~ s/,/\|/g;
    210192    $self->{'language_exp'} = $language;
    211     $self->{'delimiter'} = $delimiter;
    212 
    213     # collection directory
     193   
     194    # collection directories
    214195    $self->{'collectiondir'} = $ENV{'GSDLCOLLECTDIR'};
    215 
    216     # build directory
    217196    if (!$builddir) {
    218197    $builddir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "building");
    219198    }
    220199    $self->{'builddir'} = $builddir;
    221 
    222     # phind directory
    223     if (!$phinddir) {
    224         $phinddir = &util::filename_cat($builddir, "phind");
    225     }
    226     $self->{'phinddir'} = $phinddir;
    227 
    228     # debugging levels
    229     $self->{'verbosity'} = $verbosity;
    230     $self->{'untidy'} = $untidy;
    231     $self->{'out'} = $out;
    232 
     200    $self->{'phinddir'} = &util::filename_cat($builddir, "phind");
     201   
    233202    return bless $self, $class;
    234203}
     
    372341
    373342    my $verbosity = $self->{'verbosity'};
     343    my $out = $self->{'outhandle'};
     344
    374345    my $phinddir = $self->{'phinddir'};
    375346    my $language = "english";
    376347   
    377348    if ($verbosity) {
    378     print STDERR "\n*** phind.pm generating indexes for ", $self->{'indexes'}, "\n";
     349    print $out "\n*** phind.pm generating indexes for ", $self->{'indexes'}, "\n";
    379350    }
    380351
     
    386357    # Generate the vocabulary, symbol statistics, and numbers file
    387358    # from the clauses file
    388     print "\nExtracting vocabulary and statistics\n" if $verbosity;
     359    print $out "\nExtracting vocabulary and statistics\n" if $verbosity;
    389360    &extract_vocabulary($self);
    390361
    391362    # Use the suffix program to generate the phind/phrases file
    392     print "\nExtracting phrases from processed text (with suffix)\n" if $verbosity;
    393     &execute("suffix $phinddir $suffixsize $suffixmode", $verbosity);
     363    print $out "\nExtracting phrases from processed text (with suffix)\n" if $verbosity;
     364    &execute("suffix $phinddir $suffixsize $suffixmode", $verbosity, $out);
    394365
    395366    # Create the phrase file and put phrase numbers in phind/phrases
    396     print "\nSorting and Renumbering phrases for input to mgpp\n" if $verbosity;
     367    print $out "\nSorting and Renumbering phrases for input to mgpp\n" if $verbosity;
    397368    &renumber_phrases($self);
    398369   
     
    407378    my $mg_stem_idx = &util::filename_cat($mgpp, "text", "mg_stem_idx");
    408379
    409     print "\nCreating phrase databases\n";
     380    print $out "\nCreating phrase databases\n";
    410381    my $mg_input = &util::filename_cat($phinddir, "pdata.txt");
    411382    my $mg_stem = "pdata";
    412383
    413     &execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity);
    414     &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity);
    415     &execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity);
     384    &execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity, $out);
     385    &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity, $out);
     386    &execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity, $out);
    416387
    417388    # create the mg index of words
    418     print "\nCreating word-level search indexes\n";
     389    print $out "\nCreating word-level search indexes\n";
    419390    $mg_input = &util::filename_cat($phinddir, "pword.txt");
    420391    $mg_stem = "pword";
    421392
    422     &execute("$mg_passes -d $phinddir -f $mg_stem -T1 -I1 $mg_input", $verbosity);
    423     &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity);
    424     &execute("$mg_perf_hash_build -d $phinddir -f $mg_stem", $verbosity);
    425     &execute("$mg_passes -d $phinddir -f $mg_stem -T2 -I2 $mg_input", $verbosity);
    426     &execute("$mg_weights_build -d $phinddir -f $mg_stem", $verbosity);
    427     &execute("$mg_invf_dict -d $phinddir -f $mg_stem", $verbosity);
    428 
    429     &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 1", $verbosity);
    430     &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 2", $verbosity);
    431     &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 3", $verbosity);
     393    &execute("$mg_passes -d $phinddir -f $mg_stem -T1 -I1 $mg_input", $verbosity, $out);
     394    &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity, $out);
     395    &execute("$mg_perf_hash_build -d $phinddir -f $mg_stem", $verbosity, $out);
     396    &execute("$mg_passes -d $phinddir -f $mg_stem -T2 -I2 $mg_input", $verbosity, $out);
     397    &execute("$mg_weights_build -d $phinddir -f $mg_stem", $verbosity, $out);
     398    &execute("$mg_invf_dict -d $phinddir -f $mg_stem", $verbosity, $out);
     399
     400    &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 1", $verbosity, $out);
     401    &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 2", $verbosity, $out);
     402    &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 3", $verbosity, $out);
    432403
    433404    # create the mg document information database
    434     print "\nCreating document information databases\n";
     405    print $out "\nCreating document information databases\n";
    435406    $mg_input = &util::filename_cat($phinddir, "docs.txt");
    436407    $mg_stem = "docs";
    437408
    438     &execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity);
    439     &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity);
    440     &execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity);
     409    &execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity, $out);
     410    &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity, $out);
     411    &execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity, $out);
    441412
    442413
    443414    # Tidy up stray files
    444415    if (!$self->{'untidy'}) {
    445     print "\nCleaning up\n" if ($verbosity > 2);
     416    print $out "\nCleaning up\n" if ($verbosity > 2);
    446417    &util::rm("$phinddir/clauses", "$phinddir/clauses.numbers",
    447418          "$phinddir/clauses.vocab", "$phinddir/clauses.stats",
     
    563534
    564535sub execute {
    565     my ($command, $verbosity) = @_;
    566     print "Executing: $command\n"  if ($verbosity > 2);
     536    my ($command, $verbosity, $outhandle) = @_;
     537    print $outhandle "Executing: $command\n"  if ($verbosity > 2);
    567538    my $status = system($command);
    568539    if ($status != 0) {
     
    581552   
    582553    my $verbosity = $self->{'verbosity'};
    583     my $out = $self->{'out'};
     554    my $out = $self->{'outhandle'};
    584555
    585556    my $language = "english"; # $self->{'language'};
     
    917888   
    918889    my $verbosity = $self->{'verbosity'};
    919     my $out = $self->{'out'};
     890    my $out = $self->{'outhandle'};
    920891    print $out "Translate phrases: suffix-ids become phind-id's\n"
    921892    if ($verbosity);
     
    1028999    my ($self) = @_;
    10291000 
    1030     my $out = $self->{'out'};
     1001    my $out = $self->{'outhandle'};
    10311002    my $verbosity = $self->{'verbosity'};
    10321003    my $thesaurus = $self->{'thesaurus'};
     
    10691040
    10701041    # 2.
    1071     # Read phrases file and note all thesaurus entries that already
    1072     # have a phindid
     1042    # Read phrases file to find thesaurus entries that already
     1043    # have a phindid.  Store their phind-ids for later translation.
    10731044    my %thesaurustophindid;
    10741045    my ($phindid);
     
    12331204    my ($self) = @_;
    12341205 
    1235     my $out = $self->{'out'};
     1206    my $out = $self->{'outhandle'};
    12361207    my $verbosity = $self->{'verbosity'};
    12371208    print $out "Translate phrases.3: restore vocabulary\n" if ($verbosity);
     
    13011272    # output the phrases to a text file
    13021273    if ($savephrases) {
    1303         print SAVE "$tf\t$countdocs\t$text\n";
     1274        print SAVE $fields[0], "\t", $fields[2], "\t", "$text\n";
    13041275    }
    13051276    }
Note: See TracChangeset for help on using the changeset viewer.