Changeset 1839
- Timestamp:
- 2001-01-16T11:50:05+13:00 (23 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/classify.pm
r1515 r1839 34 34 35 35 sub load_classifiers { 36 my ($classify_list ) = @_;36 my ($classify_list, $build_dir, $outhandle) = @_; 37 37 my @classify_objects = (); 38 38 39 my $outhandle = pop (@_);40 41 39 foreach $classifyoption (@$classify_list) { 42 40 … … 44 42 my $classname = shift @$classifyoption; 45 43 next unless defined $classname; 46 44 47 45 # find the classifier 48 46 my $colclassname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/classify", … … 57 55 # create the classify object 58 56 my ($classobj); 59 map { $_ = "\"$_\""; } @$classifyoption; 60 my $options = join (",", @$classifyoption, $outhandle); 57 58 # backwards compatability hack: if the classifier options are 59 # in "x=y" format, convert them to parsearg ("-x y") format. 60 my ($opt, $key, $value); 61 my @newoptions; 62 foreach $opt (@$classifyoption) { 63 if ($opt =~ /^(\w+)=(.*)$/) { 64 push @newoptions, "-$1", $2; 65 } else { 66 push @newoptions, $opt; 67 } 68 } 69 push @newoptions, "-builddir", "$build_dir"; 70 push @newoptions, "-outhandle", "$outhandle"; 71 push @newoptions, "-verbosity", "2"; 72 73 map { $_ = "\"$_\""; } @newoptions; 74 my $options .= join (",", @newoptions); 75 76 # print STDERR "$classname\noptions: $options\n"; 61 77 eval ("\$classobj = new \$classname($options)"); 62 78 die "$@" if $@; 63 64 # add this object to the list79 80 # add this object to the list 65 81 push (@classify_objects, $classobj); 66 82 } -
trunk/gsdl/perllib/classify/AZCompactList.pm
r1716 r1839 50 50 } 51 51 52 sub print_usage { 53 print STDERR " 54 usage: classify AZCompactList [options] 55 options: 56 -metadata Metadata field used for classification 57 -title Title to use on web pages 58 59 -mingroup 60 -minnesting 61 -mincompact 62 -maxcompact 63 -doclevel 64 -onlyfirst 65 -recopt 66 "; 67 } 68 52 69 sub new { 53 my ($class, @options) = @_;54 my $self = new BasClas 70 my $class = shift (@_); 71 my $self = new BasClas($class, @_); 55 72 56 73 my ($metaname, $title); … … 63 80 my $recopt = undef; 64 81 65 foreach $option (@options) { 66 if ($option =~ /^metadata=(.*)$/i) { 67 $metaname = $1; 68 } elsif ($option =~ /^title=(.*)$/i) { 69 $title = $1; 70 } elsif ($option =~ /^mingroup(size)?=(.*)$/i) { 71 $mingroup = $2; 72 } elsif ($option =~ /^minnesting=(.*)$/i) { 73 $minnesting = $1; 74 } elsif ($option =~ /^mincompact=(.*)$/i) { 75 $mincompact = $1; 76 } elsif ($option =~ /^maxcompact=(.*)$/i) { 77 $maxcompact = $1; 78 } elsif ($option =~ /^doclevel=(.*)$/i) { 79 $doclevel = $1; 80 } elsif ($option =~ /^onlyfirst$/i) { 81 $onlyfirst = 1; 82 } elsif ($option =~ /^recopt$/i) { 83 $recopt = "on"; 84 } 85 } 82 if (!parsargv::parse(\@_, 83 q^metadata/.*/^, \$metaname, 84 q^title/.*/^, \$title, 85 86 q^mingroup/.*/2^, \$mingroup, 87 q^minnesting/.*/20^, \$minnesting, 88 q^mincompact/.*/10^, \$mincompact, 89 q^maxcompact/.*/30^, \$maxcompact, 90 q^doclevel/.*/top^, \$doclevel, 91 q^onlyfirst/.*/0^, \$onlyfirst, 92 q^recopt/.*/-1^, \$recopt, 93 94 "allow_extra_options")) { 95 96 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 97 &print_usage(); 98 die "\n"; 99 } 100 101 # These are the old-fashioned optins to AZCompactList. 102 # They should be deleted once we're sure the parsearg version is working. 103 # foreach $option (@options) { 104 # if ($option =~ /^metadata=(.*)$/i) { 105 # $metaname = $1; 106 # } elsif ($option =~ /^title=(.*)$/i) { 107 # $title = $1; 108 # } elsif ($option =~ /^mingroup(size)?=(.*)$/i) { 109 # $mingroup = $2; 110 # } elsif ($option =~ /^minnesting=(.*)$/i) { 111 # $minnesting = $1; 112 # } elsif ($option =~ /^mincompact=(.*)$/i) { 113 # $mincompact = $1; 114 # } elsif ($option =~ /^maxcompact=(.*)$/i) { 115 # $maxcompact = $1; 116 # } elsif ($option =~ /^doclevel=(.*)$/i) { 117 # $doclevel = $1; 118 # } elsif ($option =~ /^onlyfirst$/i) { 119 # $onlyfirst = 1; 120 # } elsif ($option =~ /^recopt$/i) { 121 # $recopt = "on"; 122 # } 123 # } 86 124 87 125 if (!defined $metaname) { … … 91 129 } 92 130 93 $title = $metaname unless defined $title;131 $title = $metaname unless ($title); 94 132 95 133 $self->{'list'} = {}; … … 104 142 $self->{'maxcompact'} = $maxcompact; 105 143 $self->{'doclevel'} = $doclevel; 144 145 if ($onlyfirst != 0) { 146 $onlyfirst = 1; 147 } 106 148 $self->{'onlyfirst'} = $onlyfirst; 149 150 if ($recopt == -1) { 151 $recopt = undef; 152 } else { 153 $recopt = "on"; 154 } 107 155 $self->{'recopt'} = $recopt; 108 156 -
trunk/gsdl/perllib/classify/AZList.pm
r1483 r1839 41 41 } 42 42 43 sub print_usage { 44 print STDERR " 45 usage: classify AZList [options] 46 options: 47 48 -metadata X (erquired) Metadata field used for classification. 49 List will be sorted by this element. 50 51 -title X (optional) Title field for this classification. 52 defaults to metadata name. 53 "; 54 } 55 43 56 sub new { 44 my ($class, @options) = @_;45 my $self = new BasClas 46 57 my $class = shift (@_); 58 my $self = new BasClas($class, @_); 59 47 60 my ($metaname, $title); 48 49 foreach $option (@options) { 50 if ($option =~ /^metadata=(.*)$/i) { 51 $metaname = $1; 52 } elsif ($option =~ /^title=(.*)$/i) { 53 $title = $1; 54 } 61 62 if (!parsargv::parse(\@_, 63 q^metadata/.*/^, \$metaname, 64 q^title/.*/^, \$title, 65 "allow_extra_options")) { 66 67 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 68 &print_usage(); 69 die "\n"; 55 70 } 56 71 57 72 if (!defined $metaname) { 58 my $outhandle = $self->{'outhandle'};59 print $outhandle "AZList used with no metadata name to classify by\n";73 &print_usage; 74 print STDERR "AZList used with no metadata name\n"; 60 75 die "\n"; 61 76 } 62 63 $title = $metaname unless defined $title;64 77 78 $title = $metaname unless ($title); 79 65 80 $self->{'list'} = {}; 66 81 $self->{'metaname'} = $metaname; -
trunk/gsdl/perllib/classify/AZSectionList.pm
r1611 r1839 43 43 } 44 44 45 sub print_usage { 46 print STDERR " 47 usage: classify AZSectionList [options] 48 options: 49 50 -metadata X Metadata field used for classification, 51 list will be sorted by this element. 52 53 -title X (OPTIONAL) Title field for this classification. 54 if not included title field will be Metaname. 55 56 This is very similar to AZList except it sorts by section level metadata 57 (excluding the top level) instead of just top level metadata. 58 "; 59 } 60 45 61 sub new { 46 my ($class, @options) = @_; 47 my $self = new AZList ($class, @_); 62 my $class = shift (@_); 63 my $self = new AZList($class, @_); 64 48 65 return bless $self, $class; 49 66 } -
trunk/gsdl/perllib/classify/BasClas.pm
r1483 r1839 26 26 package BasClas; 27 27 28 use parsargv; 29 30 31 sub print_general_usage { 32 my ($plugin_name) = @_; 33 print STDERR "\n usage: plugin $plugin_name [options]\n\n"; 34 } 35 36 # print_usage should be overridden for any sub-classes 37 sub print_usage { 38 print STDERR "\nThis classifier has no classifier-specific options\n\n"; 39 40 } 28 41 29 42 sub new { 30 my ($class, @options) = @_; 43 my $class = shift (@_); 44 my $name = shift (@_); 31 45 32 46 my $self = {}; 33 47 34 # this relies on the outpit handle always being 35 # the last thing in the list - it should be tidied 36 # up when the options are fixed up to use parseargv 37 $self->{'outhandle'} = pop @options; 38 if (!defined $self->{'outhandle'}) { 39 $self->{'outhandle'} = STDERR; 48 $self->{'outhandle'} = STDERR; 49 50 # general options available to all classifiers 51 if (!parsargv::parse(\@_, 52 q^builddir/.*/^, \$self->{'builddir'}, 53 q^outhandle/.*/STDERR^, \$self->{'outhandle'}, 54 q^verbosity/\d/2^, \$self->{'verbosity'}, 55 "allow_extra_options")) { 56 57 print STDERR "\nThe $name classifier uses an incorrect general option\n"; 58 print STDERR "(general options are those available to all classifiers).\n"; 59 print STDERR "Check your collect.cfg configuration file.\n"; 60 &print_general_usage($plugin_name); 61 die "\n"; 40 62 } 63 64 41 65 42 66 return bless $self, $class; -
trunk/gsdl/perllib/classify/DateList.pm
r1483 r1839 41 41 } 42 42 43 sub print_usage { 44 print STDERR " 45 usage: classify DateList 46 47 Classifier plugin for sorting by date. 48 No options - always sorts by 'Date' metadata 49 Date is assumed to be in the form yyyymmdd 50 At present dates are split by year - this should change 51 52 Any errors are Dana's problem. 53 "; 54 } 55 43 56 sub new { 44 my ($class, @options) = @_;45 my $self = new BasClas 57 my $class = shift (@_); 58 my $self = new BasClas($class, @_); 46 59 47 60 $self->{'list'} = {}; -
trunk/gsdl/perllib/classify/HTML.pm
r1483 r1839 39 39 } 40 40 41 sub print_usage { 42 print STDERR " 43 usage: classify AZList [options] 44 options: 45 -url X The url of the web page to link to. 46 -title X (optional) the title field for this classification. 47 The default is 'Browse' 48 49 HTML classifier plugin - creates classifier that is a link to a web page 50 51 "; 52 } 53 41 54 sub new { 42 my ($class, @options) = @_;43 my $self = new BasClas 44 55 my $class = shift (@_); 56 my $self = new BasClas($class, @_); 57 45 58 my ($title, $url); 46 47 foreach $option (@options) { 48 if ($option =~ /^url=(.*)$/i) { 49 $url = $1; 50 } elsif ($option =~ /^title=(.*)$/i) { 51 $title = $1; 52 } 59 60 if (!parsargv::parse(\@_, 61 q^url/.*/^, \$url, 62 q^title/.*/Browse^, \$title, 63 "allow_extra_options")) { 64 65 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 66 &print_usage(); 67 die "\n"; 53 68 } 54 69 … … 58 73 die "\n"; 59 74 } 60 61 $title = 'Browse' unless defined $title;62 75 63 76 $self->{'url'} = $url; -
trunk/gsdl/perllib/classify/Hierarchy.pm
r1608 r1839 50 50 } 51 51 52 sub print_usage { 53 print STDERR " 54 usage: classify Hierarchy [options] 55 options: 56 57 -title X Title field for this classification. 58 Defaults to metadata name. 59 60 -metadata X Metadata field used for classification, 61 list will be sorted by this element. 62 63 -hfile X The classification structure file 64 65 -sort X Metadata field to sort by (defaults to none) 66 "; 67 } 68 69 52 70 sub new { 53 my ($class, @options) = @_;54 my $self = new BasClas 55 71 my $class = shift (@_); 72 my $self = new BasClas($class, @_); 73 56 74 my $sortname = "Title"; 57 75 my ($hfile, $metadata, $title); 58 76 59 foreach $option (@options) { 60 if ($option =~ /^sort=(.*)$/i) { 61 $sortname = $1; 62 $sortname = undef if $sortname =~ /^nosort$/i; 63 } elsif ($option =~ /^hfile=(.*)$/i) { 64 $hfile = $1; 65 } elsif ($option =~ /^metadata=(.*)$/i) { 66 $metadata = $1; 67 } elsif ($option =~ /^title=(.*)$/i) { 68 $title = $1; 69 } 70 } 71 72 if (!defined ($hfile) || !defined ($metadata)) { 73 my $outhandle = $self->{'outhandle'}; 74 print $outhandle "Error in options passed to Hierarchy classification\n"; 75 print $outhandle "hfile and metadata options aren't optional\n\n"; 76 print $outhandle "Options:\n"; 77 print $outhandle "hfile -- the classification structure file\n"; 78 print $outhandle "metadata -- metadata field to test against hfile\n"; 79 print $outhandle "sort -- metadata field to sort by\n"; 80 print $outhandle "title -- title of classification\n\n"; 77 if (!parsargv::parse(\@_, 78 q^title/.*/^, \$title, 79 q^sort/.*/nosort^, \$sortname, 80 q^hfile/.*/^, \$hfile, 81 q^metadata/.*/^, \$metadata, 82 "allow_extra_options")) { 83 84 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 85 &print_usage(); 81 86 die "\n"; 82 87 } 83 88 84 $title = $metadata unless defined $title; 89 if (!$metadata) { 90 &print_usage; 91 print STDERR "\nHierarchy error: no metadata supplied\n"; 92 die "\n"; 93 } 94 95 $title = $metadata unless ($title); 96 97 $sortname = undef if $sortname =~ /^nosort$/; 98 99 if (!$hfile) { 100 &print_usage; 101 print STDERR "\nHierarchy error: No -hfile supplied\n"; 102 die "\n"; 103 } 85 104 86 105 my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile); … … 90 109 if (!-e $subjectfile) { 91 110 my $outhandle = $self->{'outhandle'}; 92 print $outhandle "Hierarchy Error: Can't locate subject file $hfile\n"; 93 print $outhandle "This file should be in $collfile or $subjectfile\n"; 111 &print_usage; 112 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n"; 113 print STDERR "This file should be in $collfile or $subjectfile\n"; 94 114 die "\n"; 95 115 } -
trunk/gsdl/perllib/classify/List.pm
r1483 r1839 38 38 # if metadata is also not included title will be 'List' 39 39 40 use BasClas; 40 41 package List; 41 42 … … 46 47 } 47 48 49 sub print_usage { 50 print STDERR " 51 usage: classify List [options] 52 options: 53 54 -metadata X Metadata field used for classification, 55 list will be sorted by this element. 56 57 -title X (optional) Title field for this classification. 58 if not included title field will be Metaname. 59 60 -sort X (optional) Sort documents in list by this metadata field. 61 By default it will sort by Metaname, or (if this is not 62 set) in build (random) order. 63 "; 64 } 65 48 66 sub new { 49 my ($class, @options) = @_; 50 my $self = new BasClas ($class, @_); 51 52 my $list = []; 53 my ($metaname, $title, $sortname); 54 55 foreach $option (@options) { 56 if ($option =~ /^metadata=(.*)$/i) { 57 $metaname = $1; 58 $list = {}; 59 } elsif ($option =~ /^title=(.*)$/i) { 60 $title = $1; 61 } elsif ($option =~ /^sort=(.*)$/i) { 62 $sortname = $1; 63 } 64 } 65 66 if (!defined $title) { 67 if (defined $metaname) { 67 my $class = shift (@_); 68 my $self = new BasClas($class, @_); 69 70 my ($metaname, $title, $sortname, $list); 71 72 if (!parsargv::parse(\@_, 73 q^metadata/.*/^, \$metaname, 74 q^title/.*/^, \$title, 75 q^sort/.*/^, \$sortname, 76 "allow_extra_options")) { 77 78 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 79 &print_usage(); 80 die "\n"; 81 } 82 83 if (!$title) { 84 if ($metaname) { 68 85 $title = $metaname; 69 86 } else { … … 72 89 } 73 90 74 if (defined $sortname && $sortname =~ /^nosort$/i) { 75 $sortname = undef; 76 } elsif (!defined $sortname && defined $metaname) { 77 $sortname = $metaname; 91 if (!$sortname) { 92 if ($metaname) { 93 $sortname = $metaname; 94 } else { 95 $sortname = undef; 96 } 97 } 98 99 if ($metaname) { 100 $list = {}; 101 } else { 102 $list = []; 103 $metaname = undef; 78 104 } 79 105 -
trunk/gsdl/perllib/classify/SectionList.pm
r1611 r1839 37 37 } 38 38 39 sub print_usage { 40 print STDERR " 41 usage: classify SectionList [options] 42 options: 43 44 -metadata X Metadata field used for classification, 45 list will be sorted by this element. 46 47 -title X (optional) Title field for this classification. 48 if not included title field will be Metaname. 49 50 -sort X (optional) Sort documents in list by this metadata field. 51 By default it will sort by Metaname, or (if this is not 52 set) in build (random) order. 53 54 Same as List classifier but includes all sections of document 55 (excluding top level) rather than just top level document. 56 "; 57 } 58 39 59 sub new { 40 my ($class, @options) = @_; 41 my $self = new List ($class, @_); 60 my $class = shift (@_); 61 my $self = new List($class, @_); 62 42 63 return bless $self, $class; 43 64 } -
trunk/gsdl/perllib/classify/phind.pm
r1829 r1839 85 85 my @delimiters = ($colstart, $colend, $doclimit, $senlimit); 86 86 87 # Create a new phind browser based on the options in collect.cfg 87 88 sub print_usage { 89 print STDERR " 90 usage: classify phind [options] 91 92 options: 93 -title Title to use on web pages 94 -text 95 -title 96 -button 97 -language 98 -savephrases 99 -suffixsize 100 -suffixmode 101 -thesaurus 102 -untidy 103 "; 104 } 105 106 # Create a new phind browser based on collect.cfg 88 107 89 108 sub new { 90 my ($class, @options) = @_; 91 my $self = new BasClas ($class, @_); 92 my $out = pop @options; 109 my $class = shift (@_); 110 my $self = new BasClas($class, @_); 111 112 my $out = $self->{'outhandle'}; 113 93 114 94 115 # Phind installation check … … 98 119 # and get all the files in place, then we let them proceed. 99 120 100 print $out "The Phind classifier for Greenstone.\n"; 101 print $out "Checking the phind phrase browser requirements...\n"; 121 print $out "Checking Phind phrase browser requirements...\n"; 102 122 103 123 # Make sure we're not in windows 104 124 if ($ENV{'GSDLOS'} =~ /windows/i) { 105 print STDERR " Phind currently only works under Unix";125 print STDERR "Sorry - Phind currently only works under Unix"; 106 126 exit(1); 107 127 } … … 144 164 } 145 165 146 # The installation appears OK - set up the classifier 147 my $collection = $ENV{'GSDLCOLLECTION'}; 148 my $language = "english"; 149 150 my $button = "Phrase"; 151 my $title = "Title"; 152 my $indexes = "section:Title,section:text"; 153 166 # Parse classifier arguments 154 167 my $builddir = ""; 155 168 my $phinddir = ""; 156 157 my $suffixmode = 1; 158 my $suffixsize = 40000000; 159 my $savephrases = 0; 160 161 my $verbosity = 2; 162 my $untidy = 0; 163 164 my $thesaurus = ""; 165 166 # parse the options 167 foreach $option (@options) { 168 169 print STDERR "option: $option\n"; 170 171 if ($option =~ /^text=(.*)$/i) { 172 $indexes = $1; 173 } elsif ($option =~ /^title=(.*)$/i) { 174 $title = $1; 175 } elsif ($option =~ /^button=(.*)$/i) { 176 $button = $1; 177 } elsif ($option =~ /^builddir=(.*)$/i) { 178 $builddir = $1; 179 } elsif ($option =~ /^phinddir=(.*)$/i) { 180 $phinddir = $1; 181 } elsif ($option =~ /^savephrases=(.*)$/i) { 182 $savephrases = $1; 183 } elsif ($option =~ /^suffixsize=(.*)$/i) { 184 $suffixsize = $1; 185 } elsif ($option =~ /^suffixmode=(.*)$/i) { 186 $suffixmode = $1; 187 } elsif ($option =~ /^thesaurus=(.*)$/i) { 188 $thesaurus = $1; 189 } elsif ($option =~ /^untidy/i) { 190 $untidy = 1; 191 } elsif ($option =~ /^verbosity=(.*)$/i) { 192 $verbosity = $1; 193 } 194 } 195 169 if (!parsargv::parse(\@_, 170 q^text/.*/section:Title,section:text^, \$self->{'indexes'}, 171 q^title/.*/Title^, \$self->{'titlefield'}, 172 q^button/.*/Phrase^, \$self->{'buttonname'}, 173 q^language/.*/english^, \$language, 174 q^builddir/.*/^, \$builddir, 175 q^savephrases/\d/0^, \$self->{'savephrases'}, 176 q^suffixsize/\d+/40000^, \$self->{'suffixsize'}, 177 q^suffixmode/\d/1^, \$self->{'suffixmode'}, 178 q^thesaurus/.*/^, \$self->{'thesaurus'}, 179 q^untidy^, \$self->{'untidy'}, 180 "allow_extra_options")) { 181 182 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 183 &print_usage(); 184 die "\n"; 185 } 186 196 187 # classifier information 197 $self->{'collection'} = $collection; 198 $self->{'titlefield'} = $title; 199 $self->{'buttonname'} = $button; 200 $self->{'indexes'} = $indexes; 201 202 # phrase extraction options 203 $self->{'suffixmode'} = $suffixmode; 204 $self->{'suffixsize'} = $suffixsize; 205 $self->{'savephrases'} = $savephrases; 206 $self->{'thesaurus'} = $thesaurus; 188 $self->{'collection'} = $ENV{'GSDLCOLLECTION'}; 207 189 208 190 # limit languages 209 191 $language =~ s/,/\|/g; 210 192 $self->{'language_exp'} = $language; 211 $self->{'delimiter'} = $delimiter; 212 213 # collection directory 193 194 # collection directories 214 195 $self->{'collectiondir'} = $ENV{'GSDLCOLLECTDIR'}; 215 216 # build directory217 196 if (!$builddir) { 218 197 $builddir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "building"); 219 198 } 220 199 $self->{'builddir'} = $builddir; 221 222 # phind directory 223 if (!$phinddir) { 224 $phinddir = &util::filename_cat($builddir, "phind"); 225 } 226 $self->{'phinddir'} = $phinddir; 227 228 # debugging levels 229 $self->{'verbosity'} = $verbosity; 230 $self->{'untidy'} = $untidy; 231 $self->{'out'} = $out; 232 200 $self->{'phinddir'} = &util::filename_cat($builddir, "phind"); 201 233 202 return bless $self, $class; 234 203 } … … 372 341 373 342 my $verbosity = $self->{'verbosity'}; 343 my $out = $self->{'outhandle'}; 344 374 345 my $phinddir = $self->{'phinddir'}; 375 346 my $language = "english"; 376 347 377 348 if ($verbosity) { 378 print STDERR"\n*** phind.pm generating indexes for ", $self->{'indexes'}, "\n";349 print $out "\n*** phind.pm generating indexes for ", $self->{'indexes'}, "\n"; 379 350 } 380 351 … … 386 357 # Generate the vocabulary, symbol statistics, and numbers file 387 358 # from the clauses file 388 print "\nExtracting vocabulary and statistics\n" if $verbosity;359 print $out "\nExtracting vocabulary and statistics\n" if $verbosity; 389 360 &extract_vocabulary($self); 390 361 391 362 # Use the suffix program to generate the phind/phrases file 392 print "\nExtracting phrases from processed text (with suffix)\n" if $verbosity;393 &execute("suffix $phinddir $suffixsize $suffixmode", $verbosity );363 print $out "\nExtracting phrases from processed text (with suffix)\n" if $verbosity; 364 &execute("suffix $phinddir $suffixsize $suffixmode", $verbosity, $out); 394 365 395 366 # Create the phrase file and put phrase numbers in phind/phrases 396 print "\nSorting and Renumbering phrases for input to mgpp\n" if $verbosity;367 print $out "\nSorting and Renumbering phrases for input to mgpp\n" if $verbosity; 397 368 &renumber_phrases($self); 398 369 … … 407 378 my $mg_stem_idx = &util::filename_cat($mgpp, "text", "mg_stem_idx"); 408 379 409 print "\nCreating phrase databases\n";380 print $out "\nCreating phrase databases\n"; 410 381 my $mg_input = &util::filename_cat($phinddir, "pdata.txt"); 411 382 my $mg_stem = "pdata"; 412 383 413 &execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity );414 &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity );415 &execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity );384 &execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity, $out); 385 &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity, $out); 386 &execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity, $out); 416 387 417 388 # create the mg index of words 418 print "\nCreating word-level search indexes\n";389 print $out "\nCreating word-level search indexes\n"; 419 390 $mg_input = &util::filename_cat($phinddir, "pword.txt"); 420 391 $mg_stem = "pword"; 421 392 422 &execute("$mg_passes -d $phinddir -f $mg_stem -T1 -I1 $mg_input", $verbosity );423 &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity );424 &execute("$mg_perf_hash_build -d $phinddir -f $mg_stem", $verbosity );425 &execute("$mg_passes -d $phinddir -f $mg_stem -T2 -I2 $mg_input", $verbosity );426 &execute("$mg_weights_build -d $phinddir -f $mg_stem", $verbosity );427 &execute("$mg_invf_dict -d $phinddir -f $mg_stem", $verbosity );428 429 &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 1", $verbosity );430 &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 2", $verbosity );431 &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 3", $verbosity );393 &execute("$mg_passes -d $phinddir -f $mg_stem -T1 -I1 $mg_input", $verbosity, $out); 394 &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity, $out); 395 &execute("$mg_perf_hash_build -d $phinddir -f $mg_stem", $verbosity, $out); 396 &execute("$mg_passes -d $phinddir -f $mg_stem -T2 -I2 $mg_input", $verbosity, $out); 397 &execute("$mg_weights_build -d $phinddir -f $mg_stem", $verbosity, $out); 398 &execute("$mg_invf_dict -d $phinddir -f $mg_stem", $verbosity, $out); 399 400 &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 1", $verbosity, $out); 401 &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 2", $verbosity, $out); 402 &execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 3", $verbosity, $out); 432 403 433 404 # create the mg document information database 434 print "\nCreating document information databases\n";405 print $out "\nCreating document information databases\n"; 435 406 $mg_input = &util::filename_cat($phinddir, "docs.txt"); 436 407 $mg_stem = "docs"; 437 408 438 &execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity );439 &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity );440 &execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity );409 &execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity, $out); 410 &execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity, $out); 411 &execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity, $out); 441 412 442 413 443 414 # Tidy up stray files 444 415 if (!$self->{'untidy'}) { 445 print "\nCleaning up\n" if ($verbosity > 2);416 print $out "\nCleaning up\n" if ($verbosity > 2); 446 417 &util::rm("$phinddir/clauses", "$phinddir/clauses.numbers", 447 418 "$phinddir/clauses.vocab", "$phinddir/clauses.stats", … … 563 534 564 535 sub execute { 565 my ($command, $verbosity ) = @_;566 print "Executing: $command\n" if ($verbosity > 2);536 my ($command, $verbosity, $outhandle) = @_; 537 print $outhandle "Executing: $command\n" if ($verbosity > 2); 567 538 my $status = system($command); 568 539 if ($status != 0) { … … 581 552 582 553 my $verbosity = $self->{'verbosity'}; 583 my $out = $self->{'out '};554 my $out = $self->{'outhandle'}; 584 555 585 556 my $language = "english"; # $self->{'language'}; … … 917 888 918 889 my $verbosity = $self->{'verbosity'}; 919 my $out = $self->{'out '};890 my $out = $self->{'outhandle'}; 920 891 print $out "Translate phrases: suffix-ids become phind-id's\n" 921 892 if ($verbosity); … … 1028 999 my ($self) = @_; 1029 1000 1030 my $out = $self->{'out '};1001 my $out = $self->{'outhandle'}; 1031 1002 my $verbosity = $self->{'verbosity'}; 1032 1003 my $thesaurus = $self->{'thesaurus'}; … … 1069 1040 1070 1041 # 2. 1071 # Read phrases file and note allthesaurus entries that already1072 # have a phindid 1042 # Read phrases file to find thesaurus entries that already 1043 # have a phindid. Store their phind-ids for later translation. 1073 1044 my %thesaurustophindid; 1074 1045 my ($phindid); … … 1233 1204 my ($self) = @_; 1234 1205 1235 my $out = $self->{'out '};1206 my $out = $self->{'outhandle'}; 1236 1207 my $verbosity = $self->{'verbosity'}; 1237 1208 print $out "Translate phrases.3: restore vocabulary\n" if ($verbosity); … … 1301 1272 # output the phrases to a text file 1302 1273 if ($savephrases) { 1303 print SAVE "$tf\t$countdocs\t$text\n";1274 print SAVE $fields[0], "\t", $fields[2], "\t", "$text\n"; 1304 1275 } 1305 1276 }
Note:
See TracChangeset
for help on using the changeset viewer.