Context Navigation

source: trunk/gsdl/perllib/classify/phind.pm@ 2083

Last change on this file since 2083 was 2083, checked in by paynter, 23 years ago
Fixed a stupid mistake that I know I've fixed before.
Property svn:keywords set to `Author Date Id Revision`
File size: 39.3 KB

Line
1	###########################################################################
2	#
3	# phind.pm -- the Phind classifier
4	#
5	# Copyright (C) 2000 Gordon W. Paynter
6	# Copyright (C) 2000 New Zealand Digital Library Project
7	#
8	#
9	# A component of the Greenstone digital library software
10	# from the New Zealand Digital Library Project at the
11	# University of Waikato, New Zealand.
12	#
13	# This program is free software; you can redistribute it and/or modify
14	# it under the terms of the GNU General Public License as published by
15	# the Free Software Foundation; either version 2 of the License, or
16	# (at your option) any later version.
17	#
18	# This program is distributed in the hope that it will be useful,
19	# but WITHOUT ANY WARRANTY; without even the implied warranty of
20	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21	# GNU General Public License for more details.
22	#
23	# You should have received a copy of the GNU General Public License
24	# along with this program; if not, write to the Free Software
25	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26	#
27	###########################################################################
28
29	# The phind clasifier plugin.
30	# Options are dexcribed in the print_usage function.
31	# Type "classinfo.pl phind" at the command line for a summary.
32
33	package phind;
34
35	use BasClas;
36	use util;
37	use ghtml;
38	use unicode;
39
40	sub BEGIN {
41	@ISA = ('BasClas');
42	}
43
44
45	sub print_usage {
46	print STDERR "
47	usage: classify phind [options]
48
49	options:
50	-text Fields The text used to build the phrase hierarchy.
51	(default: 'section:Title,section:text')
52
53	-title Title The metadata field used to describe each document.
54	(default: 'Title')
55
56	-button Name The label for the classifier screen and button in
57	navigation bar.
58	(default: 'Phrase')
59
60	-language Regex Language or languages to use building hierarchy.
61	Languages are identified by two-letter country codes
62	like en (English), es (Spanish), and fr (French).
63	Language is a regular expression, so 'en\|fr' (English or
64	French) and '..' (match any language) are valid.
65	(default: 'en'.)
66
67	-savephrases File If set, the phrase infomation will be stored in
68	the given file as text. It is probably a good idea
69	to use an absolute path.
70	(defualt: not set)
71
72	-suffixmode N The smode parameter to the phrase extraction program. A
73	value of 0 means that stopwords are ignored, and of 1
74	means that stopwords are used.
75	(default: 1)
76
77	-thesaurus Name Name of a thesaurus stored in phind format in the
78	collection's etc directory.
79	(default: not set)
80
81	-untidy Don't remove working files.
82
83	"; }
84
85
86	# Phrase delimiter symbols - these should be abstracted out someplace
87
88	my $colstart = "COLLECTIONSTART";
89	my $colend = "COLLECTIONEND";
90	my $doclimit = "DOCUMENTLIMIT";
91	my $senlimit = "SENTENCELIMIT";
92	my @delimiters = ($colstart, $colend, $doclimit, $senlimit);
93
94
95	# Create a new phind browser based on collect.cfg
96
97	sub new {
98	my $class = shift (@_);
99	my $self = new BasClas($class, @_);
100
101	my $out = $self->{'outhandle'};
102
103
104	# Phind installation check
105	# The phind phrase browser is research software and is not installed
106	# by defualt. If the user attepts to use it we warn them that it's a
107	# bit dodgy, then tell them how to install it. If they can do that
108	# and get all the files in place, then we let them proceed.
109
110	print $out "Checking Phind phrase browser requirements...\n";
111
112	# Make sure we're not in windows
113	if ($ENV{'GSDLOS'} =~ /windows/i) {
114	print STDERR "Sorry - Phind currently only works under Unix";
115	exit(1);
116	}
117
118	# Ensure the Phind generate scripts are in place
119	my $file1 = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "suffix");
120	my $src = &util::filename_cat($ENV{'GSDLHOME'}, "src", "phind", "generate");
121
122	if (!(-e $file1)) {
123	print STDERR "The phind \"suffix\" program is not installed. ";
124	print STDERR "To install it, change to the directory\n";
125	print STDERR " $src\n";
126	print STDERR "and type \"make install-phind\".\n\n";
127	exit(1);
128	}
129
130	# Ensure the Phind CGI script is in place
131	$file1 = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "phindcgi");
132	$src = &util::filename_cat($ENV{'GSDLHOME'}, "src", "phind", "host");
133
134	if (!(-e $file1)) {
135	print STDERR "The phind CGI program is not installed. ";
136	print STDERR "To install it, change to the directory\n";
137	print STDERR " $src\n";
138	print STDERR "and type \"make install-phind\".\n\n";
139	exit(1);
140	}
141
142	# Ensure the Phind Java applet is in place
143	$src = &util::filename_cat($ENV{'GSDLHOME'}, "src", "phind", "client");
144	$file1 = &util::filename_cat($src, "Phind.class");
145
146	if (!(-e $file1)) {
147	print STDERR "The phind Java classes are not compiled. ";
148	print STDERR "To compile them, change to the directory\n";
149	print STDERR " $src\n";
150	print STDERR "and use your Java compiler to compile Phind.java.\n";
151	print STDERR "(if you have Java 1.2 installed, type \"javac Phind.java\")\n\n";
152	exit(1);
153	}
154
155	# Parse classifier arguments
156	my $builddir = "";
157	if (!parsargv::parse(\@_,
158	q^text/.*/section:Title,section:text^, \$self->{'indexes'},
159	q^title/.*/Title^, \$self->{'titlefield'},
160	q^button/.*/Phrase^, \$self->{'buttonname'},
161	q^language/.*/en^, \$language,
162	q^builddir/.*/^, \$builddir,
163	q^savephrases/\d/0^, \$self->{'savephrases'},
164	q^suffixmode/\d/1^, \$self->{'suffixmode'},
165	q^thesaurus/.*/^, \$self->{'thesaurus'},
166	q^untidy^, \$self->{'untidy'},
167	"allow_extra_options")) {
168
169	print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
170	&print_usage();
171	die "\n";
172	}
173
174	# classifier information
175	$self->{'collection'} = $ENV{'GSDLCOLLECTION'};
176
177	# limit languages
178	$self->{'language_exp'} = $language;
179
180	# collection directories
181	$self->{'collectiondir'} = $ENV{'GSDLCOLLECTDIR'};
182	if (!$builddir) {
183	$builddir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "building");
184	}
185	$self->{'builddir'} = $builddir;
186
187	return bless $self, $class;
188	}
189
190
191	# Initialise the phind classifier
192
193	sub init {
194	my $self = shift (@_);
195
196	# ensure we have a build directory
197	my $builddir = $self->{'builddir'};
198	die unless (-e "$builddir");
199
200	# create phind directory
201	my $phnumber = 1;
202	my $phinddir = &util::filename_cat($builddir, "phind1");
203	while (-e "$phinddir") {
204	$phnumber++;
205	$phinddir = &util::filename_cat($builddir, "phind$phnumber");
206	}
207	&util::mk_dir("$phinddir");
208	$self->{'phinddir'} = $phinddir;
209	$self->{'phindnumber'} = $phnumber;
210
211	# open filehandles for documents and text
212	my $clausefile = &util::filename_cat("$phinddir", "clauses");
213	&util::rm($clausefile) if (-e $clausefile);
214
215	my $txthandle = 'TEXT' . $phnumber;
216	open($txthandle, ">$clausefile") \|\| die "Cannot open $clausefile: $!";
217	$self->{'txthandle'} = $txthandle;
218
219	my $docfile = &util::filename_cat("$phinddir", "docs.txt");
220	&util::rm($docfile) if (-e $docfile);
221
222	my $dochandle = 'DOC' . $phnumber;
223	open($dochandle, ">$docfile") \|\| die "Cannot open $docfile: $!";
224	$self->{'dochandle'} = $dochandle;
225
226	}
227
228
229	# Classify each document.
230	#
231	# Each document is passed here in turn. The classifier extracts the
232	# text of each and stores it in the clauses file. Document details are
233	# stored in the docs.txt file.
234
235	sub classify {
236	my $self = shift (@_);
237	my $doc_obj = shift @_;
238
239	my $verbosity = $self->{'verbosity'};
240	my $top_section = $doc_obj->get_top_section();
241
242	my $titlefield = $self->{'titlefield'};
243
244	my $title = $doc_obj->get_metadata_element ($top_section, $titlefield);
245	print "process: $title\n" if ($verbosity > 2);
246
247	# Only consider the file if it is in the correct language
248	my $doclanguage = $doc_obj->get_metadata_element ($top_section, "Language");
249	my $phrlanguage = $self->{'language_exp'};
250	return if ($doclanguage && ($doclanguage !~ /$phrlanguage/i));
251
252	# record this file
253	my $total++;
254	print "file $total: $file\n" if ($self->{'$verbosity'});
255
256
257	# Store document details
258	my $OID = $doc_obj->get_OID();
259	$OID = "NULL" unless defined $OID;
260	my $dochandle = $self->{'dochandle'};
261	print $dochandle "<Document>\t$OID\t$title\n";
262
263	# Store the text occuring in this object
264
265	# output the document delimiter
266	my $txthandle = $self->{'txthandle'};
267	print $txthandle "$doclimit\n";
268
269	# iterarate over the required indexes and store their text
270	my $indexes = $self->{'indexes'};
271	my $text = "";
272	my ($part, $level, $field, $section, $data, $dataref);
273
274	foreach $part (split(/,/, $indexes)) {
275
276	# Each field has a level and a data element ((e.g. document:Title)
277	($level, $field) = split(/:/, $part);
278	die unless ($level && $field);
279
280	# Extract the text from every section
281	# (In phind, document:text and section:text are equivalent)
282	if ($field eq "text") {
283	$data = "";
284	$section = $doc_obj->get_top_section();
285	while (defined($section)) {
286	$data .= $doc_obj->get_text($section) . "\n";
287	$section = $doc_obj->get_next_section($section);
288	}
289	$text .= convert_gml_to_tokens($phrlanguage, $data) . "\n";
290	}
291
292	# Extract a metadata field from a document
293	# (If ther eis more than one element of the given type, get them all.)
294	elsif ($level eq "document") {
295	$dataref = $doc_obj->get_metadata($doc_obj->get_top_section(), $field);
296	foreach $data ($@dataref) {
297	$text .= convert_gml_to_tokens($phrlanguage, $data) . "\n";
298	}
299	}
300
301	# Extract metadata from every section in a document
302	elsif ($level eq "section") {
303	$data = "";
304	$section = $doc_obj->get_top_section();
305	while (defined($section)) {
306	$dataref = $doc_obj->get_metadata($section, $field);
307	$data .= join("\n", @$dataref) . "\n";
308	$section = $doc_obj->get_next_section($section);
309	}
310	$text .= convert_gml_to_tokens($phrlanguage, $data) . "\n";
311	}
312
313	# Some sort of specification which I don't understand
314	else {
315	die "Unknown level ($level) in phind index ($part)\n";
316	}
317
318	}
319
320	# output the text
321	$text =~ tr/\n//s;
322	print $txthandle "$text";
323	}
324
325
326	# Construct the classifier from the information already gathered
327	#
328	# When get_classify_info is called, the clauses and docs.txt files have
329	# already been constructed in the phind directory. This function will
330	# translate them into compressed, indexed MGPP files that can be read by
331	# the phindcgi script. It will also register our classifier so that it
332	# shows up in the navigation bar.
333
334	sub get_classify_info {
335	my $self = shift (@_);
336
337	my $verbosity = $self->{'verbosity'};
338	my $out = $self->{'outhandle'};
339	my $phinddir = $self->{'phinddir'};
340
341	if ($verbosity) {
342	print $out "\n*** phind.pm generating indexes for ", $self->{'indexes'}, "\n";
343	print $out "*** in", $self->{'phinddir'}, "\n";
344	}
345
346	# Construct phind indexes
347	my $suffixmode = $self->{'suffixmode'};
348	my ($command, $status);
349
350	# Generate the vocabulary, symbol statistics, and numbers file
351	# from the clauses file
352	print $out "\nExtracting vocabulary and statistics\n" if $verbosity;
353	&extract_vocabulary($self);
354
355	# Use the suffix program to generate the phind/phrases file
356	print $out "\nExtracting phrases from processed text (with suffix)\n" if $verbosity;
357	&execute("suffix $phinddir $suffixmode $verbosity", $verbosity, $out);
358
359	# Create the phrase file and put phrase numbers in phind/phrases
360	print $out "\nSorting and renumbering phrases for input to mgpp\n" if $verbosity;
361	&renumber_phrases($self);
362
363	# Create the mg phrase database
364	my $mgpp = &util::filename_cat($ENV{'GSDLHOME'}, "src", "mgpp");
365	my $mg_passes = &util::filename_cat($mgpp, "text", "mg_passes");
366	my $mg_compression_dict = &util::filename_cat($mgpp, "text", "mg_compression_dict");
367
368	my $mg_perf_hash_build = &util::filename_cat($mgpp, "text", "mg_perf_hash_build");
369	my $mg_weights_build = &util::filename_cat($mgpp, "text", "mg_weights_build");
370	my $mg_invf_dict = &util::filename_cat($mgpp, "text", "mg_invf_dict");
371	my $mg_stem_idx = &util::filename_cat($mgpp, "text", "mg_stem_idx");
372
373	print $out "\nCreating phrase databases\n";
374	my $mg_input = &util::filename_cat($phinddir, "pdata.txt");
375	my $mg_stem = "pdata";
376
377	&execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity, $out);
378	&execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity, $out);
379	&execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity, $out);
380
381	# create the mg index of words
382	print $out "\nCreating word-level search indexes\n";
383	$mg_input = &util::filename_cat($phinddir, "pword.txt");
384	$mg_stem = "pword";
385
386	&execute("$mg_passes -d $phinddir -f $mg_stem -T1 -I1 $mg_input", $verbosity, $out);
387	&execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity, $out);
388	&execute("$mg_perf_hash_build -d $phinddir -f $mg_stem", $verbosity, $out);
389	&execute("$mg_passes -d $phinddir -f $mg_stem -T2 -I2 $mg_input", $verbosity, $out);
390	&execute("$mg_weights_build -d $phinddir -f $mg_stem", $verbosity, $out);
391	&execute("$mg_invf_dict -d $phinddir -f $mg_stem", $verbosity, $out);
392
393	&execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 1", $verbosity, $out);
394	&execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 2", $verbosity, $out);
395	&execute("$mg_stem_idx -d $phinddir -f $mg_stem -s 3", $verbosity, $out);
396
397	# create the mg document information database
398	print $out "\nCreating document information databases\n";
399	$mg_input = &util::filename_cat($phinddir, "docs.txt");
400	$mg_stem = "docs";
401
402	&execute("$mg_passes -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity, $out);
403	&execute("$mg_compression_dict -d $phinddir -f $mg_stem", $verbosity, $out);
404	&execute("$mg_passes -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity, $out);
405
406
407	# Tidy up stray files
408	if (!$self->{'untidy'}) {
409	print $out "\nCleaning up\n" if ($verbosity > 2);
410	&util::rm("$phinddir/clauses", "$phinddir/clauses.numbers",
411	"$phinddir/clauses.vocab", "$phinddir/clauses.stats",
412	"$phinddir/phrases", "$phinddir/phrases.3", "$phinddir/docs.txt",
413	"$phinddir/pdata.txt", "$phinddir/pword.txt");
414	my $outfile = 1;
415	while (-e "$phinddir/outPhrase.$outfile") {
416	&util::rm("$phinddir/outPhrase.$outfile");
417	$outfile++;
418	}
419	}
420
421
422	# Return the information about the classifier that we'll later want to
423	# use to create macros when the Phind classifier document is displayed.
424	my %classifyinfo = ('thistype'=>'Invisible',
425	'childtype'=>'Phind',
426	'Title'=>$self->{'buttonname'},
427	'parameters'=>"phindnumber=$self->{'phindnumber'}",
428	'contains'=>[]);
429
430	my $collection = $self->{'collection'};
431	my $url = "library?a=p&p=phind&c=$collection";
432	push (@{$classifyinfo{'contains'}}, {'OID'=>$url});
433
434	return \%classifyinfo;
435	}
436
437
438
439	sub convert_gml_to_tokens {
440
441	my ($language_exp, $text) = @_;
442
443	if ($language_exp =~ /en/) {
444	return &convert_gml_to_tokens_EN($text);
445	}
446
447	$_ = $text;
448
449	# 1. remove GML tags
450
451	# Remove everything that is in a tag
452	s/\s<p>\s/ PARAGRAPHBREAK /isgo;
453	s/\s<br>\s/ LINEBREAK /isgo;
454	s/<[^>]*>/ /sgo;
455
456	# Now we have the text, but it may contain HTML
457	# elements coded as > etc. Remove these tags.
458	s/&/&/sgo;
459	s/</</sgo;
460	s/>/>/sgo;
461	s/\s<p>\s/ PARAGRAPHBREAK /isgo;
462	s/\s<br>\s/ LINEBREAK /isgo;
463	s/<[^>]*>/ /sgo;
464
465	# replace<p> and <br> placeholders with clause break symbol (\n)
466	s/\s+/ /gso;
467	s/PARAGRAPHBREAK/\n/sgo;
468	s/LINEBREAK/\n/sgo;
469
470
471
472
473	# 2. Split the remaining text into space-delimited tokens
474
475	# Convert any HTML special characters (like ") to their UTF8 equivalent
476	s/&([^;]+);/&unicode::ascii2utf8(\&ghtml::getcharequiv($1,1))/gse;
477
478	# Split text at word boundaries
479	s/\b/ /go;
480
481	# 3. Convert the remaining text to "clause format"
482
483	# Insert newline if the end of a sentence is detected
484	# (delimter is: "[\.\?\!]\s")
485	# s/\s*[\.\?\!]\s+/\n/go;
486
487	# remove unnecessary punctuation and replace with clause break symbol (\n)
488	s/[^\w ]/\n/go;
489
490	# remove extraneous whitespace
491	s/ +/ /sgo;
492	s/^\s+//mgo;
493	s/\s*$/\n/mgo;
494
495	# remove lines that contain one word or less
496	s/^\S*$//mgo;
497	s/^\s*$//mgo;
498	tr/\n//s;
499
500	return $_;
501	}
502
503	# A version of convert_gml_to_tokens that is fine-tuned to the English language.
504
505	sub convert_gml_to_tokens_EN {
506	$_ = shift @_;
507
508	# FIRST, remove GML tags
509
510	# Replace all whitespace with a simple space
511	s/\s+/ /gs;
512
513	# Remove everything that is in a tag
514	s/\s<p>\s/ PARAGRAPHBREAK /isg;
515	s/\s<br>\s/ LINEBREAK /isg;
516	s/<[^>]*>/ /sg;
517
518	# Now we have the text, but it may contain HTML
519	# elements coded as > etc. Remove these tags.
520	s/</</sg;
521	s/>/>/sg;
522
523	s/\s+/ /sg;
524	s/\s<p>\s/ PARAGRAPHBREAK /isg;
525	s/\s<br>\s/ LINEBREAK /isg;
526	s/<[^>]*>/ /sg;
527
528	# remove & and other miscellaneous markup tags
529	s/&/&/sg;
530	s/</</sg;
531	s/>/>/sg;
532	s/&/&/sg;
533
534	# replace<p> and <br> placeholders with carriage returns
535	s/PARAGRAPHBREAK/\n/sg;
536	s/LINEBREAK/\n/sg;
537
538
539	# Exceptional punctuation
540	#
541	# We make special cases of some punctuation
542
543	# remove any apostrophe that indicates omitted letters
544	s/(\w+)\'(\w*\s)/ $1$2 /g;
545
546	# remove period that appears in a person's initals
547	s/\s([A-Z])\./ $1 /g;
548
549	# replace hyphens in hypheanted words and names with a space
550	s/([A-Za-z])-\s*([A-Za-z])/$1 $2/g;
551
552	# Convert the remaining text to "clause format",
553	# This means removing all excess punctuation and garbage text,
554	# normalising valid punctuation to fullstops and commas,
555	# then putting one cluse on each line.
556
557	# Insert newline when the end of a sentence is detected
558	# (delimter is: "[\.\?\!]\s")
559	s/\s*[\.\?\!]\s+/\n/g;
560
561	# split numbers after four digits
562	s/(\d\d\d\d)/$1 /g;
563
564	# split words after 32 characters
565
566	# squash repeated punctuation
567	tr/A-Za-z0-9 //cs;
568
569	# save email addresses
570	# s/\w+@\w+\.[\w\.]+/EMAIL/g;
571
572	# normalise clause breaks (mostly punctuation symbols) to commas
573	s/[^A-Za-z0-9 \n]+/ , /g;
574
575	# Remove repeated commas, and replace with newline
576	s/\s*,[, ]+/\n/g;
577
578	# remove extra whitespace
579	s/ +/ /sg;
580	s/^\s+//mg;
581	s/\s*$/\n/mg;
582
583	# remove lines that contain one word or less
584	s/^\w*$//mg;
585	s/^\s*$//mg;
586	tr/\n//s;
587
588	return $_;
589
590	}
591
592
593
594	# Execute a system command
595
596	sub execute {
597	my ($command, $verbosity, $outhandle) = @_;
598	print $outhandle "Executing: $command\n" if ($verbosity > 2);
599	my $status = system($command);
600	if ($status != 0) {
601	print STDERR "phind - Error executing $command: $!\n";
602	exit($status);
603	}
604	}
605
606
607	# Generate the vocabulary, symbol statistics, and numbers file from the
608	# clauses file. This is legacy code, so is a bit messy and probably wont
609	# run under windows.
610
611	sub extract_vocabulary {
612	my ($self) = @_;
613
614	my $verbosity = $self->{'verbosity'};
615	my $out = $self->{'outhandle'};
616
617	my $collectiondir = $self->{'collectiondir'};
618	my $phinddir = $self->{'phinddir'};
619
620	my $language_exp = $self->{'language_exp'};
621
622	my ($w, $l, $line, $word);
623
624	my ($first_delimiter, $last_delimiter,
625	$first_stopword, $last_stopword,
626	$first_extractword, $last_extractword,
627	$first_contentword, $last_contentword,
628	$phrasedelimiter);
629
630	my $thesaurus = $self->{'thesaurus'};
631	my ($thesaurus_links, $thesaurus_terms,
632	%thesaurus, $first_thesaurusword, $last_thesaurusword);
633
634	my %symbol;
635	my (%freq);
636
637	print $out "Calculating vocabulary\n" if ($verbosity > 1);
638
639	# Read and store the stopwords
640	my $stopdir = &util::filename_cat($ENV{'GSDLHOME'}, "etc", "stopwords");
641	my $stopword_files = ();
642	my ($language, $language_dir, $file, $file_name);
643	my %stopwords;
644
645	# Examine each directory in the stopword directory
646	opendir(STOPDIR, $stopdir);
647	foreach $language (readdir STOPDIR) {
648
649	# Ignore entries that do not match the classifier's language
650	next unless ($language =~ /$language_exp/);
651	$language_dir = &util::filename_cat($stopdir, $language);
652	next unless (-d "$language_dir");
653
654	opendir(LANGDIR, $language_dir);
655	foreach $file (readdir LANGDIR) {
656
657	# Ignore entries that are not stopword files
658	next unless ($file =~ /sw$/);
659	$file_name = &util::filename_cat($language_dir, $file);
660	next unless (-f "$file_name");
661
662	# Read the stopwords
663	open(STOPFILE, "<$file_name");
664	while (<STOPFILE>) {
665	s/^\s+//;
666	s/\s.*//;
667	$word = $_;
668	$l = lc($word);
669	$stopwords{$l} = $word;
670	}
671	close STOPFILE;
672
673	}
674	}
675
676	# Read thesaurus information
677	if ($thesaurus) {
678
679	# link file exists
680	$thesaurus_links = &util::filename_cat($collectiondir, "etc", "$thesaurus.lnk");
681	die "Cannot find thesaurus link file" unless (-e "$thesaurus_links");
682
683	# ensure term file exists in the correct language
684	if ($language_exp =~ /^([a-z][a-z])/) {
685	$language = $1;
686	} else {
687	$language = 'en';
688	}
689	$thesaurus_terms = &util::filename_cat($collectiondir, "etc", "$thesaurus.$language");
690	die "Cannot find thesaurus term file" unless (-e "$thesaurus_terms");
691
692
693	# Read the thesaurus terms
694	open(TH, "<$thesaurus_terms");
695	while(<TH>) {
696	s/^\d+ //;
697	s/\(.*\)//;
698	foreach $w (split(/\s+/, $_)) {
699	$thesaurus{lc($w)} = $w;
700	}
701	}
702	close TH;
703	}
704
705	# Read words in the text and count occurences
706	open(TXT, "<$phinddir/clauses");
707	my @words;
708
709	while(<TXT>) {
710	$line = $_;
711	next unless ($line =~ /./);
712
713	@words = split(/\s+/, $line);
714	foreach $w (@words) {
715	$l = lc($w);
716	$w = $l if ((defined $stopwords{$l}) \|\| (defined $thesaurus{$l}));
717	$freq{$w}++;
718	}
719	$freq{$senlimit}++;
720	}
721
722	# Calculate the "best" form of each word
723	my (%bestform, %totalfreq, %bestfreq);
724
725	foreach $w (sort (keys %freq)) {
726	$l = lc($w);
727
728	# totalfreq is the number of times a term appears in any form
729	$totalfreq{$l} += $freq{$w};
730
731	if (defined $stopwords{$l}) {
732	$bestform{$l} = $stopwords{$l};
733
734	} elsif (defined $thesaurus{$l}) {
735	$bestform{$l} = $thesaurus{$l};
736
737	} elsif (!$bestform{$l} \|\| ($freq{$w} > $bestfreq{$l})) {
738	$bestfreq{$l} = $freq{$w};
739	$bestform{$l} = $w;
740	}
741	}
742
743	undef %freq;
744	undef %bestfreq;
745
746
747	# Assign symbol numbers to tokens
748	my $nextsymbol = 1;
749	my (@vocab);
750
751	# Delimiters
752	$first_delimiter = 1;
753
754	foreach $word (@delimiters) {
755
756	$word = lc($word);
757	$bestform{$word} = uc($word);
758	$vocab[$nextsymbol] = $word;
759	$symbol{$word} = $nextsymbol;
760	$nextsymbol++;
761	}
762	$last_delimiter = $nextsymbol - 1;
763
764	# Stopwords
765	$first_stopword = $nextsymbol;
766
767	foreach my $word (sort keys %stopwords) {
768
769	# don't incluse stopword unless it occurs in the text
770	$word = lc($word);
771	next unless ($totalfreq{$word});
772	next if ($symbol{$word});
773
774	$vocab[$nextsymbol] = $word;
775	$symbol{$word} = $nextsymbol;
776	$nextsymbol++;
777	}
778	$last_stopword = $nextsymbol - 1;
779	$first_contentword = $nextsymbol;
780
781	# Thesaurus terms
782	if ($thesaurus) {
783	$first_thesaurusword = $nextsymbol;
784
785	foreach my $word (sort keys %thesaurus) {
786
787	$word = lc($word);
788	next if ($symbol{$word});
789	$bestform{$word} = $thesaurus{$word};
790
791	$vocab[$nextsymbol] = $word;
792	$symbol{$word} = $nextsymbol;
793	$nextsymbol++;
794
795	}
796	$last_thesaurusword = $nextsymbol - 1;
797	}
798
799	# Other content words
800	$first_extractword = $nextsymbol;
801
802	foreach my $word (sort (keys %bestform)) {
803
804	next if ($symbol{$word});
805
806	$vocab[$nextsymbol] = $word;
807	$symbol{$word} = $nextsymbol;
808	$nextsymbol++;
809	}
810	$last_extractword = $nextsymbol - 1;
811	$last_contentword = $nextsymbol - 1;
812
813
814	# Outut the words
815	print $out "Saving vocabulary in $phinddir/clauses.vocab\n" if ($verbosity > 1);
816	open(VOC, ">$phinddir/clauses.vocab");
817
818	for (my $i = 1; $i < $nextsymbol; $i++) {
819	$w = $vocab[$i];
820
821	print VOC "$bestform{$w}\n";
822	$totalfreq{$w} = 0 unless ($totalfreq{$w});
823	}
824	close VOC;
825
826
827	# Create statistics file
828	# Output statistics about the vocablary
829	print $out "Saving statistics in $phinddir/clauses.stats\n" if ($verbosity > 1);
830	&util::rm("$phinddir/clauses.stats") if (-e "$phinddir/clauses.stats");
831
832	open(STAT, ">$phinddir/clauses.stats")
833	\|\| die "Cannot open $phinddir/clauses.stats: $!";
834
835	print STAT "first_delimiter $first_delimiter\n";
836	print STAT "last_delimiter $last_delimiter\n";
837	print STAT "first_stopword $first_stopword\n";
838	print STAT "last_stopword $last_stopword\n";
839	if ($thesaurus) {
840	print STAT "first_thesaurusword $first_thesaurusword\n";
841	print STAT "last_thesaurusword $last_thesaurusword\n";
842	}
843	print STAT "first_extractword $first_extractword\n";
844	print STAT "last_extractword $last_extractword\n";
845	print STAT "first_contentword $first_contentword\n";
846	print STAT "last_contentword $last_contentword\n";
847	print STAT "first_symbol $first_delimiter\n";
848	print STAT "last_symbol $last_contentword\n";
849	print STAT "first_word $first_stopword\n";
850	print STAT "last_word $last_contentword\n";
851	close STAT;
852
853	undef @vocab;
854
855
856	# Create numbers file
857	# Save text as symbol numbers
858	print $out "Saving text as numbers in $phinddir/clauses.numbers\n" if ($verbosity > 1);
859
860	open(TXT, "<$phinddir/clauses");
861	open(NUM, ">$phinddir/clauses.numbers");
862
863	$phrasedelimiter = $symbol{lc($senlimit)};
864	print NUM "$symbol{lc($colstart)}\n";
865
866	# set up the special symbols that delimit documents and sentences
867	while(<TXT>) {
868
869	# split sentence into a list of tokens
870	$line = $_;
871	next unless ($line =~ /./);
872	@words = split(/\s+/, $line);
873
874	# output one token at a time
875	foreach $word (@words) {
876	$word = lc($word);
877	print NUM "$symbol{$word}\n";
878	}
879
880	# output phrase delimiter
881	print NUM "$phrasedelimiter\n";
882	}
883
884	print NUM "$symbol{lc($colend)}\n";
885	close NUM;
886
887	# Save thesaurus data in one convienient file
888	if ($thesaurus) {
889
890	my $thesaurusfile = &util::filename_cat($phinddir, "$thesaurus.numbers");
891
892
893	print $out "Saving thesaurus as numbers in $thesaurusfile\n"
894	if ($verbosity > 1);
895
896	# Read the thesaurus terms
897	my ($num, $text, %thes_symbols);
898
899	open(TH, "<$thesaurus_terms");
900	while(<TH>) {
901	chomp;
902	@words = split(/\s+/, $_);
903	$num = shift @words;
904	$text = "";
905
906	# translate words into symbol numbers
907	foreach $word (@words) {
908	$word = lc($word);
909	if ($symbol{$word}) {
910	$text .= "s$symbol{$word} ";
911	} elsif ($verbosity) {
912	print $out "phind: No thesaurus symbol, ignoring \"$word\"\n";
913	}
914	}
915	$text =~ s/ $//;
916	$thes_symbols{$num} = $text;
917	}
918	close TH;
919
920	# Read the thesaurus links and write the corresponding data
921	open(TH, "<$thesaurus_links");
922	open(THOUT, ">$thesaurusfile");
923
924	while(<TH>) {
925	chomp;
926	($num, $text) = split(/:/, $_);
927
928	if (defined($thes_symbols{$num})) {
929	print THOUT "$num:$thes_symbols{$num}:$text\n";
930	} else {
931	print THOUT "$num:untranslated:$text\n";
932	}
933	}
934	close TH;
935	close THOUT;
936	}
937
938
939
940
941	}
942
943
944	# renumber_phrases
945	#
946	# Prepare the phrases file to be input to mgpp. The biggest problem is
947	# reconciling the phrase identifiers used by the suffix program (which
948	# we'll call suffix-id numbers) with the numbers used in the thesaurus
949	# (theesaurus-id) to create a ciommon set of phind id numbers (phind-id).
950	# Phind-id numbers must be sorted by frequency of occurance.
951	#
952	# Start creating a set of phind-id numbers from the sorted suffix-id
953	# numbers and (if required) the thesaurus-id numbers. Then add any other
954	# phrases occuring in the thesaurus.
955	#
956	# The last thing we have to do is restore the vocabulary information to the
957	# phrase file so that the phrases are stored as words, not as symbol
958	# numbers.
959
960	# The original phrases file looks something like this:
961	# 159396-1:s5175:4:1:116149-2:3:d2240,2;d2253;d2254
962	# 159409-1:s5263:6:1:159410-2:6:d2122;d2128;d2129;d2130;d2215;d2380
963	# 159415-1:s5267:9:1:159418-2:8:d3,2;d632;d633;d668;d1934;d2010;d2281;d2374
964	# 159426-1:s5273:5:2:159429-2,115168-17:5:d252;d815;d938;d939;d2361
965
966
967	sub renumber_phrases {
968	my ($self) = @_;
969
970	renumber_suffix_data($self);
971	renumber_thesaurus_data($self);
972	restore_vocabulary_data($self);
973
974	}
975
976
977
978	# renumber_suffix_data
979	#
980	# Translate phrases file to phrases.2 using phind keys instead
981	# of suffix keys and sorting the expansion data.
982
983	sub renumber_suffix_data {
984	my ($self) = @_;
985
986	my $verbosity = $self->{'verbosity'};
987	my $out = $self->{'outhandle'};
988	print $out "Translate phrases: suffix-ids become phind-id's\n"
989	if ($verbosity);
990
991	my $phinddir = $self->{'phinddir'};
992	my $infile = &util::filename_cat($phinddir, 'phrases');
993	my $outfile = &util::filename_cat($phinddir, 'phrases.2');
994
995	# Read the phrase file. Calculate initial set of phind-id
996	# numbers and store (suffixid -> frequency) relation.
997
998	my %suffixtophind;
999	my @phindfrequency;
1000	my (@fields, $suffixid);
1001	my $nextphind = 1;
1002
1003	open(IN, "<$infile");
1004	while(<IN>) {
1005
1006	chomp;
1007	@fields = split(/:/, $_);
1008
1009	# get next suffixid and phindid
1010	$suffixid = shift @fields;
1011	$suffixtophind{$suffixid} = $nextphind;
1012
1013	# store total frequency
1014	shift @fields;
1015	$totalfrequency[$nextphind] = shift @fields;
1016
1017	$nextphind++;
1018	}
1019	close IN;
1020
1021
1022	# Translate phrases file to phrases.2. Use phind keys (not suffix
1023	# keys), sort expansion and document occurance data in order of
1024	# descending frequency..
1025	open(IN, "<$infile");
1026	open(OUT, ">$outfile");
1027
1028	my ($phindid, $text, $tf, $countexp, $expansions, $countdocs, $documents);
1029	my (@documwents, @newexp, $k, $n);
1030	my $linenumber = 0;
1031
1032	while(<IN>) {
1033
1034	# read the line
1035	chomp;
1036	@fields = split(/:/, $_);
1037
1038	# get a phrase number for this line
1039	$suffixid = shift @fields;
1040	die unless (defined($suffixtophind{$suffixid}));
1041	$phindid = $suffixtophind{$suffixid};
1042
1043	# get the symbols in the phrase
1044	$text = shift @fields;
1045
1046	# output status information
1047	$linenumber++;
1048	if ($verbosity > 2) {
1049	if ($linenumber % 1000 == 0) {
1050	print $out "line $linenumber:\t$phindid\t$suffixid\t($text)\n";
1051	}
1052	print $out "$num: $key\t($text)\n" if ($verbosity > 3);
1053	}
1054
1055	# get the phrase frequency
1056	$tf = shift @fields;
1057
1058	# get the number of expansions
1059	$countexp = shift @fields;
1060
1061	# get the expansions, convert them into phind-id numbers, and sort them
1062	$expansions = shift @fields;
1063	@newexp = ();
1064	foreach $k (split(/,/, $expansions)) {
1065	die "ERROR - no phindid for: $k" unless (defined($suffixtophind{$k}));
1066	$n = $suffixtophind{$k};
1067	push @newexp, $n;
1068	}
1069	@newexp = sort {$totalfrequency[$b] <=> $totalfrequency[$a]} @newexp;
1070
1071	# get the number of documents
1072	$countdocs = shift @fields;
1073
1074	# get the documents and sort them
1075	$documents = shift @fields;
1076	$documents =~ s/d//g;
1077	@documents = split(/;/, $documents);
1078	@documents = sort by_doc_frequency @documents;
1079
1080	# output the phrase data
1081	print OUT "$phindid:$text:$tf:$countexp:$countdocs:";
1082	print OUT join(",", @newexp), ",:", join(";", @documents), ";\n";
1083
1084	}
1085
1086	close IN;
1087	close OUT;
1088	}
1089
1090
1091	# renumber_thesaurus_data
1092	#
1093	# Translate phrases.2 to phrases.3, adding thesaurus data if available.
1094
1095	sub renumber_thesaurus_data {
1096	my ($self) = @_;
1097
1098	my $out = $self->{'outhandle'};
1099	my $verbosity = $self->{'verbosity'};
1100	my $thesaurus = $self->{'thesaurus'};
1101
1102	my $phinddir = $self->{'phinddir'};
1103	my $infile = &util::filename_cat($phinddir, "phrases.2");
1104	my $outfile = &util::filename_cat($phinddir, "phrases.3");
1105
1106
1107	# If no thesaurus is defined, simply move the phrases file.
1108	if (!$thesaurus) {
1109	print $out "Translate phrases.2: no thesaurus data\n"
1110	if ($verbosity);
1111	&util::mv($infile, $outfile);
1112	return;
1113	}
1114
1115	print $out "Translate phrases.2: add thesaurus data\n"
1116	if ($verbosity);
1117
1118	# 1.
1119	# Read thesaurus file and store (symbols->thesaurusid) mapping
1120	my $thesaurusfile = &util::filename_cat($phinddir, "$thesaurus.numbers");
1121	my %symbolstothesid;
1122	my (@fields, $thesid, $symbols);
1123
1124	open(TH, "<$thesaurusfile");
1125
1126	while (<TH>) {
1127
1128	chomp;
1129	@fields = split(/:/, $_);
1130
1131	# get id and text
1132	$thesid = shift @fields;
1133	$symbols = shift @fields;
1134	$symbolstothesid{$symbols} = $thesid;
1135	}
1136	close TH;
1137
1138	# 2.
1139	# Read phrases file to find thesaurus entries that already
1140	# have a phindid. Store their phind-ids for later translation,
1141	# and store their frequency for later sorting.
1142	my %thesaurustophindid;
1143	my %phindidtofrequency;
1144	my ($phindid, $freq);
1145
1146	open(IN, "<$infile");
1147
1148	while(<IN>) {
1149
1150	chomp;
1151	@fields = split(/:/, $_);
1152
1153	# phindid and symbols for this line
1154	$phindid = shift @fields;
1155	$symbols = shift @fields;
1156	$freq = shift @fields;
1157
1158	# do we have a thesaurus id corresponding to this phrase?
1159	if (defined($symbolstothesid{$symbols})) {
1160	$thesid = $symbolstothesid{$symbols};
1161	$thesaurustophindid{$thesid} = $phindid;
1162	$phindidtofrequency{$phindid} = $freq;
1163	}
1164	}
1165	close IN;
1166
1167	undef %symbolstothesid;
1168
1169	# 3.
1170	# Create phind-id numbers for remaining thesaurus entries,
1171	# and note that their frequency is 0 for later sorting.
1172	my $nextphindid = $phindid + 1;
1173
1174	open(TH, "<$thesaurusfile");
1175	while(<TH>) {
1176
1177	chomp;
1178	@fields = split(/:/, $_);
1179
1180	# read thesaurus-id and ensure it has a corresponding phind-id
1181	$thesid = shift @fields;
1182	if (!defined($thesaurustophindid{$thesid})) {
1183	$thesaurustophindid{$thesid} = $nextphindid;
1184	$phindidtofrequency{$nextphindid} = 0;
1185	$nextphindid++;
1186	}
1187	}
1188	close TH;
1189
1190	# 4.
1191	# Translate thesaurus file, replacing thesaurus-id numbers with
1192	# phind-id numbers.
1193	my $newthesaurusfile = &util::filename_cat($phinddir, "$thesaurus.phindid");
1194	my ($relations, $linkcounter, $linktext, $linktype, @linkdata);
1195	my (@links, $linkid, %linkidtotype, $newrelation);
1196
1197	open(TH, "<$thesaurusfile");
1198	open(TO, ">$newthesaurusfile");
1199	while(<TH>) {
1200
1201	chomp;
1202	@fields = split(/:/, $_);
1203
1204	# phindid and symbols for this line
1205	($thesid, $symbols, $relations) = @fields;
1206
1207	die unless ($thesid && $symbols);
1208	die unless $thesaurustophindid{$thesid};
1209	$phindid = $thesaurustophindid{$thesid};
1210
1211	# convert each part of the relation string to use phind-id numbers
1212	# at the same time, we want to sort the list by frequency.
1213	undef %linkidtotype;
1214
1215	foreach $linktext (split(/;/, $relations)) {
1216	@linkdata = split(/,/, $linktext);
1217
1218	# remember the linktype (e.g. BT, NT)
1219	$linktype = shift @linkdata;
1220
1221	# store the type of each link
1222	foreach $thesid (@linkdata) {
1223	die unless (defined($thesaurustophindid{$thesid}));
1224	$linkidtotype{$thesaurustophindid{$thesid}} = $linktype;
1225	}
1226	}
1227
1228	# sort the list of links, first by frequency, then by type.
1229	@links = sort { ($phindidtofrequency{$b} <=> $phindidtofrequency{$a})
1230	or ($linkidtotype{$a} cmp $linkidtotype{$b}) } (keys %linkidtotype);
1231	$linkcounter = (scalar @links);
1232
1233	# create a string describing the link information
1234	$linktype = $linkidtotype{$links[0]};
1235	$newrelation = $linktype;
1236	foreach $linkid (@links) {
1237	if ($linkidtotype{$linkid} ne $linktype) {
1238	$linktype = $linkidtotype{$linkid};
1239	$newrelation .= ";" . $linktype;
1240	}
1241	$newrelation .= "," . $linkid;
1242	}
1243	$newrelation .= ";";
1244
1245
1246	# output the new line
1247	print TO "$phindid:$symbols:$linkcounter:$newrelation:\n";
1248	}
1249	close TH;
1250	close TO;
1251
1252	undef %thesaurustophindid;
1253	undef %linkidtotype;
1254	undef %phindidtofrequency;
1255
1256	# 5.
1257	# Read thesaurus data (in phind-id format) into memory
1258	my %thesaurusdata;
1259
1260	open(TH, "<$newthesaurusfile");
1261	while(<TH>) {
1262	chomp;
1263	($phindid, $symbols, $linkcounter, $relations) = split(/:/, $_);
1264	die unless ($phindid && $symbols);
1265	$thesaurusdata{$phindid} = "$symbols:$linkcounter:$relations";
1266	}
1267
1268	# 6.
1269	# Add thesaurus data to phrases file
1270	my ($text, $tf, $countexp, $expansions, $countdocs, $documents);
1271	my (@documwents, @newexp, $k, $n);
1272	my $linenumber = 0;
1273
1274	open(IN, "<$infile");
1275	open(OUT, ">$outfile");
1276
1277	# Update existing phrases
1278	while(<IN>) {
1279
1280	chomp;
1281	@fields = split(/:/, $_);
1282
1283	# get data for this line
1284	$phindid = shift @fields;
1285
1286	# output the phrase data, with thesaurus information
1287	print OUT "$phindid:", join(":", @fields);
1288
1289	# add thesaurus data
1290	if (defined($thesaurusdata{$phindid})) {
1291	@fields = split(/:/, $thesaurusdata{$phindid});
1292	shift @fields;
1293	$linkcounter = shift @fields;
1294	$relations = shift @fields;
1295
1296	print OUT ":$linkcounter:$relations";
1297	$thesaurusdata{$phindid} = "";
1298	}
1299	print OUT "\n";
1300	}
1301	close IN;
1302
1303	# Add phrases that aren't already in the file
1304	foreach $phindid (sort numerically keys %thesaurusdata) {
1305	next unless ($thesaurusdata{$phindid});
1306
1307	@fields = split(/:/, $thesaurusdata{$phindid});
1308	$symbols = shift @fields;
1309	$linkcounter = shift @fields;
1310	$relations = shift @fields;
1311
1312	print OUT "$phindid:$symbols:0:0:0:::$linkcounter:$relations\n";
1313	}
1314	close OUT;
1315
1316	}
1317
1318	# restore_vocabulary_data
1319	#
1320	# Read phrases.3 and restore vocabulary information. Then write
1321	# this data to the MGPP input files (pwrod.txt and pdata.txt) and
1322	# (if requested) to the saved phrases file.
1323
1324	sub restore_vocabulary_data {
1325	my ($self) = @_;
1326
1327	my $out = $self->{'outhandle'};
1328	my $verbosity = $self->{'verbosity'};
1329	print $out "Translate phrases.3: restore vocabulary\n" if ($verbosity);
1330
1331	my $phinddir = $self->{'phinddir'};
1332	my $infile = &util::filename_cat($phinddir, 'phrases.3');
1333	my $vocabfile = &util::filename_cat($phinddir, 'clauses.vocab');
1334	my $datafile = &util::filename_cat($phinddir, 'pdata.txt');
1335	my $wordfile = &util::filename_cat($phinddir, 'pword.txt');
1336
1337	my $savephrases = $self->{'savephrases'};
1338
1339	# 1.
1340	# Read the vocabulary file
1341	open(V, "<$vocabfile")
1342	\|\| die "Cannot open $vocabfile: $!";
1343	my @symbol;
1344	my $i = 1;
1345	while(<V>) {
1346	chomp;
1347	$symbol[$i++] = $_;
1348	}
1349
1350
1351	# 2.
1352	# Translate phrases.3 to MGPP input files
1353	my ($key, $text, $word, $isThesaurus);
1354	my @fields;
1355	my $linenumber = 0;
1356
1357	open(IN, "<$infile");
1358	open(DATA, ">$datafile");
1359	open(WORD, ">$wordfile");
1360
1361	# Save the phrases in a separate text file
1362	if ($savephrases) {
1363	print $out "Saving phrases in $savephrases\n" if ($verbosity);
1364	open(SAVE, ">$savephrases");
1365	}
1366
1367	while(<IN>) {
1368
1369	# read the line
1370	chomp;
1371	$line = $_;
1372	@fields = split(/:/, $line);
1373
1374	# get a phrase number for this line
1375	$key = shift @fields;
1376
1377	# restore the text of the phrase
1378	$text = shift @fields;
1379	$text =~ s/s(\d+)/$symbol[$1]/g;
1380	if ($text =~ / /) {
1381	$word = "";
1382	} elsif ($text ne 'untranslated') {
1383	$word = $text;
1384	}
1385
1386	# output the phrase data
1387	print DATA "<Document>";
1388	print DATA "$key:$text:", join(":", @fields), ":\n";
1389
1390	# output the word index search data
1391	print WORD "<Document>$word\n";
1392
1393	# output the phrases to a text file
1394	if ($savephrases) {
1395	if ((scalar @fields) == 7) {
1396	$isThesaurus = 1;
1397	} else {
1398	$isThesaurus = 0;
1399	}
1400	print SAVE $fields[0], "\t", $fields[2], "\t$isThesaurus\t$text\n";
1401	}
1402	}
1403	close IN;
1404	close WORD;
1405	close DATA;
1406	close SAVE if ($savephrases);
1407
1408	}
1409
1410
1411
1412	# sort routines used to renumber phrases
1413
1414	sub numerically { $a <=> $b }
1415
1416	sub by_doc_frequency {
1417	my $fa = 1;
1418	if ($a =~ /,/) {
1419	$fa = $a;
1420	$fa =~ s/\d+,//;
1421	}
1422	my $fb = 1;
1423	if ($b =~ /,/) {
1424	$fb = $b;
1425	$fb =~ s/\d+,//;
1426	}
1427
1428	return ($fb <=> $fa);
1429	}
1430
1431	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: