Context Navigation

source: trunk/gsdl/src/phind/generate/phrase.cpp@ 1873

Last change on this file since 1873 was 1873, checked in by paynter, 23 years ago
Fixed a bug iin the Phrase extraction alogrithm that had the "candidates" in the GetMinimalExpansions function sorted backwards.
Property svn:keywords set to `Author Date Id Revision`
File size: 19.6 KB

Rev	Line
[1562]	1	/**********************************************************************
	2	*
	3	* phrase.cpp -- implementation of the phrase object used by suffix.cpp
	4	*
[1631]	5	* Copyright 2000 Gordon W. Paynter
	6	* Copyright 2000 The New Zealand Digital Library Project
[1562]	7	*
	8	* A component of the Greenstone digital library software
	9	* from the New Zealand Digital Library Project at the
	10	* University of Waikato, New Zealand.
	11	*
	12	* This program is free software; you can redistribute it and/or modify
	13	* it under the terms of the GNU General Public License as published by
	14	* the Free Software Foundation; either version 2 of the License, or
	15	* (at your option) any later version.
	16	*
	17	* This program is distributed in the hope that it will be useful,
	18	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	* GNU General Public License for more details.
	21	*
	22	* You should have received a copy of the GNU General Public License
	23	* along with this program; if not, write to the Free Software
	24	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	25	*
	26	*********************************************************************/
	27
	28	#include <iostream.h>
	29	#include <assert.h>
	30	#include <vector.h>
	31	#include <stdio.h>
	32
	33	#include "suffix.h"
	34	#include "phrase.h"
	35
	36	// Phrase constructor functions
	37
	38	Phrase::Phrase(symbol *words, cellcount size, int direction) {
	39	empty();
	40	length = size;
	41
	42	if (direction == SUFFIX) {
	43	forward = words;
	44	back = forward + size - 1;
	45	} else {
	46	back = words;
	47	forward = back - size + 1;
	48	}
	49	}
	50
	51	Phrase::Phrase() {
	52	empty();
	53	}
	54
	55
	56	// Empty the contents of a phrase
	57
	58	int Phrase::empty() {
	59
	60	forward = back = NULL;
	61	length = 0;
	62
	63	suffixFound = prefixFound = 0;
	64	firstSuffix = firstPrefix = NULL;
	65	lastSuffix = lastPrefix = NULL;
	66	suffixFrequency = prefixFrequency = 0;
	67	uniqueSuffixExtension = uniquePrefixExtension = -1;
	68
	69	return 0;
	70	}
	71
	72	// Remove the details of the location where a suffix/prefix
	73	// is found in the array.
	74
	75	int Phrase::clearSuffix() {
	76	suffixFound = 0;
	77	firstSuffix = NULL;
	78	lastSuffix = NULL;
	79	suffixFrequency = 0;
	80	uniqueSuffixExtension = -1;
	81	return 0;
	82	}
	83
	84	int Phrase::clearPrefix() {
	85	prefixFound = 0;
	86	firstPrefix = NULL;
	87	lastPrefix = NULL;
	88	prefixFrequency = 0;
	89	uniquePrefixExtension = -1;
	90	return 0;
	91	}
	92
	93	// Increase the length of s phrase
	94	//
	95	// We increase the length of a string "in place". When we expand
	96	// the suffix, however, we invalidate the prefix data, and vice-versa.
	97
	98	int Phrase::increaseSuffixLength(cellcount l) {
	99	length = l;
	100	clearPrefix();
	101	back = forward + l - 1;
	102	return 0;
	103	}
	104
	105	int Phrase::increasePrefixLength(cellcount l) {
	106	length = l;
	107	clearSuffix();
	108	forward = back - l + 1;
	109	return 0;
	110	}
	111
	112
	113	// Convert the phrase to a string
	114	char *Phrase::toString() {
	115
	116	assert(forward);
	117
	118	char *str;
	119	str = new char[length*20];
	120	symbol *s = forward;
	121	sprintf(str, "s%d", *s++);
	122
	123	for (cellcount i = 1; i < length; i++) {
	124	sprintf(str, "%s s%d", str, *s++);
	125	}
	126
	127	return str;
	128	}
	129
	130
	131	// Compare a phrase to an another array of symbols
	132	//
	133	// Given an array of words with a specified length,
	134	// compare those words to this phrase.
	135	//
	136	// Return 0 if the two phrases are the same over length length
	137	// Return 1 if the phrase is "greater" than tyhe words, or
	138	// Return -1 if the phrase is less.
	139
	140	int Phrase::compareSuffix(symbol *words, cellcount length) {
	141	assert(forward);
	142	symbol *p = forward;
	143
	144	for (cellcount i = 0; i < length; i++) {
	145	if (p > words) {
	146	return 1;
	147	} else if (p < words) {
	148	return -1;
	149	} else {
	150	*p++;
	151	*words++;
	152	}
	153	}
	154
	155	return 0;
	156
	157	}
	158
	159	int Phrase::comparePrefix(symbol *words, cellcount length) {
	160
	161	assert(back);
	162	symbol *p = back;
	163
	164	for (cellcount i = 0; i < length; i++) {
	165	if (p > words) {
	166	return -1;
	167	} else if (p < words) {
	168	return 1;
	169	} else {
	170	*p--;
	171	*words--;
	172	}
	173	}
	174
	175	return 0;
	176
	177	}
	178
	179
	180	// Does the phrase have a unique suffix/prefix extension?
	181	//
[1873]	182	// For suffix:
	183	// If uniqueSuffixExtensions is 1, then phrase has some unique expansion.
	184	// If it is 0 then it does not. If it is -1 then we don't know, and have
	185	// to calculate it from the suffix array. Same goes for prefixes.
[1562]	186
	187	int Phrase::hasUniqueSuffixExtension() {
	188
	189	// if we don't know, then work it out
	190	if (uniqueSuffixExtension == -1) {
	191
[1873]	192	ensureSuffixFound();
[1562]	193
	194	// pointers to the phrase's first and last occurances in te suffixArray
	195	symbol *fst = firstSuffix + length;
	196	symbol *lst = lastSuffix + length;
	197
	198	// in ANYPHRASE mode, simply check the next symbol
	199	if (phraseMode == ANYPHRASE) {
	200	if ((fst == lst) && (*fst > LASTDELIMITER)) {
	201	uniqueSuffixExtension = 1;
	202	} else {
	203	uniqueSuffixExtension = 0;
	204	}
	205	}
	206	// in STOPWORDS mode, make sure there is a unique next content symbol
	207	else {
	208	uniqueSuffixExtension = 0;
	209	while ((fst == lst) && (*fst > LASTDELIMITER)) {
	210	if (*fst >= firstContentSymbol) {
	211	uniqueSuffixExtension = 1;
	212	break;
	213	}
	214	fst++;
	215	lst++;
	216	}
	217	}
	218	}
	219
	220	return uniqueSuffixExtension;
	221	}
	222
	223	int Phrase::hasUniquePrefixExtension() {
	224
	225	// if we don't know, then work it out
	226	if (uniquePrefixExtension == -1) {
	227
	228	ensurePrefixFound();
	229
[1873]	230	// pointers to the phrase's first and last occurances in the prefixArray
[1562]	231	symbol *fst = firstPrefix - length;
	232	symbol *lst = lastPrefix - length;
	233
	234	// in ANYPHRASE mode, simply check the next symbol
	235	if (phraseMode == ANYPHRASE) {
	236	if ((fst == lst) && (*fst > LASTDELIMITER)) {
	237	uniquePrefixExtension = 1;
	238	} else {
	239	uniquePrefixExtension = 0;
	240	}
	241	}
	242	// in STOPWORDS mode, make sure there is a unique next content symbol
	243	else {
	244	uniquePrefixExtension = 0;
	245	while ((fst == lst) && (*fst > LASTDELIMITER)) {
	246	if (*fst >= firstContentSymbol) {
	247	uniquePrefixExtension = 1;
	248	break;
	249	}
	250	fst--;
	251	lst--;
	252	}
	253	}
	254	}
	255
	256	return uniquePrefixExtension;
	257	}
	258
	259
[1873]	260	// Expand a phrase with a unique suffix/prefix by 1 symbol
[1562]	261	//
	262	// Note that in STOPWORDS mode a "unique extension" means a unique
	263	// extension that ends on a content symbol. Although we may extend
	264	// such a phrase by 2 or more symbols, we will only extend it by a
	265	// single content symbol.
	266
	267	int Phrase::expandUniqueSuffixExtensionByOne() {
	268	assert(suffixFound);
	269	assert(uniqueSuffixExtension == 1);
	270
	271	// in ANYPHRASE mode, expand the phrase by one symbol
	272	if (phraseMode == ANYPHRASE) {
	273	increaseSuffixLength(length + 1);
	274	}
	275	// in STOPWORDS mode, expand the phrase up to the next content symbol
	276	else {
	277	symbol *next = firstSuffix + length;
	278	cellcount newlength = length + 1;
	279
	280	while (*next < firstContentSymbol) {
	281	next++;
	282	newlength++;
	283	}
	284	increaseSuffixLength(newlength);
	285	}
	286
	287	uniqueSuffixExtension = -1;
	288	return 0;
	289	}
	290
	291	int Phrase::expandUniquePrefixExtensionByOne() {
	292	assert(prefixFound);
	293	assert(uniquePrefixExtension == 1);
	294
	295	// in ANYPHRASE mode, expand the phrase by one symbol
	296	if (phraseMode == ANYPHRASE) {
	297	increasePrefixLength(length + 1);
	298	}
	299	// in STOPWORDS mode, expand the phrase to the next content symbol
	300	else {
	301	symbol *next = firstPrefix - length;
	302	cellcount newlength = length + 1;
	303	while (*next < firstContentSymbol) {
	304	next--;
	305	newlength++;
	306	}
	307	increasePrefixLength(newlength);
	308
	309	}
	310
	311	uniquePrefixExtension = -1;
	312	return 0;
	313	}
	314
	315
	316	// Find the phrase in a suffixArray
	317	//
	318	// Given a suffix array, find the first and last occurances of the phrase.
	319	// The begin and end parameters identify which cells to search (inclusive).
	320
	321	int Phrase::findFirstAndLastSuffix() {
	322	return findFirstAndLastSuffix(0, inputLength-1);
	323	}
	324
	325	int Phrase::findFirstAndLastSuffix(cellindex begin, cellindex end) {
	326
	327	// First find any occurance of the phrase
	328	assert(begin <= end);
	329
	330	// cout << "findFirstAndLastSuffix (" << begin << " - " << end << ") : " << toString() << endl;
	331
	332	// if we're only searching one cell, it is very easy
	333	if (begin == end) {
	334	assert(compareSuffix(suffixArray[begin], length) == 0);
	335	firstSuffix = lastSuffix = suffixArray[begin];
	336	firstSuffixIndex = lastSuffixIndex = begin;
	337	suffixFrequency = 1;
	338	suffixFound = 1;
	339	return 0;
	340	}
	341
	342	cellindex c;
	343	int cmp;
	344
	345	do {
	346	// cout << "Find anywhere in " << begin << " - " << end << endl;
	347	c = (end + begin) / 2;
	348	cmp = compareSuffix(suffixArray[c], length);
	349	if (cmp == 1) {
	350	// target phrase is lower than phrase at suffixArray[c]
	351	begin = c + 1;
	352	} else if (cmp == -1) {
	353	// target phrase is higher than phrase at suffixArray[c]
	354	end = c - 1;
	355	}
	356	} while (cmp != 0);
	357
	358	cellindex lastbegin = c;
	359	cellindex lastend = end;
	360
	361	// Next find the first occurance of the phrase.
	362	// We know that the first occurance must be between the
	363	// current value of begin and the current value of c.
	364	end = c;
	365
	366	do {
	367	// cout << "first-searching with " << begin << " - " << end << endl;
	368	if (begin == end) {
	369	c = begin;
	370	cmp = 0;
	371	} else {
	372	c = (begin + end) / 2;
	373	cmp = compareSuffix(suffixArray[c], length);
	374	if (cmp == 1) {
	375	// target phrase is lower than phrase at suffixArray[c]
	376	begin = c + 1;
	377	} else {
	378	assert(cmp == 0);
	379	// target phrase is the same as the phrase at suffixArray[c]. However,
	380	// to find the first occurance, suffixArray[c] must be the same as the
	381	// phrase, but suffixArray[c-1] must be different.
	382	cmp = compareSuffix(suffixArray[c-1], length);
	383	if (cmp == 0) {
	384	end = c - 1;
	385	assert(end >= begin);
	386	cmp = 1;
	387	} else {
	388	cmp = 0;
	389	}
	390	}
	391	}
	392	} while (cmp != 0);
	393
	394	// we have found the first location
	395	firstSuffixIndex = c;
	396	firstSuffix = suffixArray[c];
	397
	398	// Next find the last occurance of the phrase.
	399	// We previously stored some bounds for its loccation.
	400	begin = lastbegin;
	401	end = lastend;
	402
	403	do {
	404	// cout << "last-searching with " << begin << " - " << end << endl;
	405	if (begin == end) {
	406	c = begin;
	407	cmp = 0;
	408	} else {
	409	c = (begin + end) / 2;
	410	cmp = compareSuffix(suffixArray[c], length);
	411	if (cmp == -1) {
	412	// target phrase is greater than phrase at suffixArray[c]
	413	end = c - 1;
	414	} else {
	415	assert(cmp == 0);
	416	// target phrase is the same as the phrase at suffixArray[c]. However,
	417	// to find the last occurance, one of two additional condiditons must be met:
	418	// either c must be the very last cell in the array, or
	419	// suffixArray[c+1] must be different from phrase
	420	if (c < inputLength-1) {
	421	cmp = compareSuffix(suffixArray[c+1], length);
	422	if (cmp == 0) {
	423	begin = c + 1;
	424	cmp = -1;
	425	} else {
	426	cmp = 0;
	427	}
	428	}
	429	}
	430	}
	431	} while (cmp != 0);
	432
	433	lastSuffixIndex = c;
	434	lastSuffix = suffixArray[c];
	435	suffixFrequency = lastSuffixIndex - firstSuffixIndex + 1;
	436	suffixFound = 1;
	437
	438	return 0;
	439	}
	440
	441
	442	// Find the phrase in a prefix array
	443	//
	444	// Given a prefix array, find the first and last occurances of the phrase.
	445	// The begin and end parameters identify which cells to search (inclusive).
	446
	447	int Phrase::findFirstAndLastPrefix() {
	448	return findFirstAndLastPrefix(0, inputLength-1);
	449	}
	450
	451	int Phrase::findFirstAndLastPrefix(cellindex begin, cellindex end) {
	452
	453	// First find any occurance of the phrase
	454	assert(begin <= end);
	455
	456	// cout << "findFirstAndLastPrefix (" << begin << " - " << end << ") : " << toString() << endl;
	457
	458	// if we're only searching one cell, it is very easy
	459	if (begin == end) {
	460	assert(comparePrefix(prefixArray[begin], length) == 0);
	461	firstPrefix = lastPrefix = prefixArray[begin];
	462	firstPrefixIndex = lastPrefixIndex = begin;
	463	prefixFrequency = 1;
	464	prefixFound = 1;
	465	return 0;
	466	}
	467
	468	cellindex c;
	469	int cmp;
	470
	471	do {
	472	// cout << "Find anywhere in prefixarray " << begin << " - " << end << endl;
	473	c = (end + begin) / 2;
	474	cmp = comparePrefix(prefixArray[c], length);
	475	if (cmp == 1) {
	476	begin = c + 1;
	477	} else if (cmp == -1) {
	478	end = c - 1;
	479	}
	480	} while (cmp != 0);
	481
	482	cellindex lastbegin = c;
	483	cellindex lastend = end;
	484
	485	// Next find the first occurance of the phrase.
	486	// We know that the first occurance must be between the
	487	// current value of begin and the cureent value of c.
	488	end = c;
	489
	490	do {
	491	// cout << "first-prefix-searching with " << begin << " - " << end << endl;
	492	if (begin == end) {
	493	c = begin;
	494	cmp = 0;
	495	} else {
	496	c = (begin + end) / 2;
	497	cmp = comparePrefix(prefixArray[c], length);
	498	// cout << "cmp: " << cmp << ", c: " << c << endl;
	499	if (cmp == 1) {
	500	// target phrase is lower than phrase at prefixArray[c]
	501	begin = c + 1;
	502	} else {
	503	assert(cmp == 0);
	504	// target phrase is the same as the phrase at prefixArray[c]. However, to
	505	// find the first occurance, one of two additional condiditons must be met:
	506	// either c == the first cell in prefix array
	507	// or prefixArray[c-1] != phrase.
	508	if (c > 0) {
	509	cmp = comparePrefix(prefixArray[c-1], length);
	510	if (cmp == 0) {
	511	end = c - 1;
	512	assert(end >= begin);
	513	cmp = 1;
	514	} else {
	515	cmp = 0;
	516	}
	517	}
	518	}
	519	}
	520	} while (cmp != 0);
	521
	522	// we have found the first location
	523	firstPrefixIndex = c;
	524	firstPrefix = prefixArray[c];
	525
	526	// Next find the last occurance of the phrase.
	527	// We previously stored some bounds for its loccation.
	528	begin = lastbegin;
	529	end = lastend;
	530
	531	do {
	532	// cout << "last-prefix-searching with " << begin << " - " << end << endl;
	533	if (begin == end) {
	534	c = begin;
	535	cmp = 0;
	536	} else {
	537	c = (begin + end) / 2;
	538	cmp = comparePrefix(prefixArray[c], length);
	539	if (cmp == -1) {
	540	// target phrase is greater than phrase at prefixArray[c]
	541	end = c - 1;
	542	} else {
	543	assert(cmp == 0);
	544	// target phrase is the same as the phrase at prefixArray[c]. However,
	545	// to find the last occurance, prefixArray[c] must be the same as the
	546	// phrase, but prefixArray[c+1] must be different.
	547	cmp = comparePrefix(prefixArray[c+1], length);
	548	if (cmp == 0) {
	549	begin = c + 1;
	550	cmp = -1;
	551	} else {
	552	cmp = 0;
	553	}
	554	}
	555	}
	556	} while (cmp != 0);
	557
	558	lastPrefixIndex = c;
	559	lastPrefix = prefixArray[c];
	560	prefixFrequency = lastPrefixIndex - firstPrefixIndex + 1;
	561	prefixFound = 1;
	562
	563	return 0;
	564	}
	565
	566
	567	// Ensure that the phrase has been found in the suffix & prefix arrays
	568
	569	int Phrase::ensureSuffixFound() {
	570	if (!suffixFound) {
	571	findFirstAndLastSuffix();
	572	}
	573	return 0;
	574	}
	575
	576	int Phrase::ensurePrefixFound() {
	577	if (!prefixFound) {
	578	findFirstAndLastPrefix();
	579	}
	580	return 0;
	581	}
	582
	583
	584	// Calculate a set of initial suffix/prefix candidates
	585	//
	586	// Expand the phrase to find the initial
	587	// set of candidates that may be extensions of that phrase,
	588	// and add them to the end of the results vector
	589
	590	int Phrase::initialSuffixCandidates(vector<Phrase> &results) {
	591
	592	ensureSuffixFound();
	593	Phrase next;
	594	cellindex i = firstSuffixIndex;
	595
	596	// Find all the expansions of Phrase p
	597	while (i <= lastSuffixIndex) {
	598
	599	// create new phrase exactly one longer than the current one
	600	next = newPhraseShortestSuffixExpansion(i);
	601
	602	// If the expansion occurs more than once and is not delimited, expand it
	603	if ((*(next.back) > LASTDELIMITER) && (next.suffixFrequency >= 2)) {
	604	next.expandWhileUniqueSuffixExtension();
	605	results.push_back(next);
	606	}
	607
	608	// Move onto the next expansion
	609	i = next.lastSuffixIndex + 1;
	610
	611	}
	612	return 0;
	613	}
	614
	615
	616	int Phrase::initialPrefixCandidates(vector<Phrase> &results) {
	617
	618	ensurePrefixFound();
	619	Phrase next;
	620	cellindex i = firstPrefixIndex;
	621
	622	// Find all the expansions of Phrase p
	623	while (i <= lastPrefixIndex) {
	624
	625	// create new phrase exactly one longer than the current one
	626	next = newPhraseShortestPrefixExpansion(i);
	627
	628	// If the expansion occurs more than once and is not delimited, expand it
	629	if ((*(next.forward) > LASTDELIMITER) && (next.prefixFrequency >= 2)) {
	630	next.expandWhileUniquePrefixExtension();
	631	results.push_back(next);
	632	}
	633
	634	// Move onto the next expansion
	635	i = next.lastPrefixIndex + 1;
	636
	637	}
	638	return 0;
	639	}
	640
	641
	642	// Create a new phrase that is longer then the current one, but as short
	643	// as possible. In ANYPHRASE mode this means the new phrase is exactly
	644	// one symbol longer than the current one, but in STOPWORD mode the expansion
	645	// must introduce a new content symbol
	646	//
	647	// Note that we never expand across delimiters; if we encounter a delimiter
	648	// then this becomes the "end" symbol of the phrase. The calling function
	649	// should check for this condition.
	650
	651	Phrase Phrase::newPhraseShortestSuffixExpansion(cellindex i) {
	652
	653	// create a new phrase exactly one symbol longer than the current one
	654	Phrase p(suffixArray[i], length + 1, SUFFIX);
	655
	656	if (phraseMode == STOPWORDS) {
	657	// in STOPWORDS mode we must introduce a new content symbol
	658	while ((*(p.back) >= firstStopSymbol) &&
	659	(*(p.back) <= lastStopSymbol)) {
	660	p.increaseSuffixLength(p.length + 1);
	661	}
	662	}
	663
	664	p.findFirstAndLastSuffix(i, lastSuffixIndex);
	665	return p;
	666	}
	667
	668	Phrase Phrase::newPhraseShortestPrefixExpansion(cellindex i) {
	669
	670	// create a new phrase exactly one symbol longer than the current one
	671	Phrase p(prefixArray[i], length + 1, PREFIX);
	672
	673	if (phraseMode == STOPWORDS) {
	674	// in STOPWORDS mode we must introduce a new content symbol
	675	while ((*(p.forward) >= firstStopSymbol) &&
	676	(*(p.forward) <= lastStopSymbol)) {
	677	p.increasePrefixLength(p.length + 1);
	678	}
	679	}
	680
	681	p.findFirstAndLastPrefix(i, lastPrefixIndex);
	682	return p;
	683	}
	684
	685
	686	// Expand a phrase until it no longer has a unique suffix extension
	687	//
	688	// If the phrase only occurs once in the suffix array, do nothing.
	689	//
	690	// If we are in stopwords mode then we expand if there is a unique
	691	// suffix expansion and this expansion is a content word. We do this
	692	// with the content_len variable, which holds the length of the longest
	693	// phrase ending in a content word.
	694
	695	int Phrase::expandWhileUniqueSuffixExtension() {
	696	assert(forward);
	697	assert(suffixFound);
	698
	699	// if the phrase occurs only once, do nothing
	700	if (suffixFrequency < 2) {
	701	return 0;
	702	}
	703
	704	// count the length over which the cells are the same
	705	// and no delimiter is crossed.
	706	symbol *fst = firstSuffix;
	707	symbol *lst = lastSuffix;
	708	cellindex len = 0;
	709	cellindex content_len = 0;
	710
	711	// Calculate the length over which the phrases match
	712	while((fst == lst) && (*fst > LASTDELIMITER)) {
	713	len++;
	714	if (*fst > lastStopSymbol) content_len = len;
	715	fst++;
	716	lst++;
	717	}
	718
	719	// in ANYPHRASE mode, expand the phrase for as long as possible
	720	if (phraseMode == ANYPHRASE) {
	721	if (len > length) {
	722	increaseSuffixLength(len);
	723	}
	724	}
	725	// in STOPWORDS mode, expand only as far as the last contentword
	726	else {
	727	if (content_len > length) {
	728	increaseSuffixLength(content_len);
	729	}
	730	}
	731
	732	uniqueSuffixExtension = 0;
	733	return 0;
	734	}
	735
	736	int Phrase::expandWhileUniquePrefixExtension() {
	737	assert(prefixFound);
	738
	739	// if the phrase occurs only once, do nothing
	740	if (prefixFrequency < 2) {
	741	return 0;
	742	}
	743
	744	// count the length over which the cells are the same
	745	// and no delimiter is crossed.
	746	symbol *fst = firstPrefix;
	747	symbol *lst = lastPrefix;
	748	cellindex len = 0;
	749	cellindex content_len = 0;
	750
	751	// Calculate the length over which the phrases match
	752	while((fst == lst) && (*fst > LASTDELIMITER)) {
	753	len++;
	754	if (*fst > lastStopSymbol) content_len = len;
	755	fst--;
	756	lst--;
	757	}
	758
	759	// in ANYPHRASE mode, expand the phrase for as long as possible
	760	if (phraseMode == ANYPHRASE) {
	761	if (len > length) {
	762	increasePrefixLength(len);
	763	}
	764	}
	765	// in STOPWORDS mode, expand only as far as the last contentword
	766	else {
	767	if (content_len > length) {
	768	increasePrefixLength(content_len);
	769	}
	770	}
	771
	772	uniquePrefixExtension = 0;
	773	return 0;
	774	}
	775
	776
	777
	778
	779
	780
	781
	782
	783
	784
	785
	786
	787	// Compare the length of two phrases
	788	//
[1873]	789	// Given two phrases, return true if the first is shorter/longer,
	790	// otherwise return false. For use in various sort functions.
[1562]	791
	792	bool isShorter(Phrase p1, Phrase p2) {
	793	if (p1.length < p2.length) {
	794	return true;
	795	}
	796	return false;
	797	}
	798
[1873]	799	bool isLonger(Phrase p1, Phrase p2) {
	800	if (p1.length > p2.length) {
	801	return true;
	802	}
	803	return false;
	804	}
[1562]	805
[1873]	806

Note: See TracBrowser for help on using the repository browser.

Download in other formats: