source: trunk/gsdl/src/phind/generate/phrase.h@ 1873

Last change on this file since 1873 was 1873, checked in by paynter, 23 years ago

Fixed a bug iin the Phrase extraction alogrithm that had the
"candidates" in the GetMinimalExpansions function sorted backwards.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.2 KB
Line 
1/**********************************************************************
2 *
3 * phrase.h -- definition of the phrase object used by suffix.cpp
4 *
5 * Copyright 2000 Gordon W. Paynter
6 * Copyright 2000 The New Zealand Digital Library Project
7 *
8 * A component of the Greenstone digital library software
9 * from the New Zealand Digital Library Project at the
10 * University of Waikato, New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28class Phrase {
29
30public:
31
32 // The phrase itself is stored with two pointers: forward
33 // points to its first cell, back points to its last.
34 // The length is always stored in length.
35 // If one of these is set, all must be set, and it must
36 // be true that (forward + length - 1) = back
37 symbol *forward;
38 symbol *back;
39 cellcount length;
40
41 // Phrase location in the suffix array
42 int suffixFound;
43 symbol *firstSuffix;
44 symbol *lastSuffix;
45 cellindex firstSuffixIndex;
46 cellindex lastSuffixIndex;
47 cellcount suffixFrequency;
48
49 // Phrase location in the prefix array
50 int prefixFound;
51 symbol* firstPrefix;
52 symbol* lastPrefix;
53 cellindex firstPrefixIndex;
54 cellindex lastPrefixIndex;
55 cellcount prefixFrequency;
56
57 // Constructor function
58 // First argument is an array of words, second is the length of
59 // the phrase, third is the direction (SUFFIX of PREFIX) in
60 // which the words should be read (defaults to forwards).
61 Phrase(symbol *words, cellcount size, int direction);
62
63 // Represent the phrase as a string
64 char *toString();
65
66 // Find an initial set of candidate phrases in the suffix/prefix array
67 int initialSuffixCandidates(vector<Phrase> &results);
68 int initialPrefixCandidates(vector<Phrase> &results);
69
70 // Does the phrase have a unique extension?
71 int hasUniqueSuffixExtension();
72 int hasUniquePrefixExtension();
73
74 // Extend a phrase by exactly one symbol
75 int expandUniquePrefixExtensionByOne();
76 int expandUniqueSuffixExtensionByOne();
77
78 // Find the phrase in the suffix/prefix array
79 int findFirstAndLastSuffix();
80 int findFirstAndLastPrefix();
81 int findFirstAndLastSuffix(cellindex begin, cellindex end);
82 int findFirstAndLastPrefix(cellindex begin, cellindex end);
83
84 // Make sure the phrase location in the suffix/prefix array is known
85 int ensureSuffixFound();
86 int ensurePrefixFound();
87
88private:
89
90 // An empty phrase can be created without arguments, but is
91 // good for nothing and may not be used with any public fuctions.
92 // We therefore only use it internally.
93 Phrase();
94
95 // Does the phrase have a unique suffix/prefix extension?
96 // if yes, then 1; if no then 0; if unknown then -1;
97 int uniqueSuffixExtension;
98 int uniquePrefixExtension;
99
100 // reset a phrase
101 int empty();
102
103 // reset phrase information relating to location in suffix/prefix array
104 int clearSuffix();
105 int clearPrefix();
106
107 // increase the length of a phrase "in place"
108 int Phrase::increaseSuffixLength(cellcount l);
109 int Phrase::increasePrefixLength(cellcount l);
110
111 // Compare the phrase to a given array of symbols
112 int compareSuffix(symbol *words, cellcount length);
113 int comparePrefix(symbol *words, cellcount length);
114
115 // Create a new phrase that is longer than this one, yet as short as possible.
116 Phrase Phrase::newPhraseShortestSuffixExpansion(cellindex i);
117 Phrase Phrase::newPhraseShortestPrefixExpansion(cellindex i);
118
119 // Extend a phrase until it no longer has a unique extanesion
120 int expandWhileUniqueSuffixExtension();
121 int expandWhileUniquePrefixExtension();
122
123
124};
125
126
127bool isShorter(Phrase p1, Phrase p2);
128bool isLonger(Phrase p1, Phrase p2);
129
130
131
132
Note: See TracBrowser for help on using the repository browser.