source: main/trunk/greenstone2/build-src/src/phind/generate/phrase.h@ 21356

Last change on this file since 21356 was 12793, checked in by kjdon, 18 years ago

gcc-4 compilation patch, thanks to Juan Grigera

  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1/**********************************************************************
2 *
3 * phrase.h -- definition of the phrase object used by suffix.cpp
4 *
5 * Copyright 2000 Gordon W. Paynter
6 * Copyright 2000 The New Zealand Digital Library Project
7 *
8 * A component of the Greenstone digital library software
9 * from the New Zealand Digital Library Project at the
10 * University of Waikato, New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28#ifndef PHRASE_H
29#define PHRASE_H
30
31// use the standard namespace
32#if !defined (GSDL_NAMESPACE_BROKEN)
33#if defined(GSDL_USE_OBJECTSPACE)
34using namespace ospace::std;
35#else
36using namespace std;
37#endif
38#endif
39
40#if defined(GSDL_USE_STL_H)
41# include <vector.h>
42#else
43# include <vector>
44#endif
45
46#include "suffix.h"
47
48class Phrase {
49
50public:
51
52 // The phrase itself is stored with two pointers: forward points to
53 // its first cell, back points to its last. The length is always
54 // stored in length. If one of these is set, all must be set, and
55 // it must be true that (forward + length - 1) = back.
56 symbol *forward;
57 symbol *back;
58 cellcount length;
59
60 // Phrase location in the suffix array
61 int suffixFound;
62 symbol *firstSuffix;
63 symbol *lastSuffix;
64 cellindex firstSuffixIndex;
65 cellindex lastSuffixIndex;
66 cellcount suffixFrequency;
67
68 // Phrase location in the prefix array
69 int prefixFound;
70 symbol* firstPrefix;
71 symbol* lastPrefix;
72 cellindex firstPrefixIndex;
73 cellindex lastPrefixIndex;
74 cellcount prefixFrequency;
75
76 // Constructor functions
77 Phrase();
78 Phrase(const Phrase &p);
79
80 // A "partial" constructor: the first argument is an array of words,
81 // second is its length, third is the direction (SUFFIX or PREFIX)
82 // in which the words should be read (defaults to SUFFIX).
83 Phrase(symbol *words, cellcount size, int direction);
84
85 // Represent the phrase as an arracy of characters
86 // You will have to call "delete []" on the array returned.
87 char *toString();
88
89 // Find an initial set of candidate phrases in the suffix/prefix array
90 void initialSuffixCandidates(vector<Phrase> &results);
91 void initialPrefixCandidates(vector<Phrase> &results);
92
93 // Does the phrase have a unique extension?
94 int hasUniqueSuffixExtension();
95 int hasUniquePrefixExtension();
96
97 // Extend a phrase by exactly one symbol
98 int expandUniquePrefixExtensionByOne();
99 int expandUniqueSuffixExtensionByOne();
100
101 // Extend a phrase until it no longer has a unique extanesion
102 int expandWhileUniqueSuffixExtension();
103 int expandWhileUniquePrefixExtension();
104
105 // Shorten a phrase by one symbol
106 int shortenByOneAtSuffix();
107 int shortenByOneAtPrefix();
108
109 // Find the phrase in the suffix/prefix array
110 int findFirstAndLastSuffix();
111 int findFirstAndLastPrefix();
112 int findFirstAndLastSuffix(cellindex begin, cellindex end);
113 int findFirstAndLastPrefix(cellindex begin, cellindex end);
114
115 // Make sure the phrase location in the suffix/prefix array is known
116 inline void ensureSuffixFound() {
117 if (!suffixFound)
118 findFirstAndLastSuffix();
119 }
120 inline void ensurePrefixFound() {
121 if (!prefixFound)
122 findFirstAndLastPrefix();
123 }
124
125 // Output a phrase to a stream
126 friend ostream &operator<<(ostream &stream, const Phrase &phrase);
127
128 int uniqueSuffixExtension;
129 int uniquePrefixExtension;
130
131private:
132
133 // Does the phrase have a unique suffix/prefix extension?
134 // if yes, then 1; if no then 0; if unknown then -1;
135
136 // reset a phrase
137 int empty();
138
139 // reset phrase information relating to location in suffix/prefix array
140 int clearSuffix();
141 int clearPrefix();
142
143 // increase the length of a phrase "in place"
144 int increaseSuffixLength(cellcount l);
145 int increasePrefixLength(cellcount l);
146
147 // Compare the phrase to a given array of symbols
148 int compareSuffix(symbol *words, cellcount length);
149 int comparePrefix(symbol *words, cellcount length);
150
151 // Create a new phrase that is longer than this one, yet as short as possible.
152 Phrase newPhraseShortestSuffixExpansion(cellindex i);
153 Phrase newPhraseShortestPrefixExpansion(cellindex i);
154
155};
156
157
158bool isShorter(Phrase p1, Phrase p2);
159bool isLonger(Phrase p1, Phrase p2);
160
161#endif
162
Note: See TracBrowser for help on using the repository browser.