source: trunk/gsdl/src/phind/generate/phrase.h@ 2801

Last change on this file since 2801 was 2801, checked in by kjm18, 23 years ago

new version of suffix, based on suffix2 (gordon and craigs simpler version)
with kaths improvements

  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1/**********************************************************************
2 *
3 * phrase.h -- definition of the phrase object used by suffix.cpp
4 *
5 * Copyright 2000 Gordon W. Paynter
6 * Copyright 2000 The New Zealand Digital Library Project
7 *
8 * A component of the Greenstone digital library software
9 * from the New Zealand Digital Library Project at the
10 * University of Waikato, New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28#ifndef PHRASE_H
29#define PHRASE_H
30
31// use the standard namespace
32#if !defined (GSDL_NAMESPACE_BROKEN)
33#if defined(GSDL_USE_OBJECTSPACE)
34using namespace ospace::std;
35#else
36using namespace std;
37#endif
38#endif
39
40#if defined(GSDL_USE_STL_H)
41# include <vector.h>
42#else
43# include <vector>
44#endif
45
46#include "suffix.h"
47
48class Phrase {
49
50public:
51
52 // The phrase itself is stored with two pointers: forward points to
53 // its first cell, back points to its last. The length is always
54 // stored in length. If one of these is set, all must be set, and
55 // it must be true that (forward + length - 1) = back.
56 symbol *forward;
57 symbol *back;
58 cellcount length;
59
60 // Phrase location in the suffix array
61 int suffixFound;
62 symbol *firstSuffix;
63 symbol *lastSuffix;
64 cellindex firstSuffixIndex;
65 cellindex lastSuffixIndex;
66 cellcount suffixFrequency;
67
68 // Phrase location in the prefix array
69 int prefixFound;
70 symbol* firstPrefix;
71 symbol* lastPrefix;
72 cellindex firstPrefixIndex;
73 cellindex lastPrefixIndex;
74 cellcount prefixFrequency;
75
76 // Constructor functions
77 Phrase();
78 Phrase(const Phrase &p);
79
80 // A "partial" constructor: the first argument is an array of words,
81 // second is its length, third is the direction (SUFFIX or PREFIX)
82 // in which the words should be read (defaults to SUFFIX).
83 Phrase(symbol *words, cellcount size, int direction);
84
85 // Represent the phrase as an arracy of characters
86 // You will have to call "delete []" on the array returned.
87 char *toString();
88
89 // Find an initial set of candidate phrases in the suffix/prefix array
90 void initialSuffixCandidates(vector<Phrase> &results);
91 void initialPrefixCandidates(vector<Phrase> &results);
92
93 // Does the phrase have a unique extension?
94 int hasUniqueSuffixExtension();
95 int hasUniquePrefixExtension();
96
97 // Extend a phrase by exactly one symbol
98 int expandUniquePrefixExtensionByOne();
99 int expandUniqueSuffixExtensionByOne();
100
101 // Extend a phrase until it no longer has a unique extanesion
102 int expandWhileUniqueSuffixExtension();
103 int expandWhileUniquePrefixExtension();
104
105 // Shorten a phrase by one symbol
106 int shortenByOneAtSuffix();
107 int shortenByOneAtPrefix();
108
109 // Find the phrase in the suffix/prefix array
110 int findFirstAndLastSuffix();
111 int findFirstAndLastPrefix();
112 int findFirstAndLastSuffix(cellindex begin, cellindex end);
113 int findFirstAndLastPrefix(cellindex begin, cellindex end);
114
115 // Make sure the phrase location in the suffix/prefix array is known
116 inline void Phrase::ensureSuffixFound() {
117 if (!suffixFound)
118 findFirstAndLastSuffix();
119 }
120 inline void Phrase::ensurePrefixFound() {
121 if (!prefixFound)
122 findFirstAndLastPrefix();
123 }
124
125 // Output a phrase to a stream
126 friend std::ostream &operator<<(std::ostream &stream, const Phrase &phrase);
127
128 int uniqueSuffixExtension;
129 int uniquePrefixExtension;
130
131private:
132
133 // Does the phrase have a unique suffix/prefix extension?
134 // if yes, then 1; if no then 0; if unknown then -1;
135
136 // reset a phrase
137 int empty();
138
139 // reset phrase information relating to location in suffix/prefix array
140 int clearSuffix();
141 int clearPrefix();
142
143 // increase the length of a phrase "in place"
144 int Phrase::increaseSuffixLength(cellcount l);
145 int Phrase::increasePrefixLength(cellcount l);
146
147 // Compare the phrase to a given array of symbols
148 int compareSuffix(symbol *words, cellcount length);
149 int comparePrefix(symbol *words, cellcount length);
150
151 // Create a new phrase that is longer than this one, yet as short as possible.
152 Phrase Phrase::newPhraseShortestSuffixExpansion(cellindex i);
153 Phrase Phrase::newPhraseShortestPrefixExpansion(cellindex i);
154
155};
156
157
158bool isShorter(Phrase p1, Phrase p2);
159bool isLonger(Phrase p1, Phrase p2);
160
161#endif
162
Note: See TracBrowser for help on using the repository browser.