source: trunk/gsdl/src/phind/generate/phrase.h@ 2498

Last change on this file since 2498 was 2498, checked in by sjboddie, 23 years ago

A couple of small changes to phind's generate code to get it working
under windows 95

  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1/**********************************************************************
2 *
3 * phrase.h -- definition of the phrase object used by suffix.cpp
4 *
5 * Copyright 2000 Gordon W. Paynter
6 * Copyright 2000 The New Zealand Digital Library Project
7 *
8 * A component of the Greenstone digital library software
9 * from the New Zealand Digital Library Project at the
10 * University of Waikato, New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28#ifndef PHRASE_H
29#define PHRASE_H
30
31// use the standard namespace
32#if !defined (GSDL_NAMESPACE_BROKEN)
33#if defined(GSDL_USE_OBJECTSPACE)
34using namespace ospace::std;
35#else
36using namespace std;
37#endif
38#endif
39
40#if defined(GSDL_USE_STL_H)
41# include <vector.h>
42#else
43# include <vector>
44#endif
45
46#include "suffix.h"
47
48class Phrase {
49
50public:
51
52 // The phrase itself is stored with two pointers: forward
53 // points to its first cell, back points to its last.
54 // The length is always stored in length.
55 // If one of these is set, all must be set, and it must
56 // be true that (forward + length - 1) = back
57 symbol *forward;
58 symbol *back;
59 cellcount length;
60
61 // Phrase location in the suffix array
62 int suffixFound;
63 symbol *firstSuffix;
64 symbol *lastSuffix;
65 cellindex firstSuffixIndex;
66 cellindex lastSuffixIndex;
67 cellcount suffixFrequency;
68
69 // Phrase location in the prefix array
70 int prefixFound;
71 symbol* firstPrefix;
72 symbol* lastPrefix;
73 cellindex firstPrefixIndex;
74 cellindex lastPrefixIndex;
75 cellcount prefixFrequency;
76
77 // Constructor function
78 // First argument is an array of words, second is the length of
79 // the phrase, third is the direction (SUFFIX of PREFIX) in
80 // which the words should be read (defaults to forwards).
81 Phrase(symbol *words, cellcount size, int direction);
82
83 // An empty phrase can be created without arguments, but is
84 // good for nothing and may not be used with any public fuctions.
85 // We therefore only use it internally.
86 Phrase();
87
88 // Represent the phrase as a string
89 char *toString();
90
91 // Find an initial set of candidate phrases in the suffix/prefix array
92 int initialSuffixCandidates(vector<Phrase> &results);
93 int initialPrefixCandidates(vector<Phrase> &results);
94
95 // Does the phrase have a unique extension?
96 int hasUniqueSuffixExtension();
97 int hasUniquePrefixExtension();
98
99 // Extend a phrase by exactly one symbol
100 int expandUniquePrefixExtensionByOne();
101 int expandUniqueSuffixExtensionByOne();
102
103 // Find the phrase in the suffix/prefix array
104 int findFirstAndLastSuffix();
105 int findFirstAndLastPrefix();
106 int findFirstAndLastSuffix(cellindex begin, cellindex end);
107 int findFirstAndLastPrefix(cellindex begin, cellindex end);
108
109 // Make sure the phrase location in the suffix/prefix array is known
110 int ensureSuffixFound();
111 int ensurePrefixFound();
112
113private:
114
115 // Does the phrase have a unique suffix/prefix extension?
116 // if yes, then 1; if no then 0; if unknown then -1;
117 int uniqueSuffixExtension;
118 int uniquePrefixExtension;
119
120 // reset a phrase
121 int empty();
122
123 // reset phrase information relating to location in suffix/prefix array
124 int clearSuffix();
125 int clearPrefix();
126
127 // increase the length of a phrase "in place"
128 int Phrase::increaseSuffixLength(cellcount l);
129 int Phrase::increasePrefixLength(cellcount l);
130
131 // Compare the phrase to a given array of symbols
132 int compareSuffix(symbol *words, cellcount length);
133 int comparePrefix(symbol *words, cellcount length);
134
135 // Create a new phrase that is longer than this one, yet as short as possible.
136 Phrase Phrase::newPhraseShortestSuffixExpansion(cellindex i);
137 Phrase Phrase::newPhraseShortestPrefixExpansion(cellindex i);
138
139 // Extend a phrase until it no longer has a unique extanesion
140 int expandWhileUniqueSuffixExtension();
141 int expandWhileUniquePrefixExtension();
142};
143
144
145bool isShorter(Phrase p1, Phrase p2);
146bool isLonger(Phrase p1, Phrase p2);
147
148#endif
Note: See TracBrowser for help on using the repository browser.