source: trunk/gsdl/src/phind/generate/phrase.h@ 2487

Last change on this file since 2487 was 2487, checked in by sjboddie, 23 years ago

Changes to get phind working under windows

  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1/**********************************************************************
2 *
3 * phrase.h -- definition of the phrase object used by suffix.cpp
4 *
5 * Copyright 2000 Gordon W. Paynter
6 * Copyright 2000 The New Zealand Digital Library Project
7 *
8 * A component of the Greenstone digital library software
9 * from the New Zealand Digital Library Project at the
10 * University of Waikato, New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28#ifndef PHRASE_H
29#define PHRASE_H
30
31#if defined(GSDL_USE_STL_H)
32# include <vector.h>
33#else
34# include <vector>
35#endif
36
37#include "suffix.h"
38
39class Phrase {
40
41public:
42
43 // The phrase itself is stored with two pointers: forward
44 // points to its first cell, back points to its last.
45 // The length is always stored in length.
46 // If one of these is set, all must be set, and it must
47 // be true that (forward + length - 1) = back
48 symbol *forward;
49 symbol *back;
50 cellcount length;
51
52 // Phrase location in the suffix array
53 int suffixFound;
54 symbol *firstSuffix;
55 symbol *lastSuffix;
56 cellindex firstSuffixIndex;
57 cellindex lastSuffixIndex;
58 cellcount suffixFrequency;
59
60 // Phrase location in the prefix array
61 int prefixFound;
62 symbol* firstPrefix;
63 symbol* lastPrefix;
64 cellindex firstPrefixIndex;
65 cellindex lastPrefixIndex;
66 cellcount prefixFrequency;
67
68 // Constructor function
69 // First argument is an array of words, second is the length of
70 // the phrase, third is the direction (SUFFIX of PREFIX) in
71 // which the words should be read (defaults to forwards).
72 Phrase(symbol *words, cellcount size, int direction);
73
74 // An empty phrase can be created without arguments, but is
75 // good for nothing and may not be used with any public fuctions.
76 // We therefore only use it internally.
77 Phrase();
78
79 // Represent the phrase as a string
80 char *toString();
81
82 // Find an initial set of candidate phrases in the suffix/prefix array
83 int initialSuffixCandidates(vector<Phrase> &results);
84 int initialPrefixCandidates(vector<Phrase> &results);
85
86 // Does the phrase have a unique extension?
87 int hasUniqueSuffixExtension();
88 int hasUniquePrefixExtension();
89
90 // Extend a phrase by exactly one symbol
91 int expandUniquePrefixExtensionByOne();
92 int expandUniqueSuffixExtensionByOne();
93
94 // Find the phrase in the suffix/prefix array
95 int findFirstAndLastSuffix();
96 int findFirstAndLastPrefix();
97 int findFirstAndLastSuffix(cellindex begin, cellindex end);
98 int findFirstAndLastPrefix(cellindex begin, cellindex end);
99
100 // Make sure the phrase location in the suffix/prefix array is known
101 int ensureSuffixFound();
102 int ensurePrefixFound();
103
104private:
105
106 // Does the phrase have a unique suffix/prefix extension?
107 // if yes, then 1; if no then 0; if unknown then -1;
108 int uniqueSuffixExtension;
109 int uniquePrefixExtension;
110
111 // reset a phrase
112 int empty();
113
114 // reset phrase information relating to location in suffix/prefix array
115 int clearSuffix();
116 int clearPrefix();
117
118 // increase the length of a phrase "in place"
119 int Phrase::increaseSuffixLength(cellcount l);
120 int Phrase::increasePrefixLength(cellcount l);
121
122 // Compare the phrase to a given array of symbols
123 int compareSuffix(symbol *words, cellcount length);
124 int comparePrefix(symbol *words, cellcount length);
125
126 // Create a new phrase that is longer than this one, yet as short as possible.
127 Phrase Phrase::newPhraseShortestSuffixExpansion(cellindex i);
128 Phrase Phrase::newPhraseShortestPrefixExpansion(cellindex i);
129
130 // Extend a phrase until it no longer has a unique extanesion
131 int expandWhileUniqueSuffixExtension();
132 int expandWhileUniquePrefixExtension();
133};
134
135
136bool isShorter(Phrase p1, Phrase p2);
137bool isLonger(Phrase p1, Phrase p2);
138
139#endif
Note: See TracBrowser for help on using the repository browser.