source: trunk/gsdl/src/phind/generate/suffix.h@ 1631

Last change on this file since 1631 was 1631, checked in by paynter, 24 years ago

Changed copyrights to include NZDLP.

  • Property svn:keywords set to Author Date Id Revision
File size: 2.8 KB
Line 
1/**********************************************************************
2 *
3 * suffix.h -- definitions used in suffix.cpp
4 *
5 * Copyright 2000 Gordon W. Paynter
6 * Copyright 2000 The New Zealand Digital Library Project
7 *
8 * A component of the Greenstone digital library software
9 * from the New Zealand Digital Library Project at the
10 * University of Waikato, New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28
29// Types
30
31// The input words stored as an array of type symbol.
32// Required range: 0 - vocabulary size (typically hundreds of thousands)
33typedef unsigned int symbol;
34
35// The cellindex and cellcount types are used when we store indexes
36// to and ranges of cells in large arrays.
37// Required Range: 0 - length of input (typically millions)
38typedef unsigned int cellindex;
39typedef unsigned int cellcount;
40
41// The frequency type is used when we want to store the frequency with
42// which a phrase (or some other thing) occurs
43// Required range: 0 - frequency of most common symbol (often "the")
44typedef unsigned int frequency;
45
46// The check type is used when we want to store low frequency values.
47// Required range: 0 - 8 (could be recoded to use booleans)
48typedef unsigned char check;
49
50
51// Global variables
52
53// The symbol array holds the input words
54extern cellcount symbol_array_length;
55extern symbol *symbols;
56
57// The number of words read is storesd in inputLength.
58extern cellcount inputLength;
59
60// Suffix and prefix arrays are used to extract phrases
61extern symbol **suffixArray;
62extern check *suffixCheck;
63extern symbol **prefixArray;
64extern check *prefixCheck;
65
66// Collection-specific information about the first stopword/content symbols
67extern symbol firstStopSymbol;
68extern symbol lastStopSymbol;
69extern symbol firstContentSymbol;
70extern symbol lastContentSymbol;
71
72
73// Are we allowed to terminate a phrase on a stopword?
74extern int phraseMode;
75
76#define ANYPHRASE 0
77#define STOPWORDS 1
78
79
80// Direction values
81#define SUFFIX 0
82#define PREFIX 1
83
84
85// Special symbol values
86#define COLLECTIONSTART 1
87#define COLLECTIONEND 2
88#define DOCUMENTSTART 3
89#define PHRASELIMIT 4
90
91#define LASTDELIMITER 4
92
93
94
Note: See TracBrowser for help on using the repository browser.