source: trunk/gsdl/src/phind/generate/suffix.h@ 2487

Last change on this file since 2487 was 2487, checked in by sjboddie, 23 years ago

Changes to get phind working under windows

  • Property svn:keywords set to Author Date Id Revision
File size: 2.8 KB
Line 
1/**********************************************************************
2 *
3 * suffix.h -- definitions used in suffix.cpp
4 *
5 * Copyright 2000 Gordon W. Paynter
6 * Copyright 2000 The New Zealand Digital Library Project
7 *
8 * A component of the Greenstone digital library software
9 * from the New Zealand Digital Library Project at the
10 * University of Waikato, New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28#ifndef SUFFIX_H
29#define SUFFIX_H
30
31// Types
32
33// The input words stored as an array of type symbol.
34// Required range: 0 - vocabulary size (typically hundreds of thousands)
35typedef unsigned int symbol;
36
37// The cellindex and cellcount types are used when we store indexes
38// to and ranges of cells in large arrays.
39// Required Range: 0 - length of input (typically millions)
40typedef unsigned int cellindex;
41typedef unsigned int cellcount;
42
43// The frequency type is used when we want to store the frequency with
44// which a phrase (or some other thing) occurs
45// Required range: 0 - frequency of most common symbol (often "the")
46typedef unsigned int frequency;
47
48// The check type is used when we want to store low frequency values.
49// Required range: 0 - 8 (could be recoded to use booleans)
50typedef unsigned char check;
51
52
53// Global variables
54
55// The symbol array holds the input words
56extern cellcount symbol_array_length;
57extern symbol *symbols;
58
59// The number of words read is storesd in inputLength.
60extern cellcount inputLength;
61
62// Suffix and prefix arrays are used to extract phrases
63extern symbol **suffixArray;
64extern check *suffixCheck;
65extern symbol **prefixArray;
66extern check *prefixCheck;
67
68// Collection-specific information about the first stopword/content symbols
69extern symbol firstStopSymbol;
70extern symbol lastStopSymbol;
71extern symbol firstContentSymbol;
72extern symbol lastContentSymbol;
73
74
75// Are we allowed to terminate a phrase on a stopword?
76extern int phraseMode;
77
78#define ANYPHRASE 0
79#define STOPWORDS 1
80
81
82// Direction values
83#define SUFFIX 0
84#define PREFIX 1
85
86
87// Special symbol values
88#define COLLECTIONSTART 1
89#define COLLECTIONEND 2
90#define DOCUMENTSTART 3
91#define PHRASELIMIT 4
92
93#define LASTDELIMITER 4
94
95#endif
Note: See TracBrowser for help on using the repository browser.