source: trunk/gsdl/src/phind/generate/suffix.h@ 2867

Last change on this file since 2867 was 2867, checked in by paynter, 22 years ago

Moved all the sufficCheck functionality into the check.h header and
inlined it.

  • Property svn:keywords set to Author Date Id Revision
File size: 2.6 KB
Line 
1/**********************************************************************
2 *
3 * suffix.h -- definitions used in suffix.cpp
4 *
5 * Copyright 2000 Gordon W. Paynter
6 * Copyright 2000 The New Zealand Digital Library Project
7 *
8 * A component of the Greenstone digital library software
9 * from the New Zealand Digital Library Project at the
10 * University of Waikato, New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28#ifndef SUFFIX_H
29#define SUFFIX_H
30
31// Types
32
33// The input words stored as an array of type symbol.
34// Required range: 0 - vocabulary size (typically hundreds of thousands)
35typedef unsigned int symbol;
36
37// The cellindex and cellcount types are used when we store indexes
38// to and ranges of cells in large arrays.
39// Required Range: 0 - length of input (typically millions)
40typedef unsigned int cellindex;
41typedef unsigned int cellcount;
42
43// The frequency type is used when we want to store the frequency with
44// which a phrase (or some other thing) occurs
45// Required range: 0 - frequency of most common symbol (often "the")
46typedef unsigned int frequency;
47
48// Global variables
49
50// The symbol array holds the input words
51extern cellcount symbol_array_length;
52extern symbol *symbols;
53
54// The number of words read is storesd in inputLength.
55extern cellcount inputLength;
56
57// Suffix and prefix arrays are used to extract phrases
58extern symbol **suffixArray;
59extern symbol **prefixArray;
60
61// Collection-specific information about the first stopword/content symbols
62extern symbol firstStopSymbol;
63extern symbol lastStopSymbol;
64extern symbol firstContentSymbol;
65extern symbol lastContentSymbol;
66
67
68// Are we allowed to terminate a phrase on a stopword?
69extern int phraseMode;
70
71#define ANYPHRASE 0
72#define STOPWORDS 1
73
74
75// Direction values
76#define SUFFIX 0
77#define PREFIX 1
78
79
80// Special symbol values
81#define COLLECTIONSTART 1
82#define COLLECTIONEND 2
83#define DOCUMENTSTART 3
84#define PHRASELIMIT 4
85
86#define LASTDELIMITER 4
87
88#endif
Note: See TracBrowser for help on using the repository browser.