/********************************************************************** * * suffix.h -- definitions used in suffix.cpp * * Copyright 2000 Gordon W. Paynter * Copyright 2000 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #ifndef SUFFIX_H #define SUFFIX_H // Types // The input words stored as an array of type symbol. // Required range: 0 - vocabulary size (typically hundreds of thousands) typedef unsigned int symbol; // The cellindex and cellcount types are used when we store indexes // to and ranges of cells in large arrays. // Required Range: 0 - length of input (typically millions) typedef unsigned int cellindex; typedef unsigned int cellcount; // The frequency type is used when we want to store the frequency with // which a phrase (or some other thing) occurs // Required range: 0 - frequency of most common symbol (often "the") typedef unsigned int frequency; // The check type is used when we want to store low frequency values. // Required range: 0 - 8 (could be recoded to use booleans) typedef unsigned char check; // Global variables // The symbol array holds the input words extern cellcount symbol_array_length; extern symbol *symbols; // The number of words read is storesd in inputLength. extern cellcount inputLength; // Suffix and prefix arrays are used to extract phrases extern symbol **suffixArray; extern check *suffixCheck; extern symbol **prefixArray; extern check *prefixCheck; // Collection-specific information about the first stopword/content symbols extern symbol firstStopSymbol; extern symbol lastStopSymbol; extern symbol firstContentSymbol; extern symbol lastContentSymbol; // Are we allowed to terminate a phrase on a stopword? extern int phraseMode; #define ANYPHRASE 0 #define STOPWORDS 1 // Direction values #define SUFFIX 0 #define PREFIX 1 // Special symbol values #define COLLECTIONSTART 1 #define COLLECTIONEND 2 #define DOCUMENTSTART 3 #define PHRASELIMIT 4 #define LASTDELIMITER 4 #endif