source: trunk/gsdl/src/mgpp/text/term_lists.h@ 655

Last change on this file since 655 was 655, checked in by cs025, 25 years ago

Base install of MG-PP

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.8 KB
Line 
1/**************************************************************************
2 *
3 * term_lists.h -- description
4 * Copyright (C) 1994 Authors
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: term_lists.h 655 1999-10-11 02:58:42Z cs025 $
21 *
22 **************************************************************************/
23
24/*
25 $Log$
26 Revision 1.1 1999/10/11 02:58:36 cs025
27 Base install of MG-PP
28
29 Revision 1.1 1999/08/10 21:18:24 sjboddie
30 renamed mg-1.3d directory mg
31
32 Revision 1.2 1998/11/25 07:55:52 rjmcnab
33
34 Modified mg to that you can specify the stemmer you want
35 to use via a command line option. You specify it to
36 mg_passes during the build process. The number of the
37 stemmer that you used is stored within the inverted
38 dictionary header and the stemmed dictionary header so
39 the correct stemmer is used in later stages of building
40 and querying.
41
42 Revision 1.1 1998/11/17 09:35:44 rjmcnab
43 *** empty log message ***
44
45 * Revision 1.1 1994/10/20 03:57:08 tes
46 * I have rewritten the boolean query optimiser and abstracted out the
47 * components of the boolean query.
48 *
49 */
50
51#ifndef TERM_LISTS_H
52#define TERM_LISTS_H
53
54#include "sysfuncs.h"
55
56#define MAXTERMSTRLEN 1023 /* maximum number of characters in term string */
57
58typedef struct WordEntry
59 {
60 int word_num; /* Unique number for each different word */
61 u_long count; /* Number of times the word occurs in the text */
62 u_long doc_count; /* Number of documents that contain the word */
63 u_long max_doc_count; /* [RPAP - Jan 97: Stem Index Change] */
64 u_long invf_ptr; /* This is a byte position of the
65 inverted file entry corresponding to the word */
66 u_long invf_len; /* This is the length of the inverted
67 file entry in bytes */
68 }
69WordEntry;
70
71typedef struct TermEntry
72 {
73 WordEntry WE;
74 int Count; /* The number of times the word occurs in the query */
75 u_char *Word; /* The word. */
76 u_char *Stem; /* [RPAP - Jan 97: Stem Index Change] - The stemmed non-cf version of Word */
77 int require_match; /* [RJM 07/97: Ranked Required Terms] 0=optional match, 1=must match */
78
79 inline int wordLength()
80 {
81 return Word[0];
82 }
83
84 inline unsigned char *word()
85 {
86 return Word + 1;
87 }
88 }
89TermEntry;
90
91typedef struct TermList
92 {
93 int list_size;
94 int num;
95 TermEntry *TE;
96
97 inline TermEntry *entry(int term)
98 { return &TE[term];
99 }
100
101 inline int size()
102 {
103 return num;
104 }
105 }
106TermList;
107
108#define GetNthWE(term_list, n) (&((term_list)->TE[(n)].WE))
109
110/* --- prototypes --- */
111void TermList_toString (TermList * query_term_list, char *str);
112int TermList_AddTermEntry (TermList * query_term_list, TermEntry * te);
113int TermList_AddTerm (TermList * query_term_list, u_char * Word, int Count, int word_num,
114 u_long count, u_long doc_count, u_long invf_ptr, u_long invf_len, /* [RPAP - Feb 97: Term Frequency] */
115 int stemmer_num);
116#ifdef __cplusplus
117extern "C"
118#endif
119void TermList_reset(TermList **tl);
120void TermList_destroy (TermList ** the_tl);
121void TermList_print (TermList * tl, FILE * file);
122TermList *TermList_create (int n);
123
124#ifdef __cplusplus
125inline int TermList_size(TermList *tl)
126{ return tl->num;
127}
128#endif
129
130#endif
Note: See TracBrowser for help on using the repository browser.