source: trunk/gsdl/packages/mg/src/text/term_lists.c@ 1014

Last change on this file since 1014 was 439, checked in by sjboddie, 25 years ago

renamed mg-1.3d directory mg

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.6 KB
Line 
1/**************************************************************************
2 *
3 * filename -- description
4 * Copyright (C) 1994 Authors
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: term_lists.c 439 1999-08-10 21:23:37Z sjboddie $
21 *
22 **************************************************************************/
23
24/*
25 $Log$
26 Revision 1.1 1999/08/10 21:18:24 sjboddie
27 renamed mg-1.3d directory mg
28
29 Revision 1.2 1998/11/25 07:55:52 rjmcnab
30
31 Modified mg to that you can specify the stemmer you want
32 to use via a command line option. You specify it to
33 mg_passes during the build process. The number of the
34 stemmer that you used is stored within the inverted
35 dictionary header and the stemmed dictionary header so
36 the correct stemmer is used in later stages of building
37 and querying.
38
39 Revision 1.1 1998/11/17 09:35:43 rjmcnab
40 *** empty log message ***
41
42 * Revision 1.1 1994/10/20 03:57:07 tes
43 * I have rewritten the boolean query optimiser and abstracted out the
44 * components of the boolean query.
45 *
46 */
47
48static char *RCSID = "$Id: term_lists.c 439 1999-08-10 21:23:37Z sjboddie $";
49
50#include "sysfuncs.h"
51
52#include "memlib.h"
53#include "local_strings.h"
54#include "term_lists.h"
55#include "messages.h"
56#include "stemmer.h"
57
58TermList *query_term_list = NULL;
59
60/* =========================================================================
61 * Function: MakeTermList
62 * Description:
63 * Input:
64 * Output:
65 * ========================================================================= */
66TermList *
67MakeTermList (int n)
68{
69 TermList *t;
70 int list_size = (n == 0 ? 1 : n); /* always allocate at least one node */
71
72 t = Xmalloc (sizeof (TermList) + (list_size - 1) * sizeof (TermEntry));
73 if (!t)
74 FatalError (1, "Unable to allocate term list");
75
76 t->num = n;
77 t->list_size = list_size;
78
79 return t;
80}
81
82/* =========================================================================
83 * Function: ResizeTermList
84 * Description:
85 * Input:
86 * Output:
87 * ========================================================================= */
88
89#define GROWTH_FACTOR 2
90#define MIN_SIZE 2
91
92static void
93ResizeTermList (TermList ** term_list)
94{
95 TermList *tl = *term_list;
96
97 if (tl->num > tl->list_size)
98 {
99 if (tl->list_size)
100 tl->list_size *= GROWTH_FACTOR;
101 else
102 tl->list_size = MIN_SIZE;
103 }
104 tl = Xrealloc (tl, sizeof (TermList) + (tl->list_size - 1) * sizeof (TermEntry));
105
106 if (!tl)
107 FatalError (1, "Unable to resize term list");
108
109 *term_list = tl;
110}
111
112/* =========================================================================
113 * Function: ConvertTermsToString
114 * Description:
115 * Convert term list into null-terminated string
116 * Input:
117 * query_term_list = term list
118 * Output:
119 * str = term string
120 * ========================================================================= */
121
122void
123ConvertTermsToString (TermList * query_term_list, char *str)
124{
125 int i = 0;
126 int total_len = 0;
127
128 /* terms_str should be preallocated */
129 if (!str)
130 return;
131
132 for (i = 0; i < query_term_list->num; i++)
133 {
134 unsigned char *word = query_term_list->TE[i].Word;
135 int len = word[0];
136 total_len += len + 1; /* +1 for space */
137 if (total_len > MAXTERMSTRLEN)
138 break;
139 strncpy (str, (char *) word + 1, len);
140 str += len;
141 if (i != (query_term_list->num) - 1)
142 {
143 *str = ' ';
144 str++; /* add space gap */
145 }
146
147 }
148 *str = '\0';
149}
150
151/* =========================================================================
152 * Function: ResetTermList
153 * Description:
154 * Input:
155 * Output:
156 * ========================================================================= */
157
158void
159ResetTermList (TermList ** tl)
160{
161 if (*tl)
162 FreeTermList (tl);
163 *tl = MakeTermList (0);
164}
165
166/* =========================================================================
167 * Function: AddTermEntry
168 * Description:
169 * Input:
170 * Output:
171 * ========================================================================= */
172
173int
174AddTermEntry (TermList ** query_term_list, TermEntry * te)
175{
176 TermList *tl = *query_term_list;
177
178 tl->num++;
179 ResizeTermList (query_term_list);
180 tl = *query_term_list;
181
182 /* copy the structure contents */
183 bcopy ((char *) te, (char *) &(tl->TE[tl->num - 1]), sizeof (TermEntry));
184
185 return tl->num - 1;
186}
187
188
189
190/* =========================================================================
191 * Function: AddTerm
192 * Description: Used in boolean parser - see bool_tree [RPAP - Feb 97: Term Frequency]
193 * Input:
194 * Output:
195 * ========================================================================= */
196
197int
198AddTerm (TermList ** query_term_list, u_char * Word, int Count, int word_num,
199 u_long count, u_long doc_count, u_long invf_ptr, u_long invf_len, int stemmer_num) /* [RPAP - Feb 97: Term Frequency] */
200{
201 int j;
202 TermList *tl = *query_term_list;
203
204 /* Look for the word in the already identified terms */
205 for (j = 0; j < tl->num; j++)
206 {
207 TermEntry *te = &(tl->TE[j]);
208 if (compare (te->Word, Word) == 0)
209 {
210 te->Count++;
211 return j;
212 }
213 }
214
215
216 {
217 /* Create a new entry in the list for the new word */
218 TermEntry te;
219
220 /* [RPAP - Feb 97: Term Frequency] */
221 te.WE.word_num = word_num;
222 te.WE.count = count;
223 te.WE.doc_count = doc_count;
224 te.WE.max_doc_count = doc_count; /* [RPAP - Jan 97: Stem Index Change] */
225 te.WE.invf_ptr = invf_ptr;
226 te.WE.invf_len = invf_len;
227 te.Count = Count;
228 te.Word = copy_string (Word);
229 if (!te.Word)
230 FatalError (1, "Could NOT create memory to add term");
231
232 /* [RPAP - Jan 97: Stem Index Change] */
233 te.Stem = copy_string (Word);
234 if (!te.Stem)
235 FatalError (1, "Could NOT create memory to add term");
236 stemmer (2, stemmer_num, te.Stem);
237
238 te.require_match = 0; /* [RJM - 07/97: Ranked Required Terms] */
239
240 return AddTermEntry (query_term_list, &te);
241 }
242
243}
244
245/* =========================================================================
246 * Function: FreeTermList
247 * Description:
248 * Input:
249 * Output:
250 * ========================================================================= */
251
252void
253FreeTermList (TermList ** the_tl)
254{
255 int j;
256 TermList *tl = *the_tl;
257
258 for (j = 0; j < tl->num; j++)
259 {
260 if (tl->TE[j].Word)
261 Xfree (tl->TE[j].Word);
262 /* [RPAP - Jan 97: Stem Index Change] */
263 if (tl->TE[j].Stem)
264 Xfree (tl->TE[j].Stem);
265 }
266 Xfree (tl);
267
268 *the_tl = NULL;
269}
270
271/* =========================================================================
272 * Function: PrintWordEntry
273 * Description:
274 * Input:
275 * Output:
276 * ========================================================================= */
277
278void
279PrintWordEntry (WordEntry * we, FILE * file)
280{
281 fprintf (file, "we->word_num = %d\n", we->word_num);
282 fprintf (file, "we->count = %ld\n", we->count);
283 fprintf (file, "we->doc_count = %ld\n", we->doc_count);
284 fprintf (file, "we->max_doc_count = %ld\n", we->max_doc_count);
285 fprintf (file, "we->invf_ptr = %ld\n", we->invf_ptr);
286 fprintf (file, "we->invf_len = %ld\n", we->invf_len);
287}
288
289/* =========================================================================
290 * Function: PrintTermEntry
291 * Description:
292 * Input:
293 * Output:
294 * ========================================================================= */
295
296void
297PrintTermEntry (TermEntry * te, FILE * file)
298{
299
300 fprintf (file, "Term Entry\n");
301 fprintf (file, "te->Count = %d\n", te->Count);
302 fprintf (file, "te->Word = %s\n", str255_to_string (te->Word, NULL));
303 if (te->Stem != NULL)
304 fprintf (file, "te->Stem = %s\n", str255_to_string (te->Stem, NULL)); /* [RPAP - Jan 97: Stem Index Change] */
305 fprintf (file, "te->require_match = %i\n", te->require_match); /* [RJM 07/97: Ranked Required Terms] */
306 PrintWordEntry (&(te->WE), file);
307
308}
309
310/* =========================================================================
311 * Function: PrintTermList
312 * Description:
313 * Input:
314 * Output:
315 * ========================================================================= */
316
317void
318PrintTermList (TermList * tl, FILE * file)
319{
320 int i;
321
322 fprintf (file, "Term List\n");
323 fprintf (file, "tl->list_size = %d\n", tl->list_size);
324 fprintf (file, "tl->num = %d\n", tl->num);
325
326 for (i = 0; i < tl->num; i++)
327 {
328 fprintf (file, "[%d]\n", i);
329 PrintTermEntry (&(tl->TE[i]), file);
330 }
331}
Note: See TracBrowser for help on using the repository browser.