source: trunk/gsdl/packages/mg/src/text/term_lists.h@ 1014

Last change on this file since 1014 was 439, checked in by sjboddie, 25 years ago

renamed mg-1.3d directory mg

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.4 KB
Line 
1/**************************************************************************
2 *
3 * term_lists.h -- description
4 * Copyright (C) 1994 Authors
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: term_lists.h 439 1999-08-10 21:23:37Z sjboddie $
21 *
22 **************************************************************************/
23
24/*
25 $Log$
26 Revision 1.1 1999/08/10 21:18:24 sjboddie
27 renamed mg-1.3d directory mg
28
29 Revision 1.2 1998/11/25 07:55:52 rjmcnab
30
31 Modified mg to that you can specify the stemmer you want
32 to use via a command line option. You specify it to
33 mg_passes during the build process. The number of the
34 stemmer that you used is stored within the inverted
35 dictionary header and the stemmed dictionary header so
36 the correct stemmer is used in later stages of building
37 and querying.
38
39 Revision 1.1 1998/11/17 09:35:44 rjmcnab
40 *** empty log message ***
41
42 * Revision 1.1 1994/10/20 03:57:08 tes
43 * I have rewritten the boolean query optimiser and abstracted out the
44 * components of the boolean query.
45 *
46 */
47
48#ifndef TERM_LISTS_H
49#define TERM_LISTS_H
50
51#include "sysfuncs.h"
52
53#define MAXTERMSTRLEN 1023 /* maximum number of characters in term string */
54
55typedef struct WordEntry
56 {
57 int word_num; /* Unique number for each different word */
58 u_long count; /* Number of times the word occurs in the text */
59 u_long doc_count; /* Number of documents that contain the word */
60 u_long max_doc_count; /* [RPAP - Jan 97: Stem Index Change] */
61 u_long invf_ptr; /* This is a byte position of the
62 inverted file entry corresponding to the word */
63 u_long invf_len; /* This is the length of the inverted
64 file entry in bytes */
65 }
66WordEntry;
67
68typedef struct TermEntry
69 {
70 WordEntry WE;
71 int Count; /* The number of times the word occurs in the query */
72 u_char *Word; /* The word. */
73 u_char *Stem; /* [RPAP - Jan 97: Stem Index Change] - The stemmed non-cf version of Word */
74 int require_match; /* [RJM 07/97: Ranked Required Terms] 0=optional match, 1=must match */
75 }
76TermEntry;
77
78typedef struct TermList
79 {
80 int list_size;
81 int num;
82 TermEntry TE[1];
83 }
84TermList;
85
86#define GetNthWE(term_list, n) (&((term_list)->TE[(n)].WE))
87
88/* --- prototypes --- */
89void ConvertTermsToString (TermList * query_term_list, char *str);
90int AddTermEntry (TermList ** query_term_list, TermEntry * te);
91int AddTerm (TermList ** query_term_list, u_char * Word, int Count, int word_num,
92 u_long count, u_long doc_count, u_long invf_ptr, u_long invf_len, /* [RPAP - Feb 97: Term Frequency] */
93 int stemmer_num);
94void ResetTermList (TermList ** tl);
95void FreeTermList (TermList ** the_tl);
96void PrintWordEntry (WordEntry * we, FILE * file);
97void PrintTermEntry (TermEntry * te, FILE * file);
98void PrintTermList (TermList * tl, FILE * file);
99TermList *MakeTermList (int n);
100
101
102#endif
Note: See TracBrowser for help on using the repository browser.