source: trunk/indexers/mg/src/text/term_lists.h@ 3745

Last change on this file since 3745 was 3745, checked in by mdewsnip, 21 years ago

Addition of MG package for search and retrieval

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.5 KB
Line 
1/**************************************************************************
2 *
3 * term_lists.h -- description
4 * Copyright (C) 1994 Authors
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: term_lists.h 3745 2003-02-20 21:20:24Z mdewsnip $
21 *
22 **************************************************************************/
23
24/*
25 $Log$
26 Revision 1.1 2003/02/20 21:18:24 mdewsnip
27 Addition of MG package for search and retrieval
28
29 Revision 1.1 1999/08/10 21:18:24 sjboddie
30 renamed mg-1.3d directory mg
31
32 Revision 1.2 1998/11/25 07:55:52 rjmcnab
33
34 Modified mg to that you can specify the stemmer you want
35 to use via a command line option. You specify it to
36 mg_passes during the build process. The number of the
37 stemmer that you used is stored within the inverted
38 dictionary header and the stemmed dictionary header so
39 the correct stemmer is used in later stages of building
40 and querying.
41
42 Revision 1.1 1998/11/17 09:35:44 rjmcnab
43 *** empty log message ***
44
45 * Revision 1.1 1994/10/20 03:57:08 tes
46 * I have rewritten the boolean query optimiser and abstracted out the
47 * components of the boolean query.
48 *
49 */
50
51#ifndef TERM_LISTS_H
52#define TERM_LISTS_H
53
54#include "sysfuncs.h"
55
56#define MAXTERMSTRLEN 1023 /* maximum number of characters in term string */
57
58typedef struct WordEntry
59 {
60 int word_num; /* Unique number for each different word */
61 u_long count; /* Number of times the word occurs in the text */
62 u_long doc_count; /* Number of documents that contain the word */
63 u_long max_doc_count; /* [RPAP - Jan 97: Stem Index Change] */
64 u_long invf_ptr; /* This is a byte position of the
65 inverted file entry corresponding to the word */
66 u_long invf_len; /* This is the length of the inverted
67 file entry in bytes */
68 }
69WordEntry;
70
71typedef struct TermEntry
72 {
73 WordEntry WE;
74 int Count; /* The number of times the word occurs in the query */
75 u_char *Word; /* The word. */
76 u_char *Stem; /* [RPAP - Jan 97: Stem Index Change] - The stemmed non-cf version of Word */
77 int require_match; /* [RJM 07/97: Ranked Required Terms] 0=optional match, 1=must match */
78 }
79TermEntry;
80
81typedef struct TermList
82 {
83 int list_size;
84 int num;
85 TermEntry TE[1];
86 }
87TermList;
88
89#define GetNthWE(term_list, n) (&((term_list)->TE[(n)].WE))
90
91/* --- prototypes --- */
92void ConvertTermsToString (TermList * query_term_list, char *str);
93int AddTermEntry (TermList ** query_term_list, TermEntry * te);
94int AddTerm (TermList ** query_term_list, u_char * Word, int Count, int word_num,
95 u_long count, u_long doc_count, u_long invf_ptr, u_long invf_len, /* [RPAP - Feb 97: Term Frequency] */
96 int stemmer_num);
97void ResetTermList (TermList ** tl);
98void FreeTermList (TermList ** the_tl);
99void PrintWordEntry (WordEntry * we, FILE * file);
100void PrintTermEntry (TermEntry * te, FILE * file);
101void PrintTermList (TermList * tl, FILE * file);
102TermList *MakeTermList (int n);
103
104
105#endif
Note: See TracBrowser for help on using the repository browser.