source: main/tags/2.80/indexers/mg/src/text/mgstemidxlist.c@ 24541

Last change on this file since 24541 was 3745, checked in by mdewsnip, 21 years ago

Addition of MG package for search and retrieval

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.7 KB
Line 
1/**************************************************************************
2 *
3 * mgstemidxlist.c -- Text dumper for the stem indexes
4 * Copyright (C) 1997 Ross Peeters
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#include "sysfuncs.h"
23
24#include "messages.h"
25#include "memlib.h"
26#include "local_strings.h"
27#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
28
29#include "mg_files.h"
30#include "mg.h"
31#include "invf.h"
32#include "words.h"
33#include "backend.h"
34
35void
36read_3_in_4 (FILE * idbi)
37{
38 unsigned long i;
39 stemmed_idx *si;
40 u_char *buffer;
41 int block = 0;
42
43 if (!(si = Xmalloc (sizeof (stemmed_idx))))
44 {
45 return;
46 }
47
48 si->MemForStemIdx = 0;
49
50 fread (&(si->sih), sizeof (si->sih), 1, idbi);
51
52 /* [RPAP - Jan 97: Endian Ordering] */
53 NTOHUL(si->sih.lookback);
54 NTOHUL(si->sih.block_size);
55 NTOHUL(si->sih.num_blocks);
56 NTOHUL(si->sih.blocks_start);
57 NTOHUL(si->sih.index_chars);
58 NTOHUL(si->sih.num_of_words);
59
60 if (!(buffer = Xmalloc (si->sih.index_chars)))
61 {
62 Xfree (si);
63 return;
64 };
65 si->MemForStemIdx += si->sih.index_chars;
66
67 if (!(si->index = Xmalloc (si->sih.num_blocks * sizeof (*si->index))))
68 {
69 Xfree (si);
70 Xfree (buffer);
71 return;
72 };
73 si->MemForStemIdx += si->sih.num_blocks * sizeof (*si->index);
74
75 if (!(si->pos = Xmalloc (si->sih.num_blocks * sizeof (*si->pos))))
76 {
77 Xfree (si->index);
78 Xfree (si);
79 Xfree (buffer);
80 return;
81 };
82 si->MemForStemIdx += si->sih.num_blocks * sizeof (*si->pos);
83
84 if (!(si->buffer = Xmalloc (si->sih.block_size * sizeof (*si->buffer))))
85 {
86 Xfree (buffer);
87 Xfree (si->index);
88 Xfree (si->buffer);
89 Xfree (si);
90 return;
91 };
92 si->MemForStemIdx += si->sih.block_size * sizeof (*si->buffer);
93
94 si->active = -1;
95
96 for (i = 0; i < si->sih.num_blocks; i++)
97 {
98 register u_char len;
99 si->index[i] = buffer;
100 len = fgetc (idbi);
101 *buffer++ = len;
102 fread (buffer, sizeof (u_char), len, idbi);
103 buffer += len;
104 fread (&si->pos[i], sizeof (*si->pos), 1, idbi);
105 NTOHUL(si->pos[i]); /* [RPAP - Jan 97: Endian Ordering] */
106 }
107
108 printf ("# lookback = %lu\n", si->sih.lookback);
109 printf ("# block_size = %lu\n", si->sih.block_size);
110 printf ("# num_blocks = %lu\n", si->sih.num_blocks);
111 printf ("# blocks_start = %lu\n", si->sih.blocks_start);
112 printf ("# index_chars = %lu\n", si->sih.index_chars);
113 printf ("# num_of_words = %lu\n", si->sih.num_of_words);
114
115 block = 0;
116 while (block < si->sih.num_blocks)
117 {
118 unsigned long *first_word;
119 unsigned short *num_words;
120 unsigned short *index;
121 long res;
122 u_char *base;
123 int num_indexes;
124
125 /* Read in next block */
126 fseek (idbi, si->pos[block] + si->sih.blocks_start, 0);
127 fread (si->buffer, si->sih.block_size, sizeof (u_char), idbi);
128 si->active = si->pos[block];
129
130 first_word = (unsigned long *) (si->buffer);
131 NTOHUL(*first_word); /* [RPAP - Jan 97: Endian Ordering] */
132 num_words = (unsigned short *) (first_word + 1);
133 NTOHUS(*num_words); /* [RPAP - Jan 97: Endian Ordering] */
134 index = num_words + 1;
135 num_indexes = ((*num_words - 1) / si->sih.lookback) + 1;
136
137 /* [RPAP - Jan 97: Endian Ordering] */
138 for (i = 0; i < num_indexes; i++)
139 NTOHUS(index[i]);
140
141 base = (u_char *) (index + num_indexes);
142 base += index[0];
143
144 printf ("\n# block = %d\n", block);
145 printf ("# first_word = %lu\n", *first_word);
146 printf ("# num_words = %u\n", *num_words);
147
148 res = 0;
149 while (res < *num_words)
150 {
151 unsigned copy, suff;
152 u_char prev[MAXSTEMLEN + 1];
153 unsigned int num_entries, num_cases, blk;
154 unsigned short blk_index, offset;
155
156 /* Read word entry */
157 copy = *base++;
158 suff = *base++;
159 bcopy ((char *) base, (char *) (prev + copy + 1), suff);
160 *prev = copy + suff;
161 base += suff;
162 bcopy ((char *) base, (char *) &num_entries, sizeof (num_entries));
163 base += sizeof (num_entries);
164 NTOHUI(num_entries); /* [RPAP - Jan 97: Endian Ordering] */
165 printf ("%u \"%s\"\n", num_entries, word2str (prev));
166
167 /* For all the PosEntries for the word... */
168 for (i = 0; i < num_entries; i++)
169 {
170 bcopy ((char *) base, (char *) &num_cases, sizeof (num_cases));
171 NTOHUI(num_cases); /* [RPAP - Jan 97: Endian Ordering] */
172 base += sizeof (num_cases);
173 bcopy ((char *) base, (char *) &blk, sizeof (blk));
174 NTOHUI(blk); /* [RPAP - Jan 97: Endian Ordering] */
175 base += sizeof (blk);
176 bcopy ((char *) base, (char *) &blk_index, sizeof (blk_index));
177 NTOHUS(blk_index); /* [RPAP - Jan 97: Endian Ordering] */
178 base += sizeof (blk_index);
179 bcopy ((char *) base, (char *) &offset, sizeof (offset));
180 NTOHUS(offset); /* [RPAP - Jan 97: Endian Ordering] */
181 base += sizeof (offset);
182
183 printf (" -> %4u %4u %4u %4u\n", num_cases, blk, blk_index, offset);
184 }
185 res++;
186 }
187 block++;
188 }
189 fclose (idbi);
190}
191
192int
193main (int argc, char **argv)
194{
195 FILE *idbi;
196 char *filename = "";
197 int ch;
198 int stem_method = 0;
199
200 msg_prefix = argv[0];
201 opterr = 0;
202 while ((ch = getopt (argc, argv, "f:d:hs:")) != -1)
203 switch (ch)
204 {
205 case 'f': /* input file */
206 filename = optarg;
207 break;
208 case 'd':
209 set_basepath (optarg);
210 break;
211 case 's':
212 stem_method = atoi (optarg);
213 break;
214 case 'h':
215 case '?':
216 fprintf (stderr, "usage: %s [-d data directory] [-h] -s 1|2|3 -f name\n", argv[0]);
217 exit (1);
218 }
219
220 /* Open required files */
221 switch (stem_method)
222 {
223 case (1):
224 idbi = open_file (filename, INVF_DICT_BLOCKED_1_SUFFIX, "rb", MAGIC_STEM_1,
225 MG_ABORT);
226 break;
227 case (2):
228 idbi = open_file (filename, INVF_DICT_BLOCKED_2_SUFFIX, "rb", MAGIC_STEM_2,
229 MG_ABORT);
230 break;
231 case (3):
232 idbi = open_file (filename, INVF_DICT_BLOCKED_3_SUFFIX, "rb", MAGIC_STEM_3,
233 MG_ABORT);
234 break;
235 default:
236 FatalError (1, "Stem method must be 1, 2 or 3\n");
237 }
238
239 if (!idbi)
240 FatalError (1, "Could NOT open file");
241
242 read_3_in_4 (idbi);
243
244 return 0;
245}
Note: See TracBrowser for help on using the repository browser.