source: trunk/gsdl/packages/mg-1.3d/src/text/mgdictlist.c@ 13

Last change on this file since 13 was 13, checked in by rjmcnab, 26 years ago

* empty log message *

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.1 KB
Line 
1/**************************************************************************
2 *
3 * mgdictlist.c -- Program to list a dictionary
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: mgdictlist.c 13 1998-11-17 09:36:00Z rjmcnab $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25
26#include "messages.h"
27#include "memlib.h"
28#include "local_strings.h"
29#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
30
31#include "mg_files.h"
32#include "text.h"
33#include "invf.h"
34#include "locallib.h"
35#include "words.h"
36
37/*
38 $Log$
39 Revision 1.1 1998/11/17 09:35:24 rjmcnab
40 *** empty log message ***
41
42 * Revision 1.4 1994/11/29 00:32:07 tes
43 * Committing the new merged files and changes.
44 *
45 * Revision 1.3 1994/10/20 03:57:01 tes
46 * I have rewritten the boolean query optimiser and abstracted out the
47 * components of the boolean query.
48 *
49 * Revision 1.2 1994/09/20 04:41:56 tes
50 * For version 1.1
51 *
52 */
53
54static char *RCSID = "$Id: mgdictlist.c 13 1998-11-17 09:36:00Z rjmcnab $";
55
56
57int quick = 0;
58int no_of_words[2];
59u_long maxcodelen[2];
60
61char *dictname = "";
62
63
64
65
66void
67DumpStemDict (FILE * f)
68{
69 struct invf_dict_header idh;
70 int i;
71 u_char prev[MAXSTEMLEN + 1];
72
73 fread (&idh, sizeof (idh), 1, f);
74
75 /* [RPAP - Jan 97: Endian Ordering] */
76 NTOHUL(idh.lookback);
77 NTOHUL(idh.dict_size);
78 NTOHUL(idh.total_bytes);
79 NTOHUL(idh.index_string_bytes);
80 NTOHD(idh.input_bytes); /* [RJM 07/97: 4G limit] */
81 NTOHUL(idh.num_of_docs);
82 NTOHUL(idh.static_num_of_docs);
83 NTOHUL(idh.num_of_words);
84 NTOHUL(idh.stem_method);
85
86 if (quick)
87 printf ("%ld\n", idh.dict_size);
88 else
89 {
90 printf ("# lookback = %lu\n", idh.lookback);
91 printf ("# dict size = %lu\n", idh.dict_size);
92 printf ("# total bytes = %lu\n", idh.total_bytes);
93 printf ("# index string bytes = %lu\n", idh.index_string_bytes);
94 printf ("# input bytes = %.0f\n", idh.input_bytes); /* [RJM 07/97: 4G limit] */
95 printf ("# num of docs = %lu\n", idh.num_of_docs);
96 printf ("# static num of docs = %lu\n", idh.static_num_of_docs);
97 printf ("# num of words = %lu\n", idh.num_of_words);
98 printf ("#\n");
99 }
100
101 for (i = 0; i < idh.dict_size; i++)
102 {
103 register unsigned long copy, suff;
104 unsigned long wcnt, fcnt;
105
106 /* build a new word on top of prev */
107 copy = getc (f);
108 suff = getc (f);
109 *prev = copy + suff;
110 fread (prev + copy + 1, sizeof (u_char), suff, f);
111
112 /* read other data, but no need to store it */
113 fread (&fcnt, sizeof (fcnt), 1, f);
114 fread (&wcnt, sizeof (wcnt), 1, f);
115
116 /* [RPAP - Jan 97: Endian Ordering] */
117 NTOHUL(fcnt);
118 NTOHUL(wcnt);
119
120 if (!quick)
121 {
122 printf ("%d: %8ld ", i, wcnt);
123 printf ("/ %5ld ", fcnt);
124 printf ("%2d %2ld\t\"", *prev, copy);
125 }
126 printf ("%s", word2str (prev));
127 if (quick)
128 printf (" %ld %ld\n", wcnt, fcnt);
129 else
130 {
131 putchar ('"');
132 putchar ('\n');
133 }
134 }
135}
136
137
138
139
140void
141ReadInWords (FILE * f)
142{
143 comp_frags_header cfh;
144 u_long *codes;
145 u_char prev[MAXSTEMLEN + 1];
146 int i;
147
148 if (Read_cfh (f, &cfh, NULL, NULL) == -1)
149 FatalError (1, "Unable to read in the dictionary");
150
151 printf ("#\n");
152 printf ("# max code len = %u\n", cfh.hd.maxcodelen);
153 printf ("# total bytes = %lu\n", cfh.uncompressed_size);
154 printf ("#\n");
155
156 if (!(codes = Generate_Huffman_Codes (&cfh.hd, NULL)))
157 FatalError (1, "no memory for huffman codes\n");
158
159 for (i = 0; i < cfh.hd.num_codes; i++)
160 {
161 register int val, copy, j, k;
162 char code[33];
163 val = fgetc (f);
164 copy = (val >> 4) & 0xf;
165 val &= 0xf;
166
167 fread (prev + copy + 1, sizeof (u_char), val, f);
168 *prev = val + copy;
169
170 for (k = 0, j = cfh.hd.clens[i] - 1; j >= 0; j--, k++)
171 code[k] = '0' + ((codes[i] >> j) & 1);
172 code[k] = '\0';
173
174 printf ("%d: %2d : %*s : \"%s\"\n", i, cfh.hd.clens[i],
175 cfh.hd.maxcodelen, code, word2str (prev));
176 }
177 Xfree (codes);
178 Xfree (cfh.hd.clens);
179}
180
181
182void
183ReadCharHuffman (FILE * f, char *title)
184{
185 int i;
186 huff_data hd;
187 u_long *codes;
188
189 if (Read_Huffman_Data (f, &hd, NULL, NULL) == -1)
190 FatalError (1, "Unable to read huffman data");
191
192 if (!(codes = Generate_Huffman_Codes (&hd, NULL)))
193 FatalError (1, "no memory for huffman codes\n");
194
195 printf ("#\n# %s\n#\n", title);
196 for (i = 0; i < hd.num_codes; i++)
197 if (hd.clens[i])
198 {
199 int j, k;
200 char code[33];
201 for (k = 0, j = hd.clens[i] - 1; j >= 0; j--, k++)
202 code[k] = '0' + ((codes[i] >> j) & 1);
203 code[k] = '\0';
204 printf ("%2d : %*s : \"%s\"\n", hd.clens[i],
205 hd.maxcodelen, code, char2str (i));
206 }
207 Xfree (codes);
208 Xfree (hd.clens);
209}
210
211
212void
213ReadLenHuffman (FILE * f, char *title)
214{
215 int i;
216 huff_data hd;
217 u_long *codes;
218
219 if (Read_Huffman_Data (f, &hd, NULL, NULL) == -1)
220 FatalError (1, "Unable to read huffman data");
221
222 if (!(codes = Generate_Huffman_Codes (&hd, NULL)))
223 FatalError (1, "no memory for huffman codes\n");
224
225 printf ("#\n# %s\n#\n", title);
226 for (i = 0; i < hd.num_codes; i++)
227 if (hd.clens[i])
228 {
229 int j, k;
230 char code[33];
231 for (k = 0, j = hd.clens[i] - 1; j >= 0; j--, k++)
232 code[k] = '0' + ((codes[i] >> j) & 1);
233 code[k] = '\0';
234 printf ("%2d : %*s : %d\n", hd.clens[i],
235 hd.maxcodelen, code, i);
236 }
237 Xfree (codes);
238 Xfree (hd.clens);
239}
240
241
242
243
244
245void
246DumpTextDict (FILE * f)
247{
248 struct compression_dict_header cdh;
249 int which;
250
251 if (Read_cdh (f, &cdh, NULL, NULL) == -1)
252 FatalError (1, "Unable to read dictionary header");
253 switch (cdh.dict_type)
254 {
255 case MG_COMPLETE_DICTIONARY:
256 printf ("# COMPLETE DICTIONARY\n");
257 break;
258 case MG_PARTIAL_DICTIONARY:
259 printf ("# PARTIAL DICTIONARY\n");
260 break;
261 case MG_SEED_DICTIONARY:
262 printf ("# SEED DICTIONARY\n");
263 break;
264 }
265 printf ("# num words = %lu\n", cdh.num_words[1]);
266 printf ("# num word chars = %lu\n", cdh.num_word_chars[1]);
267 printf ("# num non-words = %lu\n", cdh.num_words[0]);
268 printf ("# num non-word chars = %lu\n", cdh.num_word_chars[0]);
269 printf ("# lookback = %lu\n", cdh.lookback);
270
271 for (which = 0; which < 2; which++)
272 switch (cdh.dict_type)
273 {
274 case MG_COMPLETE_DICTIONARY:
275 {
276 ReadInWords (f);
277 }
278 break;
279 case MG_PARTIAL_DICTIONARY:
280 {
281 if (cdh.num_words[which])
282 ReadInWords (f);
283
284 ReadCharHuffman (f, "Characters");
285 ReadLenHuffman (f, "Lengths");
286 }
287 break;
288 case MG_SEED_DICTIONARY:
289 {
290 if (cdh.num_words[which])
291 ReadInWords (f);
292
293 ReadCharHuffman (f, "Characters");
294 ReadLenHuffman (f, "Lengths");
295 }
296 break;
297 }
298}
299
300
301
302
303void
304DumpStatsDict (FILE * f)
305{
306 int i;
307 compression_stats_header csh;
308
309 fread (&csh, sizeof (csh), 1, f);
310
311 for (i = 0; i < 2; i++)
312 {
313 int j;
314 frags_stats_header fsh;
315
316 fread (&fsh, sizeof (fsh), 1, f);
317
318 /* [RPAP - Jan 97: Endian Ordering] */
319 NTOHUL(fsh.num_frags);
320 NTOHUL(fsh.mem_for_frags);
321
322 if (!quick)
323 printf ("#\n# num %9s = %lu\n#\n", i ? "words" : "non-words",
324 fsh.num_frags);
325
326 for (j = 0; j < fsh.num_frags; j++)
327 {
328 u_char Word[16];
329 u_long freq, occur_num;
330
331 fread (&freq, sizeof (freq), 1, f);
332 fread (&occur_num, sizeof (occur_num), 1, f);
333
334 /* [RPAP - Jan 97: Endian Ordering] */
335 NTOHUL(freq);
336 NTOHUL(occur_num);
337
338 Word[0] = fgetc (f);
339 fread (Word + 1, Word[0], 1, f);
340 printf ("%d: %7ld : %7ld : \"%s\"\n", j, freq,
341 occur_num, word2str (Word));
342 }
343 }
344}
345
346
347void
348main (int argc, char **argv)
349{
350 FILE *fp;
351 unsigned long magic = 0;
352
353 if (argc < 2)
354 FatalError (1, "A file name must be specified");
355 dictname = argv[1];
356 if (strcmp (dictname, "-q") == 0)
357 {
358 quick = 1;
359 if (argc < 3)
360 FatalError (1, "A file name must be specified");
361 dictname = argv[2];
362 }
363 if (!(fp = fopen (dictname, "rb"))) /* [RPAP - Feb 97: WIN32 Port] */
364 FatalError (1, "Unable to open \"%s\"", dictname);
365
366 fread (&magic, sizeof (magic), 1, fp);
367
368 NTOHUL(magic); /* [RPAP - Jan 97: Endian Ordering] */
369
370 switch (magic)
371 {
372 case MAGIC_STEM_BUILD:
373 if (!quick)
374 printf ("# Contents of STEM file \"%s\"\n#\n", dictname);
375 DumpStemDict (fp);
376 break;
377 case MAGIC_DICT:
378 if (!quick)
379 printf ("# Contents of DICT file \"%s\"\n#\n", dictname);
380 DumpTextDict (fp);
381 break;
382 case MAGIC_STATS_DICT:
383 if (!quick)
384 printf ("# Contents of STATS file \"%s\"\n#\n", dictname);
385 DumpStatsDict (fp);
386 break;
387 default:
388 FatalError (1, "Bad magic number. \"%s\" cannot be dumped", dictname);
389 }
390 fclose (fp);
391 exit (0);
392}
Note: See TracBrowser for help on using the repository browser.