source: main/tags/2.80/indexers/mg/src/text/mg_fast_comp_dict.c@ 24541

Last change on this file since 24541 was 3745, checked in by mdewsnip, 21 years ago

Addition of MG package for search and retrieval

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 20.3 KB
Line 
1/**************************************************************************
2 *
3 * mg_fast_comp_dict.c -- Program to generate a fast compression dictionary
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: mg_fast_comp_dict.c 3745 2003-02-20 21:20:24Z mdewsnip $
21 *
22 **************************************************************************/
23
24
25/*
26 $Log$
27 Revision 1.1 2003/02/20 21:18:24 mdewsnip
28 Addition of MG package for search and retrieval
29
30 Revision 1.1 1999/08/10 21:18:06 sjboddie
31 renamed mg-1.3d directory mg
32
33 Revision 1.2 1998/11/25 07:55:44 rjmcnab
34
35 Modified mg to that you can specify the stemmer you want
36 to use via a command line option. You specify it to
37 mg_passes during the build process. The number of the
38 stemmer that you used is stored within the inverted
39 dictionary header and the stemmed dictionary header so
40 the correct stemmer is used in later stages of building
41 and querying.
42
43 Revision 1.1 1998/11/17 09:34:57 rjmcnab
44 *** empty log message ***
45
46 * Revision 1.3 1994/10/20 03:56:55 tes
47 * I have rewritten the boolean query optimiser and abstracted out the
48 * components of the boolean query.
49 *
50 * Revision 1.2 1994/09/20 04:41:47 tes
51 * For version 1.1
52 *
53 */
54
55static char *RCSID = "$Id: mg_fast_comp_dict.c 3745 2003-02-20 21:20:24Z mdewsnip $";
56
57#include "sysfuncs.h"
58
59#include "filestats.h"
60#include "huffman.h"
61#include "timing.h"
62#include "messages.h"
63#include "memlib.h"
64#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
65
66#include "local_strings.h"
67#include "mg.h"
68#include "text.h"
69#include "invf.h"
70#include "lists.h"
71#include "backend.h"
72#include "mg_files.h"
73#include "locallib.h"
74#include "words.h"
75
76
77
78#define ALIGN_SIZE(p,t) (((p) + (sizeof(t)-1)) & (~(sizeof(t)-1)))
79
80#define WORDNO(p, base) ((((char*)(p))-((char*)(base)))/sizeof(u_char*))
81#define FIXUP(p) (fixup[WORDNO(p,buffer)/8] |= (1<<(WORDNO(p, buffer) & 7)))
82
83#define IS_FIXUP(p) ((fixup[WORDNO(p,buffer)/8] & (1<<(WORDNO(p, buffer) & 7))) != 0)
84
85#define FIXUP_VALS(vals) do { \
86 int i; \
87 for (i=0; i < MAX_HUFFCODE_LEN+1; i++) \
88 FIXUP(&vals[i]); \
89 } while(0)
90
91
92
93static u_long mem_for_comp_dict (char *filename);
94static void load_comp_dict (char *filename);
95static void save_fast_dict (char *filename);
96static void unfixup_buffer (void);
97
98
99static void *buffer;
100static void *cur;
101static u_char *fixup;
102static u_long mem, fixup_mem;
103
104int main (int argc, char **argv)
105{
106 ProgTime StartTime;
107 int ch;
108 char *filename = "";
109 opterr = 0;
110 msg_prefix = argv[0];
111 while ((ch = getopt (argc, argv, "f:d:h")) != -1)
112 switch (ch)
113 {
114 case 'f': /* input file */
115 filename = optarg;
116 break;
117 case 'd':
118 set_basepath (optarg);
119 break;
120 case 'h':
121 case '?':
122 fprintf (stderr, "usage: %s [-f input_file] [-d data directory] [-h]\n",
123 argv[0]);
124 exit (1);
125 }
126 GetTime (&StartTime);
127
128 mem = mem_for_comp_dict (filename);
129
130 fixup_mem = (ALIGN_SIZE (mem, u_char *) / sizeof (u_char *) + 7) / 8;
131
132 cur = buffer = Xmalloc (mem);
133 bzero (buffer, mem);
134 fixup = Xmalloc (fixup_mem);
135 bzero (fixup, fixup_mem);
136
137 Message ("Estimated memory for fast_dict %u", mem);
138 Message ("Estimated memory for fixups %u", fixup_mem);
139
140 load_comp_dict (filename);
141
142 Message ("Actual memory for fast_dict %u", (char *) cur - (char *) buffer);
143
144 if ((u_long) cur > (u_long) buffer + mem)
145 FatalError (1, "The buffer was not big enough for the dictionary");
146
147 {
148 /* [RPAP - Jan 97: Endian Ordering] */
149 compression_dict *cd = (compression_dict *) buffer;
150 int i, which;
151
152 /* cfh */
153 for (which = 0; which <= 1; which++)
154 {
155 int j;
156
157 HTONSI(cd->cfh[which]->hd.num_codes);
158 HTONSI(cd->cfh[which]->hd.mincodelen);
159 HTONSI(cd->cfh[which]->hd.maxcodelen);
160 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
161 {
162 HTONSI(cd->cfh[which]->hd.lencount[j]);
163 HTONUL(cd->cfh[which]->hd.min_code[j]);
164 }
165 HTONUL(cd->cfh[which]->uncompressed_size);
166 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
167 HTONUL(cd->cfh[which]->huff_words_size[j]);
168 }
169 HTONUL(cd->MemForCompDict);
170 /* ad */
171 if (cd->cdh.novel_method == MG_NOVEL_BINARY ||
172 cd->cdh.novel_method == MG_NOVEL_DELTA ||
173 cd->cdh.novel_method == MG_NOVEL_HYBRID ||
174 cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
175 for (which = 0; which <= 1; which++)
176 {
177 int j;
178
179 HTONUL(cd->ad->afh[which].num_frags);
180 HTONUL(cd->ad->afh[which].mem_for_frags);
181 for (j = 0; j < 33; j++)
182 {
183 HTONSI(cd->ad->blk_start[which][j]);
184 HTONSI(cd->ad->blk_end[which][j]);
185 }
186 }
187 /* cdh */
188 HTONUL(cd->cdh.dict_type);
189 HTONUL(cd->cdh.novel_method);
190 for (i = 0; i < TEXT_PARAMS; i++)
191 HTONUL(cd->cdh.params[which]);
192 HTONUL(cd->cdh.num_words[0]);
193 HTONUL(cd->cdh.num_words[1]);
194 HTONUL(cd->cdh.num_word_chars[0]);
195 HTONUL(cd->cdh.num_word_chars[1]);
196 HTONUL(cd->cdh.lookback);
197 HTONSI(cd->fast_loaded);
198 }
199
200 unfixup_buffer ();
201
202 save_fast_dict (filename);
203
204 Message ("%s", ElapsedTime (&StartTime, NULL));
205 return 0;
206}
207
208
209
210static void
211unfixup_buffer ()
212{
213 u_long *p;
214 for (p = buffer; (u_long) p < (u_long) cur; p++)
215 {
216 if (IS_FIXUP (p))
217 *p = *p - (u_long) buffer;
218 }
219}
220
221
222
223
224static u_long
225mem_for_aux_dict (compression_dict_header * cdh, char *filename)
226{
227 int i;
228 u_long mem = sizeof (auxiliary_dict);
229 FILE *aux;
230
231 aux = open_file (filename, TEXT_DICT_AUX_SUFFIX, "rb",
232 MAGIC_AUX_DICT, MG_ABORT); /* [RPAP - Feb 97: WIN32 Port] */
233
234 for (i = 0; i <= 1; i++)
235 {
236 aux_frags_header afh;
237 fread (&afh, sizeof (afh), 1, aux);
238 NTOHUL(afh.num_frags); /* [RPAP - Jan 97: Endian Ordering] */
239 NTOHUL(afh.mem_for_frags); /* [RPAP - Jan 97: Endian Ordering] */
240 mem += afh.num_frags * sizeof (u_char *);
241 mem = ALIGN_SIZE (mem + afh.mem_for_frags, u_char *);
242 fseek (aux, afh.mem_for_frags, SEEK_CUR);
243 }
244 fclose (aux);
245
246 return mem;
247}
248
249
250
251static u_long
252mem_for_words (FILE * dict, compression_dict_header * cdh,
253 comp_frags_header * cfh)
254{
255 u_long mem = 0;
256 u_long i, lookback;
257 int ptrs_reqd = 0;
258 int mem_reqd = 0;
259
260 lookback = cdh->lookback;
261
262 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
263 {
264 ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
265 mem_reqd += cfh->huff_words_size[i];
266 }
267
268 mem += ptrs_reqd * sizeof (u_char *);
269 mem += (MAX_HUFFCODE_LEN + 1) * sizeof (u_char **);
270 mem += mem_reqd;
271
272 for (i = 0; i < cfh->hd.num_codes; i++)
273 {
274 register int val;
275 for (val = getc (dict) & 0xf; val; val--)
276 getc (dict);
277 }
278 return ALIGN_SIZE (mem, u_char *);
279}
280
281
282
283
284static u_long
285mem_for_comp_dict (char *filename)
286{
287 u_long mem = sizeof (compression_dict);
288 compression_dict_header cdh;
289 comp_frags_header cfh;
290 FILE *dict;
291 int which;
292 int i; /* [RPAP - Jan 97: Endian Ordering] */
293
294 dict = open_file (filename, TEXT_DICT_SUFFIX, "rb",
295 MAGIC_DICT, MG_ABORT); /* [RPAP - Feb 97: WIN32 Port] */
296
297 fread (&cdh, sizeof (cdh), 1, dict);
298 /* [RPAP - Jan 97: Endian Ordering] */
299 NTOHUL(cdh.dict_type);
300 NTOHUL(cdh.novel_method);
301 for (i = 0; i < TEXT_PARAMS; i++)
302 NTOHUL(cdh.params[i]);
303 NTOHUL(cdh.num_words[0]);
304 NTOHUL(cdh.num_words[1]);
305 NTOHUL(cdh.num_word_chars[0]);
306 NTOHUL(cdh.num_word_chars[1]);
307 NTOHUL(cdh.lookback);
308
309 for (which = 0; which < 2; which++)
310 switch (cdh.dict_type)
311 {
312 case MG_COMPLETE_DICTIONARY:
313 {
314 mem += sizeof (comp_frags_header);
315 Read_cfh (dict, &cfh, NULL, NULL);
316
317 /* Don't need to count the space for the clens of the huffman data */
318
319 mem += mem_for_words (dict, &cdh, &cfh);
320 if (cfh.hd.clens)
321 Xfree (cfh.hd.clens);
322 }
323 break;
324 case MG_PARTIAL_DICTIONARY:
325 {
326 huff_data hd;
327 if (cdh.num_words[which])
328 {
329 mem += sizeof (comp_frags_header);
330 Read_cfh (dict, &cfh, NULL, NULL);
331
332 /* Don't need to count the space for the clens of the
333 huffman data */
334
335 mem += mem_for_words (dict, &cdh, &cfh);
336 if (cfh.hd.clens)
337 Xfree (cfh.hd.clens);
338
339 }
340
341 mem += sizeof (hd);
342 Read_Huffman_Data (dict, &hd, NULL, NULL);
343 if (hd.clens)
344 Xfree (hd.clens);
345 mem += hd.num_codes * sizeof (unsigned long);
346 mem += (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
347
348 mem += sizeof (hd);
349 Read_Huffman_Data (dict, &hd, NULL, NULL);
350 if (hd.clens)
351 Xfree (hd.clens);
352 mem += hd.num_codes * sizeof (unsigned long);
353 mem += (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
354 }
355 break;
356 case MG_SEED_DICTIONARY:
357 {
358 huff_data hd;
359 if (cdh.num_words[which])
360 {
361 mem += sizeof (comp_frags_header);
362 Read_cfh (dict, &cfh, NULL, NULL);
363
364 /* Don't need to count the space for the clens of the
365 huffman data */
366
367 mem += mem_for_words (dict, &cdh, &cfh);
368 if (cfh.hd.clens)
369 Xfree (cfh.hd.clens);
370
371 }
372 switch (cdh.novel_method)
373 {
374 case MG_NOVEL_HUFFMAN_CHARS:
375 mem += sizeof (hd);
376 Read_Huffman_Data (dict, &hd, NULL, NULL);
377 if (hd.clens)
378 Xfree (hd.clens);
379 mem += hd.num_codes * sizeof (unsigned long);
380 mem += (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
381
382 mem += sizeof (hd);
383 Read_Huffman_Data (dict, &hd, NULL, NULL);
384 if (hd.clens)
385 Xfree (hd.clens);
386 mem += hd.num_codes * sizeof (unsigned long);
387 mem += (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
388 break;
389 case MG_NOVEL_BINARY:
390 break;
391 case MG_NOVEL_DELTA:
392 break;
393 case MG_NOVEL_HYBRID:
394 break;
395 case MG_NOVEL_HYBRID_MTF:
396 break;
397 }
398 break;
399 }
400 }
401 fclose (dict);
402
403 if (cdh.novel_method == MG_NOVEL_BINARY ||
404 cdh.novel_method == MG_NOVEL_DELTA ||
405 cdh.novel_method == MG_NOVEL_HYBRID ||
406 cdh.novel_method == MG_NOVEL_HYBRID_MTF)
407 mem += mem_for_aux_dict (&cdh, filename);
408
409 return ALIGN_SIZE (mem, u_char *);
410}
411
412
413
414void *
415getmem (u_long size, int align)
416{
417 void *res;
418 cur = (void *) (((u_long) cur + (align - 1)) & (~(align - 1)));
419 res = cur;
420 cur = (char *) cur + size;
421 if ((long) cur > (long) buffer + mem)
422 FatalError (1, "The buffer was not big enough for the dictionary");
423 return res;
424}
425
426
427
428
429
430
431
432
433
434
435
436static auxiliary_dict *
437LoadAuxDict (compression_dict_header * cdh,
438 char *filename)
439{
440 auxiliary_dict *ad;
441 int i;
442 FILE *aux;
443
444 aux = open_file (filename, TEXT_DICT_AUX_SUFFIX, "rb",
445 MAGIC_AUX_DICT, MG_ABORT); /* [RPAP - Feb 97: WIN32 Port] */
446
447 ad = getmem (sizeof (auxiliary_dict), sizeof (u_char *));
448
449 for (i = 0; i <= 1; i++)
450 {
451 int j;
452 u_char *pos;
453
454 fread (&ad->afh[i], sizeof (aux_frags_header), 1, aux);
455
456 /* [RPAP - Jan 97: Endian Ordering] */
457 NTOHUL(ad->afh[i].num_frags);
458 NTOHUL(ad->afh[i].mem_for_frags);
459
460 ad->word_data[i] = getmem (ad->afh[i].mem_for_frags, 1);
461 FIXUP (&ad->word_data[i]);
462
463 ad->words[i] = getmem (ad->afh[i].num_frags * sizeof (u_char *),
464 sizeof (u_char *));
465 FIXUP (&ad->words[i]);
466
467 fread (ad->word_data[i], ad->afh[i].mem_for_frags, sizeof (u_char), aux);
468
469 pos = ad->word_data[i];
470 for (j = 0; j < ad->afh[i].num_frags; j++)
471 {
472 ad->words[i][j] = pos;
473 FIXUP (&ad->words[i][j]);
474 pos += *pos + 1;
475 }
476 if (cdh->novel_method == MG_NOVEL_HYBRID ||
477 cdh->novel_method == MG_NOVEL_HYBRID_MTF)
478 {
479 int num;
480 num = 1;
481 ad->blk_start[i][0] = 0;
482 ad->blk_end[i][0] = cdh->num_words[i] - 1;
483 while (num < 33)
484 {
485 ad->blk_start[i][num] = ad->blk_end[i][num - 1] + 1;
486 ad->blk_end[i][num] = ad->blk_start[i][num] +
487 (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
488 num++;
489 }
490 }
491 }
492 fclose (aux);
493 return (ad);
494}
495
496
497
498
499
500static u_char ***
501ReadInWords (FILE * dict, compression_dict * cd,
502 comp_frags_header * cfh, u_char ** escape)
503{
504 int i, lookback;
505 int ptrs_reqd = 0;
506 int mem_reqd = 0;
507 int num_set[MAX_HUFFCODE_LEN + 1];
508 u_char *next_word[MAX_HUFFCODE_LEN + 1];
509 u_char **vals;
510 u_char ***values;
511 u_char word[MAXWORDLEN + 1];
512 u_char last_word[MAX_HUFFCODE_LEN + 1][MAXWORDLEN + 1];
513
514 lookback = cd->cdh.lookback;
515
516 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
517 {
518 ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
519 mem_reqd += cfh->huff_words_size[i];
520 }
521
522 vals = getmem (ptrs_reqd * sizeof (*vals), sizeof (u_char *));
523
524 values = getmem ((MAX_HUFFCODE_LEN + 1) * sizeof (u_char **), sizeof (u_char **));
525
526 next_word[0] = getmem (mem_reqd, sizeof (char));
527
528 cd->MemForCompDict += ptrs_reqd * sizeof (*vals) +
529 (MAX_HUFFCODE_LEN + 1) * sizeof (u_char **) +
530 mem_reqd;
531
532 values[0] = vals;
533 FIXUP (&values[0]);
534 values[0][0] = next_word[0];
535 FIXUP (&values[0][0]);
536 for (i = 1; i <= cfh->hd.maxcodelen; i++)
537 {
538 int next_start = (values[i - 1] - vals) +
539 ((cfh->hd.lencount[i - 1] + ((1 << lookback) - 1)) >> lookback);
540 values[i] = &vals[next_start];
541 FIXUP (&values[i]);
542 next_word[i] = next_word[i - 1] + cfh->huff_words_size[i - 1];
543 values[i][0] = next_word[i];
544 FIXUP (&values[i][0]);
545 }
546
547 bzero ((char *) num_set, sizeof (num_set));
548
549 for (i = 0; i < cfh->hd.num_codes; i++)
550 {
551 register int val, copy;
552 register int len = cfh->hd.clens[i];
553 val = getc (dict);
554 copy = (val >> 4) & 0xf;
555 val &= 0xf;
556
557 fread (word + copy + 1, sizeof (u_char), val, dict);
558 *word = val + copy;
559
560 if ((num_set[len] & ((1 << lookback) - 1)) == 0)
561 {
562 values[len][num_set[len] >> lookback] = next_word[len];
563 FIXUP (&values[len][num_set[len] >> lookback]);
564 memcpy (next_word[len], word, *word + 1);
565 if (escape && i == cfh->hd.num_codes - 1)
566 {
567 *escape = next_word[len];
568 FIXUP (escape);
569 }
570 next_word[len] += *word + 1;
571 }
572 else
573 {
574 copy = prefixlen (last_word[len], word);
575 memcpy (next_word[len] + 1, word + copy + 1, *word - copy);
576 *next_word[len] = (copy << 4) + (*word - copy);
577 if (escape && i == cfh->hd.num_codes - 1)
578 {
579 *escape = next_word[len];
580 FIXUP (escape);
581 }
582 next_word[len] += (*word - copy) + 1;
583 }
584 memcpy (last_word[len], word, *word + 1);
585 num_set[len]++;
586 }
587 if (cfh->hd.clens)
588 Xfree (cfh->hd.clens);
589 cfh->hd.clens = NULL;
590 return values;
591}
592
593
594static unsigned long **
595Generate_Fast_Huffman_Vals (huff_data * data, u_long * mem)
596{
597 int i;
598 unsigned long *fcode[MAX_HUFFCODE_LEN + 1];
599 unsigned long **values;
600 unsigned long *vals;
601
602 if (!data)
603 return (NULL);
604 vals = getmem (data->num_codes * sizeof (*vals), sizeof (long *));
605 values = getmem ((MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *),
606 sizeof (long *));
607
608 bzero ((char *) values, (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *));
609
610 if (mem)
611 *mem += data->num_codes * sizeof (*vals) +
612 (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
613
614 fcode[0] = values[0] = &vals[0];
615 FIXUP (&values[0]);
616 for (i = 1; i <= data->maxcodelen; i++)
617 {
618 fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
619 FIXUP (&values[i]);
620 }
621
622 for (i = 0; i < data->num_codes; i++)
623 if (data->clens[i])
624 *fcode[(int) (data->clens[i])]++ = i;
625 return (values);
626}
627
628
629
630static void
631load_comp_dict (char *filename)
632{
633 FILE *dict;
634 int which;
635 compression_dict *cd;
636
637 cd = getmem (sizeof (compression_dict), sizeof (u_char *));
638 cd->fast_loaded = 1;
639
640 dict = open_file (filename, TEXT_DICT_SUFFIX, "rb",
641 MAGIC_DICT, MG_ABORT); /* [RPAP - Feb 97: WIN32 Port] */
642
643 Read_cdh (dict, &cd->cdh, NULL, NULL);
644
645 for (which = 0; which < 2; which++)
646 switch (cd->cdh.dict_type)
647 {
648 case MG_COMPLETE_DICTIONARY:
649 {
650 cd->cfh[which] = getmem (sizeof (*cd->cfh[which]), sizeof (u_char *));
651 cd->MemForCompDict += sizeof (*cd->cfh[which]);
652 Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL);
653
654 cd->values[which] = ReadInWords (dict, cd, cd->cfh[which], NULL);
655 FIXUP (&cd->cfh[which]);
656 FIXUP (&cd->values[which]);
657 cd->escape[which] = NULL;
658 }
659 break;
660 case MG_PARTIAL_DICTIONARY:
661 {
662 huff_data *hd;
663 u_long **vals;
664 if (cd->cdh.num_words[which])
665 {
666 cd->cfh[which] = getmem (sizeof (*cd->cfh[which]), sizeof (u_char *));
667 cd->MemForCompDict += sizeof (*cd->cfh[which]);
668 Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL);
669 cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
670 &cd->escape[which]);
671 FIXUP (&cd->cfh[which]);
672 FIXUP (&cd->values[which]);
673 FIXUP (&cd->escape[which]);
674 }
675
676 hd = getmem (sizeof (huff_data), sizeof (char *));
677 cd->MemForCompDict += sizeof (huff_data);
678 Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL);
679 vals = Generate_Fast_Huffman_Vals (hd, &cd->MemForCompDict);
680 cd->chars_huff[which] = hd;
681 FIXUP (&cd->chars_huff[which]);
682 cd->chars_vals[which] = vals;
683 FIXUP (&cd->chars_vals[which]);
684 if (hd->clens)
685 Xfree (hd->clens);
686 hd->clens = NULL;
687
688 hd = getmem (sizeof (huff_data), sizeof (char *));
689 cd->MemForCompDict += sizeof (huff_data);
690 Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL);
691 vals = Generate_Fast_Huffman_Vals (hd, &cd->MemForCompDict);
692 cd->lens_huff[which] = hd;
693 FIXUP (&cd->lens_huff[which]);
694 cd->lens_vals[which] = vals;
695 FIXUP (&cd->lens_vals[which]);
696 if (hd->clens)
697 Xfree (hd->clens);
698 hd->clens = NULL;
699 }
700 break;
701 case MG_SEED_DICTIONARY:
702 {
703 huff_data *hd;
704 u_long **vals;
705 if (cd->cdh.num_words[which])
706 {
707 cd->cfh[which] = getmem (sizeof (*cd->cfh[which]), sizeof (u_char *));
708 cd->MemForCompDict += sizeof (*cd->cfh[which]);
709 Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL);
710 cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
711 &cd->escape[which]);
712 FIXUP (&cd->cfh[which]);
713 FIXUP (&cd->values[which]);
714 FIXUP (&cd->escape[which]);
715 }
716 switch (cd->cdh.novel_method)
717 {
718 case MG_NOVEL_HUFFMAN_CHARS:
719 hd = getmem (sizeof (huff_data), sizeof (char *));
720 cd->MemForCompDict += sizeof (huff_data);
721 Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL);
722 vals = Generate_Fast_Huffman_Vals (hd, &cd->MemForCompDict);
723 cd->chars_huff[which] = hd;
724 FIXUP (&cd->chars_huff[which]);
725 cd->chars_vals[which] = vals;
726 FIXUP (&cd->chars_vals[which]);
727 if (hd->clens)
728 Xfree (hd->clens);
729 hd->clens = NULL;
730
731 hd = getmem (sizeof (huff_data), sizeof (char *));
732 cd->MemForCompDict += sizeof (huff_data);
733 Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL);
734 vals = Generate_Fast_Huffman_Vals (hd, &cd->MemForCompDict);
735 cd->lens_huff[which] = hd;
736 FIXUP (&cd->lens_huff[which]);
737 cd->lens_vals[which] = vals;
738 FIXUP (&cd->lens_vals[which]);
739 if (hd->clens)
740 Xfree (hd->clens);
741 hd->clens = NULL;
742 break;
743 case MG_NOVEL_BINARY:
744 break;
745 case MG_NOVEL_DELTA:
746 break;
747 case MG_NOVEL_HYBRID:
748 break;
749 case MG_NOVEL_HYBRID_MTF:
750 break;
751 }
752 break;
753 }
754 }
755 fclose (dict);
756
757
758 if (cd->cdh.novel_method == MG_NOVEL_BINARY ||
759 cd->cdh.novel_method == MG_NOVEL_DELTA ||
760 cd->cdh.novel_method == MG_NOVEL_HYBRID ||
761 cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
762 {
763 cd->ad = LoadAuxDict (&cd->cdh, filename);
764 FIXUP (&cd->ad);
765 }
766}
767
768
769static void
770save_fast_dict (char *filename)
771{
772 FILE *fdict;
773
774 fdict = create_file (filename, TEXT_DICT_FAST_SUFFIX, "wb",
775 MAGIC_FAST_DICT, MG_ABORT); /* [RPAP - Feb 97: WIN32 Port] */
776
777 {
778 u_long *p;
779 for (p = buffer; (u_long) p < (u_long) cur; p++)
780 {
781 if (IS_FIXUP (p))
782 HTONUL(*p);
783 }
784 }
785
786 /* [RPAP - Jan 97: Endian Ordering] */
787 HTONUL(mem);
788 HTONUL(fixup_mem);
789
790 fwrite (&mem, sizeof (mem), 1, fdict);
791 fwrite (&fixup_mem, sizeof (fixup_mem), 1, fdict);
792
793 /* [RPAP - Jan 97: Endian Ordering] */
794 NTOHUL(mem);
795 NTOHUL(fixup_mem);
796
797 fwrite (buffer, sizeof (u_char), mem, fdict);
798 fwrite (fixup, sizeof (u_char), fixup_mem, fdict);
799
800 fclose (fdict);
801}
Note: See TracBrowser for help on using the repository browser.