source: trunk/gsdl/packages/mg/src/text/text.pass2.c@ 439

Last change on this file since 439 was 439, checked in by sjboddie, 25 years ago

renamed mg-1.3d directory mg

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 18.6 KB
Line 
1/**************************************************************************
2 *
3 * text.pass2.c -- Text compression (Pass 2)
4 * Copyright (C) 1994 Neil Sharman, Gary Eddy and Alistair Moffat
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: text.pass2.c 439 1999-08-10 21:23:37Z sjboddie $
21 *
22 **************************************************************************/
23
24
25#include "sysfuncs.h"
26
27#include "memlib.h"
28#include "messages.h"
29#include "local_strings.h"
30#include "bitio_m_mem.h"
31#include "bitio_m.h"
32#include "huffman.h"
33#include "bitio_stdio.h"
34#include "huffman_stdio.h"
35#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
36
37#include "mg.h"
38#include "mg_files.h"
39#include "build.h"
40#include "words.h"
41#include "text.h"
42#include "hash.h"
43#include "locallib.h"
44#include "comp_dict.h"
45
46
47
48
49/*
50 $Log$
51 Revision 1.1 1999/08/10 21:18:25 sjboddie
52 renamed mg-1.3d directory mg
53
54 Revision 1.2 1998/12/17 09:12:54 rjmcnab
55
56 Altered mg to process utf-8 encoded Unicode. The main changes
57 are in the parsing of the input, the casefolding, and the stemming.
58
59 Revision 1.1 1998/11/17 09:35:47 rjmcnab
60 *** empty log message ***
61
62 * Revision 1.3 1994/10/20 03:57:10 tes
63 * I have rewritten the boolean query optimiser and abstracted out the
64 * components of the boolean query.
65 *
66 * Revision 1.2 1994/09/20 04:42:14 tes
67 * For version 1.1
68 *
69 */
70
71static char *RCSID = "$Id: text.pass2.c 439 1999-08-10 21:23:37Z sjboddie $";
72
73#define POOL_SIZE 1024*256
74
75typedef struct char_pool
76 {
77 struct char_pool *next;
78 u_long left;
79 u_char *ptr;
80 u_char pool[POOL_SIZE];
81 }
82char_pool;
83
84typedef struct novel_hash_rec
85 {
86 u_long ordinal_num;
87 u_char *word;
88 }
89novel_hash_rec;
90
91
92#define INITIAL_HASH_SIZE 7927
93#define MAX_SWAPS 10000
94
95typedef struct novel_hash_table
96 {
97 novel_hash_rec *HashTable;
98 u_long HashSize, HashUsed;
99 char_pool *first_pool;
100 char_pool *pool;
101 u_long next_num, binary_start;
102 novel_hash_rec **code_to_nhr;
103 }
104novel_hash_table;
105
106
107static FILE *text, *text_idx;
108
109static u_char *comp_buffer;
110
111static u_long text_length;
112
113/* [RJM 07/97: 4G limit] */
114static double stats_in_tot_bytes = 0.0;
115static double stats_in_bytes = 0.0;
116static double stats_out_bytes = 0.0;
117
118
119static novel_hash_table nht[2];
120
121static u_long prefix_len = 0;
122
123int blk_start[2][33], blk_end[2][33];
124
125
126static char_pool *
127new_pool (char_pool * pool)
128{
129 char_pool *p = Xmalloc (sizeof (char_pool));
130 if (!p)
131 FatalError (1, "Unable to allocate memory for pool");
132 if (pool)
133 pool->next = p;
134 p->next = NULL;
135 p->left = POOL_SIZE;
136 p->ptr = p->pool;
137 return p;
138}
139
140
141
142int
143init_text_2 (char *file_name)
144{
145 char path[512];
146 int i;
147
148 if (LoadCompressionDictionary (make_name (file_name, TEXT_DICT_SUFFIX,
149 path)) == COMPERROR)
150 return COMPERROR;
151
152 if (!(text = create_file (file_name, TEXT_SUFFIX, "w+b",
153 MAGIC_TEXT, MG_MESSAGE))) /* [RPAP - Feb 97: WIN32 Port] */
154 return COMPERROR;
155
156 bzero ((char *) &cth, sizeof (cth));
157
158 if (fwrite (&cth, sizeof (cth), 1, text) != 1)
159 return COMPERROR;
160
161 text_length = sizeof (u_long) + sizeof (cth);
162
163 if (!(text_idx = create_file (file_name, TEXT_IDX_SUFFIX, "w+b",
164 MAGIC_TEXI, MG_MESSAGE))) /* [RPAP - Feb 97: WIN32 Port] */
165 return COMPERROR;
166
167 if (fwrite (&cth, sizeof (cth), 1, text_idx) != 1)
168 return COMPERROR;
169
170 if (!(comp_buffer = Xmalloc (sizeof (u_char) * buf_size)))
171 {
172 Message ("No memory for compression buffer");
173 return (COMPERROR);
174 }
175
176#if 0
177 MaxMemInUse += sizeof (u_char) * buf_size;
178#endif
179
180 if (cdh.novel_method != MG_NOVEL_HUFFMAN_CHARS)
181 for (i = 0; i <= 1; i++)
182 {
183 nht[i].HashSize = INITIAL_HASH_SIZE;
184 nht[i].HashTable = Xmalloc (sizeof (novel_hash_rec) * nht[i].HashSize);
185 bzero ((char *) nht[i].HashTable,
186 sizeof (novel_hash_rec) * nht[i].HashSize);
187 nht[i].HashUsed = 0;
188 nht[i].HashSize = INITIAL_HASH_SIZE;
189 nht[i].pool = nht[i].first_pool = new_pool (NULL);
190 nht[i].next_num = 1;
191 nht[i].binary_start = 1;
192 if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
193 nht[i].code_to_nhr = Xmalloc (sizeof (novel_hash_rec *) *
194 ((nht[i].HashSize >> 1) + 2));
195 else
196 nht[i].code_to_nhr = NULL;
197 if (cdh.novel_method == MG_NOVEL_HYBRID ||
198 cdh.novel_method == MG_NOVEL_HYBRID_MTF)
199 {
200 int num;
201 num = 1;
202 blk_start[i][0] = 0;
203 blk_end[i][0] = cdh.num_words[i] - 1;
204 while (num < 33)
205 {
206 blk_start[i][num] = blk_end[i][num - 1] + 1;
207 blk_end[i][num] = blk_start[i][num] +
208 (blk_end[i][num - 1] - blk_start[i][num - 1]) * 2;
209 num++;
210 }
211 }
212 }
213
214 return (COMPALLOK);
215}
216
217
218
219int
220ic (void *a, void *b)
221{
222 return *((int *) a) - *((int *) b);
223}
224
225
226
227/* #define DOCDUMP 477 */
228
229int
230process_text_2 (u_char * s_in, int l_in)
231{
232 int which, byte_length;
233 u_char *end = s_in + l_in - 1;
234 int novels_used[2];
235 int swaps[2][MAX_SWAPS];
236
237 which = inaword (s_in, end);
238
239 ENCODE_START (comp_buffer, buf_size)
240
241 ENCODE_BIT (which);
242
243 if (cdh.novel_method == MG_NOVEL_BINARY)
244 {
245 DELTA_ENCODE_L (nht[0].binary_start, prefix_len);
246 DELTA_ENCODE_L (nht[1].binary_start, prefix_len);
247 }
248
249 novels_used[0] = novels_used[1] = 0;
250
251#ifdef DOCDUMP
252 if (cth.num_of_docs == DOCDUMP)
253 {
254 printf ("---------------------------------------------------\n");
255 printf ("which = %d\n", which);
256 }
257#endif
258
259 for (; s_in <= end; which = !which)
260 {
261 u_char Word[MAXWORDLEN + 1];
262 int res;
263
264 if (which)
265 cth.num_of_words++;
266
267 /* First parse a word or non-word out of the string */
268 if (which)
269 PARSE_WORD (Word, s_in, end);
270 else
271 PARSE_NON_WORD (Word, s_in, end);
272
273#ifdef DOCDUMP
274 if (cth.num_of_docs == DOCDUMP)
275 {
276 printf ("%sword : \"%.*s\"", which ? " " : "non-", Word[0], Word + 1);
277 }
278#endif
279
280 /* Search the hash table for Word */
281 if (ht[which])
282 {
283 register unsigned long hashval, step;
284 register int tsize = ht[which]->size;
285 register u_char **wptr;
286 HASH (hashval, step, Word, tsize);
287 for (;;)
288 {
289 register u_char *s1;
290 register u_char *s2;
291 register int len;
292 wptr = ht[which]->table[hashval];
293 if (wptr == NULL)
294 {
295 res = COMPERROR;
296 break;
297 }
298
299 /* Compare the words */
300 s1 = Word;
301 s2 = *wptr;
302 len = *s1 + 1;
303 for (; len; len--)
304 if (*s1++ != *s2++)
305 break;
306
307 if (len)
308 {
309 hashval += step;
310 if (hashval >= tsize)
311 hashval -= tsize;
312 }
313 else
314 {
315 res = ht[which]->table[hashval] - ht[which]->words;
316 break;
317 }
318 }
319 }
320 else
321 res = COMPERROR;
322 /* Check that the word was found in the dictionary */
323 if (res == COMPERROR)
324 {
325 if (cdh.dict_type == MG_COMPLETE_DICTIONARY)
326 {
327 Message ("Unknown word \"%.*s\"\n", *Word, Word + 1);
328 return (COMPERROR);
329 }
330 if (cdh.dict_type == MG_PARTIAL_DICTIONARY)
331 {
332 u_long i;
333 if (ht[which])
334 {
335 res = ht[which]->hd->num_codes - 1;
336 HUFF_ENCODE (res, ht[which]->codes, ht[which]->hd->clens);
337 }
338 HUFF_ENCODE (Word[0], lens_codes[which], lens_huff[which].clens);
339 for (i = 0; i < Word[0]; i++)
340 HUFF_ENCODE (Word[i + 1], char_codes[which],
341 char_huff[which].clens);
342 }
343 if (cdh.dict_type == MG_SEED_DICTIONARY)
344 {
345 if (ht[which])
346 {
347 res = ht[which]->hd->num_codes - 1;
348 HUFF_ENCODE (res, ht[which]->codes, ht[which]->hd->clens);
349 }
350 switch (cdh.novel_method)
351 {
352 case MG_NOVEL_HUFFMAN_CHARS:
353 {
354 u_long i;
355 HUFF_ENCODE (Word[0], lens_codes[which],
356 lens_huff[which].clens);
357 for (i = 0; i < Word[0]; i++)
358 HUFF_ENCODE (Word[i + 1], char_codes[which],
359 char_huff[which].clens);
360 }
361 break;
362 case MG_NOVEL_BINARY:
363 case MG_NOVEL_DELTA:
364 case MG_NOVEL_HYBRID:
365 case MG_NOVEL_HYBRID_MTF:
366 {
367 register unsigned long hashval, step;
368 register novel_hash_table *h = &nht[which];
369 register int hsize = h->HashSize;
370 register novel_hash_rec *ent;
371 HASH (hashval, step, Word, hsize);
372 for (;;)
373 {
374 register u_char *s1, *s2;
375 register int len;
376 ent = h->HashTable + hashval;
377 if (!ent->word)
378 {
379 int len = *Word + 1;
380 if (len > h->pool->left)
381 h->pool = new_pool (h->pool);
382 ent->word = h->pool->ptr;
383 ent->ordinal_num = h->next_num++;
384 if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
385 h->code_to_nhr[ent->ordinal_num - 1] = ent;
386 memcpy (h->pool->ptr, Word, len);
387 h->pool->ptr += len;
388 h->pool->left -= len;
389 h->HashUsed++;
390 break;
391 }
392 /* Compare the words */
393 s1 = Word;
394 s2 = ent->word;
395 len = *s1 + 1;
396 for (; len; len--)
397 if (*s1++ != *s2++)
398 break;
399
400 if (!len)
401 break;
402
403 hashval = (hashval + step);
404 if (hashval >= hsize)
405 hashval -= hsize;
406 }
407
408 switch (cdh.novel_method)
409 {
410 case MG_NOVEL_BINARY:
411 {
412 BINARY_ENCODE (ent->ordinal_num, h->binary_start);
413 if (ent->ordinal_num == h->binary_start)
414 h->binary_start++;
415 }
416 break;
417 case MG_NOVEL_DELTA:
418 {
419 DELTA_ENCODE (ent->ordinal_num);
420 }
421 break;
422 case MG_NOVEL_HYBRID:
423 {
424 int k = 0;
425 int j = ent->ordinal_num - 1;
426 while (j > blk_end[which][k])
427 k++;
428 assert (j - blk_start[which][k] + 1 >= 1 &&
429 j - blk_start[which][k] + 1 <=
430 blk_end[which][k] - blk_start[which][k] + 1);
431
432 GAMMA_ENCODE (k + 1);
433 BINARY_ENCODE (j - blk_start[which][k] + 1,
434 blk_end[which][k] -
435 blk_start[which][k] + 1);
436 }
437 break;
438 case MG_NOVEL_HYBRID_MTF:
439 {
440 int k = 0;
441 int j = ent->ordinal_num - 1;
442 while (j > blk_end[which][k])
443 k++;
444 assert (j - blk_start[which][k] + 1 >= 1 &&
445 j - blk_start[which][k] + 1 <=
446 blk_end[which][k] - blk_start[which][k] + 1);
447 GAMMA_ENCODE (k + 1);
448 BINARY_ENCODE (j - blk_start[which][k] + 1,
449 blk_end[which][k] -
450 blk_start[which][k] + 1);
451
452 if (ent->ordinal_num - 1 >= novels_used[which])
453 {
454 int a = novels_used[which];
455 int b = ent->ordinal_num - 1;
456 novel_hash_rec *temp;
457
458
459/* fprintf(stderr, "a = %d , b = %d\n", a, b);
460 */
461 temp = h->code_to_nhr[a];
462 h->code_to_nhr[a] = h->code_to_nhr[b];
463 h->code_to_nhr[b] = temp;
464 h->code_to_nhr[a]->ordinal_num = a + 1;
465 h->code_to_nhr[b]->ordinal_num = b + 1;
466 if (novels_used[which] == MAX_SWAPS)
467 FatalError (1, "Not enough mem for swapping");
468 swaps[which][novels_used[which]] = b;
469 novels_used[which]++;
470 }
471 }
472 break;
473 }
474 if (h->HashUsed >= h->HashSize >> 1)
475 {
476 novel_hash_rec *ht;
477 unsigned long size;
478 unsigned long i;
479 size = prime (h->HashSize * 2);
480 if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
481 {
482 Xfree (h->code_to_nhr);
483 h->code_to_nhr = Xmalloc (sizeof (novel_hash_rec *) *
484 ((size >> 1) + 2));
485 }
486 if (!(ht = Xmalloc (sizeof (novel_hash_rec) * size)))
487 {
488 Message ("Unable to allocate memory for table");
489 return (COMPERROR);
490 }
491 bzero ((char *) ht, sizeof (novel_hash_rec) * size);
492
493 for (i = 0; i < h->HashSize; i++)
494 if (h->HashTable[i].word)
495 {
496 register u_char *wptr;
497 register unsigned long hashval, step;
498
499 wptr = h->HashTable[i].word;
500 HASH (hashval, step, wptr, size);
501 wptr = (ht + hashval)->word;
502 while (wptr)
503 {
504 hashval += step;
505 if (hashval >= size)
506 hashval -= size;
507 wptr = (ht + hashval)->word;
508 }
509 ht[hashval] = h->HashTable[i];
510 if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
511 h->code_to_nhr[ht[hashval].ordinal_num - 1] =
512 &ht[hashval];
513 }
514 Xfree (h->HashTable);
515 h->HashTable = ht;
516 h->HashSize = size;
517 }
518 }
519 break;
520 }
521 }
522 }
523 else
524 {
525 HUFF_ENCODE (res, ht[which]->codes, ht[which]->hd->clens);
526#ifdef DOCDUMP
527 if (cth.num_of_docs == DOCDUMP)
528 {
529 printf (" %d %d\n", ht[which]->hd->clens[res],
530 ht[which]->codes[res]);
531 }
532#endif
533 }
534 }
535
536
537 /* Add a 1 bit onto the end of the buffer the remaining bits in the last
538 byte will all be zero */
539
540 ENCODE_BIT (1);
541
542 ENCODE_FLUSH;
543
544 byte_length = __pos - __base;
545 if (!__remaining)
546 {
547 Message ("The end of the buffer was probably overrun");
548 return COMPERROR;
549 }
550
551 ENCODE_DONE
552
553#ifdef DOCDUMP
554 if (cth.num_of_docs == DOCDUMP)
555 {
556 printf ("unused bits = %d\n", bits_unused);
557 }
558#endif
559
560 HTONUL(text_length); /* [RPAP - Jan 97: Endian Ordering] */
561 fwrite (&text_length, sizeof (text_length), 1, text_idx);
562 NTOHUL(text_length); /* [RPAP - Jan 97: Endian Ordering] */
563 text_length += byte_length;
564
565#ifdef DOCDUMP
566 if (cth.num_of_docs == DOCDUMP)
567 {
568 int i;
569 for (i = 0; i < byte_length; i++)
570 printf ("%02x ", comp_buffer[i]);
571 printf ("\n");
572 }
573#endif
574
575 if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
576 for (which = 0; which <= 1; which++)
577 for (novels_used[which]--; novels_used[which] >= 0; novels_used[which]--)
578 {
579 int a = novels_used[which];
580 int b = swaps[which][novels_used[which]];
581 novel_hash_rec *temp;
582 temp = nht[which].code_to_nhr[a];
583 nht[which].code_to_nhr[a] = nht[which].code_to_nhr[b];
584 nht[which].code_to_nhr[b] = temp;
585 nht[which].code_to_nhr[a]->ordinal_num = a + 1;
586 nht[which].code_to_nhr[b]->ordinal_num = b + 1;
587 }
588
589
590 fwrite (comp_buffer, sizeof (*comp_buffer), byte_length, text);
591
592 if ((double) l_in / (double) byte_length > cth.ratio)
593 cth.ratio = (double) l_in / (double) byte_length;
594
595 cth.num_of_docs++;
596 if (l_in > cth.length_of_longest_doc)
597 cth.length_of_longest_doc = l_in;
598
599 cth.num_of_bytes += l_in;
600
601 if (Comp_Stats)
602 {
603 stats_in_tot_bytes += l_in;
604 stats_in_bytes += l_in;
605 stats_out_bytes += byte_length;
606 if (stats_in_bytes >= comp_stat_point)
607 {
608 fprintf (Comp_Stats, "%10.0f %10.0f %10.0f %f\n", stats_in_tot_bytes,
609 stats_in_bytes, stats_out_bytes,
610 (double) stats_out_bytes / (double) stats_in_bytes);
611 stats_in_bytes = 0.0;
612 stats_out_bytes = 0.0;
613 }
614 }
615
616 return COMPALLOK;
617}
618
619
620
621
622
623
624int
625write_aux_dict (char *FileName)
626{
627 int i;
628 FILE *aux;
629 if (!(aux = create_file (FileName, TEXT_DICT_AUX_SUFFIX, "wb",
630 MAGIC_AUX_DICT, MG_MESSAGE))) /* [RPAP - Feb 97: WIN32 Port] */
631 return COMPERROR;
632
633 for (i = 0; i <= 1; i++)
634 {
635 aux_frags_header afh;
636 char_pool *cp;
637
638 afh.num_frags = nht[i].HashUsed;
639 afh.mem_for_frags = 0;
640 for (cp = nht[i].first_pool; cp; cp = cp->next)
641 afh.mem_for_frags += POOL_SIZE - cp->left;
642
643 /* [RPAP - Jan 97: Endian Ordering] */
644 HTONUL(afh.num_frags);
645 HTONUL(afh.mem_for_frags);
646
647 fwrite (&afh, sizeof (afh), 1, aux);
648
649 for (cp = nht[i].first_pool; cp; cp = cp->next)
650 fwrite (cp->pool, POOL_SIZE - cp->left, sizeof (u_char), aux);
651 }
652 fclose (aux);
653 return COMPALLOK;
654}
655
656
657void
658estimate_compressed_aux_dict (void)
659{
660 int i;
661 u_long aux_compressed = 0, total_uncomp = 0;
662 for (i = 0; i <= 1; i++)
663 {
664 int j;
665 long chars[256], fchars[256];
666 long lens[16], flens[16];
667 char_pool *cp;
668 bzero ((char *) chars, sizeof (chars));
669 bzero ((char *) lens, sizeof (lens));
670 for (cp = nht[i].first_pool; cp; cp = cp->next)
671 {
672 u_char *buf = cp->pool;
673 while (buf != cp->ptr)
674 {
675 int len = *buf++;
676 lens[len]++;
677 total_uncomp += len + 4;
678 for (; len; len--)
679 chars[*buf++]++;
680 }
681 }
682 for (j = 0; j < 256; j++)
683 if (!chars[j] && PESINAWORD (j) == i)
684 fchars[j] = 1;
685 else
686 fchars[j] = chars[j];
687 for (j = 0; j < 16; j++)
688 if (!lens[j])
689 flens[j] = 1;
690 else
691 flens[j] = lens[j];
692
693 aux_compressed += (Calculate_Huffman_Size (16, flens, lens) +
694 Calculate_Huffman_Size (256, fchars, chars)) / 8;
695
696 }
697
698 Message ("Aux dictionary (Uncompressed) %.2f Mb ( %u bytes %0.3f %%)",
699 total_uncomp / 1024.0 / 1024, total_uncomp,
700 (total_uncomp * 100.0) / bytes_processed);
701 Message ("Aux dictionary (Compressed) %.2f Mb ( %.0f bytes %0.3f %%)",
702 aux_compressed / 1024.0 / 1024, aux_compressed * 1.0,
703 (aux_compressed * 100.0) / bytes_processed);
704}
705
706
707
708
709
710
711int
712done_text_2 (char *FileName)
713{
714 if (Comp_Stats)
715 fprintf (Comp_Stats, "%10.0f %10.0f %10.0f %f\n", stats_in_tot_bytes,
716 stats_in_bytes, stats_out_bytes,
717 (double) stats_out_bytes / (double) stats_in_bytes);
718
719 HTONUL(text_length); /* [RPAP - Jan 97: Endian Ordering] */
720 fwrite (&text_length, sizeof (text_length), 1, text_idx);
721 NTOHUL(text_length); /* [RPAP - Jan 97: Endian Ordering] */
722
723 /* [RPAP - Jan 97: Endian Ordering] */
724 HTONUL(cth.num_of_docs);
725 HTOND(cth.num_of_bytes); /* [RJM 07/97: 4G limit] */
726 HTONUL(cth.num_of_words);
727 HTONUL(cth.length_of_longest_doc);
728 HTOND(cth.ratio);
729
730 if (fseek (text_idx, sizeof (u_long), SEEK_SET) == -1 ||
731 fwrite (&cth, sizeof (cth), 1, text_idx) != 1)
732 return COMPERROR;
733 fclose (text_idx);
734
735 if (fseek (text, sizeof (u_long), SEEK_SET) == -1 ||
736 fwrite (&cth, sizeof (cth), 1, text) != 1)
737 return COMPERROR;
738 fclose (text);
739
740 /* [RPAP - Jan 97: Endian Ordering] */
741 NTOHUL(cth.num_of_docs);
742 NTOHD(cth.num_of_bytes); /* [RJM 07/97: 4G limit] */
743 NTOHUL(cth.num_of_words);
744 NTOHUL(cth.length_of_longest_doc);
745 NTOHD(cth.ratio);
746
747
748 Message ("Compressed Text %.2f Mb ( %u bytes %0.3f %%)",
749 text_length / 1024.0 / 1024.0, text_length,
750 (text_length * 100.0) / bytes_processed);
751 Message ("Words portion of the dictionary %.2f Mb ( %.0f bytes %0.3f %%)",
752 Words_disk / 1024.0 / 1024, Words_disk * 1.0,
753 (Words_disk * 100.0) / bytes_processed);
754
755 if (cdh.dict_type != MG_COMPLETE_DICTIONARY &&
756 (cdh.novel_method == MG_NOVEL_BINARY ||
757 cdh.novel_method == MG_NOVEL_DELTA ||
758 cdh.novel_method == MG_NOVEL_HYBRID ||
759 cdh.novel_method == MG_NOVEL_HYBRID_MTF))
760 {
761 if (write_aux_dict (FileName) == COMPERROR)
762 return COMPERROR;
763 estimate_compressed_aux_dict ();
764 }
765 else
766 {
767 if (cdh.dict_type != MG_COMPLETE_DICTIONARY)
768 Message ("Huffman info for chars in dictionary %.2f Mb"
769 " ( %u bytes %0.3f %%)",
770 Chars_disk / 1024.0 / 1024, Chars_disk,
771 (Chars_disk * 100.0) / bytes_processed);
772 unlink (make_name (FileName, TEXT_DICT_AUX_SUFFIX, NULL));
773 }
774
775 return (COMPALLOK);
776}
Note: See TracBrowser for help on using the repository browser.