Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/gsdl/packages/mg/src/text/text.pass2.c@ 439

Last change on this file since 439 was 439, checked in by sjboddie, 25 years ago
renamed mg-1.3d directory mg
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 18.6 KB

Line
1	/**************************************************************************
2	*
3	* text.pass2.c -- Text compression (Pass 2)
4	* Copyright (C) 1994 Neil Sharman, Gary Eddy and Alistair Moffat
5	*
6	* This program is free software; you can redistribute it and/or modify
7	* it under the terms of the GNU General Public License as published by
8	* the Free Software Foundation; either version 2 of the License, or
9	* (at your option) any later version.
10	*
11	* This program is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	* GNU General Public License for more details.
15	*
16	* You should have received a copy of the GNU General Public License
17	* along with this program; if not, write to the Free Software
18	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19	*
20	* $Id: text.pass2.c 439 1999-08-10 21:23:37Z sjboddie $
21	*
22	**************************************************************************/
23
24
25	#include "sysfuncs.h"
26
27	#include "memlib.h"
28	#include "messages.h"
29	#include "local_strings.h"
30	#include "bitio_m_mem.h"
31	#include "bitio_m.h"
32	#include "huffman.h"
33	#include "bitio_stdio.h"
34	#include "huffman_stdio.h"
35	#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
36
37	#include "mg.h"
38	#include "mg_files.h"
39	#include "build.h"
40	#include "words.h"
41	#include "text.h"
42	#include "hash.h"
43	#include "locallib.h"
44	#include "comp_dict.h"
45
46
47
48
49	/*
50	$Log$
51	Revision 1.1 1999/08/10 21:18:25 sjboddie
52	renamed mg-1.3d directory mg
53
54	Revision 1.2 1998/12/17 09:12:54 rjmcnab
55
56	Altered mg to process utf-8 encoded Unicode. The main changes
57	are in the parsing of the input, the casefolding, and the stemming.
58
59	Revision 1.1 1998/11/17 09:35:47 rjmcnab
60	* empty log message *
61
62	* Revision 1.3 1994/10/20 03:57:10 tes
63	* I have rewritten the boolean query optimiser and abstracted out the
64	* components of the boolean query.
65	*
66	* Revision 1.2 1994/09/20 04:42:14 tes
67	* For version 1.1
68	*
69	*/
70
71	static char *RCSID = "$Id: text.pass2.c 439 1999-08-10 21:23:37Z sjboddie $";
72
73	#define POOL_SIZE 1024*256
74
75	typedef struct char_pool
76	{
77	struct char_pool *next;
78	u_long left;
79	u_char *ptr;
80	u_char pool[POOL_SIZE];
81	}
82	char_pool;
83
84	typedef struct novel_hash_rec
85	{
86	u_long ordinal_num;
87	u_char *word;
88	}
89	novel_hash_rec;
90
91
92	#define INITIAL_HASH_SIZE 7927
93	#define MAX_SWAPS 10000
94
95	typedef struct novel_hash_table
96	{
97	novel_hash_rec *HashTable;
98	u_long HashSize, HashUsed;
99	char_pool *first_pool;
100	char_pool *pool;
101	u_long next_num, binary_start;
102	novel_hash_rec **code_to_nhr;
103	}
104	novel_hash_table;
105
106
107	static FILE text, text_idx;
108
109	static u_char *comp_buffer;
110
111	static u_long text_length;
112
113	/* [RJM 07/97: 4G limit] */
114	static double stats_in_tot_bytes = 0.0;
115	static double stats_in_bytes = 0.0;
116	static double stats_out_bytes = 0.0;
117
118
119	static novel_hash_table nht[2];
120
121	static u_long prefix_len = 0;
122
123	int blk_start[2][33], blk_end[2][33];
124
125
126	static char_pool *
127	new_pool (char_pool * pool)
128	{
129	char_pool *p = Xmalloc (sizeof (char_pool));
130	if (!p)
131	FatalError (1, "Unable to allocate memory for pool");
132	if (pool)
133	pool->next = p;
134	p->next = NULL;
135	p->left = POOL_SIZE;
136	p->ptr = p->pool;
137	return p;
138	}
139
140
141
142	int
143	init_text_2 (char *file_name)
144	{
145	char path[512];
146	int i;
147
148	if (LoadCompressionDictionary (make_name (file_name, TEXT_DICT_SUFFIX,
149	path)) == COMPERROR)
150	return COMPERROR;
151
152	if (!(text = create_file (file_name, TEXT_SUFFIX, "w+b",
153	MAGIC_TEXT, MG_MESSAGE))) /* [RPAP - Feb 97: WIN32 Port] */
154	return COMPERROR;
155
156	bzero ((char *) &cth, sizeof (cth));
157
158	if (fwrite (&cth, sizeof (cth), 1, text) != 1)
159	return COMPERROR;
160
161	text_length = sizeof (u_long) + sizeof (cth);
162
163	if (!(text_idx = create_file (file_name, TEXT_IDX_SUFFIX, "w+b",
164	MAGIC_TEXI, MG_MESSAGE))) /* [RPAP - Feb 97: WIN32 Port] */
165	return COMPERROR;
166
167	if (fwrite (&cth, sizeof (cth), 1, text_idx) != 1)
168	return COMPERROR;
169
170	if (!(comp_buffer = Xmalloc (sizeof (u_char) * buf_size)))
171	{
172	Message ("No memory for compression buffer");
173	return (COMPERROR);
174	}
175
176	#if 0
177	MaxMemInUse += sizeof (u_char) * buf_size;
178	#endif
179
180	if (cdh.novel_method != MG_NOVEL_HUFFMAN_CHARS)
181	for (i = 0; i <= 1; i++)
182	{
183	nht[i].HashSize = INITIAL_HASH_SIZE;
184	nht[i].HashTable = Xmalloc (sizeof (novel_hash_rec) * nht[i].HashSize);
185	bzero ((char *) nht[i].HashTable,
186	sizeof (novel_hash_rec) * nht[i].HashSize);
187	nht[i].HashUsed = 0;
188	nht[i].HashSize = INITIAL_HASH_SIZE;
189	nht[i].pool = nht[i].first_pool = new_pool (NULL);
190	nht[i].next_num = 1;
191	nht[i].binary_start = 1;
192	if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
193	nht[i].code_to_nhr = Xmalloc (sizeof (novel_hash_rec )
194	((nht[i].HashSize >> 1) + 2));
195	else
196	nht[i].code_to_nhr = NULL;
197	if (cdh.novel_method == MG_NOVEL_HYBRID \|\|
198	cdh.novel_method == MG_NOVEL_HYBRID_MTF)
199	{
200	int num;
201	num = 1;
202	blk_start[i][0] = 0;
203	blk_end[i][0] = cdh.num_words[i] - 1;
204	while (num < 33)
205	{
206	blk_start[i][num] = blk_end[i][num - 1] + 1;
207	blk_end[i][num] = blk_start[i][num] +
208	(blk_end[i][num - 1] - blk_start[i][num - 1]) * 2;
209	num++;
210	}
211	}
212	}
213
214	return (COMPALLOK);
215	}
216
217
218
219	int
220	ic (void a, void b)
221	{
222	return ((int ) a) - ((int ) b);
223	}
224
225
226
227	/* #define DOCDUMP 477 */
228
229	int
230	process_text_2 (u_char * s_in, int l_in)
231	{
232	int which, byte_length;
233	u_char *end = s_in + l_in - 1;
234	int novels_used[2];
235	int swaps[2][MAX_SWAPS];
236
237	which = inaword (s_in, end);
238
239	ENCODE_START (comp_buffer, buf_size)
240
241	ENCODE_BIT (which);
242
243	if (cdh.novel_method == MG_NOVEL_BINARY)
244	{
245	DELTA_ENCODE_L (nht[0].binary_start, prefix_len);
246	DELTA_ENCODE_L (nht[1].binary_start, prefix_len);
247	}
248
249	novels_used[0] = novels_used[1] = 0;
250
251	#ifdef DOCDUMP
252	if (cth.num_of_docs == DOCDUMP)
253	{
254	printf ("---------------------------------------------------\n");
255	printf ("which = %d\n", which);
256	}
257	#endif
258
259	for (; s_in <= end; which = !which)
260	{
261	u_char Word[MAXWORDLEN + 1];
262	int res;
263
264	if (which)
265	cth.num_of_words++;
266
267	/* First parse a word or non-word out of the string */
268	if (which)
269	PARSE_WORD (Word, s_in, end);
270	else
271	PARSE_NON_WORD (Word, s_in, end);
272
273	#ifdef DOCDUMP
274	if (cth.num_of_docs == DOCDUMP)
275	{
276	printf ("%sword : \"%.*s\"", which ? " " : "non-", Word[0], Word + 1);
277	}
278	#endif
279
280	/* Search the hash table for Word */
281	if (ht[which])
282	{
283	register unsigned long hashval, step;
284	register int tsize = ht[which]->size;
285	register u_char **wptr;
286	HASH (hashval, step, Word, tsize);
287	for (;;)
288	{
289	register u_char *s1;
290	register u_char *s2;
291	register int len;
292	wptr = ht[which]->table[hashval];
293	if (wptr == NULL)
294	{
295	res = COMPERROR;
296	break;
297	}
298
299	/* Compare the words */
300	s1 = Word;
301	s2 = *wptr;
302	len = *s1 + 1;
303	for (; len; len--)
304	if (s1++ != s2++)
305	break;
306
307	if (len)
308	{
309	hashval += step;
310	if (hashval >= tsize)
311	hashval -= tsize;
312	}
313	else
314	{
315	res = ht[which]->table[hashval] - ht[which]->words;
316	break;
317	}
318	}
319	}
320	else
321	res = COMPERROR;
322	/* Check that the word was found in the dictionary */
323	if (res == COMPERROR)
324	{
325	if (cdh.dict_type == MG_COMPLETE_DICTIONARY)
326	{
327	Message ("Unknown word \"%.s\"\n", Word, Word + 1);
328	return (COMPERROR);
329	}
330	if (cdh.dict_type == MG_PARTIAL_DICTIONARY)
331	{
332	u_long i;
333	if (ht[which])
334	{
335	res = ht[which]->hd->num_codes - 1;
336	HUFF_ENCODE (res, ht[which]->codes, ht[which]->hd->clens);
337	}
338	HUFF_ENCODE (Word[0], lens_codes[which], lens_huff[which].clens);
339	for (i = 0; i < Word[0]; i++)
340	HUFF_ENCODE (Word[i + 1], char_codes[which],
341	char_huff[which].clens);
342	}
343	if (cdh.dict_type == MG_SEED_DICTIONARY)
344	{
345	if (ht[which])
346	{
347	res = ht[which]->hd->num_codes - 1;
348	HUFF_ENCODE (res, ht[which]->codes, ht[which]->hd->clens);
349	}
350	switch (cdh.novel_method)
351	{
352	case MG_NOVEL_HUFFMAN_CHARS:
353	{
354	u_long i;
355	HUFF_ENCODE (Word[0], lens_codes[which],
356	lens_huff[which].clens);
357	for (i = 0; i < Word[0]; i++)
358	HUFF_ENCODE (Word[i + 1], char_codes[which],
359	char_huff[which].clens);
360	}
361	break;
362	case MG_NOVEL_BINARY:
363	case MG_NOVEL_DELTA:
364	case MG_NOVEL_HYBRID:
365	case MG_NOVEL_HYBRID_MTF:
366	{
367	register unsigned long hashval, step;
368	register novel_hash_table *h = &nht[which];
369	register int hsize = h->HashSize;
370	register novel_hash_rec *ent;
371	HASH (hashval, step, Word, hsize);
372	for (;;)
373	{
374	register u_char s1, s2;
375	register int len;
376	ent = h->HashTable + hashval;
377	if (!ent->word)
378	{
379	int len = *Word + 1;
380	if (len > h->pool->left)
381	h->pool = new_pool (h->pool);
382	ent->word = h->pool->ptr;
383	ent->ordinal_num = h->next_num++;
384	if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
385	h->code_to_nhr[ent->ordinal_num - 1] = ent;
386	memcpy (h->pool->ptr, Word, len);
387	h->pool->ptr += len;
388	h->pool->left -= len;
389	h->HashUsed++;
390	break;
391	}
392	/* Compare the words */
393	s1 = Word;
394	s2 = ent->word;
395	len = *s1 + 1;
396	for (; len; len--)
397	if (s1++ != s2++)
398	break;
399
400	if (!len)
401	break;
402
403	hashval = (hashval + step);
404	if (hashval >= hsize)
405	hashval -= hsize;
406	}
407
408	switch (cdh.novel_method)
409	{
410	case MG_NOVEL_BINARY:
411	{
412	BINARY_ENCODE (ent->ordinal_num, h->binary_start);
413	if (ent->ordinal_num == h->binary_start)
414	h->binary_start++;
415	}
416	break;
417	case MG_NOVEL_DELTA:
418	{
419	DELTA_ENCODE (ent->ordinal_num);
420	}
421	break;
422	case MG_NOVEL_HYBRID:
423	{
424	int k = 0;
425	int j = ent->ordinal_num - 1;
426	while (j > blk_end[which][k])
427	k++;
428	assert (j - blk_start[which][k] + 1 >= 1 &&
429	j - blk_start[which][k] + 1 <=
430	blk_end[which][k] - blk_start[which][k] + 1);
431
432	GAMMA_ENCODE (k + 1);
433	BINARY_ENCODE (j - blk_start[which][k] + 1,
434	blk_end[which][k] -
435	blk_start[which][k] + 1);
436	}
437	break;
438	case MG_NOVEL_HYBRID_MTF:
439	{
440	int k = 0;
441	int j = ent->ordinal_num - 1;
442	while (j > blk_end[which][k])
443	k++;
444	assert (j - blk_start[which][k] + 1 >= 1 &&
445	j - blk_start[which][k] + 1 <=
446	blk_end[which][k] - blk_start[which][k] + 1);
447	GAMMA_ENCODE (k + 1);
448	BINARY_ENCODE (j - blk_start[which][k] + 1,
449	blk_end[which][k] -
450	blk_start[which][k] + 1);
451
452	if (ent->ordinal_num - 1 >= novels_used[which])
453	{
454	int a = novels_used[which];
455	int b = ent->ordinal_num - 1;
456	novel_hash_rec *temp;
457
458
459	/* fprintf(stderr, "a = %d , b = %d\n", a, b);
460	*/
461	temp = h->code_to_nhr[a];
462	h->code_to_nhr[a] = h->code_to_nhr[b];
463	h->code_to_nhr[b] = temp;
464	h->code_to_nhr[a]->ordinal_num = a + 1;
465	h->code_to_nhr[b]->ordinal_num = b + 1;
466	if (novels_used[which] == MAX_SWAPS)
467	FatalError (1, "Not enough mem for swapping");
468	swaps[which][novels_used[which]] = b;
469	novels_used[which]++;
470	}
471	}
472	break;
473	}
474	if (h->HashUsed >= h->HashSize >> 1)
475	{
476	novel_hash_rec *ht;
477	unsigned long size;
478	unsigned long i;
479	size = prime (h->HashSize * 2);
480	if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
481	{
482	Xfree (h->code_to_nhr);
483	h->code_to_nhr = Xmalloc (sizeof (novel_hash_rec )
484	((size >> 1) + 2));
485	}
486	if (!(ht = Xmalloc (sizeof (novel_hash_rec) * size)))
487	{
488	Message ("Unable to allocate memory for table");
489	return (COMPERROR);
490	}
491	bzero ((char ) ht, sizeof (novel_hash_rec) size);
492
493	for (i = 0; i < h->HashSize; i++)
494	if (h->HashTable[i].word)
495	{
496	register u_char *wptr;
497	register unsigned long hashval, step;
498
499	wptr = h->HashTable[i].word;
500	HASH (hashval, step, wptr, size);
501	wptr = (ht + hashval)->word;
502	while (wptr)
503	{
504	hashval += step;
505	if (hashval >= size)
506	hashval -= size;
507	wptr = (ht + hashval)->word;
508	}
509	ht[hashval] = h->HashTable[i];
510	if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
511	h->code_to_nhr[ht[hashval].ordinal_num - 1] =
512	&ht[hashval];
513	}
514	Xfree (h->HashTable);
515	h->HashTable = ht;
516	h->HashSize = size;
517	}
518	}
519	break;
520	}
521	}
522	}
523	else
524	{
525	HUFF_ENCODE (res, ht[which]->codes, ht[which]->hd->clens);
526	#ifdef DOCDUMP
527	if (cth.num_of_docs == DOCDUMP)
528	{
529	printf (" %d %d\n", ht[which]->hd->clens[res],
530	ht[which]->codes[res]);
531	}
532	#endif
533	}
534	}
535
536
537	/* Add a 1 bit onto the end of the buffer the remaining bits in the last
538	byte will all be zero */
539
540	ENCODE_BIT (1);
541
542	ENCODE_FLUSH;
543
544	byte_length = __pos - __base;
545	if (!__remaining)
546	{
547	Message ("The end of the buffer was probably overrun");
548	return COMPERROR;
549	}
550
551	ENCODE_DONE
552
553	#ifdef DOCDUMP
554	if (cth.num_of_docs == DOCDUMP)
555	{
556	printf ("unused bits = %d\n", bits_unused);
557	}
558	#endif
559
560	HTONUL(text_length); /* [RPAP - Jan 97: Endian Ordering] */
561	fwrite (&text_length, sizeof (text_length), 1, text_idx);
562	NTOHUL(text_length); /* [RPAP - Jan 97: Endian Ordering] */
563	text_length += byte_length;
564
565	#ifdef DOCDUMP
566	if (cth.num_of_docs == DOCDUMP)
567	{
568	int i;
569	for (i = 0; i < byte_length; i++)
570	printf ("%02x ", comp_buffer[i]);
571	printf ("\n");
572	}
573	#endif
574
575	if (cdh.novel_method == MG_NOVEL_HYBRID_MTF)
576	for (which = 0; which <= 1; which++)
577	for (novels_used[which]--; novels_used[which] >= 0; novels_used[which]--)
578	{
579	int a = novels_used[which];
580	int b = swaps[which][novels_used[which]];
581	novel_hash_rec *temp;
582	temp = nht[which].code_to_nhr[a];
583	nht[which].code_to_nhr[a] = nht[which].code_to_nhr[b];
584	nht[which].code_to_nhr[b] = temp;
585	nht[which].code_to_nhr[a]->ordinal_num = a + 1;
586	nht[which].code_to_nhr[b]->ordinal_num = b + 1;
587	}
588
589
590	fwrite (comp_buffer, sizeof (*comp_buffer), byte_length, text);
591
592	if ((double) l_in / (double) byte_length > cth.ratio)
593	cth.ratio = (double) l_in / (double) byte_length;
594
595	cth.num_of_docs++;
596	if (l_in > cth.length_of_longest_doc)
597	cth.length_of_longest_doc = l_in;
598
599	cth.num_of_bytes += l_in;
600
601	if (Comp_Stats)
602	{
603	stats_in_tot_bytes += l_in;
604	stats_in_bytes += l_in;
605	stats_out_bytes += byte_length;
606	if (stats_in_bytes >= comp_stat_point)
607	{
608	fprintf (Comp_Stats, "%10.0f %10.0f %10.0f %f\n", stats_in_tot_bytes,
609	stats_in_bytes, stats_out_bytes,
610	(double) stats_out_bytes / (double) stats_in_bytes);
611	stats_in_bytes = 0.0;
612	stats_out_bytes = 0.0;
613	}
614	}
615
616	return COMPALLOK;
617	}
618
619
620
621
622
623
624	int
625	write_aux_dict (char *FileName)
626	{
627	int i;
628	FILE *aux;
629	if (!(aux = create_file (FileName, TEXT_DICT_AUX_SUFFIX, "wb",
630	MAGIC_AUX_DICT, MG_MESSAGE))) /* [RPAP - Feb 97: WIN32 Port] */
631	return COMPERROR;
632
633	for (i = 0; i <= 1; i++)
634	{
635	aux_frags_header afh;
636	char_pool *cp;
637
638	afh.num_frags = nht[i].HashUsed;
639	afh.mem_for_frags = 0;
640	for (cp = nht[i].first_pool; cp; cp = cp->next)
641	afh.mem_for_frags += POOL_SIZE - cp->left;
642
643	/* [RPAP - Jan 97: Endian Ordering] */
644	HTONUL(afh.num_frags);
645	HTONUL(afh.mem_for_frags);
646
647	fwrite (&afh, sizeof (afh), 1, aux);
648
649	for (cp = nht[i].first_pool; cp; cp = cp->next)
650	fwrite (cp->pool, POOL_SIZE - cp->left, sizeof (u_char), aux);
651	}
652	fclose (aux);
653	return COMPALLOK;
654	}
655
656
657	void
658	estimate_compressed_aux_dict (void)
659	{
660	int i;
661	u_long aux_compressed = 0, total_uncomp = 0;
662	for (i = 0; i <= 1; i++)
663	{
664	int j;
665	long chars[256], fchars[256];
666	long lens[16], flens[16];
667	char_pool *cp;
668	bzero ((char *) chars, sizeof (chars));
669	bzero ((char *) lens, sizeof (lens));
670	for (cp = nht[i].first_pool; cp; cp = cp->next)
671	{
672	u_char *buf = cp->pool;
673	while (buf != cp->ptr)
674	{
675	int len = *buf++;
676	lens[len]++;
677	total_uncomp += len + 4;
678	for (; len; len--)
679	chars[*buf++]++;
680	}
681	}
682	for (j = 0; j < 256; j++)
683	if (!chars[j] && PESINAWORD (j) == i)
684	fchars[j] = 1;
685	else
686	fchars[j] = chars[j];
687	for (j = 0; j < 16; j++)
688	if (!lens[j])
689	flens[j] = 1;
690	else
691	flens[j] = lens[j];
692
693	aux_compressed += (Calculate_Huffman_Size (16, flens, lens) +
694	Calculate_Huffman_Size (256, fchars, chars)) / 8;
695
696	}
697
698	Message ("Aux dictionary (Uncompressed) %.2f Mb ( %u bytes %0.3f %%)",
699	total_uncomp / 1024.0 / 1024, total_uncomp,
700	(total_uncomp * 100.0) / bytes_processed);
701	Message ("Aux dictionary (Compressed) %.2f Mb ( %.0f bytes %0.3f %%)",
702	aux_compressed / 1024.0 / 1024, aux_compressed * 1.0,
703	(aux_compressed * 100.0) / bytes_processed);
704	}
705
706
707
708
709
710
711	int
712	done_text_2 (char *FileName)
713	{
714	if (Comp_Stats)
715	fprintf (Comp_Stats, "%10.0f %10.0f %10.0f %f\n", stats_in_tot_bytes,
716	stats_in_bytes, stats_out_bytes,
717	(double) stats_out_bytes / (double) stats_in_bytes);
718
719	HTONUL(text_length); /* [RPAP - Jan 97: Endian Ordering] */
720	fwrite (&text_length, sizeof (text_length), 1, text_idx);
721	NTOHUL(text_length); /* [RPAP - Jan 97: Endian Ordering] */
722
723	/* [RPAP - Jan 97: Endian Ordering] */
724	HTONUL(cth.num_of_docs);
725	HTOND(cth.num_of_bytes); /* [RJM 07/97: 4G limit] */
726	HTONUL(cth.num_of_words);
727	HTONUL(cth.length_of_longest_doc);
728	HTOND(cth.ratio);
729
730	if (fseek (text_idx, sizeof (u_long), SEEK_SET) == -1 \|\|
731	fwrite (&cth, sizeof (cth), 1, text_idx) != 1)
732	return COMPERROR;
733	fclose (text_idx);
734
735	if (fseek (text, sizeof (u_long), SEEK_SET) == -1 \|\|
736	fwrite (&cth, sizeof (cth), 1, text) != 1)
737	return COMPERROR;
738	fclose (text);
739
740	/* [RPAP - Jan 97: Endian Ordering] */
741	NTOHUL(cth.num_of_docs);
742	NTOHD(cth.num_of_bytes); /* [RJM 07/97: 4G limit] */
743	NTOHUL(cth.num_of_words);
744	NTOHUL(cth.length_of_longest_doc);
745	NTOHD(cth.ratio);
746
747
748	Message ("Compressed Text %.2f Mb ( %u bytes %0.3f %%)",
749	text_length / 1024.0 / 1024.0, text_length,
750	(text_length * 100.0) / bytes_processed);
751	Message ("Words portion of the dictionary %.2f Mb ( %.0f bytes %0.3f %%)",
752	Words_disk / 1024.0 / 1024, Words_disk * 1.0,
753	(Words_disk * 100.0) / bytes_processed);
754
755	if (cdh.dict_type != MG_COMPLETE_DICTIONARY &&
756	(cdh.novel_method == MG_NOVEL_BINARY \|\|
757	cdh.novel_method == MG_NOVEL_DELTA \|\|
758	cdh.novel_method == MG_NOVEL_HYBRID \|\|
759	cdh.novel_method == MG_NOVEL_HYBRID_MTF))
760	{
761	if (write_aux_dict (FileName) == COMPERROR)
762	return COMPERROR;
763	estimate_compressed_aux_dict ();
764	}
765	else
766	{
767	if (cdh.dict_type != MG_COMPLETE_DICTIONARY)
768	Message ("Huffman info for chars in dictionary %.2f Mb"
769	" ( %u bytes %0.3f %%)",
770	Chars_disk / 1024.0 / 1024, Chars_disk,
771	(Chars_disk * 100.0) / bytes_processed);
772	unlink (make_name (FileName, TEXT_DICT_AUX_SUFFIX, NULL));
773	}
774
775	return (COMPALLOK);
776	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: