Context Navigation

source: trunk/gsdl/src/mgpp/text/text_get.cpp@ 711

Last change on this file since 711 was 711, checked in by cs025, 25 years ago
Changes to eradicate Xmalloc
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 26.6 KB

Line
1	/**************************************************************************
2	*
3	* text_get.c -- Function for reading documents from the compressed text
4	* Copyright (C) 1994 Neil Sharman
5	*
6	* This program is free software; you can redistribute it and/or modify
7	* it under the terms of the GNU General Public License as published by
8	* the Free Software Foundation; either version 2 of the License, or
9	* (at your option) any later version.
10	*
11	* This program is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	* GNU General Public License for more details.
15	*
16	* You should have received a copy of the GNU General Public License
17	* along with this program; if not, write to the Free Software
18	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19	*
20	* $Id: text_get.cpp 711 1999-10-17 23:43:31Z cs025 $
21	*
22	**************************************************************************/
23
24	#include "sysfuncs.h"
25
26	#include "memlib.h"
27	#include "filestats.h"
28	#include "timing.h"
29	#include "messages.h"
30	#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
31
32	#include "huffman.h"
33	#include "bitio_m_abstract.h"
34	#include "bitio_m_mem.h"
35	#include "bitio_m.h"
36	#include "bitio_stdio.h"
37	#include "huffman_stdio.h"
38
39	#include "mg.h"
40	#include "invf.h"
41	#include "text.h"
42	#include "lists.h"
43	#include "backend.h"
44	#include "text_get.h"
45	#include "locallib.h"
46	#include "words.h"
47	#include "mg_errors.h"
48	#include "local_strings.h"
49	#include "DocEntry.h"
50
51	/*
52	$Log$
53	Revision 1.2 1999/10/17 23:43:30 cs025
54	Changes to eradicate Xmalloc
55
56	Revision 1.1 1999/10/11 02:58:39 cs025
57	Base install of MG-PP
58
59	Revision 1.1 1999/08/10 21:18:26 sjboddie
60	renamed mg-1.3d directory mg
61
62	Revision 1.1 1998/11/17 09:35:48 rjmcnab
63	* empty log message *
64
65	* Revision 1.3 1994/10/20 03:57:11 tes
66	* I have rewritten the boolean query optimiser and abstracted out the
67	* components of the boolean query.
68	*
69	* Revision 1.2 1994/09/20 04:42:15 tes
70	* For version 1.1
71	*
72	*/
73
74	typedef enum huff_type {lengths, chars};
75
76	static char *RCSID = "$Id: text_get.cpp 711 1999-10-17 23:43:31Z cs025 $";
77
78
79
80
81
82	/* FetchDocStart ()
83	* Reads into DocEnt the starting position of the document in the *.text file
84	* Where the first document is document number 1
85	* It returns the true weight of the document.
86	*/
87
88
89
90
91	static double
92	FetchDocStartLev1 (text_data * td, u_long DN,
93	u_long * seek_pos, u_long * len)
94	{
95	unsigned long data[2];
96	/* [TS:Sep/94] Fixed up the seek call to give the correct offset */
97	Fseek (td->TextIdxFile,
98	sizeof (unsigned long) * (DN - 1) + /* the doc offsets */
99	sizeof (unsigned long) + /* the magic number */
100	sizeof (compressed_text_header), /* the header */
101	0);
102	Fread ((char *) &data, sizeof (data), 1, td->TextIdxFile);
103
104	/* [RPAP - Jan 97: Endian Ordering] */
105	NTOHUL(data[0]);
106	NTOHUL(data[1]);
107
108	*seek_pos = data[0];
109	*len = data[1] - data[0];
110	return (1.0);
111	}
112
113	#define MG_PAGE_SIZE 2048
114
115	static int
116	LoadIdx (text_data * td, unsigned long DN)
117	{
118	if (!td->idx_data)
119	{
120	td->idx_data = new index_data[MG_PAGE_SIZE];
121	if (!td->idx_data)
122	FatalError (1, "Out of memory in FDSL2");
123	}
124
125	if (td->current_pos == -1 \|\| DN >= td->current_pos + MG_PAGE_SIZE - 1 \|\|
126	DN < td->current_pos)
127	{
128	int i, num; /* [RPAP - Jan 97: Endian Ordering] */
129
130	long rn = (long) DN - (MG_PAGE_SIZE >> 1);
131	if (rn < 1)
132	rn = 1;
133
134	Fseek (td->TextIdxWgtFile, (sizeof (unsigned long) + sizeof (float)) *
135	(rn - 1) + sizeof (unsigned long), 0);
136
137	num = Fread ((char ) td->idx_data, sizeof ((td->idx_data)), MG_PAGE_SIZE, /* [RPAP - Jan 97: Endian Ordering] */
138	td->TextIdxWgtFile);
139
140	/* [RPAP - Jan 97: Endian Ordering] */
141	for (i = 0; i < num; i++)
142	{
143	NTOHUL(td->idx_data[i].Start);
144	NTOHF(td->idx_data[i].Weight);
145	}
146
147	td->current_pos = rn;
148	}
149	return DN - td->current_pos;
150	}
151
152	static double
153	FDSL2 (text_data * td, unsigned long DN, unsigned long *Pos)
154	{
155	unsigned long pos = LoadIdx (td, DN);
156	*Pos = td->idx_data[pos].Start;
157	return (td->idx_data[pos].Weight);
158	}
159
160
161	static double
162	FetchDocStartLev2 (text_data * td, u_long DN,
163	u_long * seek_pos, u_long * len)
164	{
165	double Weight;
166	unsigned long s1, s2;
167
168	Weight = FDSL2 (td, DN, &s1);
169	do
170	{
171	DN++;
172	FDSL2 (td, DN, &s2);
173	}
174	while (s2 == s1);
175	*seek_pos = s1;
176	*len = s2 - s1;
177	return (Weight);
178	}
179
180
181
182
183	double
184	FetchDocStart (query_data * qd, u_long DN, u_long * seek_pos, u_long * len)
185	{
186	qd->text_idx_lookups++;
187
188	if (qd->td->TextIdxWgtFile)
189	return FetchDocStartLev2 (qd->td, DN, seek_pos, len);
190	else
191	return FetchDocStartLev1 (qd->td, DN, seek_pos, len);
192	}
193
194	unsigned long
195	FetchInitialParagraph (text_data * td, unsigned long ParaNum)
196	{
197	if (td->TextIdxWgtFile)
198	{
199	unsigned long pos;
200	unsigned long start;
201	int PN = ParaNum - 1;
202	pos = LoadIdx (td, ParaNum);
203	start = td->idx_data[pos].Start;
204	while (PN > 0)
205	{
206	pos = LoadIdx (td, PN);
207	if (td->idx_data[pos].Start != start)
208	return PN + 1;
209	PN--;
210	}
211	return PN + 1;
212	}
213	else
214	return ParaNum;
215	}
216
217
218
219	/* FetchCompressed ()
220	* Reads into buffer DocBuff the compressed form of document DocNum.
221	* Where the first document is document number 1
222	*/
223	int
224	FetchCompressed (query_data * qd, char *DocBuff, DocEntry DocEnt)
225	{
226	if (!DocEnt->SeekPos)
227	DocEnt->FetchStart(qd);
228	// FetchDocStart (qd, DocEnt->DocNum, &DocEnt->SeekPos, &DocEnt->Len);
229	if (!(*DocBuff = new char[DocEnt->Len]))
230	return (-1);
231
232	if (Fseek (qd->td->TextFile, DocEnt->SeekPos, 0) == -1)
233	FatalError (1, "Error when seeking into text file");
234	#if 0
235	printf ("Loading compressed text %d %d\n", DocEnt->SeekPos, DocEnt->Len);
236	#endif
237	if (Fread (*DocBuff, 1, DocEnt->Len, qd->td->TextFile) != DocEnt->Len)
238	FatalError (1, "Error when reading data");
239
240	return (DocEnt->Len);
241
242	}
243
244
245	text_data *
246	LoadTextData (File * text, File * text_idx_wgt, File * text_idx)
247	{
248	text_data *td;
249
250	if (!(td = new text_data))
251	{
252	mg_errno = MG_NOMEM;
253	return (NULL);
254	}
255
256	td->TextFile = text;
257	td->TextIdxWgtFile = text_idx_wgt;
258	td->TextIdxFile = text_idx;
259	td->current_pos = -1;
260	td->idx_data = NULL;
261	Fread (&td->cth, sizeof (td->cth), 1, text);
262
263	/* [RPAP - Jan 97: Endian Ordering] */
264	NTOHUL(td->cth.num_of_docs);
265	NTOHD(td->cth.num_of_bytes); /* [RJM 07/97: 4G limit] */
266	NTOHUL(td->cth.num_of_words);
267	NTOHUL(td->cth.length_of_longest_doc);
268	NTOHD(td->cth.ratio);
269
270	return (td);
271	}
272
273
274	void
275	FreeTextData (text_data * td)
276	{
277	if (td)
278	{
279	if (td->idx_data)
280	delete td->idx_data;
281	delete td;
282	}
283	}
284
285
286	static int
287	pts_comp (const void A, const void B)
288	{
289	const DocEntry const a = (DocEntry **) A;
290	const DocEntry const b = (DocEntry **) B;
291	return (a)->DocNum - (b)->DocNum;
292	}
293
294
295
296
297	int
298	GetPosLens (query_data * qd, DocEntry * Docs, int num)
299	{
300	DocEntry **pts;
301	int i, j;
302	if (!(pts = new (DocEntry *)[num]))
303	{
304	mg_errno = MG_NOMEM;
305	return (-1);
306	}
307	for (i = j = 0; i < num; i++, Docs++)
308	if (!Docs->SeekPos)
309	pts[j++] = Docs;
310
311	if (j)
312	{
313	qsort (pts, j, sizeof (DocEntry *), pts_comp);
314	for (i = 0; i < j; i++)
315	pts[i]->FetchStart(qd);
316	// FetchDocStart (qd, pts[i]->DocNum, &pts[i]->SeekPos, &pts[i]->Len);
317	}
318
319	delete pts;
320	return (0);
321	}
322
323
324
325
326
327	int
328	LoadBuffers (query_data * qd, DocEntry * Docs, int max_mem, int num)
329	{
330	DocEntry **pts;
331	int i, j;
332	int mem;
333
334	if (!num)
335	return (0);
336	if (!(pts = new (DocEntry *)[num]))
337	{
338	mg_errno = MG_NOMEM;
339	return (-1);
340	}
341
342	mem = i = 0;
343	do
344	{
345	pts[i] = Docs;
346	mem += Docs->Len;
347	i++;
348	Docs++;
349	}
350	while (i < num && mem < max_mem);
351	if (i > 1)
352	qsort (pts, i, sizeof (DocEntry *), pts_comp);
353	for (j = 0; j < i; j++)
354	{
355	if (FetchCompressed (qd, &pts[j]->CompTextBuffer, pts[j]) == -1)
356	return (-1);
357	ChangeMemInUse (qd, pts[j]->Len);
358	}
359
360	delete pts;
361
362	return (i);
363	}
364
365
366	/**
367	*
368	* GRB: Function removed 21/09/99 - wasn't being used; in any case used DocEntry_FreeTextBuffers instead
369	*
370	*
371	void
372	FreeBuffers (query_data * qd, DocEntry * Docs, int num)
373	{
374	int i;
375
376	for (i = 0; i < num; i++, Docs++)
377	if (Docs->CompTextBuffer)
378	{
379	delete Docs->CompTextBuffer;
380	Docs->CompTextBuffer = NULL;
381	ChangeMemInUse (qd, -Docs->Len);
382	}
383
384	}
385	*/
386
387
388	/****************************************************************************/
389
390	static void
391	FreeAuxDict (auxiliary_dict * ad)
392	{
393	if (!ad)
394	return;
395	if (ad->word_data[0])
396	delete ad->word_data[0];
397	if (ad->word_data[1])
398	delete ad->word_data[1];
399	if (ad->words[0])
400	delete ad->words[0];
401	if (ad->words[1])
402	delete ad->words[1];
403	delete ad;
404	}
405
406	static auxiliary_dict *
407	LoadAuxDict (compression_dict * cd, File * text_aux_dict)
408	{
409	auxiliary_dict *ad;
410	int i;
411
412	if (!(ad = new auxiliary_dict))
413	{
414	mg_errno = MG_NOMEM;
415	return (NULL);
416	}
417
418	bzero ((char ) ad, sizeof (ad));
419
420	for (i = 0; i <= 1; i++)
421	{
422	int j;
423	u_char *pos;
424
425	Fread (&ad->afh[i], sizeof (aux_frags_header), 1, text_aux_dict);
426
427	/* [RPAP - Jan 97: Endian Ordering] */
428	NTOHUL(ad->afh[i].num_frags);
429	NTOHUL(ad->afh[i].mem_for_frags);
430
431	if (!(ad->word_data[i] = new u_char[ad->afh[i].mem_for_frags]))
432	{
433	mg_errno = MG_NOMEM;
434	FreeAuxDict (ad);
435	return (NULL);
436	}
437	if (!(ad->words[i] = new (u_char *)[ad->afh[i].num_frags]))
438	{
439	mg_errno = MG_NOMEM;
440	FreeAuxDict (ad);
441	return (NULL);
442	}
443
444	Fread (ad->word_data[i], ad->afh[i].mem_for_frags, sizeof (u_char),
445	text_aux_dict);
446
447	pos = ad->word_data[i];
448	for (j = 0; j < ad->afh[i].num_frags; j++)
449	{
450	ad->words[i][j] = pos;
451	pos += *pos + 1;
452	}
453	if (cd->cdh.novel_method == MG_NOVEL_HYBRID \|\|
454	cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
455	{
456	int num;
457	num = 1;
458	ad->blk_start[i][0] = 0;
459	ad->blk_end[i][0] = cd->cdh.num_words[i] - 1;
460	while (num < 33)
461	{
462	ad->blk_start[i][num] = ad->blk_end[i][num - 1] + 1;
463	ad->blk_end[i][num] = ad->blk_start[i][num] +
464	(ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
465	num++;
466	}
467	}
468	}
469	return (ad);
470	}
471
472
473
474
475
476
477	static u_char ***
478	ReadInWords (File * dict, compression_dict * cd,
479	comp_frags_header * cfh, u_char ** escape)
480	{
481	int i, lookback;
482	int ptrs_reqd = 0;
483	int mem_reqd = 0;
484	int num_set[MAX_HUFFCODE_LEN + 1];
485	u_char *next_word[MAX_HUFFCODE_LEN + 1];
486	u_char **vals;
487	u_char ***values;
488	u_char word[MAXWORDLEN + 1];
489	u_char last_word[MAX_HUFFCODE_LEN + 1][MAXWORDLEN + 1];
490
491	lookback = cd->cdh.lookback;
492
493	for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
494	{
495	ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
496	mem_reqd += cfh->huff_words_size[i];
497	}
498
499	if (!(vals = new (u_char *)[ptrs_reqd]))
500	return (NULL);
501
502	if (!(values = new (u_char **)[MAX_HUFFCODE_LEN + 1]))
503	return (NULL);
504
505	if (!(next_word[0] = new u_char[mem_reqd]))
506	return (NULL);
507
508	cd->MemForCompDict += ptrs_reqd * sizeof (*vals) +
509	(MAX_HUFFCODE_LEN + 1) * sizeof (u_char **) +
510	mem_reqd;
511
512	values[0] = vals;
513	values[0][0] = next_word[0];
514	for (i = 1; i <= cfh->hd.maxcodelen; i++)
515	{
516	int next_start = (values[i - 1] - vals) +
517	((cfh->hd.lencount[i - 1] + ((1 << lookback) - 1)) >> lookback);
518	values[i] = &vals[next_start];
519	next_word[i] = next_word[i - 1] + cfh->huff_words_size[i - 1];
520	values[i][0] = next_word[i];
521	}
522
523	bzero ((char *) num_set, sizeof (num_set));
524
525	for (i = 0; i < cfh->hd.num_codes; i++)
526	{
527	register int val, copy;
528	register int len = cfh->hd.clens[i];
529	val = Getc (dict);
530	copy = (val >> 4) & 0xf;
531	val &= 0xf;
532
533	Fread (word + copy + 1, sizeof (u_char), val, dict);
534	*word = val + copy;
535
536	if ((num_set[len] & ((1 << lookback) - 1)) == 0)
537	{
538	values[len][num_set[len] >> lookback] = next_word[len];
539	memcpy (next_word[len], word, *word + 1);
540	if (escape && i == cfh->hd.num_codes - 1)
541	*escape = next_word[len];
542	next_word[len] += *word + 1;
543	}
544	else
545	{
546	copy = prefixlen (last_word[len], word);
547	memcpy (next_word[len] + 1, word + copy + 1, *word - copy);
548	next_word[len] = (copy << 4) + (word - copy);
549	if (escape && i == cfh->hd.num_codes - 1)
550	*escape = next_word[len];
551	next_word[len] += (*word - copy) + 1;
552	}
553	memcpy (last_word[len], word, *word + 1);
554	num_set[len]++;
555	}
556	if (cfh->hd.clens)
557	delete cfh->hd.clens;
558	cfh->hd.clens = NULL;
559	return values;
560	}
561
562	int Load_Comp_HuffData(compression_dict cd, int which, File dict,
563	huff_type type)
564	{
565	huff_data * hd;
566	u_long ** vals;
567
568	if (!(hd = new huff_data))
569	return 1;
570	cd->MemForCompDict += sizeof (huff_data);
571	if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL) == -1)
572	return 2;
573	if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
574	return 3;
575	if (hd->clens)
576	delete hd->clens;
577	hd->clens = NULL;
578	if (type == chars)
579	{
580	cd->chars_huff[which] = hd;
581	cd->chars_vals[which] = vals;
582	}
583	else
584	{
585	cd->lens_huff[which] = hd;
586	cd->lens_vals[which] = vals;
587	}
588
589	return 0;
590	}
591
592	int Load_Comp_FragsHeader(compression_dict cd, int which, int getEscape, File dict)
593	{
594	if (!(cd->cfh[which] = new comp_frags_header))
595	return 1;
596	cd->MemForCompDict += sizeof (*cd->cfh[which]);
597	if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
598	return 2;
599
600	if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
601	getEscape == 0 ? NULL : &cd->escape[which])))
602	return 3;
603	return 0;
604	}
605
606	static compression_dict *
607	Load_Comp_Dict (File * dict, File * aux_dict)
608	{
609	int which;
610	compression_dict *cd;
611
612	if (!(cd = new compression_dict))
613	{
614	mg_errno = MG_NOMEM;
615	return (NULL);
616	}
617
618	bzero ((char *) cd, sizeof (compression_dict));
619
620	cd->MemForCompDict = sizeof (compression_dict);
621
622	if (F_Read_cdh (dict, &cd->cdh, &cd->MemForCompDict, NULL) == -1)
623	return NULL;
624
625	for (which = 0; which < 2; which++)
626	switch (cd->cdh.dict_type)
627	{
628	case MG_COMPLETE_DICTIONARY:
629	{
630	if (Load_Comp_FragsHeader(cd, which, 0, dict) != 0)
631	return NULL;
632	/*
633	if (!(cd->cfh[which] = (comp_frags_header ) Xmalloc (sizeof (cd->cfh[which]))))
634	return NULL;
635	cd->MemForCompDict += sizeof (*cd->cfh[which]);
636	if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
637	return NULL;
638
639	if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
640	NULL)))
641	return NULL;
642	*/
643	cd->escape[which] = NULL;
644
645	}
646	break;
647	case MG_PARTIAL_DICTIONARY:
648	{
649	huff_data *hd;
650	u_long **vals;
651	if (cd->cdh.num_words[which])
652	{
653	if (Load_Comp_FragsHeader(cd, which, 1, dict) != 0)
654	return NULL;
655	/*
656	if (!(cd->cfh[which] = (comp_frags_header ) Xmalloc (sizeof (cd->cfh[which]))))
657	return NULL;
658	cd->MemForCompDict += sizeof (*cd->cfh[which]);
659	if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
660	return NULL;
661
662	if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
663	&cd->escape[which])))
664	return NULL;
665	*/
666	}
667	if (Load_Comp_HuffData(cd, which, dict, chars) != 0)
668	return NULL;
669	/*
670	if (!(hd = (huff_data *) Xmalloc (sizeof (huff_data))))
671	return NULL;
672	cd->MemForCompDict += sizeof (huff_data);
673	if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL) == -1)
674	return NULL;
675	if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
676	return NULL;
677	if (hd->clens)
678	Xfree (hd->clens);
679	hd->clens = NULL;
680	cd->chars_huff[which] = hd;
681	cd->chars_vals[which] = vals;
682	*/
683	if (Load_Comp_HuffData(cd, which, dict, lengths) != 0)
684	return NULL;
685	/*
686	if (!(hd = (huff_data *) Xmalloc (sizeof (huff_data))))
687	return NULL;
688	cd->MemForCompDict += sizeof (huff_data);
689	if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL) == -1)
690	return NULL;
691	if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
692	return NULL;
693	cd->lens_huff[which] = hd;
694	cd->lens_vals[which] = vals;
695	if (hd->clens)
696	Xfree (hd->clens);
697	hd->clens = NULL;
698	*/
699	}
700	break;
701	case MG_SEED_DICTIONARY:
702	{
703	huff_data *hd;
704	u_long **vals;
705	if (cd->cdh.num_words[which])
706	{
707	if (Load_Comp_FragsHeader(cd, which, 1, dict))
708	return NULL;
709	/*
710	if (!(cd->cfh[which] = (comp_frags_header ) Xmalloc (sizeof (cd->cfh[which]))))
711	return NULL;
712	cd->MemForCompDict += sizeof (*cd->cfh[which]);
713	if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
714	return NULL;
715
716	if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
717	&cd->escape[which])))
718	return NULL;
719	*/
720	}
721	switch (cd->cdh.novel_method)
722	{
723	case MG_NOVEL_HUFFMAN_CHARS:
724	if (Load_Comp_HuffData(cd, which, dict, chars) != 0)
725	return NULL;
726	/*
727	if (!(hd = (huff_data *) Xmalloc (sizeof (huff_data))))
728	return NULL;
729	cd->MemForCompDict += sizeof (huff_data);
730	if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict,
731	NULL) == -1)
732	return NULL;
733	if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
734	return NULL;
735	cd->chars_huff[which] = hd;
736	cd->chars_vals[which] = vals;
737	if (hd->clens)
738	Xfree (hd->clens);
739	hd->clens = NULL;
740	*/
741
742	if (Load_Comp_HuffData(cd, which, dict, lengths) != 0)
743	return NULL;
744	/*
745	if (!(hd = (huff_data *) Xmalloc (sizeof (huff_data))))
746	return NULL;
747	cd->MemForCompDict += sizeof (huff_data);
748	if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict
749	,NULL) == -1)
750	return NULL;
751	if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
752	return NULL;
753	cd->lens_huff[which] = hd;
754	cd->lens_vals[which] = vals;
755	if (hd->clens)
756	Xfree (hd->clens);
757	hd->clens = NULL;
758	*/
759	break;
760	case MG_NOVEL_BINARY:
761	break;
762	case MG_NOVEL_DELTA:
763	break;
764	case MG_NOVEL_HYBRID:
765	break;
766	case MG_NOVEL_HYBRID_MTF:
767	break;
768	}
769	break;
770	}
771	}
772
773	if (cd->cdh.novel_method == MG_NOVEL_BINARY \|\|
774	cd->cdh.novel_method == MG_NOVEL_DELTA \|\|
775	cd->cdh.novel_method == MG_NOVEL_HYBRID \|\|
776	cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
777	{
778	if (!aux_dict)
779	{
780	mg_errno = MG_NOFILE;
781	FreeCompDict (cd);
782	return (NULL);
783	}
784
785	if (!(cd->ad = LoadAuxDict (cd, aux_dict)))
786	{
787	FreeCompDict (cd);
788	return (NULL);
789	}
790	}
791
792
793	mg_errno = MG_NOERROR;
794
795	cd->fast_loaded = 0;
796	return (cd);
797	}
798
799	#define WORDNO(p, base) ((((char)(p))-((char)(base)))/sizeof(u_char*))
800
801	#define IS_FIXUP(p) ((fixup[WORDNO(p,cd)/8] & (1<<(WORDNO(p, cd) & 7))) != 0)
802
803
804	static compression_dict *
805	Load_Fast_Comp_Dict (File * text_fast_comp_dict)
806	{
807	compression_dict *cd;
808	u_long p, end;
809	u_char *fixup;
810	u_long mem;
811	u_long fixup_mem;
812	int i; /* [RPAP - Jan 97: Endian Ordering] */
813
814	Fread (&mem, sizeof (mem), 1, text_fast_comp_dict);
815	NTOHUL(mem); /* [RPAP - Jan 97: Endian Ordering] */
816	Fread (&fixup_mem, sizeof (fixup_mem), 1, text_fast_comp_dict);
817	NTOHUL(fixup_mem); /* [RPAP - Jan 97: Endian Ordering] */
818	if (!(cd = (compression_dict *) Xmalloc (mem)))
819	{
820	mg_errno = MG_NOMEM;
821	return (NULL);
822	}
823
824	end = (u_long ) (((u_char ) cd) + mem);
825	Fread (cd, sizeof (u_char), mem, text_fast_comp_dict);
826
827	if (!(fixup = new u_char[fixup_mem]))
828	{
829	mg_errno = MG_NOMEM;
830	return (NULL);
831	}
832
833	Fread (fixup, fixup_mem, sizeof (u_char), text_fast_comp_dict);
834
835	for (p = (u_long *) cd; (u_long) p < (u_long) end; p++)
836	if (IS_FIXUP (p))
837	{
838	NTOHUL(p); / [RPAP - Jan 97: Endian Ordering] */
839	p = p + (u_long) cd;
840	}
841
842	/* [RPAP - Jan 97: Endian Ordering] */
843	/* cdh */
844	NTOHUL(cd->cdh.dict_type);
845	NTOHUL(cd->cdh.novel_method);
846	for (i = 0; i < TEXT_PARAMS; i++)
847	NTOHUL(cd->cdh.params[i]);
848	NTOHUL(cd->cdh.num_words[0]);
849	NTOHUL(cd->cdh.num_words[1]);
850	NTOHUL(cd->cdh.num_word_chars[0]);
851	NTOHUL(cd->cdh.num_word_chars[1]);
852	NTOHUL(cd->cdh.lookback);
853	/* cfh */
854	for (i = 0; i <= 1; i++)
855	{
856	int j;
857
858	NTOHSI(cd->cfh[i]->hd.num_codes);
859	NTOHSI(cd->cfh[i]->hd.mincodelen);
860	NTOHSI(cd->cfh[i]->hd.maxcodelen);
861	for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
862	{
863	NTOHSI(cd->cfh[i]->hd.lencount[j]);
864	NTOHUL(cd->cfh[i]->hd.min_code[j]);
865	}
866	NTOHUL(cd->cfh[i]->uncompressed_size);
867	for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
868	NTOHUL(cd->cfh[i]->huff_words_size[j]);
869	}
870	NTOHUL(cd->MemForCompDict);
871	/* ad */
872	if (cd->cdh.novel_method == MG_NOVEL_BINARY \|\|
873	cd->cdh.novel_method == MG_NOVEL_DELTA \|\|
874	cd->cdh.novel_method == MG_NOVEL_HYBRID \|\|
875	cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
876	for (i = 0; i <= 1; i++)
877	{
878	int j;
879
880	NTOHUL(cd->ad->afh[i].num_frags);
881	NTOHUL(cd->ad->afh[i].mem_for_frags);
882	for (j = 0; j < 33; j++)
883	{
884	NTOHSI(cd->ad->blk_start[i][j]);
885	NTOHSI(cd->ad->blk_end[i][j]);
886	}
887	}
888	NTOHSI(cd->fast_loaded);
889
890	delete fixup;
891	return (cd);
892	}
893
894
895
896
897	compression_dict *
898	LoadCompDict (File * text_comp_dict,
899	File * text_aux_dict,
900	File * text_fast_comp_dict)
901	{
902	return text_fast_comp_dict ?
903	Load_Fast_Comp_Dict (text_fast_comp_dict) :
904	Load_Comp_Dict (text_comp_dict, text_aux_dict);
905	}
906
907
908
909
910	void
911	FreeCompDict (compression_dict * cd)
912	{
913	int which;
914
915	if (cd->fast_loaded)
916	{
917	delete cd;
918	return;
919	}
920	for (which = 0; which < 2; which++)
921	{
922	if (cd->cfh[which])
923	delete cd->cfh[which];
924	if (cd->chars_huff[which])
925	delete cd->chars_huff[which];
926	if (cd->lens_huff[which])
927	delete cd->lens_huff[which];
928	if (cd->values[which])
929	{
930	delete (cd->values[which][0][0]); /* [RJM 07/98: Memory Leak] */
931	delete cd->values[which][0];
932	delete (cd->values[which]);
933	}
934	if (cd->chars_vals[which])
935	{
936	delete cd->chars_vals[which][0];
937	delete cd->chars_vals[which];
938	}
939	if (cd->lens_vals[which])
940	{
941	delete cd->lens_vals[which][0];
942	delete cd->lens_vals[which];
943	}
944	}
945	if (cd->ad)
946	FreeAuxDict (cd->ad);
947	delete cd;
948	}
949
950
951
952
953
954	#define MY_HUFF_DECODE(len, code, mcodes) \
955	do { \
956	register unsigned long *__min_code = (mcodes); \
957	register unsigned long *__mclen = __min_code; \
958	register unsigned long __code = 0; \
959	do \
960	{ \
961	__code += __code + buffer.bit(); \
962	} \
963	while (__code < *++__mclen); \
964	(len) = __mclen - __min_code; \
965	(code) = __code - *__mclen; \
966	} while(0);
967
968
969	/#define DUMPDOC /
970
971	#define MAX_SWAPS 10000
972
973	int
974	DecodeText (compression_dict * cd,
975	u_char * s_in, int l_in, u_char * s_out, int *l_out)
976	{
977	auxiliary_dict *ad = cd->ad;
978	int which;
979	u_long num_bits, bits;
980	u_char *ptr = s_out;
981	static int num = 0;
982	u_long binary_start[2];
983	int novels_used[2];
984	int swaps[2][MAX_SWAPS];
985	novels_used[0] = novels_used[1] = 0;
986
987	{
988	unsigned char bf = s_in[l_in - 1];
989	num_bits = 1;
990	while ((bf & 1) != 1)
991	{
992	num_bits++;
993	bf >>= 1;
994	}
995	num_bits = l_in * 8 - num_bits;
996	}
997
998	DECODE_START (s_in, l_in)
999
1000	which = buffer.bit();
1001	bits = 1;
1002
1003	if (cd->cdh.novel_method == MG_NOVEL_BINARY)
1004	{
1005	binary_start[0] = buffer.delta_decode (&bits);
1006	binary_start[1] = buffer.delta_decode (&bits);
1007	}
1008
1009
1010	while (bits < num_bits)
1011	{
1012	register unsigned code, len;
1013	register int r;
1014	register u_char t, b = NULL;
1015	u_char word[MAXWORDLEN + 1];
1016
1017	#ifdef DUMPDOC
1018	printf ("\n%d %d ", bits, num_bits);
1019	#endif
1020	if (cd->cfh[which])
1021	{
1022	MY_HUFF_DECODE (len, code, cd->cfh[which]->hd.min_code);
1023	bits += len;
1024
1025	r = code & ((1 << cd->cdh.lookback) - 1);
1026	t = cd->values[which][len][code >> cd->cdh.lookback];
1027
1028	/* step through from base pointer */
1029	b = word + 1;
1030	while (r--)
1031	{
1032	register int copy = *t >> 4;
1033	memcpy (word + copy + 1, t + 1, *t & 0xf);
1034	word[0] = copy + (*t & 0xf);
1035	t += ((*t) & 0xf) + 1;
1036	}
1037	}
1038	else
1039	t = NULL;
1040	if (t == cd->escape[which])
1041	{
1042	switch (cd->cdh.novel_method)
1043	{
1044	case MG_NOVEL_HUFFMAN_CHARS:
1045	{
1046	int len, i;
1047	int c;
1048	len = buffer.huff_decode(cd->lens_huff[which]->min_code,
1049	cd->lens_vals[which], &bits);
1050	for (i = 0; i < len; i++)
1051	{
1052	c = buffer.huff_decode(cd->chars_huff[which]->min_code,
1053	cd->chars_vals[which], &bits);
1054	*ptr++ = c;
1055	}
1056	}
1057	break;
1058	case MG_NOVEL_BINARY:
1059	case MG_NOVEL_DELTA:
1060	case MG_NOVEL_HYBRID:
1061	case MG_NOVEL_HYBRID_MTF:
1062	{
1063	int idx = 0, len;
1064	u_char *base;
1065	switch (cd->cdh.novel_method)
1066	{
1067	case MG_NOVEL_BINARY:
1068	{
1069	idx = buffer.binary_decode(binary_start[which], &bits);
1070	if (idx == binary_start[which])
1071	binary_start[which]++;
1072	idx--;
1073	}
1074	break;
1075	case MG_NOVEL_DELTA:
1076	{
1077	idx = buffer.delta_decode (&bits);
1078	idx--;
1079	}
1080	break;
1081	case MG_NOVEL_HYBRID:
1082	{
1083	int k;
1084	k = buffer.gamma_decode (&bits);
1085	k--;
1086	idx = buffer.binary_decode(ad->blk_end[which][k] -
1087	ad->blk_start[which][k] + 1,
1088	&bits);
1089	idx += ad->blk_start[which][k] - 1;
1090	}
1091	break;
1092	case MG_NOVEL_HYBRID_MTF:
1093	{
1094	int k;
1095	k = buffer.gamma_decode (&bits);
1096	k--;
1097	idx = buffer.binary_decode(ad->blk_end[which][k] -
1098	ad->blk_start[which][k] + 1,
1099	&bits);
1100	idx += ad->blk_start[which][k] - 1;
1101	if (idx >= novels_used[which])
1102	{
1103	u_char *temp;
1104	temp = ad->words[which][idx];
1105	ad->words[which][idx] =
1106	ad->words[which][novels_used[which]];
1107	ad->words[which][novels_used[which]] = temp;
1108	swaps[which][novels_used[which]] = idx;
1109	idx = novels_used[which]++;
1110	}
1111	}
1112	break;
1113	}
1114	base = ad->words[which][idx];
1115	len = *base++;
1116	#ifdef DUMPDOC
1117	printf ("[[");
1118	#endif
1119	for (; len; len--)
1120	{
1121	ptr++ = base++;
1122	#ifdef DUMPDOC
1123	putchar (*(base - 1));
1124	#endif
1125	}
1126	#ifdef DUMPDOC
1127	printf ("]]");
1128	#endif
1129	}
1130	break;
1131	}
1132	}
1133	else
1134	{
1135	/* copy over the matching prefix */
1136	r = (*t >> 4);
1137	while (r--)
1138	#ifndef DUMPDOC
1139	ptr++ = b++;
1140	#else
1141	{
1142	ptr = b++;
1143	putchar (*ptr);
1144	ptr++;
1145	}
1146	#endif
1147
1148	/* and the stored suffix */
1149	r = ((*t) & 0xf);
1150	while (r--)
1151	#ifndef DUMPDOC
1152	ptr++ = ++t;
1153	#else
1154	{
1155	ptr = ++t;
1156	putchar (*ptr);
1157	ptr++;
1158	}
1159	#endif
1160	}
1161	which = !which;
1162	}
1163
1164	DECODE_DONE
1165
1166	* l_out = ptr - s_out;
1167	num += *l_out + 1;
1168
1169	if (cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
1170	for (which = 0; which <= 1; which++)
1171	for (novels_used[which]--; novels_used[which] >= 0; novels_used[which]--)
1172	{
1173	int a = novels_used[which];
1174	int b = swaps[which][novels_used[which]];
1175	u_char *temp;
1176	temp = ad->words[which][a];
1177	ad->words[which][a] = ad->words[which][b];
1178	ad->words[which][b] = temp;
1179	}
1180	return (COMPALLOK);
1181	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: