source: trunk/gsdl/packages/mg/src/text/text_get.c@ 1014

Last change on this file since 1014 was 439, checked in by sjboddie, 25 years ago

renamed mg-1.3d directory mg

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 24.3 KB
Line 
1/**************************************************************************
2 *
3 * text_get.c -- Function for reading documents from the compressed text
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: text_get.c 439 1999-08-10 21:23:37Z sjboddie $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25
26#include "memlib.h"
27#include "filestats.h"
28#include "timing.h"
29#include "messages.h"
30#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
31
32#include "huffman.h"
33#include "bitio_m_mem.h"
34#include "bitio_m.h"
35#include "bitio_stdio.h"
36#include "huffman_stdio.h"
37
38#include "mg.h"
39#include "invf.h"
40#include "text.h"
41#include "lists.h"
42#include "backend.h"
43#include "text_get.h"
44#include "locallib.h"
45#include "words.h"
46#include "mg_errors.h"
47#include "local_strings.h"
48
49
50/*
51 $Log$
52 Revision 1.1 1999/08/10 21:18:26 sjboddie
53 renamed mg-1.3d directory mg
54
55 Revision 1.1 1998/11/17 09:35:48 rjmcnab
56 *** empty log message ***
57
58 * Revision 1.3 1994/10/20 03:57:11 tes
59 * I have rewritten the boolean query optimiser and abstracted out the
60 * components of the boolean query.
61 *
62 * Revision 1.2 1994/09/20 04:42:15 tes
63 * For version 1.1
64 *
65 */
66
67static char *RCSID = "$Id: text_get.c 439 1999-08-10 21:23:37Z sjboddie $";
68
69
70
71
72
73/* FetchDocStart ()
74 * Reads into DocEnt the starting position of the document in the *.text file
75 * Where the first document is document number 1
76 * It returns the true weight of the document.
77 */
78
79
80
81
82static double
83FetchDocStartLev1 (text_data * td, u_long DN,
84 u_long * seek_pos, u_long * len)
85{
86 unsigned long data[2];
87 /* [TS:Sep/94] Fixed up the seek call to give the correct offset */
88 Fseek (td->TextIdxFile,
89 sizeof (unsigned long) * (DN - 1) + /* the doc offsets */
90 sizeof (unsigned long) + /* the magic number */
91 sizeof (compressed_text_header), /* the header */
92 0);
93 Fread ((char *) &data, sizeof (data), 1, td->TextIdxFile);
94
95 /* [RPAP - Jan 97: Endian Ordering] */
96 NTOHUL(data[0]);
97 NTOHUL(data[1]);
98
99 *seek_pos = data[0];
100 *len = data[1] - data[0];
101 return (1.0);
102}
103
104#define MG_PAGE_SIZE 2048
105
106static int
107LoadIdx (text_data * td, unsigned long DN)
108{
109 if (!td->idx_data)
110 {
111 td->idx_data = Xmalloc (sizeof (*(td->idx_data)) * MG_PAGE_SIZE);
112 if (!td->idx_data)
113 FatalError (1, "Out of memory in FDSL2");
114 }
115 if (td->current_pos == -1 || DN >= td->current_pos + MG_PAGE_SIZE - 1 ||
116 DN < td->current_pos)
117 {
118 int i, num; /* [RPAP - Jan 97: Endian Ordering] */
119
120 long rn = (long) DN - (MG_PAGE_SIZE >> 1);
121 if (rn < 1)
122 rn = 1;
123 Fseek (td->TextIdxWgtFile, (sizeof (unsigned long) + sizeof (float)) *
124 (rn - 1) + sizeof (unsigned long), 0);
125 num = Fread ((char *) td->idx_data, sizeof (*(td->idx_data)), MG_PAGE_SIZE, /* [RPAP - Jan 97: Endian Ordering] */
126 td->TextIdxWgtFile);
127
128 /* [RPAP - Jan 97: Endian Ordering] */
129 for (i = 0; i < num; i++)
130 {
131 NTOHUL(td->idx_data[i].Start);
132 NTOHF(td->idx_data[i].Weight);
133 }
134
135 td->current_pos = rn;
136 }
137 return DN - td->current_pos;
138}
139
140static double
141FDSL2 (text_data * td, unsigned long DN, unsigned long *Pos)
142{
143 unsigned long pos = LoadIdx (td, DN);
144 *Pos = td->idx_data[pos].Start;
145 return (td->idx_data[pos].Weight);
146}
147
148
149static double
150FetchDocStartLev2 (text_data * td, u_long DN,
151 u_long * seek_pos, u_long * len)
152{
153 double Weight;
154 unsigned long s1, s2;
155 Weight = FDSL2 (td, DN, &s1);
156 do
157 {
158 DN++;
159 FDSL2 (td, DN, &s2);
160 }
161 while (s2 == s1);
162 *seek_pos = s1;
163 *len = s2 - s1;
164 return (Weight);
165}
166
167
168
169
170double
171FetchDocStart (query_data * qd, u_long DN, u_long * seek_pos, u_long * len)
172{
173 qd->text_idx_lookups++;
174 if (qd->td->TextIdxWgtFile)
175 return FetchDocStartLev2 (qd->td, DN, seek_pos, len);
176 else
177 return FetchDocStartLev1 (qd->td, DN, seek_pos, len);
178}
179
180unsigned long
181FetchInitialParagraph (text_data * td, unsigned long ParaNum)
182{
183 if (td->TextIdxWgtFile)
184 {
185 unsigned long pos;
186 unsigned long start;
187 int PN = ParaNum - 1;
188 pos = LoadIdx (td, ParaNum);
189 start = td->idx_data[pos].Start;
190 while (PN > 0)
191 {
192 pos = LoadIdx (td, PN);
193 if (td->idx_data[pos].Start != start)
194 return PN + 1;
195 PN--;
196 }
197 return PN + 1;
198 }
199 else
200 return ParaNum;
201}
202
203
204
205/* FetchCompressed ()
206 * Reads into buffer DocBuff the compressed form of document DocNum.
207 * Where the first document is document number 1
208 */
209int
210FetchCompressed (query_data * qd, char **DocBuff, DocEntry * DocEnt)
211{
212 if (!DocEnt->SeekPos)
213 FetchDocStart (qd, DocEnt->DocNum, &DocEnt->SeekPos, &DocEnt->Len);
214 if (!(*DocBuff = Xmalloc (DocEnt->Len)))
215 return (-1);
216
217 if (Fseek (qd->td->TextFile, DocEnt->SeekPos, 0) == -1)
218 FatalError (1, "Error when seeking into text file");
219#if 0
220 printf ("Loading compressed text %d %d\n", DocEnt->SeekPos, DocEnt->Len);
221#endif
222 if (Fread (*DocBuff, 1, DocEnt->Len, qd->td->TextFile) != DocEnt->Len)
223 FatalError (1, "Error when reading data");
224
225 return (DocEnt->Len);
226
227}
228
229
230text_data *
231LoadTextData (File * text, File * text_idx_wgt, File * text_idx)
232{
233 text_data *td;
234
235 if (!(td = Xmalloc (sizeof (text_data))))
236 {
237 mg_errno = MG_NOMEM;
238 return (NULL);
239 }
240
241 td->TextFile = text;
242 td->TextIdxWgtFile = text_idx_wgt;
243 td->TextIdxFile = text_idx;
244 td->current_pos = -1;
245 td->idx_data = NULL;
246 Fread (&td->cth, sizeof (td->cth), 1, text);
247
248 /* [RPAP - Jan 97: Endian Ordering] */
249 NTOHUL(td->cth.num_of_docs);
250 NTOHD(td->cth.num_of_bytes); /* [RJM 07/97: 4G limit] */
251 NTOHUL(td->cth.num_of_words);
252 NTOHUL(td->cth.length_of_longest_doc);
253 NTOHD(td->cth.ratio);
254
255 return (td);
256}
257
258
259void
260FreeTextData (text_data * td)
261{
262 if (td)
263 {
264 if (td->idx_data)
265 Xfree (td->idx_data);
266 Xfree (td);
267 }
268}
269
270
271static int
272pts_comp (const void *A, const void *B)
273{
274 const DocEntry *const *a = A;
275 const DocEntry *const *b = B;
276 return (*a)->DocNum - (*b)->DocNum;
277}
278
279
280
281
282int
283GetPosLens (query_data * qd, DocEntry * Docs, int num)
284{
285 DocEntry **pts;
286 int i, j;
287 if (!(pts = Xmalloc (num * sizeof (DocEntry *))))
288 {
289 mg_errno = MG_NOMEM;
290 return (-1);
291 }
292 for (i = j = 0; i < num; i++, Docs++)
293 if (!Docs->SeekPos)
294 pts[j++] = Docs;
295
296 if (j)
297 {
298 qsort (pts, j, sizeof (DocEntry *), pts_comp);
299 for (i = 0; i < j; i++)
300 FetchDocStart (qd, pts[i]->DocNum, &pts[i]->SeekPos, &pts[i]->Len);
301 }
302
303 Xfree (pts);
304 return (0);
305}
306
307
308
309
310
311int
312LoadBuffers (query_data * qd, DocEntry * Docs, int max_mem, int num)
313{
314 DocEntry **pts;
315 int i, j;
316 int mem;
317
318 if (!num)
319 return (0);
320 if (!(pts = Xmalloc (num * sizeof (DocEntry *))))
321 {
322 mg_errno = MG_NOMEM;
323 return (-1);
324 }
325
326 mem = i = 0;
327 do
328 {
329 pts[i] = Docs;
330 mem += Docs->Len;
331 i++;
332 Docs++;
333 }
334 while (i < num && mem < max_mem);
335 if (i > 1)
336 qsort (pts, i, sizeof (DocEntry *), pts_comp);
337 for (j = 0; j < i; j++)
338 {
339 if (FetchCompressed (qd, &pts[j]->CompTextBuffer, pts[j]) == -1)
340 return (-1);
341 ChangeMemInUse (qd, pts[j]->Len);
342 }
343
344 Xfree (pts);
345
346 return (i);
347}
348
349
350
351
352
353void
354FreeBuffers (query_data * qd, DocEntry * Docs, int num)
355{
356 int i;
357 for (i = 0; i < num; i++, Docs++)
358 if (Docs->CompTextBuffer)
359 {
360 Xfree (Docs->CompTextBuffer);
361 Docs->CompTextBuffer = NULL;
362 ChangeMemInUse (qd, -Docs->Len);
363 }
364}
365
366
367
368/****************************************************************************/
369
370static void
371FreeAuxDict (auxiliary_dict * ad)
372{
373 if (!ad)
374 return;
375 if (ad->word_data[0])
376 Xfree (ad->word_data[0]);
377 if (ad->word_data[1])
378 Xfree (ad->word_data[1]);
379 if (ad->words[0])
380 Xfree (ad->words[0]);
381 if (ad->words[1])
382 Xfree (ad->words[1]);
383 Xfree (ad);
384}
385
386static auxiliary_dict *
387LoadAuxDict (compression_dict * cd, File * text_aux_dict)
388{
389 auxiliary_dict *ad;
390 int i;
391
392 if (!(ad = Xmalloc (sizeof (auxiliary_dict))))
393 {
394 mg_errno = MG_NOMEM;
395 return (NULL);
396 }
397
398 bzero ((char *) ad, sizeof (*ad));
399
400 for (i = 0; i <= 1; i++)
401 {
402 int j;
403 u_char *pos;
404
405 Fread (&ad->afh[i], sizeof (aux_frags_header), 1, text_aux_dict);
406
407 /* [RPAP - Jan 97: Endian Ordering] */
408 NTOHUL(ad->afh[i].num_frags);
409 NTOHUL(ad->afh[i].mem_for_frags);
410
411 if (!(ad->word_data[i] = Xmalloc (ad->afh[i].mem_for_frags)))
412 {
413 mg_errno = MG_NOMEM;
414 FreeAuxDict (ad);
415 return (NULL);
416 }
417 if (!(ad->words[i] = Xmalloc (ad->afh[i].num_frags * sizeof (u_char *))))
418 {
419 mg_errno = MG_NOMEM;
420 FreeAuxDict (ad);
421 return (NULL);
422 }
423
424 Fread (ad->word_data[i], ad->afh[i].mem_for_frags, sizeof (u_char),
425 text_aux_dict);
426
427 pos = ad->word_data[i];
428 for (j = 0; j < ad->afh[i].num_frags; j++)
429 {
430 ad->words[i][j] = pos;
431 pos += *pos + 1;
432 }
433 if (cd->cdh.novel_method == MG_NOVEL_HYBRID ||
434 cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
435 {
436 int num;
437 num = 1;
438 ad->blk_start[i][0] = 0;
439 ad->blk_end[i][0] = cd->cdh.num_words[i] - 1;
440 while (num < 33)
441 {
442 ad->blk_start[i][num] = ad->blk_end[i][num - 1] + 1;
443 ad->blk_end[i][num] = ad->blk_start[i][num] +
444 (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
445 num++;
446 }
447 }
448 }
449 return (ad);
450}
451
452
453
454
455
456
457static u_char ***
458ReadInWords (File * dict, compression_dict * cd,
459 comp_frags_header * cfh, u_char ** escape)
460{
461 int i, lookback;
462 int ptrs_reqd = 0;
463 int mem_reqd = 0;
464 int num_set[MAX_HUFFCODE_LEN + 1];
465 u_char *next_word[MAX_HUFFCODE_LEN + 1];
466 u_char **vals;
467 u_char ***values;
468 u_char word[MAXWORDLEN + 1];
469 u_char last_word[MAX_HUFFCODE_LEN + 1][MAXWORDLEN + 1];
470
471 lookback = cd->cdh.lookback;
472
473 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
474 {
475 ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
476 mem_reqd += cfh->huff_words_size[i];
477 }
478
479 if (!(vals = Xmalloc (ptrs_reqd * sizeof (*vals))))
480 return (NULL);
481
482 if (!(values = Xmalloc ((MAX_HUFFCODE_LEN + 1) * sizeof (u_char **))))
483 return (NULL);
484
485 if (!(next_word[0] = Xmalloc (mem_reqd))) return (NULL);
486
487 cd->MemForCompDict += ptrs_reqd * sizeof (*vals) +
488 (MAX_HUFFCODE_LEN + 1) * sizeof (u_char **) +
489 mem_reqd;
490
491 values[0] = vals;
492 values[0][0] = next_word[0];
493 for (i = 1; i <= cfh->hd.maxcodelen; i++)
494 {
495 int next_start = (values[i - 1] - vals) +
496 ((cfh->hd.lencount[i - 1] + ((1 << lookback) - 1)) >> lookback);
497 values[i] = &vals[next_start];
498 next_word[i] = next_word[i - 1] + cfh->huff_words_size[i - 1];
499 values[i][0] = next_word[i];
500 }
501
502 bzero ((char *) num_set, sizeof (num_set));
503
504 for (i = 0; i < cfh->hd.num_codes; i++)
505 {
506 register int val, copy;
507 register int len = cfh->hd.clens[i];
508 val = Getc (dict);
509 copy = (val >> 4) & 0xf;
510 val &= 0xf;
511
512 Fread (word + copy + 1, sizeof (u_char), val, dict);
513 *word = val + copy;
514
515 if ((num_set[len] & ((1 << lookback) - 1)) == 0)
516 {
517 values[len][num_set[len] >> lookback] = next_word[len];
518 memcpy (next_word[len], word, *word + 1);
519 if (escape && i == cfh->hd.num_codes - 1)
520 *escape = next_word[len];
521 next_word[len] += *word + 1;
522 }
523 else
524 {
525 copy = prefixlen (last_word[len], word);
526 memcpy (next_word[len] + 1, word + copy + 1, *word - copy);
527 *next_word[len] = (copy << 4) + (*word - copy);
528 if (escape && i == cfh->hd.num_codes - 1)
529 *escape = next_word[len];
530 next_word[len] += (*word - copy) + 1;
531 }
532 memcpy (last_word[len], word, *word + 1);
533 num_set[len]++;
534 }
535 if (cfh->hd.clens)
536 Xfree (cfh->hd.clens);
537 cfh->hd.clens = NULL;
538 return values;
539}
540
541
542static compression_dict *
543Load_Comp_Dict (File * dict, File * aux_dict)
544{
545 int which;
546 compression_dict *cd;
547
548 if (!(cd = Xmalloc (sizeof (compression_dict))))
549 {
550 mg_errno = MG_NOMEM;
551 return (NULL);
552 }
553
554 bzero ((char *) cd, sizeof (compression_dict));
555
556 cd->MemForCompDict = sizeof (compression_dict);
557
558 if (F_Read_cdh (dict, &cd->cdh, &cd->MemForCompDict, NULL) == -1)
559 return NULL;
560
561 for (which = 0; which < 2; which++)
562 switch (cd->cdh.dict_type)
563 {
564 case MG_COMPLETE_DICTIONARY:
565 {
566 if (!(cd->cfh[which] = Xmalloc (sizeof (*cd->cfh[which]))))
567 return NULL;
568 cd->MemForCompDict += sizeof (*cd->cfh[which]);
569 if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
570 return NULL;
571
572 if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
573 NULL)))
574 return NULL;
575 cd->escape[which] = NULL;
576
577 }
578 break;
579 case MG_PARTIAL_DICTIONARY:
580 {
581 huff_data *hd;
582 u_long **vals;
583 if (cd->cdh.num_words[which])
584 {
585 if (!(cd->cfh[which] = Xmalloc (sizeof (*cd->cfh[which]))))
586 return NULL;
587 cd->MemForCompDict += sizeof (*cd->cfh[which]);
588 if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
589 return NULL;
590
591 if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
592 &cd->escape[which])))
593 return NULL;
594 }
595 if (!(hd = Xmalloc (sizeof (huff_data))))
596 return NULL;
597 cd->MemForCompDict += sizeof (huff_data);
598 if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL) == -1)
599 return NULL;
600 if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
601 return NULL;
602 if (hd->clens)
603 Xfree (hd->clens);
604 hd->clens = NULL;
605 cd->chars_huff[which] = hd;
606 cd->chars_vals[which] = vals;
607 if (!(hd = Xmalloc (sizeof (huff_data))))
608 return NULL;
609 cd->MemForCompDict += sizeof (huff_data);
610 if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL) == -1)
611 return NULL;
612 if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
613 return NULL;
614 cd->lens_huff[which] = hd;
615 cd->lens_vals[which] = vals;
616 if (hd->clens)
617 Xfree (hd->clens);
618 hd->clens = NULL;
619 }
620 break;
621 case MG_SEED_DICTIONARY:
622 {
623 huff_data *hd;
624 u_long **vals;
625 if (cd->cdh.num_words[which])
626 {
627 if (!(cd->cfh[which] = Xmalloc (sizeof (*cd->cfh[which]))))
628 return NULL;
629 cd->MemForCompDict += sizeof (*cd->cfh[which]);
630 if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
631 return NULL;
632
633 if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
634 &cd->escape[which])))
635 return NULL;
636 }
637 switch (cd->cdh.novel_method)
638 {
639 case MG_NOVEL_HUFFMAN_CHARS:
640 if (!(hd = Xmalloc (sizeof (huff_data))))
641 return NULL;
642 cd->chars_huff[which] = hd;
643 cd->MemForCompDict += sizeof (huff_data);
644 if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict,
645 NULL) == -1)
646 return NULL;
647 if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
648 return NULL;
649 cd->chars_vals[which] = vals;
650 if (hd->clens)
651 Xfree (hd->clens);
652 hd->clens = NULL;
653 if (!(hd = Xmalloc (sizeof (huff_data))))
654 return NULL;
655 cd->MemForCompDict += sizeof (huff_data);
656 cd->lens_huff[which] = hd;
657 if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict
658 ,NULL) == -1)
659 return NULL;
660 if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
661 return NULL;
662 cd->lens_vals[which] = vals;
663 if (hd->clens)
664 Xfree (hd->clens);
665 hd->clens = NULL;
666 break;
667 case MG_NOVEL_BINARY:
668 break;
669 case MG_NOVEL_DELTA:
670 break;
671 case MG_NOVEL_HYBRID:
672 break;
673 case MG_NOVEL_HYBRID_MTF:
674 break;
675 }
676 break;
677 }
678 }
679
680 if (cd->cdh.novel_method == MG_NOVEL_BINARY ||
681 cd->cdh.novel_method == MG_NOVEL_DELTA ||
682 cd->cdh.novel_method == MG_NOVEL_HYBRID ||
683 cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
684 {
685 if (!aux_dict)
686 {
687 mg_errno = MG_NOFILE;
688 FreeCompDict (cd);
689 return (NULL);
690 }
691
692 if (!(cd->ad = LoadAuxDict (cd, aux_dict)))
693 {
694 FreeCompDict (cd);
695 return (NULL);
696 }
697 }
698
699
700 mg_errno = MG_NOERROR;
701
702 cd->fast_loaded = 0;
703 return (cd);
704}
705
706#define WORDNO(p, base) ((((char*)(p))-((char*)(base)))/sizeof(u_char*))
707
708#define IS_FIXUP(p) ((fixup[WORDNO(p,cd)/8] & (1<<(WORDNO(p, cd) & 7))) != 0)
709
710
711static compression_dict *
712Load_Fast_Comp_Dict (File * text_fast_comp_dict)
713{
714 compression_dict *cd;
715 u_long *p, *end;
716 u_char *fixup;
717 u_long mem;
718 u_long fixup_mem;
719 int i; /* [RPAP - Jan 97: Endian Ordering] */
720
721 Fread (&mem, sizeof (mem), 1, text_fast_comp_dict);
722 NTOHUL(mem); /* [RPAP - Jan 97: Endian Ordering] */
723 Fread (&fixup_mem, sizeof (fixup_mem), 1, text_fast_comp_dict);
724 NTOHUL(fixup_mem); /* [RPAP - Jan 97: Endian Ordering] */
725 if (!(cd = Xmalloc (mem)))
726 {
727 mg_errno = MG_NOMEM;
728 return (NULL);
729 }
730
731 end = (u_long *) (((u_char *) cd) + mem);
732 Fread (cd, sizeof (u_char), mem, text_fast_comp_dict);
733
734 if (!(fixup = Xmalloc (fixup_mem)))
735 {
736 mg_errno = MG_NOMEM;
737 return (NULL);
738 }
739
740 Fread (fixup, fixup_mem, sizeof (u_char), text_fast_comp_dict);
741
742 for (p = (u_long *) cd; (u_long) p < (u_long) end; p++)
743 if (IS_FIXUP (p))
744 {
745 NTOHUL(*p); /* [RPAP - Jan 97: Endian Ordering] */
746 *p = *p + (u_long) cd;
747 }
748
749 /* [RPAP - Jan 97: Endian Ordering] */
750 /* cdh */
751 NTOHUL(cd->cdh.dict_type);
752 NTOHUL(cd->cdh.novel_method);
753 for (i = 0; i < TEXT_PARAMS; i++)
754 NTOHUL(cd->cdh.params[i]);
755 NTOHUL(cd->cdh.num_words[0]);
756 NTOHUL(cd->cdh.num_words[1]);
757 NTOHUL(cd->cdh.num_word_chars[0]);
758 NTOHUL(cd->cdh.num_word_chars[1]);
759 NTOHUL(cd->cdh.lookback);
760 /* cfh */
761 for (i = 0; i <= 1; i++)
762 {
763 int j;
764
765 NTOHSI(cd->cfh[i]->hd.num_codes);
766 NTOHSI(cd->cfh[i]->hd.mincodelen);
767 NTOHSI(cd->cfh[i]->hd.maxcodelen);
768 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
769 {
770 NTOHSI(cd->cfh[i]->hd.lencount[j]);
771 NTOHUL(cd->cfh[i]->hd.min_code[j]);
772 }
773 NTOHUL(cd->cfh[i]->uncompressed_size);
774 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
775 NTOHUL(cd->cfh[i]->huff_words_size[j]);
776 }
777 NTOHUL(cd->MemForCompDict);
778 /* ad */
779 if (cd->cdh.novel_method == MG_NOVEL_BINARY ||
780 cd->cdh.novel_method == MG_NOVEL_DELTA ||
781 cd->cdh.novel_method == MG_NOVEL_HYBRID ||
782 cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
783 for (i = 0; i <= 1; i++)
784 {
785 int j;
786
787 NTOHUL(cd->ad->afh[i].num_frags);
788 NTOHUL(cd->ad->afh[i].mem_for_frags);
789 for (j = 0; j < 33; j++)
790 {
791 NTOHSI(cd->ad->blk_start[i][j]);
792 NTOHSI(cd->ad->blk_end[i][j]);
793 }
794 }
795 NTOHSI(cd->fast_loaded);
796
797 Xfree (fixup);
798 return (cd);
799}
800
801
802
803
804compression_dict *
805LoadCompDict (File * text_comp_dict,
806 File * text_aux_dict,
807 File * text_fast_comp_dict)
808{
809 return text_fast_comp_dict ?
810 Load_Fast_Comp_Dict (text_fast_comp_dict) :
811 Load_Comp_Dict (text_comp_dict, text_aux_dict);
812}
813
814
815
816
817void
818FreeCompDict (compression_dict * cd)
819{
820 int which;
821 if (cd->fast_loaded)
822 {
823 Xfree (cd);
824 return;
825 }
826 for (which = 0; which < 2; which++)
827 {
828 if (cd->cfh[which])
829 Xfree (cd->cfh[which]);
830 if (cd->chars_huff[which])
831 Xfree (cd->chars_huff[which]);
832 if (cd->lens_huff[which])
833 Xfree (cd->lens_huff[which]);
834 if (cd->values[which])
835 {
836 Xfree (cd->values[which][0][0]); /* [RJM 07/98: Memory Leak] */
837 Xfree (cd->values[which][0]);
838 Xfree (cd->values[which]);
839 }
840 if (cd->chars_vals[which])
841 {
842 Xfree (cd->chars_vals[which][0]);
843 Xfree (cd->chars_vals[which]);
844 }
845 if (cd->lens_vals[which])
846 {
847 Xfree (cd->lens_vals[which][0]);
848 Xfree (cd->lens_vals[which]);
849 }
850 }
851 if (cd->ad)
852 FreeAuxDict (cd->ad);
853 Xfree (cd);
854}
855
856
857
858
859
860#define MY_HUFF_DECODE(len, code, mcodes) \
861 do { \
862 register unsigned long *__min_code = (mcodes); \
863 register unsigned long *__mclen = __min_code; \
864 register unsigned long __code = 0; \
865 do \
866 { \
867 DECODE_ADD(__code); \
868 } \
869 while (__code < *++__mclen); \
870 (len) = __mclen - __min_code; \
871 (code) = __code - *__mclen; \
872 } while(0);
873
874
875/*#define DUMPDOC */
876
877#define MAX_SWAPS 10000
878
879int
880DecodeText (compression_dict * cd,
881 u_char * s_in, int l_in, u_char * s_out, int *l_out)
882{
883 auxiliary_dict *ad = cd->ad;
884 int which;
885 u_long num_bits, bits;
886 u_char *ptr = s_out;
887 static int num = 0;
888 u_long binary_start[2];
889 int novels_used[2];
890 int swaps[2][MAX_SWAPS];
891 novels_used[0] = novels_used[1] = 0;
892
893 {
894 unsigned char bf = s_in[l_in - 1];
895 num_bits = 1;
896 while ((bf & 1) != 1)
897 {
898 num_bits++;
899 bf >>= 1;
900 }
901 num_bits = l_in * 8 - num_bits;
902 }
903
904 DECODE_START (s_in, l_in)
905
906 which = DECODE_BIT;
907 bits = 1;
908
909 if (cd->cdh.novel_method == MG_NOVEL_BINARY)
910 {
911 DELTA_DECODE_L (binary_start[0], bits);
912 DELTA_DECODE_L (binary_start[1], bits);
913 }
914
915
916 while (bits < num_bits)
917 {
918 register unsigned code, len;
919 register int r;
920 register u_char *t, *b = NULL;
921 u_char word[MAXWORDLEN + 1];
922
923#ifdef DUMPDOC
924 printf ("\n%d %d ", bits, num_bits);
925#endif
926 if (cd->cfh[which])
927 {
928 MY_HUFF_DECODE (len, code, cd->cfh[which]->hd.min_code);
929 bits += len;
930
931 r = code & ((1 << cd->cdh.lookback) - 1);
932 t = cd->values[which][len][code >> cd->cdh.lookback];
933
934 /* step through from base pointer */
935 b = word + 1;
936 while (r--)
937 {
938 register int copy = *t >> 4;
939 memcpy (word + copy + 1, t + 1, *t & 0xf);
940 word[0] = copy + (*t & 0xf);
941 t += ((*t) & 0xf) + 1;
942 }
943 }
944 else
945 t = NULL;
946 if (t == cd->escape[which])
947 {
948 switch (cd->cdh.novel_method)
949 {
950 case MG_NOVEL_HUFFMAN_CHARS:
951 {
952 int len, i;
953 int c;
954 HUFF_DECODE_L (len, cd->lens_huff[which]->min_code,
955 cd->lens_vals[which], bits);
956 for (i = 0; i < len; i++)
957 {
958 HUFF_DECODE_L (c, cd->chars_huff[which]->min_code,
959 cd->chars_vals[which], bits);
960 *ptr++ = c;
961 }
962 }
963 break;
964 case MG_NOVEL_BINARY:
965 case MG_NOVEL_DELTA:
966 case MG_NOVEL_HYBRID:
967 case MG_NOVEL_HYBRID_MTF:
968 {
969 int idx = 0, len;
970 u_char *base;
971 switch (cd->cdh.novel_method)
972 {
973 case MG_NOVEL_BINARY:
974 {
975 BINARY_DECODE_L (idx, binary_start[which], bits);
976 if (idx == binary_start[which])
977 binary_start[which]++;
978 idx--;
979 }
980 break;
981 case MG_NOVEL_DELTA:
982 {
983 DELTA_DECODE_L (idx, bits);
984 idx--;
985 }
986 break;
987 case MG_NOVEL_HYBRID:
988 {
989 int k;
990 GAMMA_DECODE_L (k, bits);
991 k--;
992 BINARY_DECODE_L (idx,
993 ad->blk_end[which][k] -
994 ad->blk_start[which][k] + 1, bits);
995 idx += ad->blk_start[which][k] - 1;
996 }
997 break;
998 case MG_NOVEL_HYBRID_MTF:
999 {
1000 int k;
1001 GAMMA_DECODE_L (k, bits);
1002 k--;
1003 BINARY_DECODE_L (idx,
1004 ad->blk_end[which][k] -
1005 ad->blk_start[which][k] + 1, bits);
1006 idx += ad->blk_start[which][k] - 1;
1007 if (idx >= novels_used[which])
1008 {
1009 u_char *temp;
1010 temp = ad->words[which][idx];
1011 ad->words[which][idx] =
1012 ad->words[which][novels_used[which]];
1013 ad->words[which][novels_used[which]] = temp;
1014 swaps[which][novels_used[which]] = idx;
1015 idx = novels_used[which]++;
1016 }
1017 }
1018 break;
1019 }
1020 base = ad->words[which][idx];
1021 len = *base++;
1022#ifdef DUMPDOC
1023 printf ("[[");
1024#endif
1025 for (; len; len--)
1026 {
1027 *ptr++ = *base++;
1028#ifdef DUMPDOC
1029 putchar (*(base - 1));
1030#endif
1031 }
1032#ifdef DUMPDOC
1033 printf ("]]");
1034#endif
1035 }
1036 break;
1037 }
1038 }
1039 else
1040 {
1041 /* copy over the matching prefix */
1042 r = (*t >> 4);
1043 while (r--)
1044#ifndef DUMPDOC
1045 *ptr++ = *b++;
1046#else
1047 {
1048 *ptr = *b++;
1049 putchar (*ptr);
1050 ptr++;
1051 }
1052#endif
1053
1054 /* and the stored suffix */
1055 r = ((*t) & 0xf);
1056 while (r--)
1057#ifndef DUMPDOC
1058 *ptr++ = *++t;
1059#else
1060 {
1061 *ptr = *++t;
1062 putchar (*ptr);
1063 ptr++;
1064 }
1065#endif
1066 }
1067 which = !which;
1068 }
1069
1070 DECODE_DONE
1071
1072 * l_out = ptr - s_out;
1073 num += *l_out + 1;
1074
1075 if (cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
1076 for (which = 0; which <= 1; which++)
1077 for (novels_used[which]--; novels_used[which] >= 0; novels_used[which]--)
1078 {
1079 int a = novels_used[which];
1080 int b = swaps[which][novels_used[which]];
1081 u_char *temp;
1082 temp = ad->words[which][a];
1083 ad->words[which][a] = ad->words[which][b];
1084 ad->words[which][b] = temp;
1085 }
1086 return (COMPALLOK);
1087}
Note: See TracBrowser for help on using the repository browser.