source: trunk/gsdl/packages/mg-1.3d/src/text/text_get.c@ 13

Last change on this file since 13 was 13, checked in by rjmcnab, 25 years ago

* empty log message *

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 24.3 KB
Line 
1/**************************************************************************
2 *
3 * text_get.c -- Function for reading documents from the compressed text
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: text_get.c 13 1998-11-17 09:36:00Z rjmcnab $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25
26#include "memlib.h"
27#include "filestats.h"
28#include "timing.h"
29#include "messages.h"
30#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
31
32#include "huffman.h"
33#include "bitio_m_mem.h"
34#include "bitio_m.h"
35#include "bitio_stdio.h"
36#include "huffman_stdio.h"
37
38#include "mg.h"
39#include "invf.h"
40#include "text.h"
41#include "lists.h"
42#include "backend.h"
43#include "text_get.h"
44#include "locallib.h"
45#include "words.h"
46#include "mg_errors.h"
47#include "local_strings.h"
48
49
50/*
51 $Log$
52 Revision 1.1 1998/11/17 09:35:48 rjmcnab
53 *** empty log message ***
54
55 * Revision 1.3 1994/10/20 03:57:11 tes
56 * I have rewritten the boolean query optimiser and abstracted out the
57 * components of the boolean query.
58 *
59 * Revision 1.2 1994/09/20 04:42:15 tes
60 * For version 1.1
61 *
62 */
63
64static char *RCSID = "$Id: text_get.c 13 1998-11-17 09:36:00Z rjmcnab $";
65
66
67
68
69
70/* FetchDocStart ()
71 * Reads into DocEnt the starting position of the document in the *.text file
72 * Where the first document is document number 1
73 * It returns the true weight of the document.
74 */
75
76
77
78
79static double
80FetchDocStartLev1 (text_data * td, u_long DN,
81 u_long * seek_pos, u_long * len)
82{
83 unsigned long data[2];
84 /* [TS:Sep/94] Fixed up the seek call to give the correct offset */
85 Fseek (td->TextIdxFile,
86 sizeof (unsigned long) * (DN - 1) + /* the doc offsets */
87 sizeof (unsigned long) + /* the magic number */
88 sizeof (compressed_text_header), /* the header */
89 0);
90 Fread ((char *) &data, sizeof (data), 1, td->TextIdxFile);
91
92 /* [RPAP - Jan 97: Endian Ordering] */
93 NTOHUL(data[0]);
94 NTOHUL(data[1]);
95
96 *seek_pos = data[0];
97 *len = data[1] - data[0];
98 return (1.0);
99}
100
101#define MG_PAGE_SIZE 2048
102
103static int
104LoadIdx (text_data * td, unsigned long DN)
105{
106 if (!td->idx_data)
107 {
108 td->idx_data = Xmalloc (sizeof (*(td->idx_data)) * MG_PAGE_SIZE);
109 if (!td->idx_data)
110 FatalError (1, "Out of memory in FDSL2");
111 }
112 if (td->current_pos == -1 || DN >= td->current_pos + MG_PAGE_SIZE - 1 ||
113 DN < td->current_pos)
114 {
115 int i, num; /* [RPAP - Jan 97: Endian Ordering] */
116
117 long rn = (long) DN - (MG_PAGE_SIZE >> 1);
118 if (rn < 1)
119 rn = 1;
120 Fseek (td->TextIdxWgtFile, (sizeof (unsigned long) + sizeof (float)) *
121 (rn - 1) + sizeof (unsigned long), 0);
122 num = Fread ((char *) td->idx_data, sizeof (*(td->idx_data)), MG_PAGE_SIZE, /* [RPAP - Jan 97: Endian Ordering] */
123 td->TextIdxWgtFile);
124
125 /* [RPAP - Jan 97: Endian Ordering] */
126 for (i = 0; i < num; i++)
127 {
128 NTOHUL(td->idx_data[i].Start);
129 NTOHF(td->idx_data[i].Weight);
130 }
131
132 td->current_pos = rn;
133 }
134 return DN - td->current_pos;
135}
136
137static double
138FDSL2 (text_data * td, unsigned long DN, unsigned long *Pos)
139{
140 unsigned long pos = LoadIdx (td, DN);
141 *Pos = td->idx_data[pos].Start;
142 return (td->idx_data[pos].Weight);
143}
144
145
146static double
147FetchDocStartLev2 (text_data * td, u_long DN,
148 u_long * seek_pos, u_long * len)
149{
150 double Weight;
151 unsigned long s1, s2;
152 Weight = FDSL2 (td, DN, &s1);
153 do
154 {
155 DN++;
156 FDSL2 (td, DN, &s2);
157 }
158 while (s2 == s1);
159 *seek_pos = s1;
160 *len = s2 - s1;
161 return (Weight);
162}
163
164
165
166
167double
168FetchDocStart (query_data * qd, u_long DN, u_long * seek_pos, u_long * len)
169{
170 qd->text_idx_lookups++;
171 if (qd->td->TextIdxWgtFile)
172 return FetchDocStartLev2 (qd->td, DN, seek_pos, len);
173 else
174 return FetchDocStartLev1 (qd->td, DN, seek_pos, len);
175}
176
177unsigned long
178FetchInitialParagraph (text_data * td, unsigned long ParaNum)
179{
180 if (td->TextIdxWgtFile)
181 {
182 unsigned long pos;
183 unsigned long start;
184 int PN = ParaNum - 1;
185 pos = LoadIdx (td, ParaNum);
186 start = td->idx_data[pos].Start;
187 while (PN > 0)
188 {
189 pos = LoadIdx (td, PN);
190 if (td->idx_data[pos].Start != start)
191 return PN + 1;
192 PN--;
193 }
194 return PN + 1;
195 }
196 else
197 return ParaNum;
198}
199
200
201
202/* FetchCompressed ()
203 * Reads into buffer DocBuff the compressed form of document DocNum.
204 * Where the first document is document number 1
205 */
206int
207FetchCompressed (query_data * qd, char **DocBuff, DocEntry * DocEnt)
208{
209 if (!DocEnt->SeekPos)
210 FetchDocStart (qd, DocEnt->DocNum, &DocEnt->SeekPos, &DocEnt->Len);
211 if (!(*DocBuff = Xmalloc (DocEnt->Len)))
212 return (-1);
213
214 if (Fseek (qd->td->TextFile, DocEnt->SeekPos, 0) == -1)
215 FatalError (1, "Error when seeking into text file");
216#if 0
217 printf ("Loading compressed text %d %d\n", DocEnt->SeekPos, DocEnt->Len);
218#endif
219 if (Fread (*DocBuff, 1, DocEnt->Len, qd->td->TextFile) != DocEnt->Len)
220 FatalError (1, "Error when reading data");
221
222 return (DocEnt->Len);
223
224}
225
226
227text_data *
228LoadTextData (File * text, File * text_idx_wgt, File * text_idx)
229{
230 text_data *td;
231
232 if (!(td = Xmalloc (sizeof (text_data))))
233 {
234 mg_errno = MG_NOMEM;
235 return (NULL);
236 }
237
238 td->TextFile = text;
239 td->TextIdxWgtFile = text_idx_wgt;
240 td->TextIdxFile = text_idx;
241 td->current_pos = -1;
242 td->idx_data = NULL;
243 Fread (&td->cth, sizeof (td->cth), 1, text);
244
245 /* [RPAP - Jan 97: Endian Ordering] */
246 NTOHUL(td->cth.num_of_docs);
247 NTOHD(td->cth.num_of_bytes); /* [RJM 07/97: 4G limit] */
248 NTOHUL(td->cth.num_of_words);
249 NTOHUL(td->cth.length_of_longest_doc);
250 NTOHD(td->cth.ratio);
251
252 return (td);
253}
254
255
256void
257FreeTextData (text_data * td)
258{
259 if (td)
260 {
261 if (td->idx_data)
262 Xfree (td->idx_data);
263 Xfree (td);
264 }
265}
266
267
268static int
269pts_comp (const void *A, const void *B)
270{
271 const DocEntry *const *a = A;
272 const DocEntry *const *b = B;
273 return (*a)->DocNum - (*b)->DocNum;
274}
275
276
277
278
279int
280GetPosLens (query_data * qd, DocEntry * Docs, int num)
281{
282 DocEntry **pts;
283 int i, j;
284 if (!(pts = Xmalloc (num * sizeof (DocEntry *))))
285 {
286 mg_errno = MG_NOMEM;
287 return (-1);
288 }
289 for (i = j = 0; i < num; i++, Docs++)
290 if (!Docs->SeekPos)
291 pts[j++] = Docs;
292
293 if (j)
294 {
295 qsort (pts, j, sizeof (DocEntry *), pts_comp);
296 for (i = 0; i < j; i++)
297 FetchDocStart (qd, pts[i]->DocNum, &pts[i]->SeekPos, &pts[i]->Len);
298 }
299
300 Xfree (pts);
301 return (0);
302}
303
304
305
306
307
308int
309LoadBuffers (query_data * qd, DocEntry * Docs, int max_mem, int num)
310{
311 DocEntry **pts;
312 int i, j;
313 int mem;
314
315 if (!num)
316 return (0);
317 if (!(pts = Xmalloc (num * sizeof (DocEntry *))))
318 {
319 mg_errno = MG_NOMEM;
320 return (-1);
321 }
322
323 mem = i = 0;
324 do
325 {
326 pts[i] = Docs;
327 mem += Docs->Len;
328 i++;
329 Docs++;
330 }
331 while (i < num && mem < max_mem);
332 if (i > 1)
333 qsort (pts, i, sizeof (DocEntry *), pts_comp);
334 for (j = 0; j < i; j++)
335 {
336 if (FetchCompressed (qd, &pts[j]->CompTextBuffer, pts[j]) == -1)
337 return (-1);
338 ChangeMemInUse (qd, pts[j]->Len);
339 }
340
341 Xfree (pts);
342
343 return (i);
344}
345
346
347
348
349
350void
351FreeBuffers (query_data * qd, DocEntry * Docs, int num)
352{
353 int i;
354 for (i = 0; i < num; i++, Docs++)
355 if (Docs->CompTextBuffer)
356 {
357 Xfree (Docs->CompTextBuffer);
358 Docs->CompTextBuffer = NULL;
359 ChangeMemInUse (qd, -Docs->Len);
360 }
361}
362
363
364
365/****************************************************************************/
366
367static void
368FreeAuxDict (auxiliary_dict * ad)
369{
370 if (!ad)
371 return;
372 if (ad->word_data[0])
373 Xfree (ad->word_data[0]);
374 if (ad->word_data[1])
375 Xfree (ad->word_data[1]);
376 if (ad->words[0])
377 Xfree (ad->words[0]);
378 if (ad->words[1])
379 Xfree (ad->words[1]);
380 Xfree (ad);
381}
382
383static auxiliary_dict *
384LoadAuxDict (compression_dict * cd, File * text_aux_dict)
385{
386 auxiliary_dict *ad;
387 int i;
388
389 if (!(ad = Xmalloc (sizeof (auxiliary_dict))))
390 {
391 mg_errno = MG_NOMEM;
392 return (NULL);
393 }
394
395 bzero ((char *) ad, sizeof (*ad));
396
397 for (i = 0; i <= 1; i++)
398 {
399 int j;
400 u_char *pos;
401
402 Fread (&ad->afh[i], sizeof (aux_frags_header), 1, text_aux_dict);
403
404 /* [RPAP - Jan 97: Endian Ordering] */
405 NTOHUL(ad->afh[i].num_frags);
406 NTOHUL(ad->afh[i].mem_for_frags);
407
408 if (!(ad->word_data[i] = Xmalloc (ad->afh[i].mem_for_frags)))
409 {
410 mg_errno = MG_NOMEM;
411 FreeAuxDict (ad);
412 return (NULL);
413 }
414 if (!(ad->words[i] = Xmalloc (ad->afh[i].num_frags * sizeof (u_char *))))
415 {
416 mg_errno = MG_NOMEM;
417 FreeAuxDict (ad);
418 return (NULL);
419 }
420
421 Fread (ad->word_data[i], ad->afh[i].mem_for_frags, sizeof (u_char),
422 text_aux_dict);
423
424 pos = ad->word_data[i];
425 for (j = 0; j < ad->afh[i].num_frags; j++)
426 {
427 ad->words[i][j] = pos;
428 pos += *pos + 1;
429 }
430 if (cd->cdh.novel_method == MG_NOVEL_HYBRID ||
431 cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
432 {
433 int num;
434 num = 1;
435 ad->blk_start[i][0] = 0;
436 ad->blk_end[i][0] = cd->cdh.num_words[i] - 1;
437 while (num < 33)
438 {
439 ad->blk_start[i][num] = ad->blk_end[i][num - 1] + 1;
440 ad->blk_end[i][num] = ad->blk_start[i][num] +
441 (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
442 num++;
443 }
444 }
445 }
446 return (ad);
447}
448
449
450
451
452
453
454static u_char ***
455ReadInWords (File * dict, compression_dict * cd,
456 comp_frags_header * cfh, u_char ** escape)
457{
458 int i, lookback;
459 int ptrs_reqd = 0;
460 int mem_reqd = 0;
461 int num_set[MAX_HUFFCODE_LEN + 1];
462 u_char *next_word[MAX_HUFFCODE_LEN + 1];
463 u_char **vals;
464 u_char ***values;
465 u_char word[MAXWORDLEN + 1];
466 u_char last_word[MAX_HUFFCODE_LEN + 1][MAXWORDLEN + 1];
467
468 lookback = cd->cdh.lookback;
469
470 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
471 {
472 ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
473 mem_reqd += cfh->huff_words_size[i];
474 }
475
476 if (!(vals = Xmalloc (ptrs_reqd * sizeof (*vals))))
477 return (NULL);
478
479 if (!(values = Xmalloc ((MAX_HUFFCODE_LEN + 1) * sizeof (u_char **))))
480 return (NULL);
481
482 if (!(next_word[0] = Xmalloc (mem_reqd))) return (NULL);
483
484 cd->MemForCompDict += ptrs_reqd * sizeof (*vals) +
485 (MAX_HUFFCODE_LEN + 1) * sizeof (u_char **) +
486 mem_reqd;
487
488 values[0] = vals;
489 values[0][0] = next_word[0];
490 for (i = 1; i <= cfh->hd.maxcodelen; i++)
491 {
492 int next_start = (values[i - 1] - vals) +
493 ((cfh->hd.lencount[i - 1] + ((1 << lookback) - 1)) >> lookback);
494 values[i] = &vals[next_start];
495 next_word[i] = next_word[i - 1] + cfh->huff_words_size[i - 1];
496 values[i][0] = next_word[i];
497 }
498
499 bzero ((char *) num_set, sizeof (num_set));
500
501 for (i = 0; i < cfh->hd.num_codes; i++)
502 {
503 register int val, copy;
504 register int len = cfh->hd.clens[i];
505 val = Getc (dict);
506 copy = (val >> 4) & 0xf;
507 val &= 0xf;
508
509 Fread (word + copy + 1, sizeof (u_char), val, dict);
510 *word = val + copy;
511
512 if ((num_set[len] & ((1 << lookback) - 1)) == 0)
513 {
514 values[len][num_set[len] >> lookback] = next_word[len];
515 memcpy (next_word[len], word, *word + 1);
516 if (escape && i == cfh->hd.num_codes - 1)
517 *escape = next_word[len];
518 next_word[len] += *word + 1;
519 }
520 else
521 {
522 copy = prefixlen (last_word[len], word);
523 memcpy (next_word[len] + 1, word + copy + 1, *word - copy);
524 *next_word[len] = (copy << 4) + (*word - copy);
525 if (escape && i == cfh->hd.num_codes - 1)
526 *escape = next_word[len];
527 next_word[len] += (*word - copy) + 1;
528 }
529 memcpy (last_word[len], word, *word + 1);
530 num_set[len]++;
531 }
532 if (cfh->hd.clens)
533 Xfree (cfh->hd.clens);
534 cfh->hd.clens = NULL;
535 return values;
536}
537
538
539static compression_dict *
540Load_Comp_Dict (File * dict, File * aux_dict)
541{
542 int which;
543 compression_dict *cd;
544
545 if (!(cd = Xmalloc (sizeof (compression_dict))))
546 {
547 mg_errno = MG_NOMEM;
548 return (NULL);
549 }
550
551 bzero ((char *) cd, sizeof (compression_dict));
552
553 cd->MemForCompDict = sizeof (compression_dict);
554
555 if (F_Read_cdh (dict, &cd->cdh, &cd->MemForCompDict, NULL) == -1)
556 return NULL;
557
558 for (which = 0; which < 2; which++)
559 switch (cd->cdh.dict_type)
560 {
561 case MG_COMPLETE_DICTIONARY:
562 {
563 if (!(cd->cfh[which] = Xmalloc (sizeof (*cd->cfh[which]))))
564 return NULL;
565 cd->MemForCompDict += sizeof (*cd->cfh[which]);
566 if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
567 return NULL;
568
569 if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
570 NULL)))
571 return NULL;
572 cd->escape[which] = NULL;
573
574 }
575 break;
576 case MG_PARTIAL_DICTIONARY:
577 {
578 huff_data *hd;
579 u_long **vals;
580 if (cd->cdh.num_words[which])
581 {
582 if (!(cd->cfh[which] = Xmalloc (sizeof (*cd->cfh[which]))))
583 return NULL;
584 cd->MemForCompDict += sizeof (*cd->cfh[which]);
585 if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
586 return NULL;
587
588 if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
589 &cd->escape[which])))
590 return NULL;
591 }
592 if (!(hd = Xmalloc (sizeof (huff_data))))
593 return NULL;
594 cd->MemForCompDict += sizeof (huff_data);
595 if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL) == -1)
596 return NULL;
597 if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
598 return NULL;
599 if (hd->clens)
600 Xfree (hd->clens);
601 hd->clens = NULL;
602 cd->chars_huff[which] = hd;
603 cd->chars_vals[which] = vals;
604 if (!(hd = Xmalloc (sizeof (huff_data))))
605 return NULL;
606 cd->MemForCompDict += sizeof (huff_data);
607 if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict, NULL) == -1)
608 return NULL;
609 if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
610 return NULL;
611 cd->lens_huff[which] = hd;
612 cd->lens_vals[which] = vals;
613 if (hd->clens)
614 Xfree (hd->clens);
615 hd->clens = NULL;
616 }
617 break;
618 case MG_SEED_DICTIONARY:
619 {
620 huff_data *hd;
621 u_long **vals;
622 if (cd->cdh.num_words[which])
623 {
624 if (!(cd->cfh[which] = Xmalloc (sizeof (*cd->cfh[which]))))
625 return NULL;
626 cd->MemForCompDict += sizeof (*cd->cfh[which]);
627 if (F_Read_cfh (dict, cd->cfh[which], &cd->MemForCompDict, NULL) == -1)
628 return NULL;
629
630 if (!(cd->values[which] = ReadInWords (dict, cd, cd->cfh[which],
631 &cd->escape[which])))
632 return NULL;
633 }
634 switch (cd->cdh.novel_method)
635 {
636 case MG_NOVEL_HUFFMAN_CHARS:
637 if (!(hd = Xmalloc (sizeof (huff_data))))
638 return NULL;
639 cd->chars_huff[which] = hd;
640 cd->MemForCompDict += sizeof (huff_data);
641 if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict,
642 NULL) == -1)
643 return NULL;
644 if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
645 return NULL;
646 cd->chars_vals[which] = vals;
647 if (hd->clens)
648 Xfree (hd->clens);
649 hd->clens = NULL;
650 if (!(hd = Xmalloc (sizeof (huff_data))))
651 return NULL;
652 cd->MemForCompDict += sizeof (huff_data);
653 cd->lens_huff[which] = hd;
654 if (F_Read_Huffman_Data (dict, hd, &cd->MemForCompDict
655 ,NULL) == -1)
656 return NULL;
657 if (!(vals = Generate_Huffman_Vals (hd, &cd->MemForCompDict)))
658 return NULL;
659 cd->lens_vals[which] = vals;
660 if (hd->clens)
661 Xfree (hd->clens);
662 hd->clens = NULL;
663 break;
664 case MG_NOVEL_BINARY:
665 break;
666 case MG_NOVEL_DELTA:
667 break;
668 case MG_NOVEL_HYBRID:
669 break;
670 case MG_NOVEL_HYBRID_MTF:
671 break;
672 }
673 break;
674 }
675 }
676
677 if (cd->cdh.novel_method == MG_NOVEL_BINARY ||
678 cd->cdh.novel_method == MG_NOVEL_DELTA ||
679 cd->cdh.novel_method == MG_NOVEL_HYBRID ||
680 cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
681 {
682 if (!aux_dict)
683 {
684 mg_errno = MG_NOFILE;
685 FreeCompDict (cd);
686 return (NULL);
687 }
688
689 if (!(cd->ad = LoadAuxDict (cd, aux_dict)))
690 {
691 FreeCompDict (cd);
692 return (NULL);
693 }
694 }
695
696
697 mg_errno = MG_NOERROR;
698
699 cd->fast_loaded = 0;
700 return (cd);
701}
702
703#define WORDNO(p, base) ((((char*)(p))-((char*)(base)))/sizeof(u_char*))
704
705#define IS_FIXUP(p) ((fixup[WORDNO(p,cd)/8] & (1<<(WORDNO(p, cd) & 7))) != 0)
706
707
708static compression_dict *
709Load_Fast_Comp_Dict (File * text_fast_comp_dict)
710{
711 compression_dict *cd;
712 u_long *p, *end;
713 u_char *fixup;
714 u_long mem;
715 u_long fixup_mem;
716 int i; /* [RPAP - Jan 97: Endian Ordering] */
717
718 Fread (&mem, sizeof (mem), 1, text_fast_comp_dict);
719 NTOHUL(mem); /* [RPAP - Jan 97: Endian Ordering] */
720 Fread (&fixup_mem, sizeof (fixup_mem), 1, text_fast_comp_dict);
721 NTOHUL(fixup_mem); /* [RPAP - Jan 97: Endian Ordering] */
722 if (!(cd = Xmalloc (mem)))
723 {
724 mg_errno = MG_NOMEM;
725 return (NULL);
726 }
727
728 end = (u_long *) (((u_char *) cd) + mem);
729 Fread (cd, sizeof (u_char), mem, text_fast_comp_dict);
730
731 if (!(fixup = Xmalloc (fixup_mem)))
732 {
733 mg_errno = MG_NOMEM;
734 return (NULL);
735 }
736
737 Fread (fixup, fixup_mem, sizeof (u_char), text_fast_comp_dict);
738
739 for (p = (u_long *) cd; (u_long) p < (u_long) end; p++)
740 if (IS_FIXUP (p))
741 {
742 NTOHUL(*p); /* [RPAP - Jan 97: Endian Ordering] */
743 *p = *p + (u_long) cd;
744 }
745
746 /* [RPAP - Jan 97: Endian Ordering] */
747 /* cdh */
748 NTOHUL(cd->cdh.dict_type);
749 NTOHUL(cd->cdh.novel_method);
750 for (i = 0; i < TEXT_PARAMS; i++)
751 NTOHUL(cd->cdh.params[i]);
752 NTOHUL(cd->cdh.num_words[0]);
753 NTOHUL(cd->cdh.num_words[1]);
754 NTOHUL(cd->cdh.num_word_chars[0]);
755 NTOHUL(cd->cdh.num_word_chars[1]);
756 NTOHUL(cd->cdh.lookback);
757 /* cfh */
758 for (i = 0; i <= 1; i++)
759 {
760 int j;
761
762 NTOHSI(cd->cfh[i]->hd.num_codes);
763 NTOHSI(cd->cfh[i]->hd.mincodelen);
764 NTOHSI(cd->cfh[i]->hd.maxcodelen);
765 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
766 {
767 NTOHSI(cd->cfh[i]->hd.lencount[j]);
768 NTOHUL(cd->cfh[i]->hd.min_code[j]);
769 }
770 NTOHUL(cd->cfh[i]->uncompressed_size);
771 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
772 NTOHUL(cd->cfh[i]->huff_words_size[j]);
773 }
774 NTOHUL(cd->MemForCompDict);
775 /* ad */
776 if (cd->cdh.novel_method == MG_NOVEL_BINARY ||
777 cd->cdh.novel_method == MG_NOVEL_DELTA ||
778 cd->cdh.novel_method == MG_NOVEL_HYBRID ||
779 cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
780 for (i = 0; i <= 1; i++)
781 {
782 int j;
783
784 NTOHUL(cd->ad->afh[i].num_frags);
785 NTOHUL(cd->ad->afh[i].mem_for_frags);
786 for (j = 0; j < 33; j++)
787 {
788 NTOHSI(cd->ad->blk_start[i][j]);
789 NTOHSI(cd->ad->blk_end[i][j]);
790 }
791 }
792 NTOHSI(cd->fast_loaded);
793
794 Xfree (fixup);
795 return (cd);
796}
797
798
799
800
801compression_dict *
802LoadCompDict (File * text_comp_dict,
803 File * text_aux_dict,
804 File * text_fast_comp_dict)
805{
806 return text_fast_comp_dict ?
807 Load_Fast_Comp_Dict (text_fast_comp_dict) :
808 Load_Comp_Dict (text_comp_dict, text_aux_dict);
809}
810
811
812
813
814void
815FreeCompDict (compression_dict * cd)
816{
817 int which;
818 if (cd->fast_loaded)
819 {
820 Xfree (cd);
821 return;
822 }
823 for (which = 0; which < 2; which++)
824 {
825 if (cd->cfh[which])
826 Xfree (cd->cfh[which]);
827 if (cd->chars_huff[which])
828 Xfree (cd->chars_huff[which]);
829 if (cd->lens_huff[which])
830 Xfree (cd->lens_huff[which]);
831 if (cd->values[which])
832 {
833 Xfree (cd->values[which][0][0]); /* [RJM 07/98: Memory Leak] */
834 Xfree (cd->values[which][0]);
835 Xfree (cd->values[which]);
836 }
837 if (cd->chars_vals[which])
838 {
839 Xfree (cd->chars_vals[which][0]);
840 Xfree (cd->chars_vals[which]);
841 }
842 if (cd->lens_vals[which])
843 {
844 Xfree (cd->lens_vals[which][0]);
845 Xfree (cd->lens_vals[which]);
846 }
847 }
848 if (cd->ad)
849 FreeAuxDict (cd->ad);
850 Xfree (cd);
851}
852
853
854
855
856
857#define MY_HUFF_DECODE(len, code, mcodes) \
858 do { \
859 register unsigned long *__min_code = (mcodes); \
860 register unsigned long *__mclen = __min_code; \
861 register unsigned long __code = 0; \
862 do \
863 { \
864 DECODE_ADD(__code); \
865 } \
866 while (__code < *++__mclen); \
867 (len) = __mclen - __min_code; \
868 (code) = __code - *__mclen; \
869 } while(0);
870
871
872/*#define DUMPDOC */
873
874#define MAX_SWAPS 10000
875
876int
877DecodeText (compression_dict * cd,
878 u_char * s_in, int l_in, u_char * s_out, int *l_out)
879{
880 auxiliary_dict *ad = cd->ad;
881 int which;
882 u_long num_bits, bits;
883 u_char *ptr = s_out;
884 static int num = 0;
885 u_long binary_start[2];
886 int novels_used[2];
887 int swaps[2][MAX_SWAPS];
888 novels_used[0] = novels_used[1] = 0;
889
890 {
891 unsigned char bf = s_in[l_in - 1];
892 num_bits = 1;
893 while ((bf & 1) != 1)
894 {
895 num_bits++;
896 bf >>= 1;
897 }
898 num_bits = l_in * 8 - num_bits;
899 }
900
901 DECODE_START (s_in, l_in)
902
903 which = DECODE_BIT;
904 bits = 1;
905
906 if (cd->cdh.novel_method == MG_NOVEL_BINARY)
907 {
908 DELTA_DECODE_L (binary_start[0], bits);
909 DELTA_DECODE_L (binary_start[1], bits);
910 }
911
912
913 while (bits < num_bits)
914 {
915 register unsigned code, len;
916 register int r;
917 register u_char *t, *b = NULL;
918 u_char word[MAXWORDLEN + 1];
919
920#ifdef DUMPDOC
921 printf ("\n%d %d ", bits, num_bits);
922#endif
923 if (cd->cfh[which])
924 {
925 MY_HUFF_DECODE (len, code, cd->cfh[which]->hd.min_code);
926 bits += len;
927
928 r = code & ((1 << cd->cdh.lookback) - 1);
929 t = cd->values[which][len][code >> cd->cdh.lookback];
930
931 /* step through from base pointer */
932 b = word + 1;
933 while (r--)
934 {
935 register int copy = *t >> 4;
936 memcpy (word + copy + 1, t + 1, *t & 0xf);
937 word[0] = copy + (*t & 0xf);
938 t += ((*t) & 0xf) + 1;
939 }
940 }
941 else
942 t = NULL;
943 if (t == cd->escape[which])
944 {
945 switch (cd->cdh.novel_method)
946 {
947 case MG_NOVEL_HUFFMAN_CHARS:
948 {
949 int len, i;
950 int c;
951 HUFF_DECODE_L (len, cd->lens_huff[which]->min_code,
952 cd->lens_vals[which], bits);
953 for (i = 0; i < len; i++)
954 {
955 HUFF_DECODE_L (c, cd->chars_huff[which]->min_code,
956 cd->chars_vals[which], bits);
957 *ptr++ = c;
958 }
959 }
960 break;
961 case MG_NOVEL_BINARY:
962 case MG_NOVEL_DELTA:
963 case MG_NOVEL_HYBRID:
964 case MG_NOVEL_HYBRID_MTF:
965 {
966 int idx = 0, len;
967 u_char *base;
968 switch (cd->cdh.novel_method)
969 {
970 case MG_NOVEL_BINARY:
971 {
972 BINARY_DECODE_L (idx, binary_start[which], bits);
973 if (idx == binary_start[which])
974 binary_start[which]++;
975 idx--;
976 }
977 break;
978 case MG_NOVEL_DELTA:
979 {
980 DELTA_DECODE_L (idx, bits);
981 idx--;
982 }
983 break;
984 case MG_NOVEL_HYBRID:
985 {
986 int k;
987 GAMMA_DECODE_L (k, bits);
988 k--;
989 BINARY_DECODE_L (idx,
990 ad->blk_end[which][k] -
991 ad->blk_start[which][k] + 1, bits);
992 idx += ad->blk_start[which][k] - 1;
993 }
994 break;
995 case MG_NOVEL_HYBRID_MTF:
996 {
997 int k;
998 GAMMA_DECODE_L (k, bits);
999 k--;
1000 BINARY_DECODE_L (idx,
1001 ad->blk_end[which][k] -
1002 ad->blk_start[which][k] + 1, bits);
1003 idx += ad->blk_start[which][k] - 1;
1004 if (idx >= novels_used[which])
1005 {
1006 u_char *temp;
1007 temp = ad->words[which][idx];
1008 ad->words[which][idx] =
1009 ad->words[which][novels_used[which]];
1010 ad->words[which][novels_used[which]] = temp;
1011 swaps[which][novels_used[which]] = idx;
1012 idx = novels_used[which]++;
1013 }
1014 }
1015 break;
1016 }
1017 base = ad->words[which][idx];
1018 len = *base++;
1019#ifdef DUMPDOC
1020 printf ("[[");
1021#endif
1022 for (; len; len--)
1023 {
1024 *ptr++ = *base++;
1025#ifdef DUMPDOC
1026 putchar (*(base - 1));
1027#endif
1028 }
1029#ifdef DUMPDOC
1030 printf ("]]");
1031#endif
1032 }
1033 break;
1034 }
1035 }
1036 else
1037 {
1038 /* copy over the matching prefix */
1039 r = (*t >> 4);
1040 while (r--)
1041#ifndef DUMPDOC
1042 *ptr++ = *b++;
1043#else
1044 {
1045 *ptr = *b++;
1046 putchar (*ptr);
1047 ptr++;
1048 }
1049#endif
1050
1051 /* and the stored suffix */
1052 r = ((*t) & 0xf);
1053 while (r--)
1054#ifndef DUMPDOC
1055 *ptr++ = *++t;
1056#else
1057 {
1058 *ptr = *++t;
1059 putchar (*ptr);
1060 ptr++;
1061 }
1062#endif
1063 }
1064 which = !which;
1065 }
1066
1067 DECODE_DONE
1068
1069 * l_out = ptr - s_out;
1070 num += *l_out + 1;
1071
1072 if (cd->cdh.novel_method == MG_NOVEL_HYBRID_MTF)
1073 for (which = 0; which <= 1; which++)
1074 for (novels_used[which]--; novels_used[which] >= 0; novels_used[which]--)
1075 {
1076 int a = novels_used[which];
1077 int b = swaps[which][novels_used[which]];
1078 u_char *temp;
1079 temp = ad->words[which][a];
1080 ad->words[which][a] = ad->words[which][b];
1081 ad->words[which][b] = temp;
1082 }
1083 return (COMPALLOK);
1084}
Note: See TracBrowser for help on using the repository browser.