1 | #include "sysfuncs.h"
|
---|
2 |
|
---|
3 | #include "messages.h"
|
---|
4 | #include "memlib.h"
|
---|
5 | #include "local_strings.h"
|
---|
6 | #include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
|
---|
7 |
|
---|
8 | #include "mg_files.h"
|
---|
9 | #include "mg.h"
|
---|
10 | #include "invf.h"
|
---|
11 | #include "words.h"
|
---|
12 | #include "backend.h"
|
---|
13 | #include "StemIdx.h"
|
---|
14 |
|
---|
15 | #define abort(x) abortvar=x;goto Recovery;
|
---|
16 |
|
---|
17 | stemmed_idx *StemIdx_header_read(void *f, int notefile, abstractReader reader, abstractGetc creader)
|
---|
18 | {
|
---|
19 | stemmed_idx *si;
|
---|
20 | unsigned long i;
|
---|
21 | u_char *buffer;
|
---|
22 | int abortvar;
|
---|
23 |
|
---|
24 | if (!(si = new stemmed_idx))
|
---|
25 | {
|
---|
26 | abort(0);
|
---|
27 | }
|
---|
28 |
|
---|
29 | si->MemForStemIdx = 0;
|
---|
30 | if (notefile)
|
---|
31 | {
|
---|
32 | si->stem_idx_file = (File *) f;
|
---|
33 | }
|
---|
34 |
|
---|
35 | reader (&(si->sih), sizeof (si->sih), 1, f);
|
---|
36 |
|
---|
37 | /* [RPAP - Jan 97: Endian Ordering] */
|
---|
38 | NTOHUL(si->sih.lookback);
|
---|
39 | NTOHUL(si->sih.block_size);
|
---|
40 | NTOHUL(si->sih.num_blocks);
|
---|
41 | NTOHUL(si->sih.blocks_start);
|
---|
42 | NTOHUL(si->sih.index_chars);
|
---|
43 | NTOHUL(si->sih.num_of_words);
|
---|
44 |
|
---|
45 | if (!(buffer = new u_char[si->sih.index_chars]))
|
---|
46 | {
|
---|
47 | abort(1);
|
---|
48 | }
|
---|
49 | si->MemForStemIdx += si->sih.index_chars;
|
---|
50 |
|
---|
51 | if (!(si->index = new (u_char *)[si->sih.num_blocks]))
|
---|
52 | {
|
---|
53 | abort(2);
|
---|
54 | }
|
---|
55 | si->MemForStemIdx += si->sih.num_blocks * sizeof (*si->index);
|
---|
56 |
|
---|
57 | if (!(si->pos = new (unsigned long)[si->sih.num_blocks]))
|
---|
58 | {
|
---|
59 | abort(3);
|
---|
60 | }
|
---|
61 | si->MemForStemIdx += si->sih.num_blocks * sizeof (*si->pos);
|
---|
62 |
|
---|
63 | if (!(si->buffer = new u_char[si->sih.block_size]))
|
---|
64 | {
|
---|
65 | abort(4);
|
---|
66 | }
|
---|
67 | si->MemForStemIdx += si->sih.block_size * sizeof (*si->buffer);
|
---|
68 |
|
---|
69 | si->active = -1;
|
---|
70 |
|
---|
71 | for (i = 0; i < si->sih.num_blocks; i++)
|
---|
72 | {
|
---|
73 | register u_char len;
|
---|
74 | si->index[i] = buffer;
|
---|
75 | len = creader (f);
|
---|
76 | *buffer++ = len;
|
---|
77 | reader (buffer, sizeof (u_char), len, f);
|
---|
78 | buffer += len;
|
---|
79 | reader (&si->pos[i], sizeof (*si->pos), 1, f);
|
---|
80 | NTOHUL(si->pos[i]); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
81 | }
|
---|
82 | return si;
|
---|
83 |
|
---|
84 | Recovery:
|
---|
85 | switch (abortvar)
|
---|
86 | {
|
---|
87 | case 4:
|
---|
88 | delete si->pos;
|
---|
89 | case 3:
|
---|
90 | delete si->index;
|
---|
91 | case 2:
|
---|
92 | delete buffer;
|
---|
93 | case 1:
|
---|
94 | delete si;
|
---|
95 | break;
|
---|
96 | }
|
---|
97 | return NULL;
|
---|
98 | }
|
---|
99 |
|
---|
100 | void StemIdx_block_readNext(void *file, stemmed_idx *si, int block,
|
---|
101 | unsigned long **first_word, unsigned short **num_words,
|
---|
102 | unsigned short **index, int *num_indexes, abstractSeeker seeker,
|
---|
103 | abstractReader reader)
|
---|
104 | {
|
---|
105 | int i;
|
---|
106 |
|
---|
107 | seeker (file, si->pos[block] + si->sih.blocks_start, 0);
|
---|
108 | reader (si->buffer, si->sih.block_size, sizeof (u_char), file);
|
---|
109 | si->active = si->pos[block];
|
---|
110 |
|
---|
111 | *first_word = (unsigned long *) (si->buffer);
|
---|
112 | NTOHUL(**first_word); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
113 | *num_words = (unsigned short *) ((*first_word) + 1);
|
---|
114 | NTOHUS(**num_words); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
115 | *index = (*num_words) + 1;
|
---|
116 | *num_indexes = (((**num_words) - 1) / si->sih.lookback) + 1;
|
---|
117 |
|
---|
118 | /* [RPAP - Jan 97: Endian Ordering] */
|
---|
119 | for (i = 0; i < *num_indexes; i++)
|
---|
120 | NTOHUS((*index)[i]);
|
---|
121 | }
|
---|
122 |
|
---|
123 | void StemIdx_ReadWordString(u_char **base, u_char *prev)
|
---|
124 | {
|
---|
125 | mgString::readString(base, prev);
|
---|
126 | /*
|
---|
127 | unsigned copy, suff;
|
---|
128 |
|
---|
129 | copy = **base;
|
---|
130 | *base = *base + 1;
|
---|
131 | suff = **base;
|
---|
132 | *base = *base + 1;
|
---|
133 |
|
---|
134 | bcopy ((char *) *base, (char *) (prev + copy + 1), suff);
|
---|
135 | *prev = copy + suff;
|
---|
136 | *base += suff;
|
---|
137 | */
|
---|
138 | }
|
---|
139 |
|
---|
140 | void StemIdx_ReadPosEntry(u_char **base, unsigned int *num_cases, unsigned int *blk,
|
---|
141 | unsigned short *blk_index, unsigned short *offset)
|
---|
142 | {
|
---|
143 | bcopy ((char *) *base, (char *) num_cases, sizeof (*num_cases));
|
---|
144 | NTOHUI(*num_cases); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
145 | *base += sizeof (*num_cases);
|
---|
146 | bcopy ((char *) *base, (char *) blk, sizeof (*blk));
|
---|
147 | NTOHUI(*blk); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
148 | *base += sizeof (*blk);
|
---|
149 | bcopy ((char *) *base, (char *) blk_index, sizeof (*blk_index));
|
---|
150 | NTOHUS(*blk_index); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
151 | *base += sizeof (*blk_index);
|
---|
152 | bcopy ((char *) *base, (char *) offset, sizeof (*offset));
|
---|
153 | NTOHUS(*offset); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
154 | *base += sizeof (*offset);
|
---|
155 | }
|
---|