source: tags/greenstone-3_01-distribution/indexers/mgpp/lib/huffman.cpp@ 10896

Last change on this file since 10896 was 10896, checked in by (none), 18 years ago

This commit was manufactured by cvs2svn to create tag
'greenstone-3_01-distribution'.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1/**************************************************************************
2 *
3 * huffman.c -- Huffman coding functions
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#include "sysfuncs.h"
23#include "memlib.h"
24#include "huffman.h"
25#include "messages.h"
26#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
27
28/* ->data on success,
29 NULL on error
30 */
31huff_data *
32Generate_Huffman_Data (int num, long *freqs, huff_data * data,
33 u_long * mem)
34{
35 int HNum, i, count;
36 unsigned long *heap;
37 huff_data *hd = data;
38 if (!hd)
39 {
40 if (!(hd = new huff_data))
41 goto error0;
42 }
43 if (!(heap = new unsigned long[num * 2]))
44 goto error1;
45 if (!(hd->clens = new char[num]))
46 goto error2;
47 hd->num_codes = num;
48 if (mem)
49 {
50 if (hd != data)
51 *mem += sizeof (huff_data);
52 *mem += num * sizeof (char);
53 }
54 memcpy (&heap[num], freqs, num * sizeof (*heap));
55 memset (heap, '\0', num * sizeof (*heap));
56
57 /* Initialise the pointers to the leaves */
58 for (count = i = 0; i < num; ++i)
59 if (heap[num + i])
60 heap[count++] = num + i;
61
62 /* Reorganise the pointers so that it is a heap */
63 HNum = count;
64 for (i = HNum / 2; i > 0; --i)
65 {
66 register int curr, child;
67 curr = i;
68 child = curr * 2;
69 while (child <= HNum)
70 {
71 if (child < HNum && heap[heap[child]] < heap[heap[child - 1]])
72 ++child;
73 if (heap[heap[curr - 1]] > heap[heap[child - 1]])
74 {
75 register u_long temp;
76 temp = heap[child - 1];
77 heap[child - 1] = heap[curr - 1];
78 heap[curr - 1] = temp;
79 curr = child;
80 child = 2 * curr;
81 }
82 else
83 break;
84 }
85 }
86
87
88 /* Form the huffman tree */
89 while (HNum > 1)
90 {
91 int pos[2];
92
93 for (i = 0; i < 2; ++i)
94 {
95 register int curr, child;
96 pos[i] = heap[0];
97 heap[0] = heap[--HNum];
98 curr = 1;
99 child = 2;
100 while (child <= HNum)
101 {
102 if (child < HNum &&
103 heap[heap[child]] < heap[heap[child - 1]])
104 ++child;
105 if (heap[heap[curr - 1]] > heap[heap[child - 1]])
106 {
107 register int temp;
108 temp = heap[child - 1];
109 heap[child - 1] = heap[curr - 1];
110 heap[curr - 1] = temp;
111 curr = child;
112 child = 2 * curr;
113 }
114 else
115 break;
116 }
117 }
118
119 heap[HNum + 1] = heap[pos[0]] + heap[pos[1]];
120 heap[pos[0]] = heap[pos[1]] = HNum + 1;
121
122 heap[HNum] = HNum + 1;
123
124 {
125 register int parent, curr;
126 ++HNum;
127 curr = HNum;
128 parent = curr >> 1;
129 while (parent && heap[heap[parent - 1]] > heap[heap[curr - 1]])
130 {
131 register int temp;
132 temp = heap[parent - 1];
133 heap[parent - 1] = heap[curr - 1];
134 heap[curr - 1] = temp;
135 curr = parent;
136 parent = curr >> 1;
137 }
138 }
139 }
140
141
142 /* Calculate the code lens */
143 heap[0] = -1UL;
144 heap[1] = 0;
145 for (i = 2; i < num * 2; ++i)
146 heap[i] = heap[heap[i]] + 1;
147
148 /* zero the lencount's array */
149 memset (hd->lencount, '\0', sizeof (hd->lencount));
150 hd->maxcodelen = 0;
151 hd->mincodelen = 100;
152
153 /* Set the code length of each leaf in the huffman tree */
154 for (i = 0; i < num; ++i)
155 {
156 register u_long codelen = heap[i + num];
157 hd->clens[i] = (char) codelen;
158 if (!codelen)
159 continue;
160 if (codelen > hd->maxcodelen)
161 hd->maxcodelen = codelen;
162 if (codelen < hd->mincodelen)
163 hd->mincodelen = codelen;
164 ++hd->lencount[codelen];
165 }
166
167
168 if (hd->maxcodelen == 0 && hd->mincodelen == 100)
169 {
170 hd->num_codes = 0;
171 }
172 else
173 {
174 /* Calculate the current codes for each different code length */
175 hd->min_code[hd->maxcodelen] = 0;
176 for (i = hd->maxcodelen - 1; i>=0; --i)
177 hd->min_code[i] = (hd->min_code[i + 1] + hd->lencount[i + 1]) >> 1;
178 }
179 delete []heap;
180 return (hd);
181
182error2:
183 delete []heap;
184error1:
185 if (!data)
186 delete hd;
187error0:
188 return (NULL);
189
190}
191
192unsigned long *
193Generate_Huffman_Codes (huff_data * data, u_long * mem)
194{
195 int i;
196 unsigned long *codes;
197 unsigned long mc[MAX_HUFFCODE_LEN + 1];
198
199 if (!data)
200 return (NULL);
201 if (!(codes = new unsigned long[data->num_codes]))
202 return (NULL);
203 if (mem)
204 *mem += data->num_codes * sizeof (*codes);
205 memcpy (mc, data->min_code, sizeof (mc));
206 for (i = 0; i < data->num_codes; ++i)
207 if (data->clens[i])
208 codes[i] = mc[(int) (data->clens[i])]++;
209
210 return (codes);
211}
212
213
214unsigned long **
215Generate_Huffman_Vals (huff_data * data, u_long * mem)
216{
217 int i;
218 unsigned long *fcode[MAX_HUFFCODE_LEN + 1];
219 unsigned long **values;
220 unsigned long *vals;
221
222 if (!data)
223 return (NULL);
224 if (!(vals = new unsigned long[data->num_codes]))
225 return (NULL);
226 if (!(values = new unsigned long *[MAX_HUFFCODE_LEN + 1]))
227 {
228 delete []vals;
229 return (NULL);
230 }
231
232 memset (values, '\0', (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *));
233
234 if (mem)
235 *mem += data->num_codes * sizeof (*vals) +
236 (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
237
238 fcode[0] = values[0] = &vals[0];
239 for (i = 1; i <= data->maxcodelen; ++i)
240 fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
241
242 for (i = 0; i < data->num_codes; ++i)
243 if (data->clens[i])
244 *fcode[(int) (data->clens[i])]++ = i;
245 return (values);
246}
247
248
249
250double
251Calculate_Huffman_Size (int num, long *freqs, long *counts)
252{
253 double size = 0;
254 huff_data hd;
255 int i;
256 if (!Generate_Huffman_Data (num, freqs, &hd, NULL))
257 return -1;
258 for (i = 0; i < num; ++i)
259 size += counts[i] * hd.clens[i];
260 delete []hd.clens;
261 return size;
262}
263
264
265
266int
267Write_Huffman_Data (FILE * f, huff_data * hd)
268{
269 /* [RPAP - Jan 97: Endian Ordering] */
270 HTONSI(hd->num_codes);
271 HTONSI(hd->mincodelen);
272 HTONSI(hd->maxcodelen);
273
274 if (fwrite (&hd->num_codes, sizeof (hd->num_codes), 1, f) != 1)
275 return -1;
276
277 if (fwrite (&hd->mincodelen, sizeof (hd->mincodelen), 1, f) != 1)
278 return -1;
279
280 if (fwrite (&hd->maxcodelen, sizeof (hd->maxcodelen), 1, f) != 1)
281 return -1;
282
283 /* [RPAP - Jan 97: Endian Ordering] */
284 NTOHSI(hd->num_codes);
285 NTOHSI(hd->mincodelen);
286 NTOHSI(hd->maxcodelen);
287
288 if (hd->num_codes)
289 {
290 /* [RPAP - Jan 97: Endian Ordering] */
291 int i;
292 for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
293 HTONSI(hd->lencount[i]);
294 for (i = 0; i < hd->maxcodelen + 1; ++i)
295 HTONUL(hd->min_code[i]);
296
297 if (fwrite (hd->lencount + hd->mincodelen, sizeof (*hd->lencount),
298 hd->maxcodelen - hd->mincodelen + 1, f) !=
299 hd->maxcodelen - hd->mincodelen + 1)
300 return -1;
301
302 if (fwrite (hd->min_code, sizeof (*hd->min_code), hd->maxcodelen + 1, f) !=
303 hd->maxcodelen + 1)
304 return -1;
305
306 if (fwrite (hd->clens, sizeof (*hd->clens), hd->num_codes, f) != hd->num_codes)
307 return -1;
308
309 /* [RPAP - Jan 97: Endian Ordering] */
310 for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
311 NTOHSI(hd->lencount[i]);
312 for (i = 0; i < hd->maxcodelen + 1; ++i)
313 NTOHUL(hd->min_code[i]);
314 }
315 return 0;
316}
317
318
319
320int Read_Huffman_Data (FILE * f, huff_data * hd, u_long * mem, u_long * disk) {
321 if (fread (&hd->num_codes, sizeof (hd->num_codes), 1, f) != 1)
322 return -1;
323
324 if (fread (&hd->mincodelen, sizeof (hd->mincodelen), 1, f) != 1)
325 return -1;
326
327 if (fread (&hd->maxcodelen, sizeof (hd->maxcodelen), 1, f) != 1)
328 return -1;
329
330 /* [RPAP - Jan 97: Endian Ordering] */
331 NTOHSI(hd->num_codes);
332 NTOHSI(hd->mincodelen);
333 NTOHSI(hd->maxcodelen);
334
335 if (disk)
336 (*disk) += sizeof (hd->num_codes) + sizeof (hd->mincodelen) +
337 sizeof (hd->maxcodelen);
338
339 hd->clens = NULL;
340
341 if (hd->num_codes)
342 {
343 int i; /* [RPAP - Jan 97: Endian Ordering] */
344
345 memset (hd->lencount, '\0', sizeof (hd->lencount));
346 if (fread (hd->lencount + hd->mincodelen, sizeof (*hd->lencount),
347 hd->maxcodelen - hd->mincodelen + 1, f) !=
348 hd->maxcodelen - hd->mincodelen + 1)
349 return -1;
350
351 /* [RPAP - Jan 97: Endian Ordering] */
352 for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
353 NTOHSI(hd->lencount[i]);
354
355 if (disk)
356 (*disk) += sizeof (*hd->lencount) * (hd->maxcodelen - hd->mincodelen + 1);
357
358 memset (hd->min_code, '\0', sizeof (hd->min_code));
359
360 if (fread (hd->min_code, sizeof (*hd->min_code), hd->maxcodelen + 1, f) !=
361 hd->maxcodelen + 1)
362 return -1;
363
364 /* [RPAP - Jan 97: Endian Ordering] */
365 for (i = 0; i < hd->maxcodelen + 1; ++i)
366 NTOHUL(hd->min_code[i]);
367
368 if (disk)
369 (*disk) += sizeof (*hd->min_code) * (hd->maxcodelen + 1);
370
371 if (!(hd->clens = new char[hd->num_codes]))
372 return -1;
373
374
375 if (fread (hd->clens, sizeof (*hd->clens), hd->num_codes, f) != hd->num_codes)
376 return -1;
377
378 if (disk)
379 (*disk) += sizeof (*hd->clens) * hd->num_codes;
380 }
381
382 if (mem)
383 *mem += sizeof (*hd->clens) * hd->num_codes;
384 return 0;
385}
Note: See TracBrowser for help on using the repository browser.