source: main/trunk/greenstone2/common-src/indexers/mg/lib/huffman.c@ 25147

Last change on this file since 25147 was 25147, checked in by kjdon, 12 years ago

merged 64_bit_Greenstone branch into trunk, rev 25139

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.7 KB
Line 
1/**************************************************************************
2 *
3 * huffman.c -- Huffman coding functions
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#include "sysfuncs.h"
23#include "memlib.h"
24#include "huffman.h"
25#include "messages.h"
26#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
27
28/* ->data on success,
29 NULL on error
30 */
31huff_data *
32Generate_Huffman_Data (int num, mg_s_long *freqs, huff_data * data,
33 mg_u_long * mem)
34{
35 int HNum, i, count;
36 mg_u_long *heap;
37 huff_data *hd = data;
38 if (!hd)
39 {
40 if (!(hd = Xmalloc (sizeof (huff_data))))
41 goto error0;
42 }
43 if (!(heap = Xmalloc (num * 2 * sizeof (*heap))))
44 goto error1;
45 if (!(hd->clens = Xmalloc (num * sizeof (char))))
46 goto error2;
47 hd->num_codes = num;
48 if (mem)
49 {
50 if (hd != data)
51 *mem += sizeof (huff_data);
52 *mem += num * sizeof (char);
53 }
54 bcopy ((char *) freqs, (char *) &heap[num], num * sizeof (*heap));
55 bzero ((char *) heap, num * sizeof (*heap));
56
57 /* Initialise the pointers to the leaves */
58 for (count = i = 0; i < num; i++)
59 if (heap[num + i])
60 heap[count++] = num + i;
61
62 /* Reorganise the pointers so that it is a heap */
63 HNum = count;
64 for (i = HNum / 2; i > 0; i--)
65 {
66 register int curr, child;
67 curr = i;
68 child = curr * 2;
69 while (child <= HNum)
70 {
71 if (child < HNum && heap[heap[child]] < heap[heap[child - 1]])
72 child++;
73 if (heap[heap[curr - 1]] > heap[heap[child - 1]])
74 {
75 register mg_u_long temp;
76 temp = heap[child - 1];
77 heap[child - 1] = heap[curr - 1];
78 heap[curr - 1] = temp;
79 curr = child;
80 child = 2 * curr;
81 }
82 else
83 break;
84 }
85 }
86
87
88 /* Form the huffman tree */
89 while (HNum > 1)
90 {
91 int pos[2];
92
93 for (i = 0; i < 2; i++)
94 {
95 register int curr, child;
96 pos[i] = heap[0];
97 heap[0] = heap[--HNum];
98 curr = 1;
99 child = 2;
100 while (child <= HNum)
101 {
102 if (child < HNum &&
103 heap[heap[child]] < heap[heap[child - 1]])
104 child++;
105 if (heap[heap[curr - 1]] > heap[heap[child - 1]])
106 {
107 register int temp;
108 temp = heap[child - 1];
109 heap[child - 1] = heap[curr - 1];
110 heap[curr - 1] = temp;
111 curr = child;
112 child = 2 * curr;
113 }
114 else
115 break;
116 }
117 }
118
119 heap[HNum + 1] = heap[pos[0]] + heap[pos[1]];
120 heap[pos[0]] = heap[pos[1]] = HNum + 1;
121
122 heap[HNum] = HNum + 1;
123
124 {
125 register int parent, curr;
126 HNum++;
127 curr = HNum;
128 parent = curr >> 1;
129 while (parent && heap[heap[parent - 1]] > heap[heap[curr - 1]])
130 {
131 register int temp;
132 temp = heap[parent - 1];
133 heap[parent - 1] = heap[curr - 1];
134 heap[curr - 1] = temp;
135 curr = parent;
136 parent = curr >> 1;
137 }
138 }
139 }
140
141
142 /* Calculate the code lens */
143 heap[0] = -1;
144 heap[1] = 0;
145 for (i = 2; i < num * 2; i++)
146 heap[i] = heap[heap[i]] + 1;
147
148 /* zero the lencount's array */
149 bzero ((char *) hd->lencount, sizeof (hd->lencount));
150 hd->maxcodelen = 0;
151 hd->mincodelen = 100;
152
153 /* Set the code length of each leaf in the huffman tree */
154 for (i = 0; i < num; i++)
155 {
156 register mg_u_long codelen = heap[i + num];
157 hd->clens[i] = (char) codelen;
158 if (!codelen)
159 continue;
160 if (codelen > hd->maxcodelen)
161 hd->maxcodelen = codelen;
162 if (codelen < hd->mincodelen)
163 hd->mincodelen = codelen;
164 hd->lencount[codelen]++;
165 }
166
167
168 if (hd->maxcodelen == 0 && hd->mincodelen == 100)
169 {
170 hd->num_codes = 0;
171 }
172 else
173 {
174 /* Calculate the current codes for each different code length */
175 hd->min_code[hd->maxcodelen] = 0;
176 for (i = hd->maxcodelen - 1; i>=0; i--)
177 hd->min_code[i] = (hd->min_code[i + 1] + hd->lencount[i + 1]) >> 1;
178 }
179 free (heap);
180 return (hd);
181
182error2:
183 free (heap);
184error1:
185 if (!data)
186 free (hd);
187error0:
188 return (NULL);
189
190}
191
192mg_u_long *
193Generate_Huffman_Codes (huff_data * data, mg_u_long * mem)
194{
195 int i;
196 mg_u_long *codes;
197 mg_u_long mc[MAX_HUFFCODE_LEN + 1];
198
199 if (!data)
200 return (NULL);
201 if (!(codes = Xmalloc (data->num_codes * sizeof (*codes))))
202 return (NULL);
203 if (mem)
204 *mem += data->num_codes * sizeof (*codes);
205 bcopy ((char *) data->min_code, (char *) mc, sizeof (mc));
206 for (i = 0; i < data->num_codes; i++)
207 if (data->clens[i])
208 codes[i] = mc[(int) (data->clens[i])]++;
209
210 return (codes);
211}
212
213
214mg_u_long **
215Generate_Huffman_Vals (huff_data * data, mg_u_long * mem)
216{
217 int i;
218 mg_u_long *fcode[MAX_HUFFCODE_LEN + 1];
219 mg_u_long **values;
220 mg_u_long *vals;
221
222 if (!data)
223 return (NULL);
224 if (!(vals = Xmalloc (data->num_codes * sizeof (*vals))))
225 return (NULL);
226 if (!(values = Xmalloc ((MAX_HUFFCODE_LEN + 1) * sizeof (mg_u_long *))))
227 {
228 free (vals);
229 return (NULL);
230 }
231
232 bzero ((char *) values, (MAX_HUFFCODE_LEN + 1) * sizeof (mg_u_long *));
233
234 if (mem)
235 *mem += data->num_codes * sizeof (*vals) +
236 (MAX_HUFFCODE_LEN + 1) * sizeof (mg_u_long *);
237
238 fcode[0] = values[0] = &vals[0];
239 for (i = 1; i <= data->maxcodelen; i++)
240 fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
241
242 for (i = 0; i < data->num_codes; i++)
243 if (data->clens[i])
244 *fcode[(int) (data->clens[i])]++ = i;
245 return (values);
246}
247
248
249
250double
251Calculate_Huffman_Size (int num, mg_s_long *freqs, mg_s_long *counts)
252{
253 double size = 0;
254 huff_data hd;
255 int i;
256 if (!Generate_Huffman_Data (num, freqs, &hd, NULL))
257 return -1;
258 for (i = 0; i < num; i++)
259 size += counts[i] * hd.clens[i];
260 Xfree (hd.clens);
261 return size;
262}
263
264
265
266int
267Write_Huffman_Data (FILE * f, huff_data * hd)
268{
269 /* [RPAP - Jan 97: Endian Ordering] */
270 HTONSI(hd->num_codes);
271 HTONSI(hd->mincodelen);
272 HTONSI(hd->maxcodelen);
273
274 if (fwrite (&hd->num_codes, sizeof (hd->num_codes), 1, f) != 1)
275 return -1;
276
277 if (fwrite (&hd->mincodelen, sizeof (hd->mincodelen), 1, f) != 1)
278 return -1;
279
280 if (fwrite (&hd->maxcodelen, sizeof (hd->maxcodelen), 1, f) != 1)
281 return -1;
282
283 /* [RPAP - Jan 97: Endian Ordering] */
284 NTOHSI(hd->num_codes);
285 NTOHSI(hd->mincodelen);
286 NTOHSI(hd->maxcodelen);
287
288 if (hd->num_codes)
289 {
290 /* [RPAP - Jan 97: Endian Ordering] */
291 int i;
292 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
293 HTONSI(hd->lencount[i]);
294 for (i = 0; i < hd->maxcodelen + 1; i++)
295 HTONUL(hd->min_code[i]);
296
297 if (fwrite (hd->lencount + hd->mincodelen, sizeof (*hd->lencount),
298 hd->maxcodelen - hd->mincodelen + 1, f) !=
299 hd->maxcodelen - hd->mincodelen + 1)
300 return -1;
301
302 if (fwrite (hd->min_code, sizeof (*hd->min_code), hd->maxcodelen + 1, f) !=
303 hd->maxcodelen + 1)
304 return -1;
305
306 if (fwrite (hd->clens, sizeof (*hd->clens), hd->num_codes, f) != hd->num_codes)
307 return -1;
308
309 /* [RPAP - Jan 97: Endian Ordering] */
310 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
311 NTOHSI(hd->lencount[i]);
312 for (i = 0; i < hd->maxcodelen + 1; i++)
313 NTOHUL(hd->min_code[i]);
314 }
315 return 0;
316}
317
318
319
320
321static int
322General_Read_Huffman_Data (size_t (*rd) (), void *f,
323 huff_data * hd, mg_u_long * mem, mg_u_long * disk)
324{
325 if (rd (&hd->num_codes, sizeof (hd->num_codes), 1, f) != 1)
326 return -1;
327
328 if (rd (&hd->mincodelen, sizeof (hd->mincodelen), 1, f) != 1)
329 return -1;
330
331 if (rd (&hd->maxcodelen, sizeof (hd->maxcodelen), 1, f) != 1)
332 return -1;
333
334 /* [RPAP - Jan 97: Endian Ordering] */
335 NTOHSI(hd->num_codes);
336 NTOHSI(hd->mincodelen);
337 NTOHSI(hd->maxcodelen);
338
339 if (disk)
340 (*disk) += sizeof (hd->num_codes) + sizeof (hd->mincodelen) +
341 sizeof (hd->maxcodelen);
342
343 hd->clens = NULL;
344
345 if (hd->num_codes)
346 {
347 int i; /* [RPAP - Jan 97: Endian Ordering] */
348
349 bzero ((char *) (hd->lencount), sizeof (hd->lencount));
350 if (rd (hd->lencount + hd->mincodelen, sizeof (*hd->lencount),
351 hd->maxcodelen - hd->mincodelen + 1, f) !=
352 hd->maxcodelen - hd->mincodelen + 1)
353 return -1;
354
355 /* [RPAP - Jan 97: Endian Ordering] */
356 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
357 NTOHSI(hd->lencount[i]);
358
359 if (disk)
360 (*disk) += sizeof (*hd->lencount) * (hd->maxcodelen - hd->mincodelen + 1);
361
362 bzero ((char *) (hd->min_code), sizeof (hd->min_code));
363
364 if (rd (hd->min_code, sizeof (*hd->min_code), hd->maxcodelen + 1, f) !=
365 hd->maxcodelen + 1)
366 return -1;
367
368 /* [RPAP - Jan 97: Endian Ordering] */
369 for (i = 0; i < hd->maxcodelen + 1; i++)
370 NTOHUL(hd->min_code[i]);
371
372 if (disk)
373 (*disk) += sizeof (*hd->min_code) * (hd->maxcodelen + 1);
374
375 if (!(hd->clens = Xmalloc (sizeof (*hd->clens) * hd->num_codes)))
376 return -1;
377
378
379 if (rd (hd->clens, sizeof (*hd->clens), hd->num_codes, f) != hd->num_codes)
380 return -1;
381
382 if (disk)
383 (*disk) += sizeof (*hd->clens) * hd->num_codes;
384 }
385
386 if (mem)
387 *mem += sizeof (*hd->clens) * hd->num_codes;
388 return 0;
389}
390
391
392
393int
394Read_Huffman_Data (FILE * f, huff_data * hd, mg_u_long * mem, mg_u_long * disk)
395{
396 return General_Read_Huffman_Data (fread, f, hd, mem, disk);
397}
398
399
400
401
402int
403F_Read_Huffman_Data (File * f, huff_data * hd, mg_u_long * mem, mg_u_long * disk)
404{
405 return General_Read_Huffman_Data (Fread, f, hd, mem, disk);
406}
Note: See TracBrowser for help on using the repository browser.