source: trunk/indexers/mg/lib/huffman.c@ 3745

Last change on this file since 3745 was 3745, checked in by mdewsnip, 21 years ago

Addition of MG package for search and retrieval

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.0 KB
Line 
1/**************************************************************************
2 *
3 * huffman.c -- Huffman coding functions
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: huffman.c 3745 2003-02-20 21:20:24Z mdewsnip $
21 *
22 **************************************************************************/
23
24/*
25 $Log$
26 Revision 1.1 2003/02/20 21:14:16 mdewsnip
27 Addition of MG package for search and retrieval
28
29 Revision 1.1 1999/08/10 21:16:51 sjboddie
30 renamed mg-1.3d directory mg
31
32 Revision 1.1 1998/11/17 09:32:03 rjmcnab
33 *** empty log message ***
34
35 * Revision 1.1 1994/08/22 00:24:44 tes
36 * Initial placement under CVS.
37 *
38 */
39
40static char *RCSID = "$Id: huffman.c 3745 2003-02-20 21:20:24Z mdewsnip $";
41
42#include "sysfuncs.h"
43#include "memlib.h"
44#include "huffman.h"
45#include "messages.h"
46#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
47
48/* ->data on success,
49 NULL on error
50 */
51huff_data *
52Generate_Huffman_Data (int num, long *freqs, huff_data * data,
53 u_long * mem)
54{
55 int HNum, i, count;
56 unsigned long *heap;
57 huff_data *hd = data;
58 if (!hd)
59 {
60 if (!(hd = Xmalloc (sizeof (huff_data))))
61 goto error0;
62 }
63 if (!(heap = Xmalloc (num * 2 * sizeof (*heap))))
64 goto error1;
65 if (!(hd->clens = Xmalloc (num * sizeof (char))))
66 goto error2;
67 hd->num_codes = num;
68 if (mem)
69 {
70 if (hd != data)
71 *mem += sizeof (huff_data);
72 *mem += num * sizeof (char);
73 }
74 bcopy ((char *) freqs, (char *) &heap[num], num * sizeof (*heap));
75 bzero ((char *) heap, num * sizeof (*heap));
76
77 /* Initialise the pointers to the leaves */
78 for (count = i = 0; i < num; i++)
79 if (heap[num + i])
80 heap[count++] = num + i;
81
82 /* Reorganise the pointers so that it is a heap */
83 HNum = count;
84 for (i = HNum / 2; i > 0; i--)
85 {
86 register int curr, child;
87 curr = i;
88 child = curr * 2;
89 while (child <= HNum)
90 {
91 if (child < HNum && heap[heap[child]] < heap[heap[child - 1]])
92 child++;
93 if (heap[heap[curr - 1]] > heap[heap[child - 1]])
94 {
95 register u_long temp;
96 temp = heap[child - 1];
97 heap[child - 1] = heap[curr - 1];
98 heap[curr - 1] = temp;
99 curr = child;
100 child = 2 * curr;
101 }
102 else
103 break;
104 }
105 }
106
107
108 /* Form the huffman tree */
109 while (HNum > 1)
110 {
111 int pos[2];
112
113 for (i = 0; i < 2; i++)
114 {
115 register int curr, child;
116 pos[i] = heap[0];
117 heap[0] = heap[--HNum];
118 curr = 1;
119 child = 2;
120 while (child <= HNum)
121 {
122 if (child < HNum &&
123 heap[heap[child]] < heap[heap[child - 1]])
124 child++;
125 if (heap[heap[curr - 1]] > heap[heap[child - 1]])
126 {
127 register int temp;
128 temp = heap[child - 1];
129 heap[child - 1] = heap[curr - 1];
130 heap[curr - 1] = temp;
131 curr = child;
132 child = 2 * curr;
133 }
134 else
135 break;
136 }
137 }
138
139 heap[HNum + 1] = heap[pos[0]] + heap[pos[1]];
140 heap[pos[0]] = heap[pos[1]] = HNum + 1;
141
142 heap[HNum] = HNum + 1;
143
144 {
145 register int parent, curr;
146 HNum++;
147 curr = HNum;
148 parent = curr >> 1;
149 while (parent && heap[heap[parent - 1]] > heap[heap[curr - 1]])
150 {
151 register int temp;
152 temp = heap[parent - 1];
153 heap[parent - 1] = heap[curr - 1];
154 heap[curr - 1] = temp;
155 curr = parent;
156 parent = curr >> 1;
157 }
158 }
159 }
160
161
162 /* Calculate the code lens */
163 heap[0] = -1UL;
164 heap[1] = 0;
165 for (i = 2; i < num * 2; i++)
166 heap[i] = heap[heap[i]] + 1;
167
168 /* zero the lencount's array */
169 bzero ((char *) hd->lencount, sizeof (hd->lencount));
170 hd->maxcodelen = 0;
171 hd->mincodelen = 100;
172
173 /* Set the code length of each leaf in the huffman tree */
174 for (i = 0; i < num; i++)
175 {
176 register u_long codelen = heap[i + num];
177 hd->clens[i] = (char) codelen;
178 if (!codelen)
179 continue;
180 if (codelen > hd->maxcodelen)
181 hd->maxcodelen = codelen;
182 if (codelen < hd->mincodelen)
183 hd->mincodelen = codelen;
184 hd->lencount[codelen]++;
185 }
186
187
188 if (hd->maxcodelen == 0 && hd->mincodelen == 100)
189 {
190 hd->num_codes = 0;
191 }
192 else
193 {
194 /* Calculate the current codes for each different code length */
195 hd->min_code[hd->maxcodelen] = 0;
196 for (i = hd->maxcodelen - 1; i>=0; i--)
197 hd->min_code[i] = (hd->min_code[i + 1] + hd->lencount[i + 1]) >> 1;
198 }
199 free (heap);
200 return (hd);
201
202error2:
203 free (heap);
204error1:
205 if (!data)
206 free (hd);
207error0:
208 return (NULL);
209
210}
211
212unsigned long *
213Generate_Huffman_Codes (huff_data * data, u_long * mem)
214{
215 int i;
216 unsigned long *codes;
217 unsigned long mc[MAX_HUFFCODE_LEN + 1];
218
219 if (!data)
220 return (NULL);
221 if (!(codes = Xmalloc (data->num_codes * sizeof (*codes))))
222 return (NULL);
223 if (mem)
224 *mem += data->num_codes * sizeof (*codes);
225 bcopy ((char *) data->min_code, (char *) mc, sizeof (mc));
226 for (i = 0; i < data->num_codes; i++)
227 if (data->clens[i])
228 codes[i] = mc[(int) (data->clens[i])]++;
229
230 return (codes);
231}
232
233
234unsigned long **
235Generate_Huffman_Vals (huff_data * data, u_long * mem)
236{
237 int i;
238 unsigned long *fcode[MAX_HUFFCODE_LEN + 1];
239 unsigned long **values;
240 unsigned long *vals;
241
242 if (!data)
243 return (NULL);
244 if (!(vals = Xmalloc (data->num_codes * sizeof (*vals))))
245 return (NULL);
246 if (!(values = Xmalloc ((MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *))))
247 {
248 free (vals);
249 return (NULL);
250 }
251
252 bzero ((char *) values, (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *));
253
254 if (mem)
255 *mem += data->num_codes * sizeof (*vals) +
256 (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
257
258 fcode[0] = values[0] = &vals[0];
259 for (i = 1; i <= data->maxcodelen; i++)
260 fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
261
262 for (i = 0; i < data->num_codes; i++)
263 if (data->clens[i])
264 *fcode[(int) (data->clens[i])]++ = i;
265 return (values);
266}
267
268
269
270double
271Calculate_Huffman_Size (int num, long *freqs, long *counts)
272{
273 double size = 0;
274 huff_data hd;
275 int i;
276 if (!Generate_Huffman_Data (num, freqs, &hd, NULL))
277 return -1;
278 for (i = 0; i < num; i++)
279 size += counts[i] * hd.clens[i];
280 Xfree (hd.clens);
281 return size;
282}
283
284
285
286int
287Write_Huffman_Data (FILE * f, huff_data * hd)
288{
289 /* [RPAP - Jan 97: Endian Ordering] */
290 HTONSI(hd->num_codes);
291 HTONSI(hd->mincodelen);
292 HTONSI(hd->maxcodelen);
293
294 if (fwrite (&hd->num_codes, sizeof (hd->num_codes), 1, f) != 1)
295 return -1;
296
297 if (fwrite (&hd->mincodelen, sizeof (hd->mincodelen), 1, f) != 1)
298 return -1;
299
300 if (fwrite (&hd->maxcodelen, sizeof (hd->maxcodelen), 1, f) != 1)
301 return -1;
302
303 /* [RPAP - Jan 97: Endian Ordering] */
304 NTOHSI(hd->num_codes);
305 NTOHSI(hd->mincodelen);
306 NTOHSI(hd->maxcodelen);
307
308 if (hd->num_codes)
309 {
310 /* [RPAP - Jan 97: Endian Ordering] */
311 int i;
312 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
313 HTONSI(hd->lencount[i]);
314 for (i = 0; i < hd->maxcodelen + 1; i++)
315 HTONUL(hd->min_code[i]);
316
317 if (fwrite (hd->lencount + hd->mincodelen, sizeof (*hd->lencount),
318 hd->maxcodelen - hd->mincodelen + 1, f) !=
319 hd->maxcodelen - hd->mincodelen + 1)
320 return -1;
321
322 if (fwrite (hd->min_code, sizeof (*hd->min_code), hd->maxcodelen + 1, f) !=
323 hd->maxcodelen + 1)
324 return -1;
325
326 if (fwrite (hd->clens, sizeof (*hd->clens), hd->num_codes, f) != hd->num_codes)
327 return -1;
328
329 /* [RPAP - Jan 97: Endian Ordering] */
330 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
331 NTOHSI(hd->lencount[i]);
332 for (i = 0; i < hd->maxcodelen + 1; i++)
333 NTOHUL(hd->min_code[i]);
334 }
335 return 0;
336}
337
338
339
340
341static int
342General_Read_Huffman_Data (size_t (*rd) (), void *f,
343 huff_data * hd, u_long * mem, u_long * disk)
344{
345 if (rd (&hd->num_codes, sizeof (hd->num_codes), 1, f) != 1)
346 return -1;
347
348 if (rd (&hd->mincodelen, sizeof (hd->mincodelen), 1, f) != 1)
349 return -1;
350
351 if (rd (&hd->maxcodelen, sizeof (hd->maxcodelen), 1, f) != 1)
352 return -1;
353
354 /* [RPAP - Jan 97: Endian Ordering] */
355 NTOHSI(hd->num_codes);
356 NTOHSI(hd->mincodelen);
357 NTOHSI(hd->maxcodelen);
358
359 if (disk)
360 (*disk) += sizeof (hd->num_codes) + sizeof (hd->mincodelen) +
361 sizeof (hd->maxcodelen);
362
363 hd->clens = NULL;
364
365 if (hd->num_codes)
366 {
367 int i; /* [RPAP - Jan 97: Endian Ordering] */
368
369 bzero ((char *) (hd->lencount), sizeof (hd->lencount));
370 if (rd (hd->lencount + hd->mincodelen, sizeof (*hd->lencount),
371 hd->maxcodelen - hd->mincodelen + 1, f) !=
372 hd->maxcodelen - hd->mincodelen + 1)
373 return -1;
374
375 /* [RPAP - Jan 97: Endian Ordering] */
376 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
377 NTOHSI(hd->lencount[i]);
378
379 if (disk)
380 (*disk) += sizeof (*hd->lencount) * (hd->maxcodelen - hd->mincodelen + 1);
381
382 bzero ((char *) (hd->min_code), sizeof (hd->min_code));
383
384 if (rd (hd->min_code, sizeof (*hd->min_code), hd->maxcodelen + 1, f) !=
385 hd->maxcodelen + 1)
386 return -1;
387
388 /* [RPAP - Jan 97: Endian Ordering] */
389 for (i = 0; i < hd->maxcodelen + 1; i++)
390 NTOHUL(hd->min_code[i]);
391
392 if (disk)
393 (*disk) += sizeof (*hd->min_code) * (hd->maxcodelen + 1);
394
395 if (!(hd->clens = Xmalloc (sizeof (*hd->clens) * hd->num_codes)))
396 return -1;
397
398
399 if (rd (hd->clens, sizeof (*hd->clens), hd->num_codes, f) != hd->num_codes)
400 return -1;
401
402 if (disk)
403 (*disk) += sizeof (*hd->clens) * hd->num_codes;
404 }
405
406 if (mem)
407 *mem += sizeof (*hd->clens) * hd->num_codes;
408 return 0;
409}
410
411
412
413int
414Read_Huffman_Data (FILE * f, huff_data * hd, u_long * mem, u_long * disk)
415{
416 return General_Read_Huffman_Data (fread, f, hd, mem, disk);
417}
418
419
420
421
422int
423F_Read_Huffman_Data (File * f, huff_data * hd, u_long * mem, u_long * disk)
424{
425 return General_Read_Huffman_Data (Fread, f, hd, mem, disk);
426}
Note: See TracBrowser for help on using the repository browser.