source: main/branches/64_bit_Greenstone/greenstone2/common-src/indexers/mg/src/text/locallib.c@ 23508

Last change on this file since 23508 was 23508, checked in by sjm84, 13 years ago

Committing 64 bit changes into the branch

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.6 KB
Line 
1/**************************************************************************
2 *
3 * locallib.c -- Misc functions
4 * Copyright (C) 1994 Gary Eddy, Alistair Moffat and Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: locallib.c 23508 2010-12-17 01:04:10Z sjm84 $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25
26#include "memlib.h"
27#include "bitio_gen.h"
28#include "huffman.h"
29#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
30
31#include "locallib.h"
32#include "text.h"
33
34/*
35 $Log$
36 Revision 1.1 2003/02/20 21:18:24 mdewsnip
37 Addition of MG package for search and retrieval
38
39 Revision 1.1 1999/08/10 21:18:02 sjboddie
40 renamed mg-1.3d directory mg
41
42 Revision 1.1 1998/11/17 09:34:49 rjmcnab
43 *** empty log message ***
44
45 * Revision 1.3 1994/10/20 03:56:51 tes
46 * I have rewritten the boolean query optimiser and abstracted out the
47 * components of the boolean query.
48 *
49 * Revision 1.2 1994/09/20 04:41:38 tes
50 * For version 1.1
51 *
52 */
53
54static char *RCSID = "$Id: locallib.c 23508 2010-12-17 01:04:10Z sjm84 $";
55
56
57int
58vecentropy (int *A, int n)
59{
60 int total = 0, i;
61 double bits = 0.0;
62
63 for (i = 0; i < n; i++)
64 total += A[i];
65 for (i = 0; i < n; i++)
66 if (A[i])
67 bits += A[i] * log2 (1.0 * total / A[i]);
68 return ((int) (bits + 0.9999));
69
70}
71
72static void
73siftdown (int *A, int i, int n)
74{
75 int ms, t;
76
77 while (2 * i + 1 <= n - 1)
78 {
79 if (2 * i + 1 == n - 1 || A[2 * i + 1] < A[2 * i + 2])
80 ms = 2 * i + 1;
81 else
82 ms = 2 * i + 2;
83 if (A[i] > A[ms])
84 {
85 t = A[i];
86 A[i] = A[ms];
87 A[ms] = t;
88 }
89 i = ms;
90 }
91}
92
93static void
94buildheap (int *A, int n)
95{
96 int i;
97 for (i = n / 2 - 1; i >= 0; i--)
98 siftdown (A, i, n);
99}
100
101int
102huffcodebits (mg_u_long *A, int n)
103{
104 int i, j, tot = 0, bits = 0, v1, v2, *B;
105
106 B = (int *) Xmalloc (n * sizeof (int));
107 j = 0;
108 for (i = 0; i < n; i++)
109 {
110 if (A[i])
111 {
112 tot += (B[j++] = A[i]);
113 }
114 }
115 n = j;
116
117 if (n == 0 || tot == 0)
118 return (0);
119
120 buildheap (B, n);
121 while (n > 1)
122 {
123 v1 = B[0];
124 B[0] = B[n - 1];
125 n--;
126 siftdown (B, 0, n);
127 v2 = B[0];
128 B[0] = v1 + v2;
129 siftdown (B, 0, n);
130 bits += v1 + v2;
131 }
132 Xfree (B);
133 return (bits);
134}
135
136
137int
138modelbits (mg_u_long *A, int n)
139{
140 int i, bits = 0, last, N = 0;
141
142 last = -1;
143 for (i = 0; i < n; i++)
144 {
145 if (A[i])
146 {
147 bits += BIO_Gamma_Length (i - last) + BIO_Gamma_Length (A[i]);
148 last = i;
149 N++;
150 }
151 }
152 if (N == 0)
153 return (0);
154 bits += BIO_Gamma_Length (N);
155 return (bits);
156}
157
158
159
160
161
162/* Find the next prime number larger than p */
163int
164prime (int p)
165{
166 int limit;
167 p += (p & 1) + 1;
168 limit = (int) sqrt ((double) p) + 1;
169 do
170 {
171 int j;
172 while (limit * limit < p)
173 limit++;
174 for (j = 3; j <= limit && p % j; j += 2)
175 ; /* NULLBODY */
176 if (j > limit)
177 break;
178 p += 2;
179 }
180 while (1);
181 return (p);
182}
183
184
185
186int
187Read_cdh (FILE * f, compression_dict_header * cdh, mg_u_long * mem,
188 mg_u_long * disk)
189{
190 if (disk)
191 (*disk) += sizeof (*cdh);
192
193 /* [RPAP - Jan 97: Endian Ordering] */
194 if (fread (cdh, sizeof (*cdh), 1, f) == 1)
195 {
196 int i;
197 NTOHUL(cdh->dict_type);
198 NTOHUL(cdh->novel_method);
199 for (i = 0; i < TEXT_PARAMS; i++)
200 NTOHUL(cdh->params[i]);
201 NTOHUL(cdh->num_words[0]);
202 NTOHUL(cdh->num_words[1]);
203 NTOHUL(cdh->num_word_chars[0]);
204 NTOHUL(cdh->num_word_chars[1]);
205 NTOHUL(cdh->lookback);
206 return 0;
207 }
208 else
209 return -1;
210}
211
212
213int
214F_Read_cdh (File * f, compression_dict_header * cdh, mg_u_long * mem,
215 mg_u_long * disk)
216{
217 if (disk)
218 (*disk) += sizeof (*cdh);
219
220 /* [RPAP - Jan 97: Endian Ordering] */
221 if (Fread (cdh, sizeof (*cdh), 1, f) == 1)
222 {
223 int i;
224 NTOHUL(cdh->dict_type);
225 NTOHUL(cdh->novel_method);
226 for (i = 0; i < TEXT_PARAMS; i++)
227 NTOHUL(cdh->params[i]);
228 NTOHUL(cdh->num_words[0]);
229 NTOHUL(cdh->num_words[1]);
230 NTOHUL(cdh->num_word_chars[0]);
231 NTOHUL(cdh->num_word_chars[1]);
232 NTOHUL(cdh->lookback);
233 return 0;
234 }
235 else
236 return -1;
237}
238
239
240int
241Read_cfh (FILE * f, comp_frags_header * cfh, mg_u_long * mem, mg_u_long * disk)
242{
243 int i; /* [RPAP - Jan 97: Endian Ordering] */
244
245 if (Read_Huffman_Data (f, &cfh->hd, mem, disk) == -1)
246 return -1;
247 if (fread (&cfh->uncompressed_size,
248 sizeof (cfh->uncompressed_size), 1, f) != 1)
249 return -1;
250 NTOHUL(cfh->uncompressed_size); /* [RPAP - Jan 97: Endian Ordering] */
251
252 if (disk)
253 (*disk) += sizeof (cfh->uncompressed_size);
254
255 bzero ((char *) cfh->huff_words_size, sizeof (cfh->huff_words_size));
256
257 if (fread (cfh->huff_words_size + cfh->hd.mincodelen,
258 sizeof (*cfh->huff_words_size),
259 cfh->hd.maxcodelen - cfh->hd.mincodelen + 1, f) !=
260 cfh->hd.maxcodelen - cfh->hd.mincodelen + 1)
261 return -1;
262
263 /* [RPAP - Jan 97: Endian Ordering] */
264 for (i = cfh->hd.mincodelen; i < cfh->hd.maxcodelen + 1; i++)
265 NTOHUL(cfh->huff_words_size[i]);
266
267 if (disk)
268 (*disk) += sizeof (*cfh->huff_words_size) *
269 (cfh->hd.maxcodelen - cfh->hd.mincodelen + 1);
270 return 0;
271}
272
273
274
275int
276F_Read_cfh (File * f, comp_frags_header * cfh, mg_u_long * mem, mg_u_long * disk)
277{
278 int i; /* [RPAP - Jan 97: Endian Ordering] */
279
280 if (F_Read_Huffman_Data (f, &cfh->hd, mem, disk) == -1)
281 return -1;
282 if (Fread (&cfh->uncompressed_size,
283 sizeof (cfh->uncompressed_size), 1, f) != 1)
284 return -1;
285 NTOHUL(cfh->uncompressed_size); /* [RPAP - Jan 97: Endian Ordering] */
286
287 if (disk)
288 (*disk) += sizeof (cfh->uncompressed_size);
289
290 bzero ((char *) cfh->huff_words_size, sizeof (cfh->huff_words_size));
291
292 if (Fread (cfh->huff_words_size + cfh->hd.mincodelen,
293 sizeof (*cfh->huff_words_size),
294 cfh->hd.maxcodelen - cfh->hd.mincodelen + 1, f) !=
295 cfh->hd.maxcodelen - cfh->hd.mincodelen + 1)
296 return -1;
297
298 /* [RPAP - Jan 97: Endian Ordering] */
299 for (i = cfh->hd.mincodelen; i < cfh->hd.maxcodelen + 1; i++)
300 NTOHUL(cfh->huff_words_size[i]);
301
302 if (disk)
303 (*disk) += sizeof (*cfh->huff_words_size) *
304 (cfh->hd.maxcodelen - cfh->hd.mincodelen + 1);
305
306 return 0;
307}
Note: See TracBrowser for help on using the repository browser.