source: trunk/indexers/mgpp/text/locallib.cpp@ 9613

Last change on this file since 9613 was 9613, checked in by kjdon, 19 years ago

added in x++ -> ++x changes submitted by Emanuel Dejanu

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1/**************************************************************************
2 *
3 * locallib.cpp -- Misc functions
4 * Copyright (C) 1994 Gary Eddy, Alistair Moffat and Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#include "sysfuncs.h"
23
24#include "memlib.h"
25#include "bitio_gen.h"
26#include "huffman.h"
27#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
28
29#include "locallib.h"
30#include "text.h"
31
32int
33vecentropy (int *A, int n)
34{
35 int total = 0, i;
36 double bits = 0.0;
37
38 for (i = 0; i < n; ++i)
39 total += A[i];
40 for (i = 0; i < n; ++i)
41 if (A[i])
42 bits += A[i] * log2 (1.0 * total / A[i]);
43 return ((int) (bits + 0.9999));
44
45}
46
47static void
48siftdown (int *A, int i, int n)
49{
50 int ms, t;
51
52 while (2 * i + 1 <= n - 1)
53 {
54 if (2 * i + 1 == n - 1 || A[2 * i + 1] < A[2 * i + 2])
55 ms = 2 * i + 1;
56 else
57 ms = 2 * i + 2;
58 if (A[i] > A[ms])
59 {
60 t = A[i];
61 A[i] = A[ms];
62 A[ms] = t;
63 }
64 i = ms;
65 }
66}
67
68static void
69buildheap (int *A, int n)
70{
71 int i;
72 for (i = n / 2 - 1; i >= 0; --i)
73 siftdown (A, i, n);
74}
75
76int
77huffcodebits (unsigned long *A, int n)
78{
79 int i, j, tot = 0, bits = 0, v1, v2, *B;
80
81 B = (int *) Xmalloc (n * sizeof (int));
82 j = 0;
83 for (i = 0; i < n; ++i)
84 {
85 if (A[i])
86 {
87 tot += (B[j++] = A[i]);
88 }
89 }
90 n = j;
91
92 if (n == 0 || tot == 0)
93 return (0);
94
95 buildheap (B, n);
96 while (n > 1)
97 {
98 v1 = B[0];
99 B[0] = B[n - 1];
100 --n;
101 siftdown (B, 0, n);
102 v2 = B[0];
103 B[0] = v1 + v2;
104 siftdown (B, 0, n);
105 bits += v1 + v2;
106 }
107 Xfree (B);
108 return (bits);
109}
110
111
112int
113modelbits (unsigned long *A, int n)
114{
115 int i, bits = 0, last, N = 0;
116
117 last = -1;
118 for (i = 0; i < n; ++i)
119 {
120 if (A[i])
121 {
122 bits += BIO_Gamma_Length (i - last) + BIO_Gamma_Length (A[i]);
123 last = i;
124 ++N;
125 }
126 }
127 if (N == 0)
128 return (0);
129 bits += BIO_Gamma_Length (N);
130 return (bits);
131}
132
133
134
135
136
137/* Find the next prime number larger than p */
138int
139prime (int p)
140{
141 int limit;
142 p += (p & 1) + 1;
143 limit = (int) sqrt ((double) p) + 1;
144 do
145 {
146 int j;
147 while (limit * limit < p)
148 ++limit;
149 for (j = 3; j <= limit && p % j; j += 2)
150 ; /* NULLBODY */
151 if (j > limit)
152 break;
153 p += 2;
154 }
155 while (1);
156 return (p);
157}
158
159
160
161int Read_cdh (FILE * f, compression_dict_header * cdh, u_long * /*mem*/,
162 u_long * disk) {
163 if (disk)
164 (*disk) += sizeof (*cdh);
165
166 /* [RPAP - Jan 97: Endian Ordering] */
167 if (fread (cdh, sizeof (*cdh), 1, f) == 1)
168 {
169 int i;
170 NTOHUL(cdh->dict_type);
171 NTOHUL(cdh->novel_method);
172 for (i = 0; i < TEXT_PARAMS; ++i)
173 NTOHUL(cdh->params[i]);
174 NTOHUL(cdh->num_words[0]);
175 NTOHUL(cdh->num_words[1]);
176 NTOHUL(cdh->num_word_chars[0]);
177 NTOHUL(cdh->num_word_chars[1]);
178 NTOHUL(cdh->lookback);
179 return 0;
180 }
181 else
182 return -1;
183}
184
185
186int
187Read_cfh (FILE * f, comp_frags_header * cfh, u_long * mem, u_long * disk)
188{
189 int i; /* [RPAP - Jan 97: Endian Ordering] */
190
191 if (Read_Huffman_Data (f, &cfh->hd, mem, disk) == -1)
192 return -1;
193 if (fread (&cfh->uncompressed_size,
194 sizeof (cfh->uncompressed_size), 1, f) != 1)
195 return -1;
196 NTOHUL(cfh->uncompressed_size); /* [RPAP - Jan 97: Endian Ordering] */
197
198 if (disk)
199 (*disk) += sizeof (cfh->uncompressed_size);
200
201 memset (cfh->huff_words_size, '\0', sizeof (cfh->huff_words_size));
202
203 if ((int)fread (cfh->huff_words_size + cfh->hd.mincodelen,
204 sizeof (*cfh->huff_words_size),
205 cfh->hd.maxcodelen - cfh->hd.mincodelen + 1, f) !=
206 cfh->hd.maxcodelen - cfh->hd.mincodelen + 1)
207 return -1;
208
209 /* [RPAP - Jan 97: Endian Ordering] */
210 for (i = cfh->hd.mincodelen; i < cfh->hd.maxcodelen + 1; ++i)
211 NTOHUL(cfh->huff_words_size[i]);
212
213 if (disk)
214 (*disk) += sizeof (*cfh->huff_words_size) *
215 (cfh->hd.maxcodelen - cfh->hd.mincodelen + 1);
216 return 0;
217}
218
Note: See TracBrowser for help on using the repository browser.