source: gsdl/trunk/trunk/mg/src/text/comp_dict.c@ 16583

Last change on this file since 16583 was 16583, checked in by davidb, 16 years ago

Undoing change commited in r16582

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.1 KB
Line 
1/**************************************************************************
2 *
3 * comp_dict.c -- Functions for loading the compression dictionary
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: comp_dict.c 16583 2008-07-29 10:20:36Z davidb $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25
26#include "huffman.h"
27#include "local_strings.h"
28#include "memlib.h"
29#include "messages.h"
30
31#include "mg.h"
32#include "hash.h"
33#include "text.h"
34#include "comp_dict.h"
35#include "locallib.h"
36#include "mg_files.h"
37
38/*
39 $Log$
40 Revision 1.1 2003/02/20 21:18:23 mdewsnip
41 Addition of MG package for search and retrieval
42
43 Revision 1.1 1999/08/10 21:17:48 sjboddie
44 renamed mg-1.3d directory mg
45
46 Revision 1.1 1998/11/17 09:34:34 rjmcnab
47 *** empty log message ***
48
49 * Revision 1.3 1994/10/20 03:56:41 tes
50 * I have rewritten the boolean query optimiser and abstracted out the
51 * components of the boolean query.
52 *
53 * Revision 1.2 1994/09/20 04:41:24 tes
54 * For version 1.1
55 *
56 */
57
58static char *RCSID = "$Id: comp_dict.c 16583 2008-07-29 10:20:36Z davidb $";
59
60compression_dict_header cdh;
61compressed_text_header cth;
62comp_frags_header cfh[2];
63
64dict_hash_table *ht[2];
65
66huff_data char_huff[2];
67huff_data lens_huff[2];
68u_long *char_codes[2], *lens_codes[2];
69u_long Words_disk = 0;
70u_long Chars_disk = 0;
71
72
73static dict_hash_table *
74ReadInWords (FILE * dict, comp_frags_header * cfh,
75 int esc)
76{
77 int i;
78 u_char *allwords, *prev = NULL;
79 dict_hash_table *ht;
80 u_char **words;
81 u_long ht_size;
82
83 ht_size = prime (cfh->hd.num_codes * HASH_RATIO);
84 if (!(ht = Xmalloc (sizeof (dict_hash_table) +
85 (ht_size - 1) * sizeof (ht->table[0]))))
86 {
87 Message ("no memory for hash_table\n");
88 return NULL;
89 }
90
91 ht->size = ht_size;
92 ht->hd = &cfh->hd;
93
94 if (!(ht->codes = Generate_Huffman_Codes (&cfh->hd, NULL)))
95 {
96 Message ("no memory for huffman codes\n");
97 return NULL;
98 }
99
100 if (!(ht->words = Xmalloc (sizeof (u_char *) * cfh->hd.num_codes)))
101 {
102 Message ("no memory for word pointers\n");
103 return NULL;
104 }
105 words = ht->words;
106
107 bzero ((char *) ht->table, ht_size * sizeof (ht->table[0]));
108
109 if (!(allwords = Xmalloc (sizeof (u_char) * cfh->uncompressed_size)))
110 {
111 Message ("no memory for words\n");
112 return NULL;
113 }
114
115 for (i = 0; i < cfh->hd.num_codes; i++, words++)
116 {
117 register int val, copy;
118 val = fgetc (dict);
119 copy = (val >> 4) & 0xf;
120 val &= 0xf;
121
122 *words = allwords;
123
124 memcpy (allwords + 1, prev + 1, copy);
125 fread (allwords + copy + 1, sizeof (u_char), val, dict);
126 *allwords = val + copy;
127
128 Words_disk += val + 1;
129
130 /* insert into the hash table */
131 if (i < cfh->hd.num_codes - esc)
132 {
133 register u_char **wptr;
134 register int tsize = ht->size;
135 register unsigned long hashval, step;
136
137 HASH (hashval, step, allwords, tsize);
138
139 wptr = ht->table[hashval];
140 for (; wptr;)
141 {
142 hashval += step;
143 if (hashval >= tsize)
144 hashval -= tsize;
145 wptr = ht->table[hashval];
146 }
147 ht->table[hashval] = words;
148 }
149 prev = allwords;
150 allwords += *allwords + 1;
151 }
152 return ht;
153}
154
155
156
157int
158LoadCompressionDictionary (char *dict_file_name)
159{
160 FILE *dict;
161 int which;
162 if (!(dict = open_named_file (dict_file_name, "rb", MAGIC_DICT, MG_MESSAGE))) /* [RPAP - Feb 97: WIN32 Port] */
163 return COMPERROR;
164
165 Words_disk = sizeof (u_long);
166
167 if (Read_cdh (dict, &cdh, NULL, &Words_disk) == -1)
168 goto error;
169
170 for (which = 0; which < 2; which++)
171 switch (cdh.dict_type)
172 {
173 case MG_COMPLETE_DICTIONARY:
174 {
175 if (Read_cfh (dict, &cfh[which], NULL, &Words_disk) == -1)
176 goto error;
177
178 if (!(ht[which] = ReadInWords (dict, &cfh[which], 0)))
179 goto error;
180
181 }
182 break;
183 case MG_PARTIAL_DICTIONARY:
184 {
185 if (cdh.num_words[which])
186 {
187 if (Read_cfh (dict, &cfh[which], NULL, &Words_disk) == -1)
188 goto error;
189 if (!(ht[which] = ReadInWords (dict, &cfh[which], 1)))
190 goto error;
191 }
192 else
193 ht[which] = NULL;
194 if (Read_Huffman_Data (dict, &char_huff[which], NULL,
195 &Chars_disk) == -1)
196 goto error;
197 if (!(char_codes[which] =
198 Generate_Huffman_Codes (&char_huff[which], NULL)))
199 goto error;
200 if (Read_Huffman_Data (dict, &lens_huff[which], NULL,
201 &Chars_disk) == -1)
202 goto error;
203 if (!(lens_codes[which] =
204 Generate_Huffman_Codes (&lens_huff[which], NULL)))
205 goto error;
206 }
207 break;
208 case MG_SEED_DICTIONARY:
209 {
210 if (cdh.num_words[which])
211 {
212 if (Read_cfh (dict, &cfh[which], NULL, &Words_disk) == -1)
213 goto error;
214 if (!(ht[which] = ReadInWords (dict, &cfh[which], 1)))
215 goto error;
216 }
217 else
218 ht[which] = NULL;
219 switch (cdh.novel_method)
220 {
221 case MG_NOVEL_HUFFMAN_CHARS:
222 if (Read_Huffman_Data (dict, &char_huff[which], NULL,
223 &Chars_disk) == -1)
224 goto error;
225 if (!(char_codes[which] =
226 Generate_Huffman_Codes (&char_huff[which], NULL)))
227 goto error;
228 if (Read_Huffman_Data (dict, &lens_huff[which], NULL,
229 &Chars_disk) == -1)
230 goto error;
231 if (!(lens_codes[which] =
232 Generate_Huffman_Codes (&lens_huff[which], NULL)))
233 goto error;
234 break;
235 case MG_NOVEL_BINARY:
236 break;
237 case MG_NOVEL_DELTA:
238 break;
239 case MG_NOVEL_HYBRID:
240 break;
241 case MG_NOVEL_HYBRID_MTF:
242 break;
243 default:
244 FatalError (1, "Bad novel method");
245 }
246 }
247 break;
248 default:
249 FatalError (1, "Bad dictionary kind\n");
250 }
251
252 return (COMPALLOK);
253
254
255error:
256 fclose (dict);
257 return (COMPERROR);
258}
Note: See TracBrowser for help on using the repository browser.