source: gsdl/trunk/common-src/packages/gdbm/gdbm-1.8.3/bucket.c@ 18044

Last change on this file since 18044 was 18044, checked in by mdewsnip, 15 years ago

Fixed bug in customised code where not all of the hash table buckets had their key values unswapped.

File size: 11.9 KB
Line 
1/* bucket.c - The routines for playing with hash buckets. */
2
3/* This file is part of GDBM, the GNU data base manager, by Philip A. Nelson.
4 Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc.
5
6 GDBM is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GDBM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GDBM; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
19
20 You may contact the author by:
21 e-mail: [email protected]
22 us-mail: Philip A. Nelson
23 Computer Science Department
24 Western Washington University
25 Bellingham, WA 98226
26
27*************************************************************************/
28
29
30/* include system configuration before all else. */
31#include "autoconf.h"
32
33#include "gdbmdefs.h"
34
35#include "gsdlmods.h"
36
37
38/* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
39void
40_gdbm_new_bucket (dbf, bucket, bits)
41 gdbm_file_info *dbf;
42 hash_bucket *bucket;
43 int bits;
44{
45 int index;
46
47 /* Initialize the avail block. */
48 bucket->av_count = 0;
49
50 /* Set the information fields first. */
51 bucket->bucket_bits = bits;
52 bucket->count = 0;
53
54 /* Initialize all bucket elements. */
55 for (index = 0; index < dbf->header->bucket_elems; index++)
56 bucket->h_table[index].hash_value = -1;
57}
58
59
60
61/* Find a bucket for DBF that is pointed to by the bucket directory from
62 location DIR_INDEX. The bucket cache is first checked to see if it
63 is already in memory. If not, a bucket may be tossed to read the new
64 bucket. In any case, the requested bucket is make the "current" bucket
65 and dbf->bucket points to the correct bucket. */
66
67void
68_gdbm_get_bucket (dbf, dir_index)
69 gdbm_file_info *dbf;
70 int dir_index;
71{
72 off_t bucket_adr; /* The address of the correct hash bucket. */
73 int num_bytes; /* The number of bytes read. */
74 off_t file_pos; /* The return address for lseek. */
75 register int index; /* Loop index. */
76
77 /* Initial set up. */
78 dbf->bucket_dir = dir_index;
79 bucket_adr = dbf->dir [dir_index];
80
81 if (dbf->bucket_cache == NULL)
82 {
83 if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
84 _gdbm_fatal(dbf, "couldn't init cache");
85 }
86
87 /* Is that one is not already current, we must find it. */
88 if (dbf->cache_entry->ca_adr != bucket_adr)
89 {
90 /* Look in the cache. */
91 for (index = 0; index < dbf->cache_size; index++)
92 {
93 if (dbf->bucket_cache[index].ca_adr == bucket_adr)
94 {
95 dbf->bucket = dbf->bucket_cache[index].ca_bucket;
96 dbf->cache_entry = &dbf->bucket_cache[index];
97 return;
98 }
99 }
100
101 /* It is not in the cache, read it from the disk. */
102 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
103 if (dbf->bucket_cache[dbf->last_read].ca_changed)
104 _gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]);
105 dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr;
106 dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket;
107 dbf->cache_entry = &dbf->bucket_cache[dbf->last_read];
108 dbf->cache_entry->ca_data.elem_loc = -1;
109 dbf->cache_entry->ca_changed = FALSE;
110
111 /* Read the bucket. */
112 file_pos = lseek (dbf->desc, bucket_adr, L_SET);
113 if (file_pos != bucket_adr)
114 _gdbm_fatal (dbf, "lseek error");
115
116 num_bytes = read (dbf->desc, dbf->bucket, dbf->header->bucket_size);
117 if (num_bytes != dbf->header->bucket_size)
118 _gdbm_fatal (dbf, "read error");
119
120 // GREENSTONE CUSTOMISATION: Swap each value in the bucket if the GDBM file is the wrong endianness
121 if (wrong_endianness)
122 {
123 endian_swap_array((int*) dbf->bucket, dbf->header->bucket_size);
124
125 // There is a 4-byte string in each hash table entry that we need to leave alone (because character
126 // strings are not affected by endianness), so convert these back to how they were originally
127 int b;
128 for (b = 0; b < dbf->header->bucket_elems; b++)
129 {
130 int* key_start = (int*) dbf->bucket->h_table[b].key_start;
131 *key_start = endian_swap (*key_start);
132 }
133 }
134 }
135
136 return;
137}
138
139
140/* Split the current bucket. This includes moving all items in the bucket to
141 a new bucket. This doesn't require any disk reads because all hash values
142 are stored in the buckets. Splitting the current bucket may require
143 doubling the size of the hash directory. */
144void
145_gdbm_split_bucket (dbf, next_insert)
146 gdbm_file_info *dbf;
147 int next_insert;
148{
149 hash_bucket *bucket[2]; /* Pointers to the new buckets. */
150
151 int new_bits; /* The number of bits for the new buckets. */
152 int cache_0; /* Location in the cache for the buckets. */
153 int cache_1;
154 off_t adr_0; /* File address of the new bucket 0. */
155 off_t adr_1; /* File address of the new bucket 1. */
156 avail_elem old_bucket; /* Avail Struct for the old bucket. */
157
158 off_t dir_start0; /* Used in updating the directory. */
159 off_t dir_start1;
160 off_t dir_end;
161
162 off_t *new_dir; /* Pointer to the new directory. */
163 off_t dir_adr; /* Address of the new directory. */
164 int dir_size; /* Size of the new directory. */
165 off_t old_adr[31]; /* Address of the old directories. */
166 int old_size[31]; /* Size of the old directories. */
167 int old_count; /* Number of old directories. */
168
169 int index; /* Used in array indexing. */
170 int index1; /* Used in array indexing. */
171 int elem_loc; /* Location in new bucket to put element. */
172 bucket_element *old_el; /* Pointer into the old bucket. */
173 int select; /* Used to index bucket during movement. */
174
175
176 /* No directories are yet old. */
177 old_count = 0;
178
179 if (dbf->bucket_cache == NULL)
180 {
181 if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
182 _gdbm_fatal(dbf, "couldn't init cache");
183 }
184
185 while (dbf->bucket->count == dbf->header->bucket_elems)
186 {
187 /* Initialize the "new" buckets in the cache. */
188 do
189 {
190 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
191 cache_0 = dbf->last_read;
192 }
193 while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
194 bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
195 if (dbf->bucket_cache[cache_0].ca_changed)
196 _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]);
197 do
198 {
199 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
200 cache_1 = dbf->last_read;
201 }
202 while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
203 bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
204 if (dbf->bucket_cache[cache_1].ca_changed)
205 _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]);
206 new_bits = dbf->bucket->bucket_bits+1;
207 _gdbm_new_bucket (dbf, bucket[0], new_bits);
208 _gdbm_new_bucket (dbf, bucket[1], new_bits);
209 adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
210 dbf->bucket_cache[cache_0].ca_adr = adr_0;
211 adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
212 dbf->bucket_cache[cache_1].ca_adr = adr_1;
213
214
215
216 /* Double the directory size if necessary. */
217 if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
218 {
219 dir_size = dbf->header->dir_size * 2;
220 dir_adr = _gdbm_alloc (dbf, dir_size);
221 new_dir = (off_t *) malloc (dir_size);
222 if (new_dir == NULL) _gdbm_fatal (dbf, "malloc error");
223 for (index = 0;
224 index < dbf->header->dir_size/sizeof (off_t); index++)
225 {
226 new_dir[2*index] = dbf->dir[index];
227 new_dir[2*index+1] = dbf->dir[index];
228 }
229
230 /* Update header. */
231 old_adr[old_count] = dbf->header->dir;
232 dbf->header->dir = dir_adr;
233 old_size[old_count] = dbf->header->dir_size;
234 dbf->header->dir_size = dir_size;
235 dbf->header->dir_bits = new_bits;
236 old_count++;
237
238 /* Now update dbf. */
239 dbf->header_changed = TRUE;
240 dbf->bucket_dir *= 2;
241 free (dbf->dir);
242 dbf->dir = new_dir;
243 }
244
245 /* Copy all elements in dbf->bucket into the new buckets. */
246 for (index = 0; index < dbf->header->bucket_elems; index++)
247 {
248 old_el = & (dbf->bucket->h_table[index]);
249 select = (old_el->hash_value >> (31-new_bits)) & 1;
250 elem_loc = old_el->hash_value % dbf->header->bucket_elems;
251 while (bucket[select]->h_table[elem_loc].hash_value != -1)
252 elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
253 bucket[select]->h_table[elem_loc] = *old_el;
254 bucket[select]->count += 1;
255 }
256
257 /* Allocate avail space for the bucket[1]. */
258 bucket[1]->bucket_avail[0].av_adr
259 = _gdbm_alloc (dbf, dbf->header->block_size);
260 bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
261 bucket[1]->av_count = 1;
262
263 /* Copy the avail elements in dbf->bucket to bucket[0]. */
264 bucket[0]->av_count = dbf->bucket->av_count;
265 index = 0;
266 index1 = 0;
267 if (bucket[0]->av_count == BUCKET_AVAIL)
268 {
269 /* The avail is full, move the first one to bucket[1]. */
270 _gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
271 bucket[1]->bucket_avail,
272 &bucket[1]->av_count, FALSE);
273 index = 1;
274 bucket[0]->av_count --;
275 }
276 for (; index < dbf->bucket->av_count; index++)
277 {
278 bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
279 }
280
281 /* Update the directory. We have new file addresses for both buckets. */
282 dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) | 1;
283 dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits);
284 dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits);
285 dir_start0 = dir_start1 - (dir_end - dir_start1);
286 for (index = dir_start0; index < dir_start1; index++)
287 dbf->dir[index] = adr_0;
288 for (index = dir_start1; index < dir_end; index++)
289 dbf->dir[index] = adr_1;
290
291
292 /* Set changed flags. */
293 dbf->bucket_cache[cache_0].ca_changed = TRUE;
294 dbf->bucket_cache[cache_1].ca_changed = TRUE;
295 dbf->bucket_changed = TRUE;
296 dbf->directory_changed = TRUE;
297 dbf->second_changed = TRUE;
298
299 /* Update the cache! */
300 dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits);
301
302 /* Invalidate old cache entry. */
303 old_bucket.av_adr = dbf->cache_entry->ca_adr;
304 old_bucket.av_size = dbf->header->bucket_size;
305 dbf->cache_entry->ca_adr = 0;
306 dbf->cache_entry->ca_changed = FALSE;
307
308 /* Set dbf->bucket to the proper bucket. */
309 if (dbf->dir[dbf->bucket_dir] == adr_0)
310 {
311 dbf->bucket = bucket[0];
312 dbf->cache_entry = &dbf->bucket_cache[cache_0];
313 _gdbm_put_av_elem (old_bucket,
314 bucket[1]->bucket_avail,
315 &bucket[1]->av_count, FALSE);
316 }
317 else
318 {
319 dbf->bucket = bucket[1];
320 dbf->cache_entry = &dbf->bucket_cache[cache_1];
321 _gdbm_put_av_elem (old_bucket,
322 bucket[0]->bucket_avail,
323 &bucket[0]->av_count, FALSE);
324 }
325
326 }
327
328 /* Get rid of old directories. */
329 for (index = 0; index < old_count; index++)
330 _gdbm_free (dbf, old_adr[index], old_size[index]);
331}
332
333
334/* The only place where a bucket is written. CA_ENTRY is the
335 cache entry containing the bucket to be written. */
336
337void
338_gdbm_write_bucket (dbf, ca_entry)
339 gdbm_file_info *dbf;
340 cache_elem *ca_entry;
341{
342 int num_bytes; /* The return value for write. */
343 off_t file_pos; /* The return value for lseek. */
344
345 file_pos = lseek (dbf->desc, ca_entry->ca_adr, L_SET);
346 if (file_pos != ca_entry->ca_adr)
347 _gdbm_fatal (dbf, "lseek error");
348 num_bytes = write (dbf->desc, ca_entry->ca_bucket, dbf->header->bucket_size);
349 if (num_bytes != dbf->header->bucket_size)
350 _gdbm_fatal (dbf, "write error");
351 ca_entry->ca_changed = FALSE;
352 ca_entry->ca_data.hash_val = -1;
353 ca_entry->ca_data.elem_loc = -1;
354}
Note: See TracBrowser for help on using the repository browser.