source: main/tags/2.26/gsdl/packages/wingdbm/bucket.c

Last change on this file was 520, checked in by rjmcnab, 25 years ago

fixed small compiler warning

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.3 KB
Line 
1/* bucket.c - The routines for playing with hash buckets. */
2
3/* This file is part of GDBM, the GNU data base manager, by Philip A. Nelson.
4 Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc.
5
6 GDBM is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GDBM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GDBM; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
19
20 You may contact the author by:
21 e-mail: [email protected]
22 us-mail: Philip A. Nelson
23 Computer Science Department
24 Western Washington University
25 Bellingham, WA 98226
26
27*************************************************************************/
28
29
30/* include system configuration before all else. */
31#include "autoconf.h"
32#include "gdbmdefs.h"
33
34
35/* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
36void
37_gdbm_new_bucket (dbf, bucket, bits)
38 gdbm_file_info *dbf;
39 hash_bucket *bucket;
40 int bits;
41{
42 int index;
43
44 /* Initialize the avail block. */
45 bucket->av_count = 0;
46
47 /* Set the information fields first. */
48 bucket->bucket_bits = bits;
49 bucket->count = 0;
50
51 /* Initialize all bucket elements. */
52 for (index = 0; index < dbf->header->bucket_elems; index++)
53 bucket->h_table[index].hash_value = -1;
54}
55
56
57
58/* Find a bucket for DBF that is pointed to by the bucket directory from
59 location DIR_INDEX. The bucket cache is first checked to see if it
60 is already in memory. If not, a bucket may be tossed to read the new
61 bucket. In any case, the requested bucket is make the "current" bucket
62 and dbf->bucket points to the correct bucket. */
63
64void
65_gdbm_get_bucket (dbf, dir_index)
66 gdbm_file_info *dbf;
67 word_t dir_index;
68{
69 off_t bucket_adr; /* The address of the correct hash bucket. */
70 int num_bytes; /* The number of bytes read. */
71 off_t file_pos; /* The return address for lseek. */
72 register int index; /* Loop index. */
73
74 /* Initial set up. */
75 dbf->bucket_dir = dir_index;
76 bucket_adr = dbf->dir [dir_index];
77
78 if (dbf->bucket_cache == NULL)
79 {
80 if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
81 _gdbm_fatal(dbf, "couldn't init cache");
82 }
83
84 /* Is that one is not already current, we must find it. */
85 if (dbf->cache_entry->ca_adr != bucket_adr)
86 {
87 /* Look in the cache. */
88 for (index = 0; index < dbf->cache_size; index++)
89 {
90 if (dbf->bucket_cache[index].ca_adr == bucket_adr)
91 {
92 dbf->bucket = dbf->bucket_cache[index].ca_bucket;
93 dbf->cache_entry = &dbf->bucket_cache[index];
94 return;
95 }
96 }
97
98 /* It is not in the cache, read it from the disk. */
99 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
100 if (dbf->bucket_cache[dbf->last_read].ca_changed)
101 _gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]);
102 dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr;
103 dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket;
104 dbf->cache_entry = &dbf->bucket_cache[dbf->last_read];
105 dbf->cache_entry->ca_data.elem_loc = -1;
106 dbf->cache_entry->ca_changed = FALSE;
107
108 /* Read the bucket. */
109 file_pos = lseek (dbf->desc, bucket_adr, L_SET);
110 if (file_pos != bucket_adr)
111 _gdbm_fatal (dbf, "lseek error");
112
113 num_bytes = read (dbf->desc, dbf->bucket, dbf->header->bucket_size);
114 if (num_bytes != dbf->header->bucket_size)
115 _gdbm_fatal (dbf, "read error");
116 }
117
118 return;
119}
120
121
122/* Split the current bucket. This includes moving all items in the bucket to
123 a new bucket. This doesn't require any disk reads because all hash values
124 are stored in the buckets. Splitting the current bucket may require
125 doubling the size of the hash directory. */
126void
127_gdbm_split_bucket (dbf, next_insert)
128 gdbm_file_info *dbf;
129 word_t next_insert;
130{
131 hash_bucket *bucket[2]; /* Pointers to the new buckets. */
132
133 int new_bits; /* The number of bits for the new buckets. */
134 int cache_0; /* Location in the cache for the buckets. */
135 int cache_1;
136 off_t adr_0; /* File address of the new bucket 0. */
137 off_t adr_1; /* File address of the new bucket 1. */
138 avail_elem old_bucket; /* Avail Struct for the old bucket. */
139
140 off_t dir_start0; /* Used in updating the directory. */
141 off_t dir_start1;
142 off_t dir_end;
143
144 off_t *new_dir; /* Pointer to the new directory. */
145 off_t dir_adr; /* Address of the new directory. */
146 int dir_size; /* Size of the new directory. */
147 off_t old_adr[31]; /* Address of the old directories. */
148 int old_size[31]; /* Size of the old directories. */
149 int old_count; /* Number of old directories. */
150
151 int index; /* Used in array indexing. */
152 int index1; /* Used in array indexing. */
153 int elem_loc; /* Location in new bucket to put element. */
154 bucket_element *old_el; /* Pointer into the old bucket. */
155 int select; /* Used to index bucket during movement. */
156
157
158 /* No directories are yet old. */
159 old_count = 0;
160
161 if (dbf->bucket_cache == NULL)
162 {
163 if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
164 _gdbm_fatal(dbf, "couldn't init cache");
165 }
166
167 while (dbf->bucket->count == dbf->header->bucket_elems)
168 {
169 /* Initialize the "new" buckets in the cache. */
170 do
171 {
172 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
173 cache_0 = dbf->last_read;
174 }
175 while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
176 bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
177 if (dbf->bucket_cache[cache_0].ca_changed)
178 _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]);
179 do
180 {
181 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
182 cache_1 = dbf->last_read;
183 }
184 while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
185 bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
186 if (dbf->bucket_cache[cache_1].ca_changed)
187 _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]);
188 new_bits = dbf->bucket->bucket_bits+1;
189 _gdbm_new_bucket (dbf, bucket[0], new_bits);
190 _gdbm_new_bucket (dbf, bucket[1], new_bits);
191 adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
192 dbf->bucket_cache[cache_0].ca_adr = adr_0;
193 adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
194 dbf->bucket_cache[cache_1].ca_adr = adr_1;
195
196
197
198 /* Double the directory size if necessary. */
199 if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
200 {
201 dir_size = dbf->header->dir_size * 2;
202 dir_adr = _gdbm_alloc (dbf, dir_size);
203 new_dir = (off_t *) malloc (dir_size);
204 if (new_dir == NULL) _gdbm_fatal (dbf, "malloc error");
205 for (index = 0;
206 (unsigned int)index < dbf->header->dir_size/sizeof (off_t); index++)
207 {
208 new_dir[2*index] = dbf->dir[index];
209 new_dir[2*index+1] = dbf->dir[index];
210 }
211
212 /* Update header. */
213 old_adr[old_count] = dbf->header->dir;
214 dbf->header->dir = dir_adr;
215 old_size[old_count] = dbf->header->dir_size;
216 dbf->header->dir_size = dir_size;
217 dbf->header->dir_bits = new_bits;
218 old_count++;
219
220 /* Now update dbf. */
221 dbf->header_changed = TRUE;
222 dbf->bucket_dir *= 2;
223 free (dbf->dir);
224 dbf->dir = new_dir;
225 }
226
227 /* Copy all elements in dbf->bucket into the new buckets. */
228 for (index = 0; index < dbf->header->bucket_elems; index++)
229 {
230 old_el = & (dbf->bucket->h_table[index]);
231 select = (old_el->hash_value >> (31-new_bits)) & 1;
232 elem_loc = old_el->hash_value % dbf->header->bucket_elems;
233 while (bucket[select]->h_table[elem_loc].hash_value != -1)
234 elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
235 bucket[select]->h_table[elem_loc] = *old_el;
236 bucket[select]->count += 1;
237 }
238
239 /* Allocate avail space for the bucket[1]. */
240 bucket[1]->bucket_avail[0].av_adr
241 = _gdbm_alloc (dbf, dbf->header->block_size);
242 bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
243 bucket[1]->av_count = 1;
244
245 /* Copy the avail elements in dbf->bucket to bucket[0]. */
246 bucket[0]->av_count = dbf->bucket->av_count;
247 index = 0;
248 index1 = 0;
249 if (bucket[0]->av_count == BUCKET_AVAIL)
250 {
251 /* The avail is full, move the first one to bucket[1]. */
252 _gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
253 bucket[1]->bucket_avail,
254 &bucket[1]->av_count);
255 index = 1;
256 bucket[0]->av_count --;
257 }
258 for (; index < dbf->bucket->av_count; index++)
259 {
260 bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
261 }
262
263 /* Update the directory. We have new file addresses for both buckets. */
264 dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) | 1;
265 dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits);
266 dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits);
267 dir_start0 = dir_start1 - (dir_end - dir_start1);
268 for (index = dir_start0; index < dir_start1; index++)
269 dbf->dir[index] = adr_0;
270 for (index = dir_start1; index < dir_end; index++)
271 dbf->dir[index] = adr_1;
272
273
274 /* Set changed flags. */
275 dbf->bucket_cache[cache_0].ca_changed = TRUE;
276 dbf->bucket_cache[cache_1].ca_changed = TRUE;
277 dbf->bucket_changed = TRUE;
278 dbf->directory_changed = TRUE;
279 dbf->second_changed = TRUE;
280
281 /* Update the cache! */
282 dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits);
283
284 /* Invalidate old cache entry. */
285 old_bucket.av_adr = dbf->cache_entry->ca_adr;
286 old_bucket.av_size = dbf->header->bucket_size;
287 dbf->cache_entry->ca_adr = 0;
288 dbf->cache_entry->ca_changed = FALSE;
289
290 /* Set dbf->bucket to the proper bucket. */
291 if (dbf->dir[dbf->bucket_dir] == adr_0)
292 {
293 dbf->bucket = bucket[0];
294 dbf->cache_entry = &dbf->bucket_cache[cache_0];
295 _gdbm_put_av_elem (old_bucket,
296 bucket[1]->bucket_avail,
297 &bucket[1]->av_count);
298 }
299 else
300 {
301 dbf->bucket = bucket[1];
302 dbf->cache_entry = &dbf->bucket_cache[cache_1];
303 _gdbm_put_av_elem (old_bucket,
304 bucket[0]->bucket_avail,
305 &bucket[0]->av_count);
306 }
307
308 }
309
310 /* Get rid of old directories. */
311 for (index = 0; index < old_count; index++)
312 _gdbm_free (dbf, old_adr[index], old_size[index]);
313}
314
315
316/* The only place where a bucket is written. CA_ENTRY is the
317 cache entry containing the bucket to be written. */
318
319void
320_gdbm_write_bucket (dbf, ca_entry)
321 gdbm_file_info *dbf;
322 cache_elem *ca_entry;
323{
324 int num_bytes; /* The return value for write. */
325 off_t file_pos; /* The return value for lseek. */
326
327 file_pos = lseek (dbf->desc, ca_entry->ca_adr, L_SET);
328 if (file_pos != ca_entry->ca_adr)
329 _gdbm_fatal (dbf, "lseek error");
330 num_bytes = write (dbf->desc, ca_entry->ca_bucket, dbf->header->bucket_size);
331 if (num_bytes != dbf->header->bucket_size)
332 _gdbm_fatal (dbf, "write error");
333 ca_entry->ca_changed = FALSE;
334 ca_entry->ca_data.hash_val = -1;
335 ca_entry->ca_data.elem_loc = -1;
336}
Note: See TracBrowser for help on using the repository browser.