Context Navigation

source: gsdl/trunk/common-src/packages/gdbm/gdbm-1.8.3/bucket.c@ 18044

Last change on this file since 18044 was 18044, checked in by mdewsnip, 15 years ago
Fixed bug in customised code where not all of the hash table buckets had their key values unswapped.
File size: 11.9 KB

Line
1	/* bucket.c - The routines for playing with hash buckets. */
2
3	/* This file is part of GDBM, the GNU data base manager, by Philip A. Nelson.
4	Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc.
5
6	GDBM is free software; you can redistribute it and/or modify
7	it under the terms of the GNU General Public License as published by
8	the Free Software Foundation; either version 2, or (at your option)
9	any later version.
10
11	GDBM is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with GDBM; see the file COPYING. If not, write to
18	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
19
20	You may contact the author by:
21	e-mail: [email protected]
22	us-mail: Philip A. Nelson
23	Computer Science Department
24	Western Washington University
25	Bellingham, WA 98226
26
27	*************************************************************************/
28
29
30	/* include system configuration before all else. */
31	#include "autoconf.h"
32
33	#include "gdbmdefs.h"
34
35	#include "gsdlmods.h"
36
37
38	/* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
39	void
40	_gdbm_new_bucket (dbf, bucket, bits)
41	gdbm_file_info *dbf;
42	hash_bucket *bucket;
43	int bits;
44	{
45	int index;
46
47	/* Initialize the avail block. */
48	bucket->av_count = 0;
49
50	/* Set the information fields first. */
51	bucket->bucket_bits = bits;
52	bucket->count = 0;
53
54	/* Initialize all bucket elements. */
55	for (index = 0; index < dbf->header->bucket_elems; index++)
56	bucket->h_table[index].hash_value = -1;
57	}
58
59
60
61	/* Find a bucket for DBF that is pointed to by the bucket directory from
62	location DIR_INDEX. The bucket cache is first checked to see if it
63	is already in memory. If not, a bucket may be tossed to read the new
64	bucket. In any case, the requested bucket is make the "current" bucket
65	and dbf->bucket points to the correct bucket. */
66
67	void
68	_gdbm_get_bucket (dbf, dir_index)
69	gdbm_file_info *dbf;
70	int dir_index;
71	{
72	off_t bucket_adr; /* The address of the correct hash bucket. */
73	int num_bytes; /* The number of bytes read. */
74	off_t file_pos; /* The return address for lseek. */
75	register int index; /* Loop index. */
76
77	/* Initial set up. */
78	dbf->bucket_dir = dir_index;
79	bucket_adr = dbf->dir [dir_index];
80
81	if (dbf->bucket_cache == NULL)
82	{
83	if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
84	_gdbm_fatal(dbf, "couldn't init cache");
85	}
86
87	/* Is that one is not already current, we must find it. */
88	if (dbf->cache_entry->ca_adr != bucket_adr)
89	{
90	/* Look in the cache. */
91	for (index = 0; index < dbf->cache_size; index++)
92	{
93	if (dbf->bucket_cache[index].ca_adr == bucket_adr)
94	{
95	dbf->bucket = dbf->bucket_cache[index].ca_bucket;
96	dbf->cache_entry = &dbf->bucket_cache[index];
97	return;
98	}
99	}
100
101	/* It is not in the cache, read it from the disk. */
102	dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
103	if (dbf->bucket_cache[dbf->last_read].ca_changed)
104	_gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]);
105	dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr;
106	dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket;
107	dbf->cache_entry = &dbf->bucket_cache[dbf->last_read];
108	dbf->cache_entry->ca_data.elem_loc = -1;
109	dbf->cache_entry->ca_changed = FALSE;
110
111	/* Read the bucket. */
112	file_pos = lseek (dbf->desc, bucket_adr, L_SET);
113	if (file_pos != bucket_adr)
114	_gdbm_fatal (dbf, "lseek error");
115
116	num_bytes = read (dbf->desc, dbf->bucket, dbf->header->bucket_size);
117	if (num_bytes != dbf->header->bucket_size)
118	_gdbm_fatal (dbf, "read error");
119
120	// GREENSTONE CUSTOMISATION: Swap each value in the bucket if the GDBM file is the wrong endianness
121	if (wrong_endianness)
122	{
123	endian_swap_array((int*) dbf->bucket, dbf->header->bucket_size);
124
125	// There is a 4-byte string in each hash table entry that we need to leave alone (because character
126	// strings are not affected by endianness), so convert these back to how they were originally
127	int b;
128	for (b = 0; b < dbf->header->bucket_elems; b++)
129	{
130	int* key_start = (int*) dbf->bucket->h_table[b].key_start;
131	key_start = endian_swap (key_start);
132	}
133	}
134	}
135
136	return;
137	}
138
139
140	/* Split the current bucket. This includes moving all items in the bucket to
141	a new bucket. This doesn't require any disk reads because all hash values
142	are stored in the buckets. Splitting the current bucket may require
143	doubling the size of the hash directory. */
144	void
145	_gdbm_split_bucket (dbf, next_insert)
146	gdbm_file_info *dbf;
147	int next_insert;
148	{
149	hash_bucket bucket[2]; / Pointers to the new buckets. */
150
151	int new_bits; /* The number of bits for the new buckets. */
152	int cache_0; /* Location in the cache for the buckets. */
153	int cache_1;
154	off_t adr_0; /* File address of the new bucket 0. */
155	off_t adr_1; /* File address of the new bucket 1. */
156	avail_elem old_bucket; /* Avail Struct for the old bucket. */
157
158	off_t dir_start0; /* Used in updating the directory. */
159	off_t dir_start1;
160	off_t dir_end;
161
162	off_t new_dir; / Pointer to the new directory. */
163	off_t dir_adr; /* Address of the new directory. */
164	int dir_size; /* Size of the new directory. */
165	off_t old_adr[31]; /* Address of the old directories. */
166	int old_size[31]; /* Size of the old directories. */
167	int old_count; /* Number of old directories. */
168
169	int index; /* Used in array indexing. */
170	int index1; /* Used in array indexing. */
171	int elem_loc; /* Location in new bucket to put element. */
172	bucket_element old_el; / Pointer into the old bucket. */
173	int select; /* Used to index bucket during movement. */
174
175
176	/* No directories are yet old. */
177	old_count = 0;
178
179	if (dbf->bucket_cache == NULL)
180	{
181	if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
182	_gdbm_fatal(dbf, "couldn't init cache");
183	}
184
185	while (dbf->bucket->count == dbf->header->bucket_elems)
186	{
187	/* Initialize the "new" buckets in the cache. */
188	do
189	{
190	dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
191	cache_0 = dbf->last_read;
192	}
193	while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
194	bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
195	if (dbf->bucket_cache[cache_0].ca_changed)
196	_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]);
197	do
198	{
199	dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
200	cache_1 = dbf->last_read;
201	}
202	while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
203	bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
204	if (dbf->bucket_cache[cache_1].ca_changed)
205	_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]);
206	new_bits = dbf->bucket->bucket_bits+1;
207	_gdbm_new_bucket (dbf, bucket[0], new_bits);
208	_gdbm_new_bucket (dbf, bucket[1], new_bits);
209	adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
210	dbf->bucket_cache[cache_0].ca_adr = adr_0;
211	adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
212	dbf->bucket_cache[cache_1].ca_adr = adr_1;
213
214
215
216	/* Double the directory size if necessary. */
217	if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
218	{
219	dir_size = dbf->header->dir_size * 2;
220	dir_adr = _gdbm_alloc (dbf, dir_size);
221	new_dir = (off_t *) malloc (dir_size);
222	if (new_dir == NULL) _gdbm_fatal (dbf, "malloc error");
223	for (index = 0;
224	index < dbf->header->dir_size/sizeof (off_t); index++)
225	{
226	new_dir[2*index] = dbf->dir[index];
227	new_dir[2*index+1] = dbf->dir[index];
228	}
229
230	/* Update header. */
231	old_adr[old_count] = dbf->header->dir;
232	dbf->header->dir = dir_adr;
233	old_size[old_count] = dbf->header->dir_size;
234	dbf->header->dir_size = dir_size;
235	dbf->header->dir_bits = new_bits;
236	old_count++;
237
238	/* Now update dbf. */
239	dbf->header_changed = TRUE;
240	dbf->bucket_dir *= 2;
241	free (dbf->dir);
242	dbf->dir = new_dir;
243	}
244
245	/* Copy all elements in dbf->bucket into the new buckets. */
246	for (index = 0; index < dbf->header->bucket_elems; index++)
247	{
248	old_el = & (dbf->bucket->h_table[index]);
249	select = (old_el->hash_value >> (31-new_bits)) & 1;
250	elem_loc = old_el->hash_value % dbf->header->bucket_elems;
251	while (bucket[select]->h_table[elem_loc].hash_value != -1)
252	elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
253	bucket[select]->h_table[elem_loc] = *old_el;
254	bucket[select]->count += 1;
255	}
256
257	/* Allocate avail space for the bucket[1]. */
258	bucket[1]->bucket_avail[0].av_adr
259	= _gdbm_alloc (dbf, dbf->header->block_size);
260	bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
261	bucket[1]->av_count = 1;
262
263	/* Copy the avail elements in dbf->bucket to bucket[0]. */
264	bucket[0]->av_count = dbf->bucket->av_count;
265	index = 0;
266	index1 = 0;
267	if (bucket[0]->av_count == BUCKET_AVAIL)
268	{
269	/* The avail is full, move the first one to bucket[1]. */
270	_gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
271	bucket[1]->bucket_avail,
272	&bucket[1]->av_count, FALSE);
273	index = 1;
274	bucket[0]->av_count --;
275	}
276	for (; index < dbf->bucket->av_count; index++)
277	{
278	bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
279	}
280
281	/* Update the directory. We have new file addresses for both buckets. */
282	dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) \| 1;
283	dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits);
284	dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits);
285	dir_start0 = dir_start1 - (dir_end - dir_start1);
286	for (index = dir_start0; index < dir_start1; index++)
287	dbf->dir[index] = adr_0;
288	for (index = dir_start1; index < dir_end; index++)
289	dbf->dir[index] = adr_1;
290
291
292	/* Set changed flags. */
293	dbf->bucket_cache[cache_0].ca_changed = TRUE;
294	dbf->bucket_cache[cache_1].ca_changed = TRUE;
295	dbf->bucket_changed = TRUE;
296	dbf->directory_changed = TRUE;
297	dbf->second_changed = TRUE;
298
299	/* Update the cache! */
300	dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits);
301
302	/* Invalidate old cache entry. */
303	old_bucket.av_adr = dbf->cache_entry->ca_adr;
304	old_bucket.av_size = dbf->header->bucket_size;
305	dbf->cache_entry->ca_adr = 0;
306	dbf->cache_entry->ca_changed = FALSE;
307
308	/* Set dbf->bucket to the proper bucket. */
309	if (dbf->dir[dbf->bucket_dir] == adr_0)
310	{
311	dbf->bucket = bucket[0];
312	dbf->cache_entry = &dbf->bucket_cache[cache_0];
313	_gdbm_put_av_elem (old_bucket,
314	bucket[1]->bucket_avail,
315	&bucket[1]->av_count, FALSE);
316	}
317	else
318	{
319	dbf->bucket = bucket[1];
320	dbf->cache_entry = &dbf->bucket_cache[cache_1];
321	_gdbm_put_av_elem (old_bucket,
322	bucket[0]->bucket_avail,
323	&bucket[0]->av_count, FALSE);
324	}
325
326	}
327
328	/* Get rid of old directories. */
329	for (index = 0; index < old_count; index++)
330	_gdbm_free (dbf, old_adr[index], old_size[index]);
331	}
332
333
334	/* The only place where a bucket is written. CA_ENTRY is the
335	cache entry containing the bucket to be written. */
336
337	void
338	_gdbm_write_bucket (dbf, ca_entry)
339	gdbm_file_info *dbf;
340	cache_elem *ca_entry;
341	{
342	int num_bytes; /* The return value for write. */
343	off_t file_pos; /* The return value for lseek. */
344
345	file_pos = lseek (dbf->desc, ca_entry->ca_adr, L_SET);
346	if (file_pos != ca_entry->ca_adr)
347	_gdbm_fatal (dbf, "lseek error");
348	num_bytes = write (dbf->desc, ca_entry->ca_bucket, dbf->header->bucket_size);
349	if (num_bytes != dbf->header->bucket_size)
350	_gdbm_fatal (dbf, "write error");
351	ca_entry->ca_changed = FALSE;
352	ca_entry->ca_data.hash_val = -1;
353	ca_entry->ca_data.elem_loc = -1;
354	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: