Context Navigation

source: gsdl/trunk/common-src/packages/gdbm/gdbm-1.8.3/bucket.c@ 18039

Last change on this file since 18039 was 18039, checked in by mdewsnip, 15 years ago
Added extra code to leave the 4-byte strings in each hash table entry alone. This doesn't matter for db2txt but it matters a lot for Greenstone and gdbmget, since these access the database by a known key.
File size: 11.9 KB

Rev	Line
[18019]	1	/* bucket.c - The routines for playing with hash buckets. */
	2
	3	/* This file is part of GDBM, the GNU data base manager, by Philip A. Nelson.
	4	Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc.
	5
	6	GDBM is free software; you can redistribute it and/or modify
	7	it under the terms of the GNU General Public License as published by
	8	the Free Software Foundation; either version 2, or (at your option)
	9	any later version.
	10
	11	GDBM is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	GNU General Public License for more details.
	15
	16	You should have received a copy of the GNU General Public License
	17	along with GDBM; see the file COPYING. If not, write to
	18	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
	19
	20	You may contact the author by:
	21	e-mail: [email protected]
	22	us-mail: Philip A. Nelson
	23	Computer Science Department
	24	Western Washington University
	25	Bellingham, WA 98226
	26
	27	*************************************************************************/
	28
	29
	30	/* include system configuration before all else. */
	31	#include "autoconf.h"
	32
	33	#include "gdbmdefs.h"
	34
[18038]	35	#include "gsdlmods.h"
[18019]	36
[18038]	37
[18019]	38	/* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
	39	void
	40	_gdbm_new_bucket (dbf, bucket, bits)
	41	gdbm_file_info *dbf;
	42	hash_bucket *bucket;
	43	int bits;
	44	{
	45	int index;
	46
	47	/* Initialize the avail block. */
	48	bucket->av_count = 0;
	49
	50	/* Set the information fields first. */
	51	bucket->bucket_bits = bits;
	52	bucket->count = 0;
	53
	54	/* Initialize all bucket elements. */
	55	for (index = 0; index < dbf->header->bucket_elems; index++)
	56	bucket->h_table[index].hash_value = -1;
	57	}
	58
	59
	60
	61	/* Find a bucket for DBF that is pointed to by the bucket directory from
	62	location DIR_INDEX. The bucket cache is first checked to see if it
	63	is already in memory. If not, a bucket may be tossed to read the new
	64	bucket. In any case, the requested bucket is make the "current" bucket
	65	and dbf->bucket points to the correct bucket. */
	66
	67	void
	68	_gdbm_get_bucket (dbf, dir_index)
	69	gdbm_file_info *dbf;
	70	int dir_index;
	71	{
	72	off_t bucket_adr; /* The address of the correct hash bucket. */
	73	int num_bytes; /* The number of bytes read. */
	74	off_t file_pos; /* The return address for lseek. */
	75	register int index; /* Loop index. */
	76
	77	/* Initial set up. */
	78	dbf->bucket_dir = dir_index;
	79	bucket_adr = dbf->dir [dir_index];
	80
	81	if (dbf->bucket_cache == NULL)
	82	{
	83	if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
	84	_gdbm_fatal(dbf, "couldn't init cache");
	85	}
	86
	87	/* Is that one is not already current, we must find it. */
	88	if (dbf->cache_entry->ca_adr != bucket_adr)
	89	{
	90	/* Look in the cache. */
	91	for (index = 0; index < dbf->cache_size; index++)
	92	{
	93	if (dbf->bucket_cache[index].ca_adr == bucket_adr)
	94	{
	95	dbf->bucket = dbf->bucket_cache[index].ca_bucket;
	96	dbf->cache_entry = &dbf->bucket_cache[index];
	97	return;
	98	}
	99	}
	100
	101	/* It is not in the cache, read it from the disk. */
	102	dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
	103	if (dbf->bucket_cache[dbf->last_read].ca_changed)
	104	_gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]);
	105	dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr;
	106	dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket;
	107	dbf->cache_entry = &dbf->bucket_cache[dbf->last_read];
	108	dbf->cache_entry->ca_data.elem_loc = -1;
	109	dbf->cache_entry->ca_changed = FALSE;
	110
	111	/* Read the bucket. */
	112	file_pos = lseek (dbf->desc, bucket_adr, L_SET);
	113	if (file_pos != bucket_adr)
	114	_gdbm_fatal (dbf, "lseek error");
	115
	116	num_bytes = read (dbf->desc, dbf->bucket, dbf->header->bucket_size);
	117	if (num_bytes != dbf->header->bucket_size)
	118	_gdbm_fatal (dbf, "read error");
[18038]	119
	120	// GREENSTONE CUSTOMISATION: Swap each value in the bucket if the GDBM file is the wrong endianness
	121	if (wrong_endianness)
	122	{
	123	endian_swap_array((int*) dbf->bucket, dbf->header->bucket_size);
[18039]	124
	125	// There is a 4-byte string in each hash table entry that we need to leave alone (because character
	126	// strings are not affected by endianness), so convert these back to how they were originally
	127	int b;
	128	for (b = 0; b < dbf->bucket->count; b++)
	129	{
	130	int* key_start = (int*) dbf->bucket->h_table[b].key_start;
	131	key_start = endian_swap (key_start);
	132	}
[18038]	133	}
[18019]	134	}
	135
	136	return;
	137	}
	138
	139
	140	/* Split the current bucket. This includes moving all items in the bucket to
	141	a new bucket. This doesn't require any disk reads because all hash values
	142	are stored in the buckets. Splitting the current bucket may require
	143	doubling the size of the hash directory. */
	144	void
	145	_gdbm_split_bucket (dbf, next_insert)
	146	gdbm_file_info *dbf;
	147	int next_insert;
	148	{
	149	hash_bucket bucket[2]; / Pointers to the new buckets. */
	150
	151	int new_bits; /* The number of bits for the new buckets. */
	152	int cache_0; /* Location in the cache for the buckets. */
	153	int cache_1;
	154	off_t adr_0; /* File address of the new bucket 0. */
	155	off_t adr_1; /* File address of the new bucket 1. */
	156	avail_elem old_bucket; /* Avail Struct for the old bucket. */
	157
	158	off_t dir_start0; /* Used in updating the directory. */
	159	off_t dir_start1;
	160	off_t dir_end;
	161
	162	off_t new_dir; / Pointer to the new directory. */
	163	off_t dir_adr; /* Address of the new directory. */
	164	int dir_size; /* Size of the new directory. */
	165	off_t old_adr[31]; /* Address of the old directories. */
	166	int old_size[31]; /* Size of the old directories. */
	167	int old_count; /* Number of old directories. */
	168
	169	int index; /* Used in array indexing. */
	170	int index1; /* Used in array indexing. */
	171	int elem_loc; /* Location in new bucket to put element. */
	172	bucket_element old_el; / Pointer into the old bucket. */
	173	int select; /* Used to index bucket during movement. */
	174
	175
	176	/* No directories are yet old. */
	177	old_count = 0;
	178
	179	if (dbf->bucket_cache == NULL)
	180	{
	181	if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
	182	_gdbm_fatal(dbf, "couldn't init cache");
	183	}
	184
	185	while (dbf->bucket->count == dbf->header->bucket_elems)
	186	{
	187	/* Initialize the "new" buckets in the cache. */
	188	do
	189	{
	190	dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
	191	cache_0 = dbf->last_read;
	192	}
	193	while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
	194	bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
	195	if (dbf->bucket_cache[cache_0].ca_changed)
	196	_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]);
	197	do
	198	{
	199	dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
	200	cache_1 = dbf->last_read;
	201	}
	202	while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
	203	bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
	204	if (dbf->bucket_cache[cache_1].ca_changed)
	205	_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]);
	206	new_bits = dbf->bucket->bucket_bits+1;
	207	_gdbm_new_bucket (dbf, bucket[0], new_bits);
	208	_gdbm_new_bucket (dbf, bucket[1], new_bits);
	209	adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
	210	dbf->bucket_cache[cache_0].ca_adr = adr_0;
	211	adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
	212	dbf->bucket_cache[cache_1].ca_adr = adr_1;
	213
	214
	215
	216	/* Double the directory size if necessary. */
	217	if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
	218	{
	219	dir_size = dbf->header->dir_size * 2;
	220	dir_adr = _gdbm_alloc (dbf, dir_size);
	221	new_dir = (off_t *) malloc (dir_size);
	222	if (new_dir == NULL) _gdbm_fatal (dbf, "malloc error");
	223	for (index = 0;
	224	index < dbf->header->dir_size/sizeof (off_t); index++)
	225	{
	226	new_dir[2*index] = dbf->dir[index];
	227	new_dir[2*index+1] = dbf->dir[index];
	228	}
	229
	230	/* Update header. */
	231	old_adr[old_count] = dbf->header->dir;
	232	dbf->header->dir = dir_adr;
	233	old_size[old_count] = dbf->header->dir_size;
	234	dbf->header->dir_size = dir_size;
	235	dbf->header->dir_bits = new_bits;
	236	old_count++;
	237
	238	/* Now update dbf. */
	239	dbf->header_changed = TRUE;
	240	dbf->bucket_dir *= 2;
	241	free (dbf->dir);
	242	dbf->dir = new_dir;
	243	}
	244
	245	/* Copy all elements in dbf->bucket into the new buckets. */
	246	for (index = 0; index < dbf->header->bucket_elems; index++)
	247	{
	248	old_el = & (dbf->bucket->h_table[index]);
	249	select = (old_el->hash_value >> (31-new_bits)) & 1;
	250	elem_loc = old_el->hash_value % dbf->header->bucket_elems;
	251	while (bucket[select]->h_table[elem_loc].hash_value != -1)
	252	elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
	253	bucket[select]->h_table[elem_loc] = *old_el;
	254	bucket[select]->count += 1;
	255	}
	256
	257	/* Allocate avail space for the bucket[1]. */
	258	bucket[1]->bucket_avail[0].av_adr
	259	= _gdbm_alloc (dbf, dbf->header->block_size);
	260	bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
	261	bucket[1]->av_count = 1;
	262
	263	/* Copy the avail elements in dbf->bucket to bucket[0]. */
	264	bucket[0]->av_count = dbf->bucket->av_count;
	265	index = 0;
	266	index1 = 0;
	267	if (bucket[0]->av_count == BUCKET_AVAIL)
	268	{
	269	/* The avail is full, move the first one to bucket[1]. */
	270	_gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
	271	bucket[1]->bucket_avail,
	272	&bucket[1]->av_count, FALSE);
	273	index = 1;
	274	bucket[0]->av_count --;
	275	}
	276	for (; index < dbf->bucket->av_count; index++)
	277	{
	278	bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
	279	}
	280
	281	/* Update the directory. We have new file addresses for both buckets. */
	282	dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) \| 1;
	283	dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits);
	284	dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits);
	285	dir_start0 = dir_start1 - (dir_end - dir_start1);
	286	for (index = dir_start0; index < dir_start1; index++)
	287	dbf->dir[index] = adr_0;
	288	for (index = dir_start1; index < dir_end; index++)
	289	dbf->dir[index] = adr_1;
	290
	291
	292	/* Set changed flags. */
	293	dbf->bucket_cache[cache_0].ca_changed = TRUE;
	294	dbf->bucket_cache[cache_1].ca_changed = TRUE;
	295	dbf->bucket_changed = TRUE;
	296	dbf->directory_changed = TRUE;
	297	dbf->second_changed = TRUE;
	298
	299	/* Update the cache! */
	300	dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits);
	301
	302	/* Invalidate old cache entry. */
	303	old_bucket.av_adr = dbf->cache_entry->ca_adr;
	304	old_bucket.av_size = dbf->header->bucket_size;
	305	dbf->cache_entry->ca_adr = 0;
	306	dbf->cache_entry->ca_changed = FALSE;
	307
	308	/* Set dbf->bucket to the proper bucket. */
	309	if (dbf->dir[dbf->bucket_dir] == adr_0)
	310	{
	311	dbf->bucket = bucket[0];
	312	dbf->cache_entry = &dbf->bucket_cache[cache_0];
	313	_gdbm_put_av_elem (old_bucket,
	314	bucket[1]->bucket_avail,
	315	&bucket[1]->av_count, FALSE);
	316	}
	317	else
	318	{
	319	dbf->bucket = bucket[1];
	320	dbf->cache_entry = &dbf->bucket_cache[cache_1];
	321	_gdbm_put_av_elem (old_bucket,
	322	bucket[0]->bucket_avail,
	323	&bucket[0]->av_count, FALSE);
	324	}
	325
	326	}
	327
	328	/* Get rid of old directories. */
	329	for (index = 0; index < old_count; index++)
	330	_gdbm_free (dbf, old_adr[index], old_size[index]);
	331	}
	332
	333
	334	/* The only place where a bucket is written. CA_ENTRY is the
	335	cache entry containing the bucket to be written. */
	336
	337	void
	338	_gdbm_write_bucket (dbf, ca_entry)
	339	gdbm_file_info *dbf;
	340	cache_elem *ca_entry;
	341	{
	342	int num_bytes; /* The return value for write. */
	343	off_t file_pos; /* The return value for lseek. */
	344
	345	file_pos = lseek (dbf->desc, ca_entry->ca_adr, L_SET);
	346	if (file_pos != ca_entry->ca_adr)
	347	_gdbm_fatal (dbf, "lseek error");
	348	num_bytes = write (dbf->desc, ca_entry->ca_bucket, dbf->header->bucket_size);
	349	if (num_bytes != dbf->header->bucket_size)
	350	_gdbm_fatal (dbf, "write error");
	351	ca_entry->ca_changed = FALSE;
	352	ca_entry->ca_data.hash_val = -1;
	353	ca_entry->ca_data.elem_loc = -1;
	354	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: