root/gsdl/trunk/common-src/packages/gdbm/gdbm-1.8.3/bucket.c @ 18044

Revision 18044, 11.9 KB (checked in by mdewsnip, 12 years ago)

Fixed bug in customised code where not all of the hash table buckets had their key values unswapped.

Line 
1/* bucket.c - The routines for playing with hash buckets. */
2
3/*  This file is part of GDBM, the GNU data base manager, by Philip A. Nelson.
4    Copyright (C) 1990, 1991, 1993  Free Software Foundation, Inc.
5
6    GDBM is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10
11    GDBM is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with GDBM; see the file COPYING.  If not, write to
18    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
19
20    You may contact the author by:
21       e-mail:  phil@cs.wwu.edu
22      us-mail:  Philip A. Nelson
23                Computer Science Department
24                Western Washington University
25                Bellingham, WA 98226
26       
27*************************************************************************/
28
29
30/* include system configuration before all else. */
31#include "autoconf.h"
32
33#include "gdbmdefs.h"
34
35#include "gsdlmods.h"
36
37
38/* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
39void
40_gdbm_new_bucket (dbf, bucket, bits)
41     gdbm_file_info *dbf;
42     hash_bucket *bucket;
43     int bits;
44{
45  int index;
46 
47  /* Initialize the avail block. */
48  bucket->av_count = 0;
49
50  /* Set the information fields first. */
51  bucket->bucket_bits = bits;
52  bucket->count = 0;
53 
54  /* Initialize all bucket elements. */
55  for (index = 0; index < dbf->header->bucket_elems; index++)
56    bucket->h_table[index].hash_value = -1;
57}
58
59
60
61/* Find a bucket for DBF that is pointed to by the bucket directory from
62   location DIR_INDEX.   The bucket cache is first checked to see if it
63   is already in memory.  If not, a bucket may be tossed to read the new
64   bucket.  In any case, the requested bucket is make the "current" bucket
65   and dbf->bucket points to the correct bucket. */
66
67void
68_gdbm_get_bucket (dbf, dir_index)
69     gdbm_file_info *dbf;
70     int dir_index;
71{
72  off_t bucket_adr; /* The address of the correct hash bucket.  */
73  int   num_bytes;  /* The number of bytes read. */
74  off_t file_pos;   /* The return address for lseek. */
75  register int index;   /* Loop index. */
76
77  /* Initial set up. */
78  dbf->bucket_dir = dir_index;
79  bucket_adr = dbf->dir [dir_index];
80 
81  if (dbf->bucket_cache == NULL)
82    {
83      if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
84        _gdbm_fatal(dbf, "couldn't init cache");
85    }
86
87  /* Is that one is not already current, we must find it. */
88  if (dbf->cache_entry->ca_adr != bucket_adr)
89    {
90      /* Look in the cache. */
91      for (index = 0; index < dbf->cache_size; index++)
92        {
93      if (dbf->bucket_cache[index].ca_adr == bucket_adr)
94        {
95          dbf->bucket = dbf->bucket_cache[index].ca_bucket;
96          dbf->cache_entry = &dbf->bucket_cache[index];
97          return;
98        }
99        }
100
101      /* It is not in the cache, read it from the disk. */
102      dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
103      if (dbf->bucket_cache[dbf->last_read].ca_changed)
104    _gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]);
105      dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr;
106      dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket;
107      dbf->cache_entry = &dbf->bucket_cache[dbf->last_read];
108      dbf->cache_entry->ca_data.elem_loc = -1;
109      dbf->cache_entry->ca_changed = FALSE;
110
111      /* Read the bucket. */
112      file_pos = lseek (dbf->desc, bucket_adr, L_SET);
113      if (file_pos != bucket_adr)
114    _gdbm_fatal (dbf, "lseek error");
115
116      num_bytes = read (dbf->desc, dbf->bucket, dbf->header->bucket_size);
117      if (num_bytes != dbf->header->bucket_size)
118    _gdbm_fatal (dbf, "read error");
119
120      // GREENSTONE CUSTOMISATION: Swap each value in the bucket if the GDBM file is the wrong endianness
121      if (wrong_endianness)
122      {
123    endian_swap_array((int*) dbf->bucket, dbf->header->bucket_size);
124
125    // There is a 4-byte string in each hash table entry that we need to leave alone (because character
126    //   strings are not affected by endianness), so convert these back to how they were originally
127    int b;
128    for (b = 0; b < dbf->header->bucket_elems; b++)
129    {
130      int* key_start = (int*) dbf->bucket->h_table[b].key_start;
131      *key_start = endian_swap (*key_start);
132    }
133      }
134    }
135
136  return;
137}
138
139
140/* Split the current bucket.  This includes moving all items in the bucket to
141   a new bucket.  This doesn't require any disk reads because all hash values
142   are stored in the buckets.  Splitting the current bucket may require
143   doubling the size of the hash directory.  */
144void
145_gdbm_split_bucket (dbf, next_insert)
146     gdbm_file_info *dbf;
147     int next_insert;
148{
149  hash_bucket *bucket[2];   /* Pointers to the new buckets. */
150
151  int          new_bits;    /* The number of bits for the new buckets. */
152  int          cache_0;     /* Location in the cache for the buckets. */
153  int          cache_1;
154  off_t        adr_0;       /* File address of the new bucket 0. */
155  off_t        adr_1;       /* File address of the new bucket 1. */
156  avail_elem   old_bucket;  /* Avail Struct for the old bucket. */
157
158  off_t        dir_start0;  /* Used in updating the directory. */
159  off_t        dir_start1;
160  off_t        dir_end;
161
162  off_t       *new_dir;     /* Pointer to the new directory. */
163  off_t        dir_adr;     /* Address of the new directory. */
164  int          dir_size;    /* Size of the new directory. */
165  off_t        old_adr[31];     /* Address of the old directories. */
166  int          old_size[31];    /* Size of the old directories. */
167  int          old_count;   /* Number of old directories. */
168
169  int          index;       /* Used in array indexing. */
170  int          index1;      /* Used in array indexing. */
171  int          elem_loc;    /* Location in new bucket to put element. */
172  bucket_element *old_el;   /* Pointer into the old bucket. */
173  int          select;      /* Used to index bucket during movement. */
174
175
176  /* No directories are yet old. */
177  old_count = 0;
178
179  if (dbf->bucket_cache == NULL)
180    {
181      if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
182        _gdbm_fatal(dbf, "couldn't init cache");
183    }
184
185  while (dbf->bucket->count == dbf->header->bucket_elems)
186    {
187      /* Initialize the "new" buckets in the cache. */
188      do
189    {
190      dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
191      cache_0 = dbf->last_read;
192    }     
193      while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
194      bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
195      if (dbf->bucket_cache[cache_0].ca_changed)
196    _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]);
197      do
198    {
199      dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
200      cache_1 = dbf->last_read;
201    }     
202      while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
203      bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
204      if (dbf->bucket_cache[cache_1].ca_changed)
205    _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]);
206      new_bits = dbf->bucket->bucket_bits+1;
207      _gdbm_new_bucket (dbf, bucket[0], new_bits);
208      _gdbm_new_bucket (dbf, bucket[1], new_bits);
209      adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
210      dbf->bucket_cache[cache_0].ca_adr = adr_0;
211      adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
212      dbf->bucket_cache[cache_1].ca_adr = adr_1;
213
214     
215     
216      /* Double the directory size if necessary. */
217      if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
218    {
219      dir_size = dbf->header->dir_size * 2;
220      dir_adr  = _gdbm_alloc (dbf, dir_size);
221      new_dir  = (off_t *) malloc (dir_size);
222      if (new_dir == NULL) _gdbm_fatal (dbf, "malloc error");
223      for (index = 0;
224        index < dbf->header->dir_size/sizeof (off_t); index++)
225        {
226          new_dir[2*index]   = dbf->dir[index];
227          new_dir[2*index+1] = dbf->dir[index];
228        }
229     
230      /* Update header. */
231      old_adr[old_count] = dbf->header->dir;
232      dbf->header->dir = dir_adr;
233      old_size[old_count] = dbf->header->dir_size;
234      dbf->header->dir_size = dir_size;
235      dbf->header->dir_bits = new_bits;
236      old_count++;
237     
238      /* Now update dbf.  */
239      dbf->header_changed = TRUE;
240      dbf->bucket_dir *= 2;
241      free (dbf->dir);
242      dbf->dir = new_dir;
243    }
244
245      /* Copy all elements in dbf->bucket into the new buckets. */
246      for (index = 0; index < dbf->header->bucket_elems; index++)
247    {
248      old_el = & (dbf->bucket->h_table[index]);
249      select = (old_el->hash_value >> (31-new_bits)) & 1;
250      elem_loc = old_el->hash_value % dbf->header->bucket_elems;
251      while (bucket[select]->h_table[elem_loc].hash_value != -1)
252        elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
253      bucket[select]->h_table[elem_loc] = *old_el;
254      bucket[select]->count += 1;
255    }
256     
257      /* Allocate avail space for the bucket[1]. */
258      bucket[1]->bucket_avail[0].av_adr
259    = _gdbm_alloc (dbf, dbf->header->block_size);
260      bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
261      bucket[1]->av_count = 1;
262     
263      /* Copy the avail elements in dbf->bucket to bucket[0]. */
264      bucket[0]->av_count = dbf->bucket->av_count;
265      index = 0;
266      index1 = 0;
267      if (bucket[0]->av_count == BUCKET_AVAIL)
268    {
269      /* The avail is full, move the first one to bucket[1]. */
270      _gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
271                 bucket[1]->bucket_avail,
272                 &bucket[1]->av_count, FALSE);
273      index = 1;
274      bucket[0]->av_count --;
275    }
276      for (; index < dbf->bucket->av_count; index++)
277    {
278      bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
279    }
280     
281      /* Update the directory.  We have new file addresses for both buckets. */
282      dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) | 1;
283      dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits);
284      dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits);
285      dir_start0 = dir_start1 - (dir_end - dir_start1);
286      for (index = dir_start0; index < dir_start1; index++)
287    dbf->dir[index] = adr_0;
288      for (index = dir_start1; index < dir_end; index++)
289    dbf->dir[index] = adr_1;
290     
291     
292      /* Set changed flags. */
293      dbf->bucket_cache[cache_0].ca_changed = TRUE;
294      dbf->bucket_cache[cache_1].ca_changed = TRUE;
295      dbf->bucket_changed = TRUE;
296      dbf->directory_changed = TRUE;
297      dbf->second_changed = TRUE;
298     
299      /* Update the cache! */
300      dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits);
301     
302      /* Invalidate old cache entry. */
303      old_bucket.av_adr  = dbf->cache_entry->ca_adr;
304      old_bucket.av_size = dbf->header->bucket_size;
305      dbf->cache_entry->ca_adr = 0;
306      dbf->cache_entry->ca_changed = FALSE;
307     
308      /* Set dbf->bucket to the proper bucket. */
309      if (dbf->dir[dbf->bucket_dir] == adr_0)
310    {
311      dbf->bucket = bucket[0];
312      dbf->cache_entry = &dbf->bucket_cache[cache_0];
313      _gdbm_put_av_elem (old_bucket,
314                 bucket[1]->bucket_avail,
315                 &bucket[1]->av_count, FALSE);
316    }
317      else
318    {
319      dbf->bucket = bucket[1];
320      dbf->cache_entry = &dbf->bucket_cache[cache_1];
321      _gdbm_put_av_elem (old_bucket,
322                 bucket[0]->bucket_avail,
323                 &bucket[0]->av_count, FALSE);
324    }
325     
326    }
327
328  /* Get rid of old directories. */
329  for (index = 0; index < old_count; index++)
330    _gdbm_free (dbf, old_adr[index], old_size[index]);
331}
332
333
334/* The only place where a bucket is written.  CA_ENTRY is the
335   cache entry containing the bucket to be written. */
336
337void
338_gdbm_write_bucket (dbf, ca_entry)
339     gdbm_file_info *dbf;
340     cache_elem *ca_entry;
341{
342  int  num_bytes;   /* The return value for write. */
343  off_t file_pos;   /* The return value for lseek. */
344 
345  file_pos = lseek (dbf->desc, ca_entry->ca_adr, L_SET);
346  if (file_pos != ca_entry->ca_adr)
347    _gdbm_fatal (dbf, "lseek error");
348  num_bytes = write (dbf->desc, ca_entry->ca_bucket, dbf->header->bucket_size);
349  if (num_bytes != dbf->header->bucket_size)
350    _gdbm_fatal (dbf, "write error");
351  ca_entry->ca_changed = FALSE;
352  ca_entry->ca_data.hash_val = -1;
353  ca_entry->ca_data.elem_loc = -1;
354}
Note: See TracBrowser for help on using the browser.