source: trunk/gsdl/packages/mg-1.3d/src/text/mg_perf_hash_build.c@ 30

Last change on this file since 30 was 13, checked in by rjmcnab, 26 years ago

* empty log message *

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.2 KB
Line 
1/**************************************************************************
2 *
3 * mg_perf_hash_build.c -- Program to build a perfect hash function
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: mg_perf_hash_build.c 13 1998-11-17 09:36:00Z rjmcnab $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25#include "memlib.h"
26#include "messages.h"
27#include "timing.h"
28#include "local_strings.h"
29#include "perf_hash.h"
30#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
31
32#include "mg_files.h"
33#include "invf.h"
34#include "locallib.h"
35#include "words.h"
36#include "mg.h"
37
38/*
39 $Log$
40 Revision 1.1 1998/11/17 09:35:15 rjmcnab
41 *** empty log message ***
42
43 * Revision 1.3 1994/10/20 03:56:58 tes
44 * I have rewritten the boolean query optimiser and abstracted out the
45 * components of the boolean query.
46 *
47 * Revision 1.2 1994/09/20 04:41:53 tes
48 * For version 1.1
49 *
50 */
51
52static char *RCSID = "$Id: mg_perf_hash_build.c 13 1998-11-17 09:36:00Z rjmcnab $";
53
54
55
56#define POOL_SIZE 1024*1024
57
58static void process_files (char *filename);
59
60int r = -1;
61
62void
63main (int argc, char **argv)
64{
65 ProgTime start;
66 char *file_name = "";
67 int ch;
68 msg_prefix = argv[0];
69 opterr = 0;
70 while ((ch = getopt (argc, argv, "f:d:r:h")) != -1)
71 switch (ch)
72 {
73 case 'f': /* input file */
74 file_name = optarg;
75 break;
76 case 'd':
77 set_basepath (optarg);
78 break;
79 case 'r':
80 r = atoi (optarg);
81 break;
82 case 'h':
83 case '?':
84 fprintf (stderr, "usage: %s [-f input_file]"
85 "[-d data directory] [-r random seed] [-h]\n", argv[0]);
86 exit (1);
87 }
88
89 GetTime (&start);
90 process_files (file_name);
91 Message ("%s\n", ElapsedTime (&start, NULL));
92 exit (0);
93}
94
95
96
97
98
99static void
100process_files (char *filename)
101{
102 FILE *dict, *hash;
103 unsigned long i;
104 u_char prev[MAXSTEMLEN + 1];
105 struct invf_dict_header idh;
106 perf_hash_data *phd;
107 u_char *pool;
108 int pool_left;
109 u_char **starts;
110
111
112 dict = open_file (filename, INVF_DICT_SUFFIX, "rb",
113 MAGIC_STEM_BUILD, MG_ABORT); /* [RPAP - Feb 97: WIN32 Port] */
114
115 fread ((char *) &idh, sizeof (idh), 1, dict);
116
117 /* [RPAP - Jan 97: Endian Ordering] */
118 NTOHUL(idh.lookback);
119 NTOHUL(idh.dict_size);
120 NTOHUL(idh.total_bytes);
121 NTOHUL(idh.index_string_bytes);
122 NTOHD(idh.input_bytes); /* [RJM 07/97: 4G limit] */
123 NTOHUL(idh.num_of_docs);
124 NTOHUL(idh.static_num_of_docs);
125 NTOHUL(idh.num_of_words);
126 NTOHUL(idh.stem_method);
127
128 hash = create_file (filename, INVF_DICT_HASH_SUFFIX, "wb",
129 MAGIC_HASH, MG_ABORT); /* [RPAP - Feb 97: WIN32 Port] */
130
131 if (!(pool = Xmalloc (POOL_SIZE)))
132 FatalError (1, "Out of memory");
133 pool_left = POOL_SIZE;
134
135 if (!(starts = Xmalloc (sizeof (u_char *) * idh.dict_size)))
136 FatalError (1, "Out of memory");
137
138 for (i = 0; i < idh.dict_size; i++)
139 {
140 register unsigned long copy, suff, l;
141 unsigned long wcnt, fcnt;
142
143 /* build a new word on top of prev */
144 copy = getc (dict);
145 suff = getc (dict);
146 *prev = copy + suff;
147 fread (prev + copy + 1, sizeof (u_char), suff, dict);
148
149 /* read other data, but no need to store it */
150 fread (&fcnt, sizeof (fcnt), 1, dict);
151 fread (&wcnt, sizeof (wcnt), 1, dict);
152
153 l = *prev + 1;
154 if (pool_left < l)
155 {
156 pool = Xmalloc (POOL_SIZE);
157 pool_left = POOL_SIZE;
158 }
159 starts[i] = pool;
160 bcopy ((char *) prev, (char *) pool, l);
161 pool += l;
162 pool_left -= l;
163 }
164 if (!(phd = gen_hash_func (idh.dict_size, starts, r)))
165 FatalError (1, "Unable to generate hash function");
166 if (write_perf_hash_data (hash, phd) == -1)
167 FatalError (1, "Unable to write hash function");
168 fclose (dict);
169 fclose (hash);
170}
Note: See TracBrowser for help on using the repository browser.