source: trunk/gsdl/src/mgpp/text/mg_perf_hash_build.cpp@ 879

Last change on this file since 879 was 856, checked in by sjboddie, 24 years ago

Rodgers new C++ mg

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1/**************************************************************************
2 *
3 * mg_perf_hash_build.cpp -- Program to build a perfect hash function
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: mg_perf_hash_build.cpp 856 2000-01-14 02:26:25Z sjboddie $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25#include "memlib.h"
26#include "messages.h"
27#include "local_strings.h"
28#include "perf_hash.h"
29#include "netorder.h"
30
31#include "mg_files.h"
32#include "invf.h"
33#include "locallib.h"
34#include "words.h"
35#include "mg.h"
36
37/*
38 $Log$
39 Revision 1.1 2000/01/14 02:26:19 sjboddie
40 Rodgers new C++ mg
41
42 Revision 1.2 1999/10/17 23:43:27 cs025
43 Changes to eradicate Xmalloc
44
45 Revision 1.1 1999/10/11 02:58:01 cs025
46 Base install of MG-PP
47
48 Revision 1.1 1999/08/10 21:18:13 sjboddie
49 renamed mg-1.3d directory mg
50
51 Revision 1.2 1998/11/25 07:55:47 rjmcnab
52
53 Modified mg to that you can specify the stemmer you want
54 to use via a command line option. You specify it to
55 mg_passes during the build process. The number of the
56 stemmer that you used is stored within the inverted
57 dictionary header and the stemmed dictionary header so
58 the correct stemmer is used in later stages of building
59 and querying.
60
61 Revision 1.1 1998/11/17 09:35:15 rjmcnab
62 *** empty log message ***
63
64 * Revision 1.3 1994/10/20 03:56:58 tes
65 * I have rewritten the boolean query optimiser and abstracted out the
66 * components of the boolean query.
67 *
68 * Revision 1.2 1994/09/20 04:41:53 tes
69 * For version 1.1
70 *
71 */
72
73
74
75#define POOL_SIZE 1024*1024
76
77
78static void ProcessFiles (char *filename, int r) {
79 FILE *dictFile, *hashFile;
80 unsigned long i;
81 invf_dict_header idh;
82 perf_hash_data *phd;
83 u_char *pool;
84 unsigned long pool_left;
85 u_char **starts;
86
87 // read in the dictionary
88 dictFile = open_file (filename, INVF_DICT_SUFFIX, "rb",
89 MAGIC_STEM_BUILD, MG_ABORT);
90 idh.Read (dictFile);
91
92 // go to the start of the word dictionary
93 fseek (dictFile, idh.word_dict_start, SEEK_SET);
94
95 if (!(pool = (u_char *) Xmalloc (POOL_SIZE)))
96 FatalError (1, "Out of memory");
97 pool_left = POOL_SIZE;
98
99 if (!(starts = (u_char **) Xmalloc (sizeof (u_char *) * idh.word_dict_size)))
100 FatalError (1, "Out of memory");
101
102 word_dict_el wordEl;
103 wordEl.SetNumLevels (idh.num_levels);
104 for (i = 0; i < idh.word_dict_size; i++) {
105 // read the next word and associated information
106 wordEl.Read (dictFile, idh.num_levels);
107
108 // push string onto pool data
109 register unsigned long l;
110 l = wordEl.el.size() + 1;
111 if (pool_left < l) {
112 pool = (u_char *) Xmalloc (POOL_SIZE);
113 pool_left = POOL_SIZE;
114 }
115 starts[i] = pool;
116
117 *pool++ = wordEl.el.size();
118 bcopy ((char *) wordEl.el.begin(), (char *) pool, wordEl.el.size());
119 pool += l;
120 pool_left -= l;
121 }
122 fclose (dictFile);
123
124 // create perfect hash file
125 hashFile = create_file (filename, INVF_DICT_HASH_SUFFIX, "wb",
126 MAGIC_HASH, MG_ABORT);
127 if (!(phd = gen_hash_func (idh.word_dict_size, starts, r)))
128 FatalError (1, "Unable to generate hash function");
129 if (write_perf_hash_data (hashFile, phd) == -1)
130 FatalError (1, "Unable to write hash function");
131 fclose (hashFile);
132}
133
134
135
136int main (int argc, char **argv) {
137 int r = -1;
138 char *filename = "";
139 int ch;
140 msg_prefix = argv[0];
141 opterr = 0;
142
143 while ((ch = getopt (argc, argv, "f:d:r:h")) != -1) {
144 switch (ch) {
145 case 'f': // input file
146 filename = optarg;
147 break;
148 case 'd':
149 set_basepath (optarg);
150 break;
151 case 'r':
152 r = atoi (optarg);
153 break;
154 case 'h':
155 case '?':
156 fprintf (stderr, "usage: %s [-f input_file]"
157 "[-d data directory] [-r random seed] [-h]\n", argv[0]);
158 exit (1);
159 }
160 }
161
162 ProcessFiles (filename, r);
163 return 0;
164}
Note: See TracBrowser for help on using the repository browser.