source: indexers/trunk/mgpp/text/mgpp_perf_hash_build.cpp@ 19822

Last change on this file since 19822 was 19822, checked in by mdewsnip, 15 years ago

Commented out all occurrences of

#define _XOPEN_SOURCE_EXTENDED 1

This was allegedly added for compilation on Solaris, but it just causes errors for me (on the NLNZ Solaris machines).

  • Property svn:keywords set to Author Date Id Revision
File size: 4.2 KB
Line 
1/**************************************************************************
2 *
3 * mgpp_perf_hash_build.cpp -- Program to build a perfect hash function
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#define _XOPEN_SOURCE 1
23// This was added for Solaris, but it makes things worse on Solaris for me...
24// #define _XOPEN_SOURCE_EXTENDED 1
25
26// need this to avoid bizarre compiler problems under VC++ 6.0
27#if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
28# include <iostream>
29#endif
30
31/* getopt is in posix.2, so cygwin should have it in unistd, but doesn't */
32#if defined (__WIN32__) || defined (__CYGWIN__)
33# include "getopt_old.h"
34#else
35# include <unistd.h>
36#endif
37
38#include "sysfuncs.h"
39#include "memlib.h"
40#include "messages.h"
41#include "local_strings.h"
42#include "perf_hash.h"
43#include "netorder.h"
44
45#include "mg_files.h"
46#include "invf.h"
47#include "locallib.h"
48#include "words.h"
49#include "mg.h"
50
51#define POOL_SIZE 1024*1024
52
53static void ProcessFiles (char *filename, int r) {
54 FILE *dictFile, *hashFile;
55 unsigned long i;
56 invf_dict_header idh;
57 perf_hash_data *phd;
58 u_char *pool;
59 unsigned long pool_left;
60 u_char **starts;
61
62 // read in the dictionary
63 dictFile = open_file (filename, INVF_DICT_SUFFIX, "rb",
64 MAGIC_STEM_BUILD, MG_ABORT);
65 if (dictFile==NULL) {
66 FatalError(1, "unable to open file");
67 }
68 idh.Read (dictFile);
69
70 //cerr << idh.lookback<<" "<<idh.word_dict_start<<endl;
71 // go to the start of the word dictionary
72 fseek (dictFile, idh.word_dict_start, SEEK_SET);
73
74 if (!(pool = (u_char *) Xmalloc (POOL_SIZE)))
75 FatalError (1, "Out of memory");
76 pool_left = POOL_SIZE;
77
78 if (!(starts = (u_char **) Xmalloc (sizeof (u_char *) * idh.word_dict_size)))
79 FatalError (1, "Out of memory");
80 //cerr << "size= "<< idh.word_dict_size<<endl;
81 word_dict_el wordEl;
82 wordEl.SetNumLevels (idh.num_levels);
83 for (i = 0; i < idh.word_dict_size; ++i) {
84 // read the next word and associated information
85 wordEl.Read (dictFile, idh.num_levels);
86
87 // push string onto pool data
88 register unsigned long l;
89 l = wordEl.el.size() + 1;
90 if (pool_left < l) {
91 pool = (u_char *) Xmalloc (POOL_SIZE);
92 pool_left = POOL_SIZE;
93 }
94 starts[i] = pool;
95
96 *pool++ = wordEl.el.size();
97 memcpy ((char *) pool, &(wordEl.el[0]), wordEl.el.size());
98 //cerr << pool<<" " <<starts[i]<<endl;
99 pool += wordEl.el.size();
100 pool_left -= l;
101
102 }
103 fclose (dictFile);
104 //cerr << pool<<" " <<starts[i-1]<<endl;
105 //cerr<<"starts "<<starts[113529]<<endl;
106 //cerr << starts[17][1] << " "<<starts[25][4]<<endl;
107 // create perfect hash file
108 hashFile = create_file (filename, INVF_DICT_HASH_SUFFIX, "wb",
109 MAGIC_HASH, MG_ABORT);
110 if (!(phd = gen_hash_func (idh.word_dict_size, starts, r)))
111 FatalError (1, "Unable to generate hash function");
112 if (write_perf_hash_data (hashFile, phd) == -1)
113 FatalError (1, "Unable to write hash function");
114 fclose (hashFile);
115}
116
117
118
119int main (int argc, char **argv) {
120 int r = -1;
121 char *filename = "";
122 int ch;
123 msg_prefix = argv[0];
124 opterr = 0;
125
126 while ((ch = getopt (argc, argv, "f:d:r:h")) != -1) {
127 switch (ch) {
128 case 'f': // input file
129 filename = optarg;
130 break;
131 case 'd':
132 set_basepath (optarg);
133 break;
134 case 'r':
135 r = atoi (optarg);
136 break;
137 case 'h':
138 case '?':
139 fprintf (stderr, "usage: %s [-f input_file]"
140 "[-d data directory] [-r random seed] [-h]\n", argv[0]);
141 exit (1);
142 }
143 }
144
145 ProcessFiles (filename, r);
146 return 0;
147}
Note: See TracBrowser for help on using the repository browser.