source: indexers/trunk/mgpp/text/mgpp_invf_dict_dump.cpp@ 19822

Last change on this file since 19822 was 19822, checked in by mdewsnip, 15 years ago

Commented out all occurrences of

#define _XOPEN_SOURCE_EXTENDED 1

This was allegedly added for compilation on Solaris, but it just causes errors for me (on the NLNZ Solaris machines).

  • Property svn:keywords set to Author Date Id Revision
File size: 4.1 KB
Line 
1/**************************************************************************
2 *
3 * mgpp_invf_dict_dump.cpp -- Program to printthe various inverted dictionaries
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#define _XOPEN_SOURCE 1
23// This was added for Solaris, but it makes things worse on Solaris for me...
24// #define _XOPEN_SOURCE_EXTENDED 1
25
26// need this to avoid bizarre compiler problems under VC++ 6.0
27#if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
28# include <iostream>
29#endif
30
31/* getopt is in posix.2, so cygwin should have it in unistd, but doesn't */
32#if defined (__WIN32__) || defined (__CYGWIN__)
33# include "getopt_old.h"
34#else
35# include <unistd.h>
36#endif
37
38#include "sysfuncs.h"
39#include "messages.h"
40#include "mg_files.h"
41#include "invf.h"
42
43static void process_files (char *filename,
44 bool printHeader,
45 bool printWords,
46 bool printTags) {
47 // open the dictionary
48 FILE *dictFile = open_file (filename, INVF_DICT_SUFFIX, "rb",
49 MAGIC_STEM_BUILD, MG_ABORT);
50 invf_dict_header idh;
51 idh.Read (dictFile);
52
53 // print the information from the inverted dictionary file header
54 if (printHeader) {
55 cout << "lookback: " << idh.lookback << "\n";
56 cout << "word_dict_start: " << idh.word_dict_start << "\n";
57 cout << "word_dict_size: " << idh.word_dict_size << "\n";
58 cout << "tag_dict_start: " << idh.tag_dict_start << "\n";
59 cout << "tag_dict_size: " << idh.tag_dict_size << "\n";
60 cout << "num_docs: " << idh.num_docs << "\n";
61 cout << "num_frags: " << idh.num_frags << "\n";
62 cout << "num_words: " << idh.num_words << "\n";
63 cout << "total_bytes: " << idh.total_bytes << "\n";
64 cout << "index_string_bytes: " << idh.index_string_bytes << "\n";
65 cout << "num_levels: " << idh.num_levels << "\n";
66
67 cout << "\n";
68 }
69
70
71 if (printWords) {
72 fseek (dictFile, idh.word_dict_start, SEEK_SET);
73
74 unsigned long wordNum;
75 word_dict_el wordEl;
76 wordEl.SetNumLevels (idh.num_levels);
77 for (wordNum=0; wordNum<idh.word_dict_size; ++wordNum) {
78 wordEl.Read (dictFile, idh.num_levels);
79 cout << "\"" << wordEl.el << "\"\n";// (" << wordNum << ")\n";
80 }
81
82 cout << "\n";
83 }
84
85 // write out the tag part of the dictionary
86 if (printTags) {
87 fseek (dictFile, idh.tag_dict_start, SEEK_SET);
88
89 unsigned long tagNum;
90 dict_el tagEl;
91 for (tagNum=0; tagNum<idh.tag_dict_size; ++tagNum) {
92 // read in the next tag and inverted file pointer
93 tagEl.Read (dictFile);
94
95 cout << "\"" << tagEl.el << "\" (" << tagNum << ")\n";
96 }
97 }
98
99 // close open files
100 fclose (dictFile);
101}
102
103
104int main (int argc, char **argv) {
105 char *filename = "";
106 int ch;
107 msg_prefix = argv[0];
108 opterr = 0;
109
110 bool printHeader = false;
111 bool printWords = false;
112 bool printTags = false;
113
114 while ((ch = getopt (argc, argv, "f:d:rwth")) != -1) {
115 switch (ch) {
116 case 'f': // input file
117 filename = optarg;
118 break;
119 case 'd':
120 set_basepath (optarg);
121 break;
122 case 'r':
123 printHeader = true;
124 break;
125 case 'w':
126 printWords = true;
127 break;
128 case 't':
129 printTags = true;
130 break;
131 case 'h':
132 case '?':
133 fprintf (stderr, "usage: %s [-h] [-r] [-w] [-t] [-f input_file] "
134 "[-d data directory]\n(-rwt:print header, words, tags)\n",
135 argv[0]);
136 exit (1);
137 }
138 }
139
140 process_files (filename, printHeader, printWords, printTags);
141 return 0;
142}
Note: See TracBrowser for help on using the repository browser.