source: trunk/mgpp/text/mgpp_invf_dict_dump.cpp@ 3365

Last change on this file since 3365 was 3365, checked in by kjdon, 22 years ago

Initial revision

  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1/**************************************************************************
2 *
3 * mgpp_invf_dict_dump.cpp -- Program to printthe various inverted dictionaries
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#define _XOPEN_SOURCE 1
23#define _XOPEN_SOURCE_EXTENDED 1
24
25// need this to avoid bizarre compiler problems under VC++ 6.0
26#if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
27# include <iostream>
28#endif
29
30/* getopt is in posix.2, so cygwin should have it in unistd, but doesn't */
31#if defined (__WIN32__) || defined (__CYGWIN__)
32# include "getopt_old.h"
33#else
34# include <unistd.h>
35#endif
36
37#include "sysfuncs.h"
38#include "messages.h"
39#include "mg_files.h"
40#include "invf.h"
41
42static void process_files (char *filename,
43 bool printHeader,
44 bool printWords,
45 bool printTags) {
46 // open the dictionary
47 FILE *dictFile = open_file (filename, INVF_DICT_SUFFIX, "rb",
48 MAGIC_STEM_BUILD, MG_ABORT);
49 invf_dict_header idh;
50 idh.Read (dictFile);
51
52 // print the information from the inverted dictionary file header
53 if (printHeader) {
54 cout << "lookback: " << idh.lookback << "\n";
55 cout << "word_dict_start: " << idh.word_dict_start << "\n";
56 cout << "word_dict_size: " << idh.word_dict_size << "\n";
57 cout << "tag_dict_start: " << idh.tag_dict_start << "\n";
58 cout << "tag_dict_size: " << idh.tag_dict_size << "\n";
59 cout << "num_docs: " << idh.num_docs << "\n";
60 cout << "num_frags: " << idh.num_frags << "\n";
61 cout << "num_words: " << idh.num_words << "\n";
62 cout << "total_bytes: " << idh.total_bytes << "\n";
63 cout << "index_string_bytes: " << idh.index_string_bytes << "\n";
64 cout << "num_levels: " << idh.num_levels << "\n";
65
66 cout << "\n";
67 }
68
69
70 if (printWords) {
71 fseek (dictFile, idh.word_dict_start, SEEK_SET);
72
73 unsigned long wordNum;
74 word_dict_el wordEl;
75 wordEl.SetNumLevels (idh.num_levels);
76 for (wordNum=0; wordNum<idh.word_dict_size; wordNum++) {
77 wordEl.Read (dictFile, idh.num_levels);
78 cout << "\"" << wordEl.el << "\"\n";// (" << wordNum << ")\n";
79 }
80
81 cout << "\n";
82 }
83
84 // write out the tag part of the dictionary
85 if (printTags) {
86 fseek (dictFile, idh.tag_dict_start, SEEK_SET);
87
88 unsigned long tagNum;
89 dict_el tagEl;
90 for (tagNum=0; tagNum<idh.tag_dict_size; tagNum++) {
91 // read in the next tag and inverted file pointer
92 tagEl.Read (dictFile);
93
94 cout << "\"" << tagEl.el << "\" (" << tagNum << ")\n";
95 }
96 }
97
98 // close open files
99 fclose (dictFile);
100}
101
102
103int main (int argc, char **argv) {
104 char *filename = "";
105 int ch;
106 msg_prefix = argv[0];
107 opterr = 0;
108
109 bool printHeader = false;
110 bool printWords = false;
111 bool printTags = false;
112
113 while ((ch = getopt (argc, argv, "f:d:rwth")) != -1) {
114 switch (ch) {
115 case 'f': // input file
116 filename = optarg;
117 break;
118 case 'd':
119 set_basepath (optarg);
120 break;
121 case 'r':
122 printHeader = true;
123 break;
124 case 'w':
125 printWords = true;
126 break;
127 case 't':
128 printTags = true;
129 break;
130 case 'h':
131 case '?':
132 fprintf (stderr, "usage: %s [-h] [-r] [-w] [-t] [-f input_file] "
133 "[-d data directory]\n(-rwt:print header, words, tags)\n",
134 argv[0]);
135 exit (1);
136 }
137 }
138
139 process_files (filename, printHeader, printWords, printTags);
140 return 0;
141}
Note: See TracBrowser for help on using the repository browser.