source: trunk/gsdl/src/mgpp/text/mg_invf_dict_dump.cpp@ 2442

Last change on this file since 2442 was 2442, checked in by jrm21, 23 years ago

portability changes, use getopt from unistd.h (all POSIX systems)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1/**************************************************************************
2 *
3 * mg_invf_dict_dump.cpp -- Program to printthe various inverted dictionaries
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: mg_invf_dict_dump.cpp 2442 2001-05-17 04:38:16Z jrm21 $
21 *
22 **************************************************************************/
23#define _XOPEN_SOURCE 1
24#define _XOPEN_SOURCE_EXTENDED 1
25#include <unistd.h>
26
27#include "sysfuncs.h"
28#include "messages.h"
29
30#include "mg_files.h"
31#include "invf.h"
32
33/*
34 $Log$
35 Revision 1.3 2001/05/17 04:38:15 jrm21
36 portability changes, use getopt from unistd.h (all POSIX systems)
37
38 Revision 1.2 2001/02/02 01:13:05 kjm18
39 changed the help message
40
41 Revision 1.1 2000/01/14 02:17:51 sjboddie
42 Rodgers new C++ mg
43
44 */
45
46
47static void process_files (char *filename,
48 bool printHeader,
49 bool printWords,
50 bool printTags) {
51 // open the dictionary
52 FILE *dictFile = open_file (filename, INVF_DICT_SUFFIX, "rb",
53 MAGIC_STEM_BUILD, MG_ABORT);
54 invf_dict_header idh;
55 idh.Read (dictFile);
56
57 // print the information from the inverted dictionary file header
58 if (printHeader) {
59 cout << "lookback: " << idh.lookback << "\n";
60 cout << "word_dict_start: " << idh.word_dict_start << "\n";
61 cout << "word_dict_size: " << idh.word_dict_size << "\n";
62 cout << "tag_dict_start: " << idh.tag_dict_start << "\n";
63 cout << "tag_dict_size: " << idh.tag_dict_size << "\n";
64 cout << "num_docs: " << idh.num_docs << "\n";
65 cout << "num_frags: " << idh.num_frags << "\n";
66 cout << "num_words: " << idh.num_words << "\n";
67 cout << "total_bytes: " << idh.total_bytes << "\n";
68 cout << "index_string_bytes: " << idh.index_string_bytes << "\n";
69 cout << "num_levels: " << idh.num_levels << "\n";
70
71 cout << "\n";
72 }
73
74
75 if (printWords) {
76 fseek (dictFile, idh.word_dict_start, SEEK_SET);
77
78 unsigned long wordNum;
79 word_dict_el wordEl;
80 wordEl.SetNumLevels (idh.num_levels);
81 for (wordNum=0; wordNum<idh.word_dict_size; wordNum++) {
82 wordEl.Read (dictFile, idh.num_levels);
83 cout << "\"" << wordEl.el << "\"\n";// (" << wordNum << ")\n";
84 }
85
86 cout << "\n";
87 }
88
89 // write out the tag part of the dictionary
90 if (printTags) {
91 fseek (dictFile, idh.tag_dict_start, SEEK_SET);
92
93 unsigned long tagNum;
94 dict_el tagEl;
95 for (tagNum=0; tagNum<idh.tag_dict_size; tagNum++) {
96 // read in the next tag and inverted file pointer
97 tagEl.Read (dictFile);
98
99 cout << "\"" << tagEl.el << "\" (" << tagNum << ")\n";
100 }
101 }
102
103 // close open files
104 fclose (dictFile);
105}
106
107
108int main (int argc, char **argv) {
109 char *filename = "";
110 int ch;
111 msg_prefix = argv[0];
112 opterr = 0;
113
114 bool printHeader = false;
115 bool printWords = false;
116 bool printTags = false;
117
118 while ((ch = getopt (argc, argv, "f:d:rwth")) != -1) {
119 switch (ch) {
120 case 'f': // input file
121 filename = optarg;
122 break;
123 case 'd':
124 set_basepath (optarg);
125 break;
126 case 'r':
127 printHeader = true;
128 break;
129 case 'w':
130 printWords = true;
131 break;
132 case 't':
133 printTags = true;
134 break;
135 case 'h':
136 case '?':
137 fprintf (stderr, "usage: %s [-h] [-r] [-w] [-t] [-f input_file] "
138 "[-d data directory]\n(-rwt:print header, words, tags)\n",
139 argv[0]);
140 exit (1);
141 }
142 }
143
144 process_files (filename, printHeader, printWords, printTags);
145 return 0;
146}
Note: See TracBrowser for help on using the repository browser.