source: trunk/gsdl/src/mgpp/text/Queryer.cpp@ 4209

Last change on this file since 4209 was 4209, checked in by kjdon, 21 years ago

parseQuery (in GSDLQueryParser) now returns NULL if there has been a syntax error. So all the auxiliary functions either return NULl if there is an error, or set an error flag to true. Queryer now tells the user that there has been invalid syntax rather than seg faulting

  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1/**************************************************************************
2 *
3 * Queryer.cpp -- simple interactive query program
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#define _XOPEN_SOURCE 1
23#define _XOPEN_SOURCE_EXTENDED 1
24
25/* getopt is in posix.2, so cygwin should have it in unistd, but doesn't */
26#if defined (__WIN32__) || defined (__CYGWIN__)
27# include "getopt_old.h"
28#else
29# include <unistd.h>
30#endif
31
32#include "MGQuery.h"
33#include "TextGet.h"
34
35#include "messages.h"
36#include "mg_files.h"
37
38#include "GSDLQueryParser.h"
39
40void printHelp() {
41
42 cout << "commands available are:\n"
43 << "\t.q\t\tquit\n"
44 << "\t.h\t\tprint the help message\n"
45 << "\t.i\t\tchange the search level (enter the new level at the prompt)\n"
46 << "\t.l\t\tchange the result level ( \"\" \"\" )\n"
47 << "\t.b\t\tfull text browse (enter a word or fragment at the prompt)\n"
48 << "\t.r0/.r1\t\tranking off/on\n"
49 << "\t.t0/.t1\t\tquery type some/all\n"
50 << "\t.c0/.c1\t\tcasefolding off/on\n"
51 << "\t.s0/.s1\t\tstemming off/on\n"
52 << "\t.o0/.o1\t\tshort output off/on\n\n"
53 << "\t.p\t\tprint a document (enter the docnum at the prompt)\n"
54 << "otherwise just enter a query\n\n";
55
56}
57
58int main (int argc, char **argv) {
59 int ch;
60 char *textfilename = "";
61 char *indexfilename = "";
62 char *basePath = "";
63
64 opterr = 0;
65 msg_prefix = argv[0];
66
67 // process the command line arguments
68 while ((ch = getopt (argc, argv, "f:t:d:h")) != -1) {
69 switch (ch) {
70 case 'f': /* input file */
71 indexfilename = optarg;
72 break;
73 case 't':
74 textfilename = optarg;
75 break;
76 case 'd':
77 basePath = optarg;
78 set_basepath (basePath);
79 break;
80 case 'h':
81 case '?':
82 fprintf (stderr, "usage: %s [-h] [-d directory] -f indexname -t textname\n", argv[0]);
83 exit (1);
84 }
85 }
86
87 if (textfilename[0] == '\0' || indexfilename[0] == '\0') {
88 FatalError (1, "Index and text file names must be specified with -f and -t \n");
89 }
90
91 // init the text system
92 TextData textData;
93 if (!textData.LoadData (basePath, textfilename)) {
94 FatalError (1, "Couldn't load text information for \"%s\"", textfilename);
95 }
96
97 // init the query system
98 IndexData indexData;
99 if (!indexData.LoadData (basePath, indexfilename)) {
100 FatalError (1, "Couldn't load index information for \"%s\"", indexfilename);
101 }
102
103 // debug output
104 cerr << "num docs: "<<indexData.bdh.num_docs
105 << "\nnum frags: "<<indexData.bdh.num_frags
106 << "\nnum words: "<<indexData.bdh.num_words
107 << "\ntotal bytes: "<<indexData.bdh.total_bytes
108 << "\nindex string bytes: "<<indexData.bdh.index_string_bytes
109 << "\nnum levels: "<<indexData.bdh.num_levels<<endl;
110
111 // do querying
112 QueryInfo queryInfo;
113 SetCStr (queryInfo.docLevel, "Document");
114 queryInfo.maxDocs = 50;
115 queryInfo.sortByRank = true;
116 queryInfo.exactWeights = false;
117 queryInfo.needRankInfo = true;
118 queryInfo.needTermFreqs = true;
119
120 ExtQueryResult queryResult;
121 char query[2048];
122 UCArray queryArray;
123 QueryNode *queryTree = NULL;
124
125
126 UCArray docLevel;
127 SetCStr(docLevel, "Document");
128
129 UCArray level;
130 UCArrayClear(level);
131 //SetCStr(level, "");
132
133 int defaultStemMethod = 0; // uncasefolded, unstemmed
134 int defaultBoolCombine = 0; // OR
135 bool shortOutput = false;
136 BrowseQueryNode browseNode;
137 browseNode.startPosition = -10;
138 browseNode.numTerms = 40;
139
140 BrowseQueryResult browseResult;
141
142 while (true) {
143 cout << "> ";
144 cin.getline(query, 2048, '\n');
145 SetCStr (queryArray, query);
146
147 // check for commands
148 if (queryArray.size() >= 2 && queryArray[0] == '.') {
149 if (queryArray[1] == 'q') break; // quit
150
151 if (queryArray[1] == 'h') { // help
152 printHelp();
153 }
154 if (queryArray[1] == 'i') {
155 cout << "current index="<< queryInfo.docLevel << "\nchange to index:";
156 cin >> query;
157 UCArrayClear(queryInfo.docLevel);
158 SetCStr(queryInfo.docLevel, query);
159 cout << "index set to " << queryInfo.docLevel <<"\n";
160 cin.getline(query, 2048, '\n');
161 }
162 if (queryArray[1] == 'l') {
163 cout << "current level="<< level << "\nchange to level:";
164 cin >> query;
165 UCArrayClear(level);
166 SetCStr(level, query);
167 cout << "level set to " << level <<"\n";
168 cin.getline(query, 2048, '\n');
169 }
170
171
172 else if (queryArray[1] == 'p') {
173 // print
174 UCArray docText;
175 unsigned long docNum = 0;
176 cin >> docNum;
177 cin.getline(query, 2048, '\n'); // eat up return
178
179 if (!GetDocText (textData, queryInfo.docLevel, docNum, docText)) {
180 FatalError (1, "Error while trying to get document %u", docNum);
181 }
182
183 cout << docText << "\n";
184 }
185 else if (queryArray[1] == 't') { // query type - all/some
186 if (queryArray[2] == '1') defaultBoolCombine = 1;
187 else if (queryArray[2] == '0') defaultBoolCombine = 0;
188 else {
189 cout << "Error: please enter .t0 (some) or .t1 (all)\n";
190 }
191 }
192 else if (queryArray[1] == 'r') { // ranking - on/off
193 if (queryArray[2] == '1') queryInfo.sortByRank = true;
194 else if (queryArray[2] == '0') queryInfo.sortByRank = false;
195 else {
196 cout << "Error: please enter .r0 (non-ranked) or .r1 (ranked)\n";
197 }
198 }
199 else if (queryArray[1] == 'c') { // casefolding - on/off
200 if (queryArray[2] == '1') defaultStemMethod |= 1;
201 else if (queryArray[2] == '0') defaultStemMethod &= 0xe;
202 else {
203 cout << "Error: please enter .c0 (case sensitive) or .c1 (casefolded)\n";
204 }
205 }
206 else if (queryArray[1] == 's') { // stemming - on/off
207 if (queryArray[2] == '1') defaultStemMethod |=2;
208 else if (queryArray[2] == '0') defaultStemMethod &=0xd;
209 else {
210 cout << "Error: please enter .s0 (unstemmed) or .s1 (stemmed)\n";
211 }
212 }
213 else if (queryArray[1] == 'o') { // output - short/long
214 if (queryArray[2] == '1') shortOutput = true;
215 else if (queryArray[2] == '0') shortOutput = false;
216 else {
217 cout << "Error: please enter .o0 (long output) or .o1 (short output)\n";
218 }
219 }
220 else if (queryArray[1] == 'b') {
221 // full text browse
222 cout<<"enter a few letters to start browsing from:";
223 cin>>query;
224 UCArrayClear(browseNode.term);
225 SetCStr(browseNode.term, query);
226 cin.getline(query, 2048, '\n'); // get rest of line
227
228 // print the query
229 PrintNode (cout, &browseNode);
230
231 MGBrowseQuery(indexData, docLevel, browseNode, browseResult);
232 cout << browseResult;
233 cout << "\n";
234
235 }
236 else { // bad option
237 cout << "bad command\n\n";
238 printHelp();
239 }
240 } // if a .x query
241 else {
242 // regular query
243 queryTree = ParseQuery (queryArray, defaultBoolCombine, defaultStemMethod);
244 if (queryTree == NULL) {
245 cout << "invalid syntax\n";
246 } else {
247 // print the query
248 PrintNode (cout, queryTree);
249
250 MGQuery (indexData, queryInfo, queryTree, queryResult, level);
251 if (shortOutput) {
252 queryResult.printShort(cout);
253 cout << "\n";
254 } else {
255 cout << queryResult;
256 cout << "\n";
257 }
258 // delete the query
259 delete queryTree;
260 queryTree = NULL;
261 }
262 }
263 }
264
265
266 // clean up, everybody clean up
267 textData.UnloadData ();
268 indexData.UnloadData ();
269
270 return (0);
271}
272
273
Note: See TracBrowser for help on using the repository browser.