source: main/branches/64_bit_Greenstone/greenstone2/common-src/indexers/mgpp/text/Queryer.cpp@ 23701

Last change on this file since 23701 was 23701, checked in by sjm84, 13 years ago

A few more fixes for 64-bit mgpp

  • Property svn:keywords set to Author Date Id Revision
File size: 8.6 KB
Line 
1/**************************************************************************
2 *
3 * Queryer.cpp -- simple interactive query program
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#define _XOPEN_SOURCE 1
23// This was added for Solaris, but it makes things worse on Solaris for me...
24// #define _XOPEN_SOURCE_EXTENDED 1
25
26/* getopt is in posix.2, so cygwin should have it in unistd, but doesn't */
27#if defined (__WIN32__) || defined (__CYGWIN__)
28# include "getopt_old.h"
29#else
30# include <unistd.h>
31#endif
32
33#include "MGQuery.h"
34#include "TextGet.h"
35
36#include "messages.h"
37#include "mg_files.h"
38
39#include "GSDLQueryParser.h"
40
41void printHelp() {
42
43 cout << "commands available are:\n"
44 << "\t.q\t\tquit\n"
45 << "\t.h\t\tprint the help message\n"
46 << "\t.i\t\tchange the search level (enter the new level at the prompt)\n"
47 << "\t.l\t\tchange the result level ( \"\" \"\" )\n"
48 << "\t.b\t\tfull text browse (enter a word or fragment at the prompt)\n"
49 << "\t.r0/.r1\t\tranking off/on\n"
50 << "\t.t0/.t1\t\tquery type some/all\n"
51 << "\t.c0/.c1\t\tcasefolding off/on\n"
52 << "\t.s0/.s1\t\tstemming off/on\n"
53#ifdef ENABLE_ACCENTFOLD
54 << "\t.a0/.a1\t\taccentfolding off/on\n"
55#endif
56 << "\t.o0/.o1\t\tshort output off/on\n"
57 << "\t.m\t\tset maxnumeric (enter the number at the prompt)\n\n"
58 << "\t.p\t\tprint a document (enter the docnum at the prompt)\n"
59 << "otherwise just enter a query\n\n";
60
61}
62
63int main (int argc, char **argv) {
64 int ch;
65 char *textfilename = (char*)"";
66 char *indexfilename = (char*)"";
67 char *basePath = (char*)"";
68
69 opterr = 0;
70 msg_prefix = argv[0];
71
72 // process the command line arguments
73 while ((ch = getopt (argc, argv, "f:t:d:h")) != -1) {
74 switch (ch) {
75 case 'f': /* input file */
76 indexfilename = optarg;
77 break;
78 case 't':
79 textfilename = optarg;
80 break;
81 case 'd':
82 basePath = optarg;
83 set_basepath (basePath);
84 break;
85 case 'h':
86 case '?':
87 fprintf (stderr, "usage: %s [-h] [-d directory] -f indexname -t textname\n", argv[0]);
88 exit (1);
89 }
90 }
91
92 if (textfilename[0] == '\0' || indexfilename[0] == '\0') {
93 FatalError (1, "Index and text file names must be specified with -f and -t \n");
94 }
95
96 // init the text system
97 TextData textData;
98 if (!textData.LoadData (basePath, textfilename)) {
99 FatalError (1, "Couldn't load text information for \"%s\"", textfilename);
100 }
101
102 // init the query system
103 IndexData indexData;
104 if (!indexData.LoadData (basePath, indexfilename)) {
105 FatalError (1, "Couldn't load index information for \"%s\"", indexfilename);
106 }
107
108 int maxnumeric = 4;
109
110 // debug output
111 cerr << "num docs: "<<indexData.bdh.num_docs
112 << "\nnum frags: "<<indexData.bdh.num_frags
113 << "\nnum words: "<<indexData.bdh.num_words
114 << "\ntotal bytes: "<<indexData.bdh.total_bytes
115 << "\nindex string bytes: "<<indexData.bdh.index_string_bytes
116 << "\nnum levels: "<<indexData.bdh.num_levels<<endl;
117
118 // do querying
119 QueryInfo queryInfo;
120 SetCStr (queryInfo.docLevel, "Doc", 3);
121 queryInfo.maxDocs = 50;
122 queryInfo.sortByRank = true;
123 queryInfo.exactWeights = false;
124 queryInfo.needRankInfo = true;
125 queryInfo.needTermFreqs = true;
126
127 ExtQueryResult queryResult;
128 char query[2048];
129 UCArray queryArray;
130 QueryNode *queryTree = NULL;
131
132
133 UCArray docLevel;
134 SetCStr(docLevel, "Doc", 3);
135
136 UCArray level;
137 UCArrayClear(level);
138 //SetCStr(level, "");
139
140 int defaultStemMethod = 0; // uncasefolded, unstemmed, unaccentfolded
141 int defaultBoolCombine = 0; // OR
142 bool shortOutput = false;
143 BrowseQueryNode browseNode;
144 browseNode.startPosition = -10;
145 browseNode.numTerms = 40;
146
147 BrowseQueryResult browseResult;
148
149 while (true) {
150 cout << "> ";
151 cin.getline(query, 2048, '\n');
152 SetCStr (queryArray, query, strlen(query));
153
154 // check for commands
155 if (queryArray.size() >= 2 && queryArray[0] == '.') {
156 if (queryArray[1] == 'q') break; // quit
157
158 if (queryArray[1] == 'h') { // help
159 printHelp();
160 } else if (queryArray[1] == 'i') {
161 cout << "current index="<< queryInfo.docLevel << "\nchange to index:";
162 cin >> query;
163 UCArrayClear(queryInfo.docLevel);
164 SetCStr(queryInfo.docLevel, query, strlen(query));
165 cout << "index set to " << queryInfo.docLevel <<"\n";
166 cin.getline(query, 2048, '\n');
167 } else if (queryArray[1] == 'l') {
168 cout << "current level="<< level << "\nchange to level:";
169 cin >> query;
170 UCArrayClear(level);
171 SetCStr(level, query, strlen(query));
172 cout << "level set to " << level <<"\n";
173 cin.getline(query, 2048, '\n');
174 }
175
176 else if (queryArray[1] == 'm') {
177 // maxnumeric
178 int m = 0;
179 cin >> m;
180 cin.getline(query, 2048, '\n'); // eat up return
181 if (m > 4 && m < 512) {
182 maxnumeric = m;
183 }
184 }
185 else if (queryArray[1] == 'p') {
186 // print
187 UCArray docText;
188 mg_u_long docNum = 0;
189 cin >> docNum;
190 cin.getline(query, 2048, '\n'); // eat up return
191
192 if (!GetDocText (textData, queryInfo.docLevel, docNum, docText)) {
193 FatalError (1, "Error while trying to get document %u", docNum);
194 }
195
196 cout << docText << "\n";
197 }
198 else if (queryArray[1] == 't') { // query type - all/some
199 if (queryArray[2] == '1') defaultBoolCombine = 1;
200 else if (queryArray[2] == '0') defaultBoolCombine = 0;
201 else {
202 cout << "Error: please enter .t0 (some) or .t1 (all)\n";
203 }
204 }
205 else if (queryArray[1] == 'r') { // ranking - on/off
206 if (queryArray[2] == '1') queryInfo.sortByRank = true;
207 else if (queryArray[2] == '0') queryInfo.sortByRank = false;
208 else {
209 cout << "Error: please enter .r0 (non-ranked) or .r1 (ranked)\n";
210 }
211 }
212 else if (queryArray[1] == 'c') { // casefolding - on/off
213 if (queryArray[2] == '1') defaultStemMethod |= STEM_CaseFolding;
214 else if (queryArray[2] == '0') defaultStemMethod &= (~STEM_CaseFolding);
215 else {
216 cout << "Error: please enter .c0 (case sensitive) or .c1 (casefolded)\n";
217 }
218 }
219 else if (queryArray[1] == 's') { // stemming - on/off
220 if (queryArray[2] == '1') defaultStemMethod |= STEM_Stemming;
221 else if (queryArray[2] == '0') defaultStemMethod &= (~STEM_Stemming);
222 else {
223 cout << "Error: please enter .s0 (unstemmed) or .s1 (stemmed)\n";
224 }
225 }
226#ifdef ENABLE_ACCENTFOLD
227 else if (queryArray[1] == 'a') { // accentfolding - on/off
228 if (queryArray[2] == '1') defaultStemMethod |= STEM_AccentFolding;
229 else if (queryArray[2] == '0') defaultStemMethod &= (~STEM_AccentFolding);
230 else {
231 cout << "Error: please enter .a0 (accent sensitive) or .a1 (accentfolded)\n";
232 }
233 }
234#endif
235 else if (queryArray[1] == 'o') { // output - short/long
236 if (queryArray[2] == '1') shortOutput = true;
237 else if (queryArray[2] == '0') shortOutput = false;
238 else {
239 cout << "Error: please enter .o0 (long output) or .o1 (short output)\n";
240 }
241 }
242 else if (queryArray[1] == 'b') {
243 // full text browse
244 cout<<"enter a few letters to start browsing from:";
245 cin>>query;
246 UCArrayClear(browseNode.term);
247 SetCStr(browseNode.term, query, strlen(query));
248 cin.getline(query, 2048, '\n'); // get rest of line
249
250 // print the query
251 PrintNode (cout, &browseNode);
252
253 MGBrowseQuery(indexData, docLevel, browseNode, browseResult);
254 cout << browseResult;
255 cout << "\n";
256
257 }
258 else { // bad option
259 cout << "bad command\n\n";
260 printHelp();
261 }
262 } // if a .x query
263 else {
264 // regular query
265 queryTree = ParseQuery (queryArray, defaultBoolCombine, defaultStemMethod, maxnumeric);
266 if (queryTree == NULL) {
267 cout << "invalid syntax\n";
268 } else {
269 // print the query
270 PrintNode (cout, queryTree);
271
272 MGQuery (indexData, queryInfo, queryTree, queryResult, level);
273 if (shortOutput) {
274 queryResult.printShort(cout);
275 cout << "\n";
276 } else {
277 cout << queryResult;
278 cout << "\n";
279 }
280 // delete the query
281 delete queryTree;
282 queryTree = NULL;
283 }
284 }
285 }
286
287
288 // clean up, everybody clean up
289 textData.UnloadData ();
290 indexData.UnloadData ();
291
292 return (0);
293}
294
295
Note: See TracBrowser for help on using the repository browser.