source: main/tags/2.80/indexers/mgpp/text/Queryer.cpp@ 24540

Last change on this file since 24540 was 13477, checked in by shaoqun, 17 years ago

added code for accentfolding

  • Property svn:keywords set to Author Date Id Revision
File size: 8.5 KB
Line 
1/**************************************************************************
2 *
3 * Queryer.cpp -- simple interactive query program
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#define _XOPEN_SOURCE 1
23#define _XOPEN_SOURCE_EXTENDED 1
24
25/* getopt is in posix.2, so cygwin should have it in unistd, but doesn't */
26#if defined (__WIN32__) || defined (__CYGWIN__)
27# include "getopt_old.h"
28#else
29# include <unistd.h>
30#endif
31
32#include "MGQuery.h"
33#include "TextGet.h"
34
35#include "messages.h"
36#include "mg_files.h"
37
38#include "GSDLQueryParser.h"
39
40void printHelp() {
41
42 cout << "commands available are:\n"
43 << "\t.q\t\tquit\n"
44 << "\t.h\t\tprint the help message\n"
45 << "\t.i\t\tchange the search level (enter the new level at the prompt)\n"
46 << "\t.l\t\tchange the result level ( \"\" \"\" )\n"
47 << "\t.b\t\tfull text browse (enter a word or fragment at the prompt)\n"
48 << "\t.r0/.r1\t\tranking off/on\n"
49 << "\t.t0/.t1\t\tquery type some/all\n"
50 << "\t.c0/.c1\t\tcasefolding off/on\n"
51 << "\t.s0/.s1\t\tstemming off/on\n"
52#ifdef ENABLE_ACCENTFOLD
53 << "\t.a0/.a1\t\taccentfolding off/on\n"
54#endif
55 << "\t.o0/.o1\t\tshort output off/on\n"
56 << "\t.m\t\tset maxnumeric (enter the number at the prompt)\n\n"
57 << "\t.p\t\tprint a document (enter the docnum at the prompt)\n"
58 << "otherwise just enter a query\n\n";
59
60}
61
62int main (int argc, char **argv) {
63 int ch;
64 char *textfilename = "";
65 char *indexfilename = "";
66 char *basePath = "";
67
68 opterr = 0;
69 msg_prefix = argv[0];
70
71 // process the command line arguments
72 while ((ch = getopt (argc, argv, "f:t:d:h")) != -1) {
73 switch (ch) {
74 case 'f': /* input file */
75 indexfilename = optarg;
76 break;
77 case 't':
78 textfilename = optarg;
79 break;
80 case 'd':
81 basePath = optarg;
82 set_basepath (basePath);
83 break;
84 case 'h':
85 case '?':
86 fprintf (stderr, "usage: %s [-h] [-d directory] -f indexname -t textname\n", argv[0]);
87 exit (1);
88 }
89 }
90
91 if (textfilename[0] == '\0' || indexfilename[0] == '\0') {
92 FatalError (1, "Index and text file names must be specified with -f and -t \n");
93 }
94
95 // init the text system
96 TextData textData;
97 if (!textData.LoadData (basePath, textfilename)) {
98 FatalError (1, "Couldn't load text information for \"%s\"", textfilename);
99 }
100
101 // init the query system
102 IndexData indexData;
103 if (!indexData.LoadData (basePath, indexfilename)) {
104 FatalError (1, "Couldn't load index information for \"%s\"", indexfilename);
105 }
106
107 int maxnumeric = 4;
108
109 // debug output
110 cerr << "num docs: "<<indexData.bdh.num_docs
111 << "\nnum frags: "<<indexData.bdh.num_frags
112 << "\nnum words: "<<indexData.bdh.num_words
113 << "\ntotal bytes: "<<indexData.bdh.total_bytes
114 << "\nindex string bytes: "<<indexData.bdh.index_string_bytes
115 << "\nnum levels: "<<indexData.bdh.num_levels<<endl;
116
117 // do querying
118 QueryInfo queryInfo;
119 SetCStr (queryInfo.docLevel, "Doc", 3);
120 queryInfo.maxDocs = 50;
121 queryInfo.sortByRank = true;
122 queryInfo.exactWeights = false;
123 queryInfo.needRankInfo = true;
124 queryInfo.needTermFreqs = true;
125
126 ExtQueryResult queryResult;
127 char query[2048];
128 UCArray queryArray;
129 QueryNode *queryTree = NULL;
130
131
132 UCArray docLevel;
133 SetCStr(docLevel, "Doc", 3);
134
135 UCArray level;
136 UCArrayClear(level);
137 //SetCStr(level, "");
138
139 int defaultStemMethod = 0; // uncasefolded, unstemmed, unaccentfolded
140 int defaultBoolCombine = 0; // OR
141 bool shortOutput = false;
142 BrowseQueryNode browseNode;
143 browseNode.startPosition = -10;
144 browseNode.numTerms = 40;
145
146 BrowseQueryResult browseResult;
147
148 while (true) {
149 cout << "> ";
150 cin.getline(query, 2048, '\n');
151 SetCStr (queryArray, query, strlen(query));
152
153 // check for commands
154 if (queryArray.size() >= 2 && queryArray[0] == '.') {
155 if (queryArray[1] == 'q') break; // quit
156
157 if (queryArray[1] == 'h') { // help
158 printHelp();
159 } else if (queryArray[1] == 'i') {
160 cout << "current index="<< queryInfo.docLevel << "\nchange to index:";
161 cin >> query;
162 UCArrayClear(queryInfo.docLevel);
163 SetCStr(queryInfo.docLevel, query, strlen(query));
164 cout << "index set to " << queryInfo.docLevel <<"\n";
165 cin.getline(query, 2048, '\n');
166 } else if (queryArray[1] == 'l') {
167 cout << "current level="<< level << "\nchange to level:";
168 cin >> query;
169 UCArrayClear(level);
170 SetCStr(level, query, strlen(query));
171 cout << "level set to " << level <<"\n";
172 cin.getline(query, 2048, '\n');
173 }
174
175 else if (queryArray[1] == 'm') {
176 // maxnumeric
177 int m = 0;
178 cin >> m;
179 cin.getline(query, 2048, '\n'); // eat up return
180 if (4 < m < 512) {
181 maxnumeric = m;
182 }
183 }
184 else if (queryArray[1] == 'p') {
185 // print
186 UCArray docText;
187 unsigned long docNum = 0;
188 cin >> docNum;
189 cin.getline(query, 2048, '\n'); // eat up return
190
191 if (!GetDocText (textData, queryInfo.docLevel, docNum, docText)) {
192 FatalError (1, "Error while trying to get document %u", docNum);
193 }
194
195 cout << docText << "\n";
196 }
197 else if (queryArray[1] == 't') { // query type - all/some
198 if (queryArray[2] == '1') defaultBoolCombine = 1;
199 else if (queryArray[2] == '0') defaultBoolCombine = 0;
200 else {
201 cout << "Error: please enter .t0 (some) or .t1 (all)\n";
202 }
203 }
204 else if (queryArray[1] == 'r') { // ranking - on/off
205 if (queryArray[2] == '1') queryInfo.sortByRank = true;
206 else if (queryArray[2] == '0') queryInfo.sortByRank = false;
207 else {
208 cout << "Error: please enter .r0 (non-ranked) or .r1 (ranked)\n";
209 }
210 }
211 else if (queryArray[1] == 'c') { // casefolding - on/off
212 if (queryArray[2] == '1') defaultStemMethod |= STEM_CaseFolding;
213 else if (queryArray[2] == '0') defaultStemMethod &= (~STEM_CaseFolding);
214 else {
215 cout << "Error: please enter .c0 (case sensitive) or .c1 (casefolded)\n";
216 }
217 }
218 else if (queryArray[1] == 's') { // stemming - on/off
219 if (queryArray[2] == '1') defaultStemMethod |= STEM_Stemming;
220 else if (queryArray[2] == '0') defaultStemMethod &= (~STEM_Stemming);
221 else {
222 cout << "Error: please enter .s0 (unstemmed) or .s1 (stemmed)\n";
223 }
224 }
225#ifdef ENABLE_ACCENTFOLD
226 else if (queryArray[1] == 'a') { // accentfolding - on/off
227 if (queryArray[2] == '1') defaultStemMethod |= STEM_AccentFolding;
228 else if (queryArray[2] == '0') defaultStemMethod &= (~STEM_AccentFolding);
229 else {
230 cout << "Error: please enter .a0 (accent sensitive) or .a1 (accentfolded)\n";
231 }
232 }
233#endif
234 else if (queryArray[1] == 'o') { // output - short/long
235 if (queryArray[2] == '1') shortOutput = true;
236 else if (queryArray[2] == '0') shortOutput = false;
237 else {
238 cout << "Error: please enter .o0 (long output) or .o1 (short output)\n";
239 }
240 }
241 else if (queryArray[1] == 'b') {
242 // full text browse
243 cout<<"enter a few letters to start browsing from:";
244 cin>>query;
245 UCArrayClear(browseNode.term);
246 SetCStr(browseNode.term, query, strlen(query));
247 cin.getline(query, 2048, '\n'); // get rest of line
248
249 // print the query
250 PrintNode (cout, &browseNode);
251
252 MGBrowseQuery(indexData, docLevel, browseNode, browseResult);
253 cout << browseResult;
254 cout << "\n";
255
256 }
257 else { // bad option
258 cout << "bad command\n\n";
259 printHelp();
260 }
261 } // if a .x query
262 else {
263 // regular query
264 queryTree = ParseQuery (queryArray, defaultBoolCombine, defaultStemMethod, maxnumeric);
265 if (queryTree == NULL) {
266 cout << "invalid syntax\n";
267 } else {
268 // print the query
269 PrintNode (cout, queryTree);
270
271 MGQuery (indexData, queryInfo, queryTree, queryResult, level);
272 if (shortOutput) {
273 queryResult.printShort(cout);
274 cout << "\n";
275 } else {
276 cout << queryResult;
277 cout << "\n";
278 }
279 // delete the query
280 delete queryTree;
281 queryTree = NULL;
282 }
283 }
284 }
285
286
287 // clean up, everybody clean up
288 textData.UnloadData ();
289 indexData.UnloadData ();
290
291 return (0);
292}
293
294
Note: See TracBrowser for help on using the repository browser.