source: tags/gsdl-2_38-distribution/gsdl/src/mgpp/text/Queryer.cpp@ 3129

Last change on this file since 3129 was 3015, checked in by jrm21, 22 years ago

include getopt_old.h if we are compiling on cygwin

  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1/**************************************************************************
2 *
3 * Queryer.cpp -- simple interactive query program
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#define _XOPEN_SOURCE 1
23#define _XOPEN_SOURCE_EXTENDED 1
24
25/* getopt is in posix.2, so cygwin should have it in unistd, but doesn't */
26#if defined (__WIN32__) || defined (__CYGWIN__)
27# include "getopt_old.h"
28#else
29# include <unistd.h>
30#endif
31
32#include "MGQuery.h"
33#include "TextGet.h"
34
35#include "messages.h"
36#include "mg_files.h"
37
38#include "GSDLQueryParser.h"
39
40void printHelp() {
41
42 cout << "commands available are:\n"
43 << "\t.q\t\tquit\n"
44 << "\t.h\t\tprint the help message\n"
45 << "\t.i\t\tchange the search level (enter the new level at the prompt)\n"
46 << "\t.l\t\tchange the result level ( \"\" \"\" )\n"
47 << "\t.b\t\tfull text browse (enter a word or fragment at the prompt)\n"
48 << "\t.r0/.r1\t\tranking off/on\n"
49 << "\t.t0/.t1\t\tquery type some/all\n"
50 << "\t.c0/.c1\t\tcasefolding off/on\n"
51 << "\t.s0/.s1\t\tstemming off/on\n"
52 << "\t.o0/.o1\t\tshort output off/on\n\n";
53
54}
55
56int main (int argc, char **argv) {
57 int ch;
58 char *textfilename = "";
59 char *indexfilename = "";
60 char *basePath = "";
61
62 opterr = 0;
63 msg_prefix = argv[0];
64
65 // process the command line arguments
66 while ((ch = getopt (argc, argv, "f:t:d:h")) != -1) {
67 switch (ch) {
68 case 'f': /* input file */
69 indexfilename = optarg;
70 break;
71 case 't':
72 textfilename = optarg;
73 break;
74 case 'd':
75 basePath = optarg;
76 set_basepath (basePath);
77 break;
78 case 'h':
79 case '?':
80 fprintf (stderr, "usage: %s [-h] [-d directory] -f indexname -t textname\n", argv[0]);
81 exit (1);
82 }
83 }
84
85 if (textfilename[0] == '\0' || indexfilename[0] == '\0') {
86 FatalError (1, "Index and text file names must be specified with -f and -t \n");
87 }
88
89 // init the text system
90 TextData textData;
91 if (!textData.LoadData (basePath, textfilename)) {
92 FatalError (1, "Couldn't load text information for \"%s\"", textfilename);
93 }
94
95 // init the query system
96 IndexData indexData;
97 if (!indexData.LoadData (basePath, indexfilename)) {
98 FatalError (1, "Couldn't load index information for \"%s\"", indexfilename);
99 }
100
101 // do querying
102 QueryInfo queryInfo;
103 SetCStr (queryInfo.docLevel, "Document");
104 queryInfo.maxDocs = 50;
105 queryInfo.sortByRank = true;
106 queryInfo.exactWeights = false;
107 queryInfo.needRankInfo = true;
108 queryInfo.needTermFreqs = true;
109
110 ExtQueryResult queryResult;
111 char query[2048];
112 UCArray queryArray;
113 QueryNode *queryTree = NULL;
114
115
116 UCArray docLevel;
117 SetCStr(docLevel, "Document");
118
119 UCArray level;
120 UCArrayClear(level);
121 //SetCStr(level, "");
122
123 int defaultStemMethod = 0; // uncasefolded, unstemmed
124 int defaultBoolCombine = 0; // OR
125 bool shortOutput = false;
126 BrowseQueryNode browseNode;
127 browseNode.startPosition = -10;
128 browseNode.numTerms = 40;
129
130 BrowseQueryResult browseResult;
131
132 while (true) {
133 cout << "> ";
134 cin.getline(query, 2048, '\n');
135 SetCStr (queryArray, query);
136
137 // check for commands
138 if (queryArray.size() >= 2 && queryArray[0] == '.') {
139 if (queryArray[1] == 'q') break; // quit
140
141 if (queryArray[1] == 'h') { // help
142 printHelp();
143 }
144 if (queryArray[1] == 'i') {
145 cout << "current index="<< queryInfo.docLevel << "\nchange to index:";
146 cin >> query;
147 UCArrayClear(queryInfo.docLevel);
148 SetCStr(queryInfo.docLevel, query);
149 cout << "index set to " << queryInfo.docLevel <<"\n";
150 cin.getline(query, 2048, '\n');
151 }
152 if (queryArray[1] == 'l') {
153 cout << "current level="<< level << "\nchange to level:";
154 cin >> query;
155 UCArrayClear(level);
156 SetCStr(level, query);
157 cout << "level set to " << level <<"\n";
158 cin.getline(query, 2048, '\n');
159 }
160
161
162 else if (queryArray[1] == 'p') {
163 // print
164 UCArray docText;
165 unsigned long docNum = 0;
166 cin >> docNum;
167 cin.getline(query, 2048, '\n'); // eat up return
168
169 if (!GetDocText (textData, queryInfo.docLevel, docNum, docText)) {
170 FatalError (1, "Error while trying to get document %u", docNum);
171 }
172
173 cout << docText << "\n";
174 }
175 else if (queryArray[1] == 't') { // query type - all/some
176 if (queryArray[2] == '1') defaultBoolCombine = 1;
177 else if (queryArray[2] == '0') defaultBoolCombine = 0;
178 else {
179 cout << "Error: please enter .t0 (some) or .t1 (all)\n";
180 }
181 }
182 else if (queryArray[1] == 'r') { // ranking - on/off
183 if (queryArray[2] == '1') queryInfo.sortByRank = true;
184 else if (queryArray[2] == '0') queryInfo.sortByRank = false;
185 else {
186 cout << "Error: please enter .r0 (non-ranked) or .r1 (ranked)\n";
187 }
188 }
189 else if (queryArray[1] == 'c') { // casefolding - on/off
190 if (queryArray[2] == '1') defaultStemMethod |= 1;
191 else if (queryArray[2] == '0') defaultStemMethod &= 0xe;
192 else {
193 cout << "Error: please enter .c0 (case sensitive) or .c1 (casefolded)\n";
194 }
195 }
196 else if (queryArray[1] == 's') { // stemming - on/off
197 if (queryArray[2] == '1') defaultStemMethod |=2;
198 else if (queryArray[2] == '0') defaultStemMethod &=0xd;
199 else {
200 cout << "Error: please enter .s0 (unstemmed) or .s1 (stemmed)\n";
201 }
202 }
203 else if (queryArray[1] == 'o') { // output - short/long
204 if (queryArray[2] == '1') shortOutput = true;
205 else if (queryArray[2] == '0') shortOutput = false;
206 else {
207 cout << "Error: please enter .o0 (long output) or .o1 (short output)\n";
208 }
209 }
210 else if (queryArray[1] == 'b') {
211 // full text browse
212 cout<<"enter a few letters to start browsing from:";
213 cin>>query;
214 UCArrayClear(browseNode.term);
215 SetCStr(browseNode.term, query);
216 cin.getline(query, 2048, '\n'); // get rest of line
217
218 // print the query
219 PrintNode (cout, &browseNode);
220
221 MGBrowseQuery(indexData, docLevel, browseNode, browseResult);
222 cout << browseResult;
223 cout << "\n";
224
225 }
226 else { // bad option
227 cout << "bad command\n\n";
228 printHelp();
229 }
230 } // if a .x query
231 else {
232 // regular query
233 queryTree = ParseQuery (queryArray, defaultBoolCombine, defaultStemMethod);
234
235 // print the query
236 PrintNode (cout, queryTree);
237
238 MGQuery (indexData, queryInfo, queryTree, queryResult, level);
239 if (shortOutput) {
240 queryResult.printShort(cout);
241 cout << "\n";
242 }else {
243 cout << queryResult;
244 cout << "\n";
245 }
246 // delete the query
247 if (queryTree != NULL) delete queryTree;
248 queryTree = NULL;
249 }
250 }
251
252
253 // clean up, everybody clean up
254 textData.UnloadData ();
255 indexData.UnloadData ();
256
257 return (0);
258}
259
260
Note: See TracBrowser for help on using the repository browser.