source: trunk/gsdl/src/mgpp/text/Queryer.cpp@ 2442

Last change on this file since 2442 was 2442, checked in by jrm21, 23 years ago

portability changes, use getopt from unistd.h (all POSIX systems)

  • Property svn:keywords set to Author Date Id Revision
File size: 7.0 KB
Line 
1/**************************************************************************
2 *
3 * Queryer.cpp -- simple interactive query program
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: Queryer.cpp 2442 2001-05-17 04:38:16Z jrm21 $
21 *
22 **************************************************************************/
23#define _XOPEN_SOURCE 1
24#define _XOPEN_SOURCE_EXTENDED 1
25#include <unistd.h>
26
27#include "MGQuery.h"
28#include "TextGet.h"
29
30#include "messages.h"
31#include "mg_files.h"
32
33#include "GSDLQueryParser.h"
34
35void printHelp() {
36
37 cout << "commands available are:\n"
38 << "\t.q\t\tquit\n"
39 << "\t.h\t\tprint the help message\n"
40 << "\t.i\t\tchange the search level (enter the new level at the prompt)\n"
41 << "\t.l\t\tchange the result level ( \"\" \"\" )\n"
42 << "\t.b\t\tfull text browse (enter a word or fragment at the prompt)\n"
43 << "\t.r0/.r1\t\tranking off/on\n"
44 << "\t.t0/.t1\t\tquery type some/all\n"
45 << "\t.c0/.c1\t\tcasefolding off/on\n"
46 << "\t.s0/.s1\t\tstemming off/on\n"
47 << "\t.o0/.o1\t\tshort output off/on\n\n";
48
49}
50
51int main (int argc, char **argv) {
52 int ch;
53 char *textfilename = "";
54 char *indexfilename = "";
55 char *basePath = "";
56
57 opterr = 0;
58 msg_prefix = argv[0];
59
60 // process the command line arguments
61 while ((ch = getopt (argc, argv, "f:t:d:h")) != -1) {
62 switch (ch) {
63 case 'f': /* input file */
64 indexfilename = optarg;
65 break;
66 case 't':
67 textfilename = optarg;
68 break;
69 case 'd':
70 basePath = optarg;
71 set_basepath (basePath);
72 break;
73 case 'h':
74 case '?':
75 fprintf (stderr, "usage: %s [-h] [-d directory] -f indexname -t textname\n", argv[0]);
76 exit (1);
77 }
78 }
79
80 if (textfilename[0] == '\0' || indexfilename[0] == '\0') {
81 FatalError (1, "Index and text file names must be specified with -f and -t \n");
82 }
83
84 // init the text system
85 TextData textData;
86 if (!textData.LoadData (basePath, textfilename)) {
87 FatalError (1, "Couldn't load text information for \"%s\"", textfilename);
88 }
89
90 // init the query system
91 IndexData indexData;
92 if (!indexData.LoadData (basePath, indexfilename)) {
93 FatalError (1, "Couldn't load index information for \"%s\"", indexfilename);
94 }
95
96 // do querying
97 QueryInfo queryInfo;
98 SetCStr (queryInfo.docLevel, "Document");
99 queryInfo.maxDocs = 50;
100 queryInfo.sortByRank = true;
101 queryInfo.exactWeights = false;
102 queryInfo.needRankInfo = true;
103 queryInfo.needTermFreqs = true;
104
105 ExtQueryResult queryResult;
106 char query[2048];
107 UCArray queryArray;
108 QueryNode *queryTree = NULL;
109
110
111 UCArray docLevel;
112 SetCStr(docLevel, "Document");
113
114 UCArray level;
115 UCArrayClear(level);
116 //SetCStr(level, "");
117
118 int defaultStemMethod = 0; // uncasefolded, unstemmed
119 int defaultBoolCombine = 0; // OR
120 bool shortOutput = false;
121 BrowseQueryNode browseNode;
122 browseNode.startPosition = -10;
123 browseNode.numTerms = 40;
124
125 BrowseQueryResult browseResult;
126
127 while (true) {
128 cout << "> ";
129 cin.getline(query, 2048, '\n');
130 SetCStr (queryArray, query);
131
132 // check for commands
133 if (queryArray.size() >= 2 && queryArray[0] == '.') {
134 if (queryArray[1] == 'q') break; // quit
135
136 if (queryArray[1] == 'h') { // help
137 printHelp();
138 }
139 if (queryArray[1] == 'i') {
140 cout << "current index="<< queryInfo.docLevel << "\nchange to index:";
141 cin >> query;
142 UCArrayClear(queryInfo.docLevel);
143 SetCStr(queryInfo.docLevel, query);
144 cout << "index set to " << queryInfo.docLevel <<"\n";
145 cin.getline(query, 2048, '\n');
146 }
147 if (queryArray[1] == 'l') {
148 cout << "current level="<< level << "\nchange to level:";
149 cin >> query;
150 UCArrayClear(level);
151 SetCStr(level, query);
152 cout << "level set to " << level <<"\n";
153 cin.getline(query, 2048, '\n');
154 }
155
156
157 else if (queryArray[1] == 'p') {
158 // print
159 UCArray docText;
160 unsigned long docNum = 0;
161 cin >> docNum;
162 cin.getline(query, 2048, '\n'); // eat up return
163
164 if (!GetDocText (textData, queryInfo.docLevel, docNum, docText)) {
165 FatalError (1, "Error while trying to get document %u", docNum);
166 }
167
168 cout << docText << "\n";
169 }
170 else if (queryArray[1] == 't') { // query type - all/some
171 if (queryArray[2] == '1') defaultBoolCombine = 1;
172 else if (queryArray[2] == '0') defaultBoolCombine = 0;
173 else {
174 cout << "Error: please enter .t0 (some) or .t1 (all)\n";
175 }
176 }
177 else if (queryArray[1] == 'r') { // ranking - on/off
178 if (queryArray[2] == '1') queryInfo.sortByRank = true;
179 else if (queryArray[2] == '0') queryInfo.sortByRank = false;
180 else {
181 cout << "Error: please enter .r0 (non-ranked) or .r1 (ranked)\n";
182 }
183 }
184 else if (queryArray[1] == 'c') { // casefolding - on/off
185 if (queryArray[2] == '1') defaultStemMethod |= 1;
186 else if (queryArray[2] == '0') defaultStemMethod &= 0xe;
187 else {
188 cout << "Error: please enter .c0 (case sensitive) or .c1 (casefolded)\n";
189 }
190 }
191 else if (queryArray[1] == 's') { // stemming - on/off
192 if (queryArray[2] == '1') defaultStemMethod |=2;
193 else if (queryArray[2] == '0') defaultStemMethod &=0xd;
194 else {
195 cout << "Error: please enter .s0 (unstemmed) or .s1 (stemmed)\n";
196 }
197 }
198 else if (queryArray[1] == 'o') { // output - short/long
199 if (queryArray[2] == '1') shortOutput = true;
200 else if (queryArray[2] == '0') shortOutput = false;
201 else {
202 cout << "Error: please enter .o0 (long output) or .o1 (short output)\n";
203 }
204 }
205 else if (queryArray[1] == 'b') {
206 // full text browse
207 cout<<"enter a few letters to start browsing from:";
208 cin>>query;
209 UCArrayClear(browseNode.term);
210 SetCStr(browseNode.term, query);
211 cin.getline(query, 2048, '\n'); // get rest of line
212
213 // print the query
214 PrintNode (cout, &browseNode);
215
216 MGBrowseQuery(indexData, docLevel, browseNode, browseResult);
217 cout << browseResult;
218 cout << "\n";
219
220 }
221 else { // bad option
222 cout << "bad command\n\n";
223 printHelp();
224 }
225 } // if a .x query
226 else {
227 // regular query
228 queryTree = ParseQuery (queryArray, defaultBoolCombine, defaultStemMethod);
229
230 // print the query
231 PrintNode (cout, queryTree);
232
233 MGQuery (indexData, queryInfo, queryTree, queryResult, level);
234 if (shortOutput) {
235 queryResult.printShort(cout);
236 cout << "\n";
237 }else {
238 cout << queryResult;
239 cout << "\n";
240 }
241 // delete the query
242 if (queryTree != NULL) delete queryTree;
243 queryTree = NULL;
244 }
245 }
246
247
248 // clean up, everybody clean up
249 textData.UnloadData ();
250 indexData.UnloadData ();
251}
252
253
Note: See TracBrowser for help on using the repository browser.