1 | static char *help_str[] = {
|
---|
2 | "\n",
|
---|
3 | " HELP for mgquery\n",
|
---|
4 | " ================================\n",
|
---|
5 | "\n",
|
---|
6 | "The text is a summary of the information in the \"mgquery\" manual pages.\n",
|
---|
7 | "\n",
|
---|
8 | "The input to 'mgquery' consists of a series of input lines. The backslash\n",
|
---|
9 | "character (\"\\\") is used at the end of lines to indicate that input \n",
|
---|
10 | "continues on the next line.\n",
|
---|
11 | "\n",
|
---|
12 | "Input lines on which the first character is a dot (\".\") are commands to\n",
|
---|
13 | "the mgquery program. Input lines that do not start with a dot are queries.\n",
|
---|
14 | "\n",
|
---|
15 | "A query consists of two parts. One part is a boolean or ranked query that\n",
|
---|
16 | "identifies documents. The second part is a post-processing pattern matching\n",
|
---|
17 | "operation. Any text between the first speech mark (\") and the last speech\n",
|
---|
18 | "mark is considered to the a post-processing pattern. \n",
|
---|
19 | "\n",
|
---|
20 | "\n",
|
---|
21 | "The following command are available :-\n",
|
---|
22 | "\n",
|
---|
23 | " .help - displays this text.\n",
|
---|
24 | " .quit - quits the program.\n",
|
---|
25 | " .set name value - sets parameter \"name\" to \"value\" . If the parameter\n",
|
---|
26 | " is a boolean parameter and value is omitted the\n",
|
---|
27 | " parameter will be inverted (i.e. if it is true it\n",
|
---|
28 | " will change to false, if it is false it will change\n",
|
---|
29 | " to true).\n",
|
---|
30 | " .unset name - deletes parameter \"name\"\n",
|
---|
31 | " .reset - sets all the parameters to their initial state.\n",
|
---|
32 | " .display - displays the values of all the current parameters.\n",
|
---|
33 | " .push - pushes the current parameters on to a stack.\n",
|
---|
34 | " .pop - destroys the current parameters and pops a new set\n",
|
---|
35 | " of parameters off the stack.\n",
|
---|
36 | " .output arg - This is used to specify where to send the documents.\n",
|
---|
37 | " Arg may one be of the following:\n",
|
---|
38 | " > filename : Send output to the specified file.\n",
|
---|
39 | " >> filename : Append output to the specified file.\n",
|
---|
40 | " | command : The output is piped into command, \n",
|
---|
41 | " which is executed by sh.\n",
|
---|
42 | " .input arg - This is used to specify where input comes from.\n",
|
---|
43 | " Arg may one be of the following:\n",
|
---|
44 | " < filename : Get the input from the specified\n",
|
---|
45 | " file.\n",
|
---|
46 | " | command : The input comes from the standard\n",
|
---|
47 | " output of command, which is executed\n",
|
---|
48 | " by sh.\n",
|
---|
49 | "\n",
|
---|
50 | "On startup the mgquery program reads from the file .mgrc a sequence \n",
|
---|
51 | "of commands (NOTE: The .mgrc file may not contain any queries). mgquery \n",
|
---|
52 | "first looks for .mgrc in the current directory and then in the users home \n",
|
---|
53 | "directory. Lines starting with a '#' in the .mgrc file are considered to\n",
|
---|
54 | "be comments and are ignored.\n",
|
---|
55 | "\n",
|
---|
56 | "The following parameters (used in the .set and .unset commands) are \n",
|
---|
57 | "predefined and have special significance :-\n",
|
---|
58 | "\n",
|
---|
59 | "accumulator_method = `array'\n",
|
---|
60 | " This parameter is used during ranking, and specifies how the\n",
|
---|
61 | " weight for each document should be accumulated. The following\n",
|
---|
62 | " methods are available `array', `splay_tree', `hash_table', and\n",
|
---|
63 | " `list'.\n",
|
---|
64 | "\n",
|
---|
65 | "briefstats = `off'\n",
|
---|
66 | " This is a boolean parameter that determines whether the the\n",
|
---|
67 | " totals for disk, memory and time usage statistics will be \n",
|
---|
68 | " displayed. at the end of each query. \n",
|
---|
69 | " NOTE: this takes precedence over the parameters \"diskstats\",\n",
|
---|
70 | " \"memstats\" and \"timestats\". This parameter may take the values \n",
|
---|
71 | " `yes', `no', `true', `false', `on' or `off'.\n",
|
---|
72 | "\n",
|
---|
73 | "buffer = `1048576'\n",
|
---|
74 | " When the documents are being read in they are read into a \n",
|
---|
75 | " buffer of this size and then displayed from this buffer. If \n",
|
---|
76 | " the documents are larger than this buffer the buffer is\n",
|
---|
77 | " expanded automatically. Having a large buffer gives a very\n",
|
---|
78 | " slight performance improvement because it allows the order of \n",
|
---|
79 | " disk operations to be optimised. The buffer size is measured\n",
|
---|
80 | " in bytes.\n",
|
---|
81 | "\n",
|
---|
82 | "diskstats = `off'\n",
|
---|
83 | " This is a boolean parameter that determines whether the disk\n",
|
---|
84 | " usage statistics for the preceding query will be displayed\n",
|
---|
85 | " after each query. This parameter may take the values `yes',\n",
|
---|
86 | " `no', `true', `false', `on' or `off'.\n",
|
---|
87 | "\n",
|
---|
88 | "doc_sepstr = `---------------------------------- %n\\n'\n",
|
---|
89 | " This specifies the string that will be used to separate \n",
|
---|
90 | " documents when they are displayed for `boolean' or `docnums'\n",
|
---|
91 | " queries. The standard C escape character sequences (see the \n",
|
---|
92 | " man page) may be used to place special characters in the \n",
|
---|
93 | " string. For example, a newline would the `\\n'. To include a `%'\n",
|
---|
94 | " use the sequence `%%'. To include the MG document number use\n",
|
---|
95 | " the sequence `%n'.\n",
|
---|
96 | "\n",
|
---|
97 | "expert = `false'\n",
|
---|
98 | " If this is true then a lot of the waffle that the program \n",
|
---|
99 | " spits out is suppressed. This parameter may take the values\n",
|
---|
100 | " `yes', `no', `true', `false', `on' or `off'.\n",
|
---|
101 | "\n",
|
---|
102 | "hash_tbl_size = `1000'\n",
|
---|
103 | " One of the options during ranking queries is to use a hash \n",
|
---|
104 | " table to accumulate the weights for each document. The hash \n",
|
---|
105 | " table is a simple chained type. This parameter specifies the \n",
|
---|
106 | " size of the hash table and may take any value between 8 and\n",
|
---|
107 | " 268435456. \n",
|
---|
108 | "\n",
|
---|
109 | "heads_length = `50'\n",
|
---|
110 | " When the mode is `heads' this specifies the number of \n",
|
---|
111 | " characters that will be output for each document.\n",
|
---|
112 | "\n",
|
---|
113 | "maxdocs = `all'\n",
|
---|
114 | " The maximum number of documents to display in response to a\n",
|
---|
115 | " query. This parameter may take on a numeric value between 1 \n",
|
---|
116 | " and 429467295 or the word `all'.\n",
|
---|
117 | "\n",
|
---|
118 | "maxparas = `1000'\n",
|
---|
119 | " The maximum number of paragraphs to identify during a ranked\n",
|
---|
120 | " query with paragraph indexing. After the paragraphs have been\n",
|
---|
121 | " identified the paragraphs are converted into documents, and \n",
|
---|
122 | " because some of the paragraphs may refer to the same documents \n",
|
---|
123 | " the final number of answers may be less that maxparas. The \n",
|
---|
124 | " maxdocs parameter will then be applied. This parameter may \n",
|
---|
125 | " take on a numeric value between 1 and 429467295.\n",
|
---|
126 | "\n",
|
---|
127 | "max_accumulators = `50000'\n",
|
---|
128 | " This parameter limits the number of different paragraph/\n",
|
---|
129 | " document numbers to be accumulated during ranked queries when \n",
|
---|
130 | " the parameter `accumulator_method` is set to `splay_tree',\n",
|
---|
131 | " `hash_table', or `list'. This parameter may take any value \n",
|
---|
132 | " between 8 and 268435456.\n",
|
---|
133 | "\n",
|
---|
134 | "max_terms = `all'\n",
|
---|
135 | " This parameter limits the number of terms that will actually\n",
|
---|
136 | " be used during a ranked query. If more terms than the number\n",
|
---|
137 | " specified by max_terms are entered, then the extra terms will\n",
|
---|
138 | " be discarded. If `sorted_terms' is on then the limiting will \n",
|
---|
139 | " be done after the terms have been sorted. This parameter may\n",
|
---|
140 | " take any value between 1 and 429467295 or the word `all'.\n",
|
---|
141 | "\n",
|
---|
142 | "memstats = `off'\n",
|
---|
143 | " This is a boolean parameter that determines whether the memory \n",
|
---|
144 | " usage statistics for the preceding query will be displayed\n",
|
---|
145 | " after each query. This parameter may take the values `yes', \n",
|
---|
146 | " `no', `true', `false', `on' or `off'.\n",
|
---|
147 | "\n",
|
---|
148 | "mgdir = `.'\n",
|
---|
149 | " This specifies the directory where the MG files may be found.\n",
|
---|
150 | " If the environment variable `MGDATA' is set then `mgdir' is\n",
|
---|
151 | " initialised to the value in `MGDATA'.\n",
|
---|
152 | "\n",
|
---|
153 | "mgname = `'\n",
|
---|
154 | " This specifies the name of the MG database to process.\n",
|
---|
155 | "\n",
|
---|
156 | "mode = `text'\n",
|
---|
157 | " This specifies how documents should be displayed when they\n",
|
---|
158 | " are retrieved it may take four different values `text', \n",
|
---|
159 | " `docnums', `silent', `heads' or `count'. `text' displays \n",
|
---|
160 | " the contents of the document. `docnums' displays only the\n",
|
---|
161 | " document numbers. `Silent' retrieves all the documents but\n",
|
---|
162 | " displays nothing except how many documents were retrieved.\n",
|
---|
163 | " This mode is intended to be used in timing experiments. \n",
|
---|
164 | " `Heads` is used to print out the head of each document.\n",
|
---|
165 | " `Count' does the minimum amount of work required to determine\n",
|
---|
166 | " how many documents would be retrieved, but does not retrieve\n",
|
---|
167 | " them.\n",
|
---|
168 | "\n",
|
---|
169 | "pager = `more'\n",
|
---|
170 | " This is the name of the program that will be used to display\n",
|
---|
171 | " the help and the retrieved documents. If the environment \n",
|
---|
172 | " variable \"PAGER\" is defined then `pager' takes on that value.\n",
|
---|
173 | "\n",
|
---|
174 | "para_sepstr = `\\n######## PARAGRAPH %n ########\\n'\n",
|
---|
175 | " This specifies the string that will be used to separate \n",
|
---|
176 | " paragraphs. The standard C escape character sequences (see the \n",
|
---|
177 | " man page) may be used to place special characters in the \n",
|
---|
178 | " string. For example, a newline would the `\\n'. To include a `%'\n",
|
---|
179 | " use the sequence `%%'. To include the paragraph number within\n",
|
---|
180 | " the document use the sequence `%n'.\n",
|
---|
181 | "\n",
|
---|
182 | "para_start = `***** Weight = %w *****\\n'\n",
|
---|
183 | " This specifies the string that will be used at the head of \n",
|
---|
184 | " paragraphs for a paraghaph level index following a ranked query.\n",
|
---|
185 | " The standard C escape character sequences (see the man page)\n",
|
---|
186 | " may be used to place special characters in the string. For \n",
|
---|
187 | " example, a newline would the `\\n'. To include a `%' use the\n",
|
---|
188 | " sequence `%%'. To include the paragraph weight use the \n",
|
---|
189 | " sequence `%w'.\n",
|
---|
190 | "\n",
|
---|
191 | "qfreq = `true'\n",
|
---|
192 | " This determine whether the ranked queries will take into \n",
|
---|
193 | " account the number of times each query term is specified.\n",
|
---|
194 | " When this is `true' the number of times a term appears in\n",
|
---|
195 | " the query is used in the ranking. When this is `false' all \n",
|
---|
196 | " query term are assumed to occur only once. This parameter\n",
|
---|
197 | " may take the values `yes', `no', `true', `false', `on' or\n",
|
---|
198 | " `off'.\n",
|
---|
199 | "\n",
|
---|
200 | "query = `boolean'\n",
|
---|
201 | " This specifies the type of queries that are to be specified.\n",
|
---|
202 | " It can take four different values `boolean', `ranked', \n",
|
---|
203 | " `docnums' or `approx-ranked'. \n",
|
---|
204 | "\n",
|
---|
205 | " `boolean' is for boolean queries. \n",
|
---|
206 | " The yacc grammar for boolean queries is as follows :-\n",
|
---|
207 | "\n",
|
---|
208 | " query : or;\n",
|
---|
209 | " \n",
|
---|
210 | " or : or '|' and\n",
|
---|
211 | " | and ; \n",
|
---|
212 | " \n",
|
---|
213 | " and : and '&' not\n",
|
---|
214 | " | and not\n",
|
---|
215 | " | not ;\n",
|
---|
216 | "\n",
|
---|
217 | " not : term\n",
|
---|
218 | " | '!' not ;\n",
|
---|
219 | " \n",
|
---|
220 | " term : TERM\n",
|
---|
221 | " | '(' or ')' ;\n",
|
---|
222 | " \n",
|
---|
223 | "\n",
|
---|
224 | "\n",
|
---|
225 | "\n",
|
---|
226 | " `ranked' and `approx-ranked' are for queries ranked by the\n",
|
---|
227 | " cosine measure. `approx-ranked' uses only the low\n",
|
---|
228 | " precision document lengths, and therefore only\n",
|
---|
229 | " produces an approximation to full cosine ranking.\n",
|
---|
230 | "\n",
|
---|
231 | " query : TERM\n",
|
---|
232 | " | query TERM ;\n",
|
---|
233 | " \n",
|
---|
234 | " `docnums' allows the entry of document numbers. Multiple \n",
|
---|
235 | " numbers separated by spaces may be specified\n",
|
---|
236 | " or ranges separated by hyphens.\n",
|
---|
237 | "\n",
|
---|
238 | "\n",
|
---|
239 | " query : range\n",
|
---|
240 | " | query range ;\n",
|
---|
241 | "\n",
|
---|
242 | "\n",
|
---|
243 | " range : num\n",
|
---|
244 | " | num '-' num ;\n",
|
---|
245 | "\n",
|
---|
246 | "\n",
|
---|
247 | "ranked_doc_sepstr = `---------------------------------- %n %w\\n'\n",
|
---|
248 | " This specifies the string that will be used to separate \n",
|
---|
249 | " documents when they are displayed for `ranked' or \n",
|
---|
250 | " `approx-ranked' queries. The standard C escape character \n",
|
---|
251 | " sequences (see the man page) may be used to place special\n",
|
---|
252 | " characters in the string. For example, a newline would the\n",
|
---|
253 | " `\\n'. To include a `%' use the sequence `%%'. To include the\n",
|
---|
254 | " MG document number use the sequence `%n'. To include the\n",
|
---|
255 | " document weight use the sequence `%w'.\n",
|
---|
256 | "\n",
|
---|
257 | "sizestats = `false'\n",
|
---|
258 | " If this is true then various numbers are output at the end\n",
|
---|
259 | " of each query indicating what went on during the query. \n",
|
---|
260 | " This parameter may take the values `yes', `no', `true', \n",
|
---|
261 | " `false', `on' or `off'.\n",
|
---|
262 | "\n",
|
---|
263 | "skip_dump = `skips.%d'\n",
|
---|
264 | " If this parameter is set then during ranked queries on skipped\n",
|
---|
265 | " inverted files when `accumulator_method' is set to `splay_tree',\n",
|
---|
266 | " `hash_table', or `list` a file will be produced in the current\n",
|
---|
267 | " directory. The name of the file is the value of this parameter,\n",
|
---|
268 | " a `%d' in the file name will be replaced with the process id of\n",
|
---|
269 | " mgquery. This file will contain information about the usage of\n",
|
---|
270 | " skips during the query processing. This option is expensive; \n",
|
---|
271 | " use `.unset skip_dump' to obtain optimal performance.\n",
|
---|
272 | "\n",
|
---|
273 | "sorted_terms = `on'\n",
|
---|
274 | " This specifies whether of not the terms should be sorted into\n",
|
---|
275 | " decreasing occurrence in documents so that the least often\n",
|
---|
276 | " occurring terms are processed first when ranked queries are\n",
|
---|
277 | " being done. When this is true the terms are sorted. When this\n",
|
---|
278 | " is false the terms are not sorted and are instead processed in\n",
|
---|
279 | " order of occurrence. This parameter may take the values `yes',\n",
|
---|
280 | " `no', `true', `false', `on' or `off'.\n",
|
---|
281 | "\n",
|
---|
282 | "\n",
|
---|
283 | "stop_at_max_accum = `on'\n",
|
---|
284 | " This specifies what should happen when the maximum number of\n",
|
---|
285 | " accumulators set by `max_accumulators' is reached. When this\n",
|
---|
286 | " is true the the processing of terms is stopped at the completion\n",
|
---|
287 | " of the current term. When this is false processing continues but\n",
|
---|
288 | " no new accumulators are created. This parameter may take the \n",
|
---|
289 | " values `yes', `no', `true', `false', `on' or `off'.\n",
|
---|
290 | "\n",
|
---|
291 | "terminator = `'\n",
|
---|
292 | " This specifies the string that will be output after the last\n",
|
---|
293 | " document from the previous query has been output. The standard\n",
|
---|
294 | " C escape character sequences (see the man page) may be used to\n",
|
---|
295 | " place special characters in the string. For example, a newline\n",
|
---|
296 | " would the `\\n'. To include a `%' use the sequence `%%'.\n",
|
---|
297 | " \n",
|
---|
298 | "\n",
|
---|
299 | "timestats = `false'\n",
|
---|
300 | " If this is true then the time to process a query is displayed\n",
|
---|
301 | " in both real time and CPU time. This parameter may take the\n",
|
---|
302 | " values `yes', `no', `true', `false', `on' or `off'.\n",
|
---|
303 | "\n",
|
---|
304 | "verbatim = `off'\n",
|
---|
305 | " This is a boolean parameter that determines whether the program\n",
|
---|
306 | " should attempt to do a regular expression match on the retrieved\n",
|
---|
307 | " text. If verbatim is `on' and a post-processing strng is specified\n",
|
---|
308 | " with the query then the post-processing string will be searched for\n",
|
---|
309 | " in the documents just before they are displayed. If the string is\n",
|
---|
310 | " found the document will be displayed, if not the document will not\n",
|
---|
311 | " be displayed. If verbatim is `off' the post-processing string will\n",
|
---|
312 | " be considered a regular expression like in `vi' or `egrep'.\n",
|
---|
313 | " E.G. If verbatim is `on', \"and.*the\" will look for the 8 character\n",
|
---|
314 | " sequence \"and.*the\". If verbatim is `off', \"and.*the\" will\n",
|
---|
315 | " look for the sequence \"and\" followed somewhere later in the\n",
|
---|
316 | " document by the sequence \"the\".\n",
|
---|
317 | " This parameter may take the values `yes', `no', `true', `false',\n",
|
---|
318 | " `on' or `off'.\n",
|
---|
319 | "\n",
|
---|
320 | "\n"};
|
---|