1 | #include <string.h>
|
---|
2 | #include <stdio.h>
|
---|
3 | #include <stdlib.h>
|
---|
4 | #include <ctype.h>
|
---|
5 | #include <time.h>
|
---|
6 | #include "libinterface.h"
|
---|
7 | #include "cgiargs.h"
|
---|
8 |
|
---|
9 | #include <assert.h>
|
---|
10 |
|
---|
11 | ///////////////////////
|
---|
12 | // support functions //
|
---|
13 | ///////////////////////
|
---|
14 |
|
---|
15 | unsigned short hexdigit (unsigned short c)
|
---|
16 | {
|
---|
17 | if (c >= '0' && c <= '9') return (c-'0');
|
---|
18 | if (c >= 'a' && c <= 'f') return (c-'a'+10);
|
---|
19 | if (c >= 'A' && c <= 'F') return (c-'A'+10);
|
---|
20 | return c;
|
---|
21 | }
|
---|
22 |
|
---|
23 |
|
---|
24 | void c2hex (unsigned short c, text_t &t)
|
---|
25 | {
|
---|
26 | t.clear();
|
---|
27 |
|
---|
28 | if (c >= 256)
|
---|
29 | {
|
---|
30 | t = "20"; // ' '
|
---|
31 | return;
|
---|
32 | }
|
---|
33 |
|
---|
34 | unsigned short o1, o2;
|
---|
35 |
|
---|
36 | o1 = (c/16) % 16;
|
---|
37 | o2 = c % 16;
|
---|
38 | if (o1 >= 10) o1 += 'a' - 10;
|
---|
39 | else o1 += '0';
|
---|
40 | if (o2 >= 10) o2 += 'a' - 10;
|
---|
41 | else o2 += '0';
|
---|
42 |
|
---|
43 | t.push_back(o1);
|
---|
44 | t.push_back(o2);
|
---|
45 | }
|
---|
46 |
|
---|
47 | // convert %xx and + to their appropriate equivalents
|
---|
48 | void decode (text_t &argstr)
|
---|
49 | {
|
---|
50 | text_t::iterator in = argstr.begin();
|
---|
51 | text_t::iterator out = in;
|
---|
52 | text_t::iterator end = argstr.end();
|
---|
53 |
|
---|
54 | while (in != end)
|
---|
55 | {
|
---|
56 | if (*in == '+') *out = ' ';
|
---|
57 |
|
---|
58 | else if (*in == '%')
|
---|
59 | {
|
---|
60 | unsigned short c = '%';
|
---|
61 | in++;
|
---|
62 | if (in != end)
|
---|
63 | {
|
---|
64 | c = hexdigit (*in);
|
---|
65 | in++;
|
---|
66 | }
|
---|
67 | if (in != end && c < 16) // sanity check on the previous character
|
---|
68 | {
|
---|
69 | c = c*16 + hexdigit (*in);
|
---|
70 | }
|
---|
71 |
|
---|
72 | *out = c;
|
---|
73 | }
|
---|
74 | else *out = *in;
|
---|
75 |
|
---|
76 | if (in != end) in++;
|
---|
77 | out++;
|
---|
78 | }
|
---|
79 |
|
---|
80 | // remove the excess characters
|
---|
81 | argstr.erase (out, end);
|
---|
82 | }
|
---|
83 |
|
---|
84 |
|
---|
85 | // split up the cgi arguments
|
---|
86 | void parse_cgi_args (text_t argstr, cgiargsclass &args)
|
---|
87 | {
|
---|
88 | args.clear();
|
---|
89 |
|
---|
90 | text_t::iterator here = argstr.begin();
|
---|
91 | text_t::iterator end = argstr.end();
|
---|
92 | text_t key, value;
|
---|
93 |
|
---|
94 | // extract out the key=value pairs
|
---|
95 | while (here != end)
|
---|
96 | {
|
---|
97 | // get the next key and value pair
|
---|
98 | here = getdelimitstr (here, end, '=', key);
|
---|
99 | here = getdelimitstr (here, end, '&', value);
|
---|
100 |
|
---|
101 | // convert %xx and + to their appropriate equivalents
|
---|
102 | decode (value);
|
---|
103 | // store this key=value pair
|
---|
104 | if (!key.empty()) args.setarg (key, value);
|
---|
105 | }
|
---|
106 | }
|
---|
107 |
|
---|
108 | text_t cgisafe (text_t &intext)
|
---|
109 | {
|
---|
110 | text_t outtext;
|
---|
111 |
|
---|
112 | text_t::iterator here = intext.begin ();
|
---|
113 | text_t::iterator end = intext.end ();
|
---|
114 | unsigned short c;
|
---|
115 | text_t ttmp;
|
---|
116 |
|
---|
117 | while (here != end)
|
---|
118 | {
|
---|
119 | c = *here;
|
---|
120 | if (((c >= 'a') && (c <= 'z')) ||
|
---|
121 | ((c >= 'A') && (c <= 'Z')) ||
|
---|
122 | ((c >= '0') && (c <= '9'))) {
|
---|
123 | // alphanumeric character
|
---|
124 | outtext.push_back(c);
|
---|
125 | } else {
|
---|
126 | // non-alphnumeric character
|
---|
127 | outtext.push_back('%');
|
---|
128 | c2hex(c, ttmp);
|
---|
129 | outtext += ttmp;
|
---|
130 | }
|
---|
131 |
|
---|
132 | here++;
|
---|
133 | }
|
---|
134 |
|
---|
135 | return outtext;
|
---|
136 | }
|
---|
137 |
|
---|
138 | //////////////////////////////
|
---|
139 | // methods for libinterface //
|
---|
140 | //////////////////////////////
|
---|
141 |
|
---|
142 | // constructor
|
---|
143 |
|
---|
144 | libinterface::libinterface()
|
---|
145 | {
|
---|
146 | browse = NULL;
|
---|
147 | }
|
---|
148 |
|
---|
149 | void libinterface::setgsdlhome (const text_t &thegsdlhome)
|
---|
150 | {
|
---|
151 | gsdlhome = thegsdlhome;
|
---|
152 | setmacroshome (thegsdlhome);
|
---|
153 | setgdbmhome (thegsdlhome);
|
---|
154 | setindexhome (thegsdlhome);
|
---|
155 | }
|
---|
156 |
|
---|
157 | void libinterface::setmacroshome (const text_t &themacroshome)
|
---|
158 | {
|
---|
159 | macroshome = themacroshome;
|
---|
160 | }
|
---|
161 |
|
---|
162 | void libinterface::setgdbmhome (const text_t &thegdbmhome)
|
---|
163 | {
|
---|
164 | gdbmhome = thegdbmhome;
|
---|
165 | gdbm.setgdbmhome(gdbmhome);
|
---|
166 | }
|
---|
167 |
|
---|
168 | void libinterface::setindexhome (const text_t &theindexhome)
|
---|
169 | {
|
---|
170 | indexhome = theindexhome;
|
---|
171 | search.setindexhome(theindexhome);
|
---|
172 | }
|
---|
173 |
|
---|
174 | void libinterface::sethttpprefix (const text_t &thehttpprefix)
|
---|
175 | {
|
---|
176 | httpprefix = thehttpprefix;
|
---|
177 | }
|
---|
178 |
|
---|
179 | void libinterface::setgwcgi (const text_t &thegwcgi)
|
---|
180 | {
|
---|
181 | gwcgi = thegwcgi;
|
---|
182 | }
|
---|
183 |
|
---|
184 | // init should be called after the various homes are set,
|
---|
185 | // it returns 'false' on failure and 'true' on success
|
---|
186 | bool libinterface::init ()
|
---|
187 | {
|
---|
188 | set_default_index();
|
---|
189 | text_t collection = get_collection_name();
|
---|
190 |
|
---|
191 | // load up default macro files -- these shouldn't have absolute file names
|
---|
192 | #ifdef __WIN32__
|
---|
193 | disp.loaddefaultmacros(macroshome + "\\macros\\base.dm");
|
---|
194 | disp.loaddefaultmacros(macroshome + "\\macros\\browse.dm");
|
---|
195 | disp.loaddefaultmacros(macroshome + "\\macros\\text.dm");
|
---|
196 | disp.loaddefaultmacros(macroshome + "\\macros\\query.dm");
|
---|
197 | disp.loaddefaultmacros(macroshome + "\\macros\\help.dm");
|
---|
198 | disp.loaddefaultmacros(macroshome + "\\macros\\gsdl.dm");
|
---|
199 | disp.loaddefaultmacros(macroshome + "\\macros\\pref.dm");
|
---|
200 | #else
|
---|
201 | disp.loaddefaultmacros(macroshome + "/macros/base.dm");
|
---|
202 | disp.loaddefaultmacros(macroshome + "/macros/browse.dm");
|
---|
203 | disp.loaddefaultmacros(macroshome + "/macros/text.dm");
|
---|
204 | disp.loaddefaultmacros(macroshome + "/macros/query.dm");
|
---|
205 | disp.loaddefaultmacros(macroshome + "/macros/help.dm");
|
---|
206 | disp.loaddefaultmacros(macroshome + "/macros/gsdl.dm");
|
---|
207 | disp.loaddefaultmacros(macroshome + "/macros/pref.dm");
|
---|
208 | #endif
|
---|
209 |
|
---|
210 | srand(time(NULL));
|
---|
211 |
|
---|
212 | return collection_init(collection);
|
---|
213 | }
|
---|
214 |
|
---|
215 |
|
---|
216 | // examine the cgi arguments and create the appropriate page,
|
---|
217 | // outputing the page to textout and any debug information to logout
|
---|
218 | //
|
---|
219 | // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
|
---|
220 | int libinterface::getpage (const text_t &argstr, ostream &textout, ostream &logout)
|
---|
221 | {
|
---|
222 | int err = LI_NOERROR;
|
---|
223 |
|
---|
224 | gdbm.setlogout (&logout); // make the output go where they want!
|
---|
225 |
|
---|
226 | cgiargsclass args;
|
---|
227 |
|
---|
228 | parse_cgi_args (argstr, args);
|
---|
229 | expand_compressed_args (args);
|
---|
230 | add_default_args (args);
|
---|
231 | check_args (args);
|
---|
232 |
|
---|
233 | lastcomparg = get_compressed_args (args);
|
---|
234 |
|
---|
235 | logout << args;
|
---|
236 |
|
---|
237 | text_t &arg_a = args["a"];
|
---|
238 | if (arg_a == "q") err = query_action (args, textout, logout);
|
---|
239 | else if (arg_a == "b") browse_action (args, textout, logout);
|
---|
240 | else if (arg_a == "t") document_action (args, textout, logout);
|
---|
241 | else if (arg_a == "p") page_action (args, textout, logout);
|
---|
242 | else if ((arg_a.size() == 2) && (arg_a[0] == 'a')) auxiliary_action (args, textout, logout);
|
---|
243 | else
|
---|
244 | {
|
---|
245 | // output error page
|
---|
246 | }
|
---|
247 |
|
---|
248 | return err;
|
---|
249 | }
|
---|
250 |
|
---|
251 | // the arg config string is used to do processing on the arguments
|
---|
252 | // entries take the form argname[defaultvalue]
|
---|
253 | // if the argument name is preceeded by a "+" it means that the
|
---|
254 | // value may be more than one character long
|
---|
255 | // the main state variable missed on this list is "q" the query string
|
---|
256 | void libinterface::get_arg_config (text_t &argconfigstr)
|
---|
257 | {
|
---|
258 | argconfigstr =
|
---|
259 | "+a[p]" // action: q=query, b=browse, t=targetdoc, p=page, a1=auxiliary
|
---|
260 | "t[1]" // query type: 0=boolean, 1=ranked
|
---|
261 | "i[c]" // index: c=chapter, p=paragraph, t=title, b=book
|
---|
262 | "k[1]" // casefolding: 0=off, 1=on
|
---|
263 | "s[0]" // stemming: 0=off, 1=on
|
---|
264 | "+p[home]" // page
|
---|
265 | "+c[unu]" // collection
|
---|
266 | "+r[1]" // results from
|
---|
267 | "+d[C.1]" // the target document
|
---|
268 | "+j[11]" // partial index: 11=all, 10=f&n, 01=other
|
---|
269 | "+m[100]" // maxdocs
|
---|
270 | "+o[20]" // hits per page
|
---|
271 | "v[0]" // version: 0=text+graphics, 1=text
|
---|
272 | "f[0]"; // query box size: 0=normal, 1=big
|
---|
273 |
|
---|
274 | }
|
---|
275 |
|
---|
276 | text_t::iterator libinterface::get_next_config_arg (text_t::iterator first,
|
---|
277 | text_t::iterator last,
|
---|
278 | text_t &argname,
|
---|
279 | text_t &defaultvalue,
|
---|
280 | bool &longarg)
|
---|
281 | {
|
---|
282 | first = getdelimitstr (first, last, '[', argname);
|
---|
283 | first = getdelimitstr (first, last, ']', defaultvalue);
|
---|
284 | longarg = false;
|
---|
285 |
|
---|
286 | if (!argname.empty() && (argname[0] == '+'))
|
---|
287 | {
|
---|
288 | argname.erase(argname.begin(), argname.begin()+1);
|
---|
289 | longarg = true;
|
---|
290 | }
|
---|
291 |
|
---|
292 | return first;
|
---|
293 | }
|
---|
294 |
|
---|
295 | text_t libinterface::get_compressed_args (cgiargsclass &args)
|
---|
296 | {
|
---|
297 | text_t argconfigstr; get_arg_config (argconfigstr);
|
---|
298 | text_t arg_e;
|
---|
299 | text_t argname, defaultvalue;
|
---|
300 | text_t *argvalue;
|
---|
301 | bool longarg;
|
---|
302 |
|
---|
303 | text_t::iterator here = argconfigstr.begin();
|
---|
304 | text_t::iterator end = argconfigstr.end();
|
---|
305 | while (here != end)
|
---|
306 | {
|
---|
307 | here = get_next_config_arg (here, end, argname, defaultvalue, longarg);
|
---|
308 |
|
---|
309 | if (!argname.empty())
|
---|
310 | {
|
---|
311 | argvalue = args.getarg (argname);
|
---|
312 | if (argvalue == NULL) arg_e += defaultvalue;
|
---|
313 | else arg_e += *argvalue;
|
---|
314 |
|
---|
315 | if (longarg) arg_e += "-";
|
---|
316 | }
|
---|
317 | }
|
---|
318 |
|
---|
319 | return arg_e;
|
---|
320 | }
|
---|
321 |
|
---|
322 |
|
---|
323 | // the compressed options should never override explicit options
|
---|
324 | // but they should always be expanded before add_default_args is
|
---|
325 | // called
|
---|
326 | void libinterface::expand_compressed_args (cgiargsclass &args)
|
---|
327 | {
|
---|
328 | text_t *arg_e = args.getarg("e");
|
---|
329 |
|
---|
330 | // see if there is compressed options
|
---|
331 | if (arg_e != NULL)
|
---|
332 | {
|
---|
333 | text_t argconfigstr; get_arg_config (argconfigstr);
|
---|
334 | text_t argname, defaultvalue, argvalue;
|
---|
335 | bool longarg;
|
---|
336 |
|
---|
337 | text_t::iterator confighere = argconfigstr.begin();
|
---|
338 | text_t::iterator configend = argconfigstr.end();
|
---|
339 |
|
---|
340 | text_t::iterator arghere = arg_e->begin();
|
---|
341 | text_t::iterator argend = arg_e->end();
|
---|
342 | while (confighere != configend && arghere != argend)
|
---|
343 | {
|
---|
344 | confighere = get_next_config_arg (confighere, configend, argname,
|
---|
345 | defaultvalue, longarg);
|
---|
346 | if (!argname.empty())
|
---|
347 | {
|
---|
348 | if (longarg)
|
---|
349 | {
|
---|
350 | arghere = getdelimitstr (arghere, argend, '-', argvalue);
|
---|
351 | if (!argvalue.empty()) args.setdefaultarg (argname, argvalue);
|
---|
352 | }
|
---|
353 | else
|
---|
354 | {
|
---|
355 | args.setdefaultcarg (argname,*arghere);
|
---|
356 | arghere++;
|
---|
357 | }
|
---|
358 | }
|
---|
359 | }
|
---|
360 | }
|
---|
361 | }
|
---|
362 |
|
---|
363 |
|
---|
364 | void libinterface::add_default_args (cgiargsclass &args)
|
---|
365 | {
|
---|
366 | text_t argconfigstr; get_arg_config (argconfigstr);
|
---|
367 | text_t argname, defaultvalue;
|
---|
368 | bool longarg;
|
---|
369 |
|
---|
370 | text_t::iterator confighere = argconfigstr.begin();
|
---|
371 | text_t::iterator configend = argconfigstr.end();
|
---|
372 | while (confighere != configend)
|
---|
373 | {
|
---|
374 | confighere = get_next_config_arg (confighere, configend, argname,
|
---|
375 | defaultvalue, longarg);
|
---|
376 | if (!argname.empty()) args.setdefaultarg (argname, defaultvalue);
|
---|
377 | }
|
---|
378 |
|
---|
379 | // the query string and format string are not included in the argument configuration string
|
---|
380 | args.setdefaultarg ("q", ""); // the default query string is ""
|
---|
381 | args.setdefaultarg ("g", "00");
|
---|
382 | args.setdefaultarg ("x", "0");
|
---|
383 | }
|
---|
384 |
|
---|
385 |
|
---|
386 | // check and attempt to fix an problems encountered in the list
|
---|
387 | // of cgi arguments
|
---|
388 | void libinterface::check_args (cgiargsclass &args)
|
---|
389 | {
|
---|
390 | text_t collection = get_collection_name();
|
---|
391 | args.setarg("c", collection);
|
---|
392 | }
|
---|
393 |
|
---|
394 |
|
---|
395 | void libinterface::define_general_macros (cgiargsclass &args, ostream &logout)
|
---|
396 | {
|
---|
397 | disp.setmacro("httpprefix", "Global", httpprefix);
|
---|
398 | disp.setmacro("gwcgi", "Global", gwcgi);
|
---|
399 |
|
---|
400 | disp.setmacro("collection", "Global", cgisafe(args["c"]));
|
---|
401 | disp.setmacro("compressedoptions", "Global", get_compressed_args(args));
|
---|
402 | disp.setmacro("urlsafequerystring", "Global", cgisafe(args["q"]));
|
---|
403 |
|
---|
404 | // need to escape any quotes in querystring to prevent them upsetting the html
|
---|
405 | text_t querystring;
|
---|
406 | text_t::iterator here = args["q"].begin();
|
---|
407 | text_t::iterator end = args["q"].end();
|
---|
408 | while (here != end) {
|
---|
409 | if (*here == '"') querystring += """;
|
---|
410 | else querystring.push_back(*here);
|
---|
411 | here ++;
|
---|
412 | }
|
---|
413 | disp.setmacro("querystring", "Global", querystring);
|
---|
414 |
|
---|
415 |
|
---|
416 | if (args.getintarg("x") == 0) disp.setmacro("notdetached", "Global", "1");
|
---|
417 | if (args["d"][0] == 'T') disp.setmacro("istitle", "Global", "1");
|
---|
418 |
|
---|
419 | int i = rand();
|
---|
420 | disp.setmacro("pagedest", "Global", text_t(i));
|
---|
421 |
|
---|
422 | // define the macro for the "g" argument
|
---|
423 | disp.setmacro("g", "Global", args["g"]);
|
---|
424 | }
|
---|
425 |
|
---|
426 |
|
---|
427 | // prepare_page prepares to write out a page using the current
|
---|
428 | // page parameters and defines any general macros
|
---|
429 | void libinterface::prepare_page (cgiargsclass &args, ostream &logout)
|
---|
430 | {
|
---|
431 | // get page parameters
|
---|
432 | text_t pageparams = text_t("collection=") + args["c"];
|
---|
433 | if (args.getintarg("u") == 1) pageparams += ",style=htmlonly";
|
---|
434 | if (args.getintarg("v") == 1) pageparams += ",version=text";
|
---|
435 | if (args.getintarg("f") == 1) pageparams += ",queryversion=big";
|
---|
436 |
|
---|
437 | disp.openpage(pageparams, MACROPRECEDENCE);
|
---|
438 | define_general_macros(args, logout);
|
---|
439 | define_collection_macros(args, logout);
|
---|
440 | }
|
---|
441 |
|
---|
442 | void libinterface::set_query_params (cgiargsclass &args, queryparamclass &queryparams)
|
---|
443 | {
|
---|
444 | queryparams.collection = args["c"];
|
---|
445 | assemble_index (args, queryparams.search_index);
|
---|
446 | queryparams.querystring = args["q"];
|
---|
447 | format_querystring (queryparams.querystring);
|
---|
448 | queryparams.search_type = args.getintarg ("t");
|
---|
449 | queryparams.casefolding = args.getintarg ("k");
|
---|
450 | queryparams.stemming = args.getintarg ("s");
|
---|
451 | queryparams.maxdocs = args.getintarg ("m");
|
---|
452 | }
|
---|
453 |
|
---|
454 | void libinterface::format_querystring (text_t &querystring)
|
---|
455 | {
|
---|
456 | text_t formattedstring;
|
---|
457 | quotedstring.clear();
|
---|
458 |
|
---|
459 | text_t::iterator here = querystring.begin();
|
---|
460 | text_t::iterator end = querystring.end();
|
---|
461 | int foundquote = 0;
|
---|
462 |
|
---|
463 | // want to remove ()|!& from querystring so boolean queries are just
|
---|
464 | // "all the words" queries
|
---|
465 | while (here != end) {
|
---|
466 | if (*here == '(' || *here == ')' || *here == '|' ||
|
---|
467 | *here == '!' || *here == '&') {
|
---|
468 | formattedstring += " ";
|
---|
469 | } else {
|
---|
470 | if (*here == '"') {
|
---|
471 | if (foundquote) {foundquote = 0; quotedstring.push_back(*here);}
|
---|
472 | else foundquote = 1;
|
---|
473 | } else {
|
---|
474 | formattedstring.push_back(*here);
|
---|
475 | }
|
---|
476 | if (foundquote) quotedstring.push_back(*here);
|
---|
477 | }
|
---|
478 | here ++;
|
---|
479 | }
|
---|
480 | querystring = formattedstring + quotedstring;
|
---|
481 | }
|
---|
482 |
|
---|
483 | void libinterface::define_query_macros (cgiargsclass &args,
|
---|
484 | queryparamclass &queryparams,
|
---|
485 | queryresultsclass &queryresults,
|
---|
486 | ostream &logout)
|
---|
487 | {
|
---|
488 | int numdocs = queryresults.getnumdocs();
|
---|
489 | int numterms = queryresults.getnumterms();
|
---|
490 | disp.setmacro("querysize", "query", args["f"]);
|
---|
491 | disp.setmacro("haveresults", "query", numdocs);
|
---|
492 |
|
---|
493 | // set the display frequency macro
|
---|
494 | text_t freqmsg = "_textfm1_";
|
---|
495 |
|
---|
496 | int first = 1;
|
---|
497 | for (int i = 0; i < numterms; i++) {
|
---|
498 | if (first == 0) freqmsg += "; ";
|
---|
499 | first = 0;
|
---|
500 | freqmsg += queryresults.terms[i].termstr + ": " + queryresults.terms[i].termfreq;
|
---|
501 | }
|
---|
502 | if (!quotedstring.empty()) freqmsg += "<br><i>post-processed to find " + quotedstring + "</i>\n";
|
---|
503 | disp.setmacro("freqmsg", "query", freqmsg);
|
---|
504 |
|
---|
505 | // set the result line macro
|
---|
506 |
|
---|
507 | text_t resline;
|
---|
508 |
|
---|
509 | if (numdocs >= queryparams.maxdocs)
|
---|
510 | resline.setcstr("_textmt2_");
|
---|
511 |
|
---|
512 | if (numdocs == 0) {
|
---|
513 | resline.setcstr("_textndmtq_");
|
---|
514 | } else if (numdocs == 1) {
|
---|
515 | resline += text_t(numdocs) + text_t(" _textdmtq2_.");
|
---|
516 | } else {
|
---|
517 | resline += text_t(numdocs) + text_t(" _textdmtq3_.");
|
---|
518 | }
|
---|
519 |
|
---|
520 | disp.setmacro("resultline", "query", resline);
|
---|
521 |
|
---|
522 | // define_collection_macros (args, logout);
|
---|
523 |
|
---|
524 | if (queryresults.getnumdocs() > 0) {
|
---|
525 | docLinks(args, queryresults, logout);
|
---|
526 | }
|
---|
527 | }
|
---|
528 |
|
---|
529 |
|
---|
530 | // set the _links_ macro to create the links between pages of query results
|
---|
531 | void libinterface::docLinks (cgiargsclass &args,
|
---|
532 | queryresultsclass &queryresults,
|
---|
533 | ostream &logout)
|
---|
534 | {
|
---|
535 | text_t links;
|
---|
536 | int a, b, documents, nextfirst, nextlast, prevfirst, prevlast;
|
---|
537 | int results_from = args.getintarg("r");
|
---|
538 | int hitsperpage = args.getintarg("o");
|
---|
539 |
|
---|
540 | documents = queryresults.getnumdocs();
|
---|
541 |
|
---|
542 | a = results_from;
|
---|
543 | b = a + (hitsperpage - 1);
|
---|
544 |
|
---|
545 | // make sure a and b are in range
|
---|
546 | if (a < 1) a = 1;
|
---|
547 | if (b < 1) b = 1;
|
---|
548 | if (a > documents) a = documents;
|
---|
549 | if (b > documents) b = documents;
|
---|
550 |
|
---|
551 | links.setcstr("<table cellspacing=0 cellpadding=0 border=0 width=\"100%\">\n");
|
---|
552 | links += "<tr valign=bottom>\n";
|
---|
553 | links += "<td align=left>\n";
|
---|
554 | links += "<_font_>\n";
|
---|
555 |
|
---|
556 | // previous page link
|
---|
557 | if (a > 1) {
|
---|
558 | prevlast = a - 1;
|
---|
559 | prevfirst = a - hitsperpage;
|
---|
560 |
|
---|
561 | links += "<p><a href=\"_httpqueryresults_&r=";
|
---|
562 | links += prevfirst;
|
---|
563 | links += "\">_iconprev__textmatches_ ";
|
---|
564 | links += prevfirst;
|
---|
565 | links += " - ";
|
---|
566 | links += prevlast;
|
---|
567 | links += "</a>\n";
|
---|
568 | }
|
---|
569 |
|
---|
570 | links += "</font>\n";
|
---|
571 | links += "</td><td width=100></td><td align=right>\n";
|
---|
572 | links += "<_font_>\n";
|
---|
573 |
|
---|
574 | // next page link
|
---|
575 | if (b < documents) {
|
---|
576 | nextfirst = b + 1;
|
---|
577 | nextlast = b + hitsperpage;
|
---|
578 | if (nextlast > documents) nextlast = documents;
|
---|
579 |
|
---|
580 | links += "<p><a href=\"_httpqueryresults_&r=";
|
---|
581 | links += nextfirst;
|
---|
582 | links += "\">_textmatches_ ";
|
---|
583 | links += nextfirst;
|
---|
584 | links += " - ";
|
---|
585 | links += nextlast ;
|
---|
586 | links += "_iconnext_</a>\n";
|
---|
587 | }
|
---|
588 |
|
---|
589 | links += "</font>\n";
|
---|
590 | links += "</td></tr></table>\n";
|
---|
591 |
|
---|
592 | disp.setmacro("links", "query", links);
|
---|
593 | }
|
---|
594 |
|
---|
595 |
|
---|
596 | // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
|
---|
597 | int libinterface::do_query(cgiargsclass &args, queryparamclass &queryparams,
|
---|
598 | queryresultsclass &queryresults, ostream &logout)
|
---|
599 | {
|
---|
600 | set_query_params(args, queryparams);
|
---|
601 |
|
---|
602 | if (!queryparams.querystring.empty()) {
|
---|
603 | // do the query - the results are returned in queryresults
|
---|
604 | if (!search.search(queryparams, queryresults)) {
|
---|
605 | logout << "ERROR: database didn't load\n";
|
---|
606 | return LI_LOADDATABASEFAILED;
|
---|
607 | }
|
---|
608 | }
|
---|
609 | return LI_NOERROR;
|
---|
610 | }
|
---|
611 |
|
---|
612 | ////////////////////////////////////////////////////////////////////////////////////////
|
---|
613 | // query_action is called whenever a search is to be carried out (i.e. when the
|
---|
614 | // 'a' parameter == 'q') - query calls the mgsearch search() function (via do_query()) to
|
---|
615 | // carry out the search then displays the first page of results.
|
---|
616 | //
|
---|
617 | // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
|
---|
618 | int libinterface::query_action (cgiargsclass &args, ostream &textout, ostream &logout)
|
---|
619 | {
|
---|
620 | int err = LI_NOERROR;
|
---|
621 |
|
---|
622 | queryparamclass queryparams;
|
---|
623 | queryresultsclass queryresults;
|
---|
624 |
|
---|
625 | err = do_query(args, queryparams, queryresults, logout);
|
---|
626 |
|
---|
627 | // prepare to print out the page
|
---|
628 | prepare_page(args, logout);
|
---|
629 | define_query_macros(args, queryparams, queryresults, logout);
|
---|
630 |
|
---|
631 | // print out the query page
|
---|
632 | textout << text_t2ascii << disp << "_query:header_\n";
|
---|
633 |
|
---|
634 | // output query results if there is a query string -
|
---|
635 | // otherwise output help text
|
---|
636 | if (!queryparams.querystring.empty())
|
---|
637 | {
|
---|
638 | displayresults (args, textout, logout, queryresults);
|
---|
639 | }
|
---|
640 | else
|
---|
641 | {
|
---|
642 | textout << text_t2ascii << disp << "_query:noqueryheader_\n";
|
---|
643 | }
|
---|
644 |
|
---|
645 | textout << text_t2ascii << disp << "_query:footer_\n";
|
---|
646 |
|
---|
647 | return err;
|
---|
648 | }
|
---|
649 |
|
---|
650 | void libinterface::displayresults (cgiargsclass &args, ostream &textout, ostream &logout,
|
---|
651 | queryresultsclass &queryresults)
|
---|
652 | {
|
---|
653 | textout << text_t2ascii << disp << "_query:queryheader_";
|
---|
654 |
|
---|
655 |
|
---|
656 | int startresults = args.getintarg("r") - 1;
|
---|
657 | int numresults = args.getintarg("o");
|
---|
658 |
|
---|
659 | textout << text_t2ascii << "<table cellspacing=4>\n";
|
---|
660 | for (int i=startresults; i < startresults+numresults; i++)
|
---|
661 | {
|
---|
662 | displaydocsummary (args, textout, logout, queryresults, i);
|
---|
663 | }
|
---|
664 | textout << text_t2ascii << "</table>\n\n";
|
---|
665 |
|
---|
666 | textout << text_t2ascii << disp << "_query:queryfooter_";
|
---|
667 | }
|
---|
668 |
|
---|
669 |
|
---|
670 | ///////////////////////////////////////////////////////////////////////////////////////////////
|
---|
671 | // browse_action writes out the browse pages (i.e. the top level hierarchy pages)
|
---|
672 | void libinterface::browse_action (cgiargsclass &args, ostream &textout, ostream &logout)
|
---|
673 | {
|
---|
674 | text_t browse_bar, locator, output;
|
---|
675 | gdbm_info info;
|
---|
676 |
|
---|
677 | prepare_page(args, logout);
|
---|
678 |
|
---|
679 | // get browse bar unless page has been detached
|
---|
680 | if (args.getintarg("x") == 0) {
|
---|
681 | browse->get_browse_bar(args["d"], browse_bar);
|
---|
682 | }
|
---|
683 |
|
---|
684 | // get top locator
|
---|
685 | browse->get_top_locator(args, gdbm, 0, locator);
|
---|
686 |
|
---|
687 | // expand and output page
|
---|
688 | // Note: we need to expand these out using package 'browse'
|
---|
689 | // so we can't use the tricky '<<' syntax
|
---|
690 | disp.expandstring("browse", "_header_", output);
|
---|
691 | textout << text_t2ascii << output;
|
---|
692 | disp.expandstring("browse", browse_bar, output);
|
---|
693 | textout << text_t2ascii << output;
|
---|
694 | disp.expandstring("browse", locator, output);
|
---|
695 | textout << text_t2ascii << output;
|
---|
696 | disp.expandstring("browse", "_footer_", output);
|
---|
697 | textout << text_t2ascii << output;
|
---|
698 | }
|
---|
699 |
|
---|
700 |
|
---|
701 |
|
---|
702 | /////////////////////////////////////////////////////////////////////////////////////////
|
---|
703 | // document_action is called to retrieve and display collection documents
|
---|
704 | // It calls the mgsearch function docTargetDocument() to retrieve
|
---|
705 | // a document.
|
---|
706 |
|
---|
707 | void libinterface::document_action (cgiargsclass &args, ostream &textout, ostream &logout) {
|
---|
708 |
|
---|
709 | text_t locator, content, links, output;
|
---|
710 | gdbm_info info;
|
---|
711 | queryparamclass queryparams;
|
---|
712 | queryresultsclass queryresults;
|
---|
713 | int oversize = 0;
|
---|
714 |
|
---|
715 | // have to redo the query to get queryterms for highlight text
|
---|
716 | do_query(args, queryparams, queryresults, logout);
|
---|
717 |
|
---|
718 | prepare_page(args, logout);
|
---|
719 |
|
---|
720 | if (args["g"][1] == '0') {
|
---|
721 | // get docnum from gdbm
|
---|
722 | text_t docref;
|
---|
723 | if (args["d"][0] != 'B') get_book(args["d"], docref);
|
---|
724 | else docref = args["d"];
|
---|
725 | if (gdbm.getinfo(docref, queryparams.collection, info) != 0) {
|
---|
726 | logout << text_t2ascii << "info_db wasn't opened - " << docref << "\n";
|
---|
727 | return;
|
---|
728 | }
|
---|
729 |
|
---|
730 | // get document text if there is any
|
---|
731 | if (info.c.empty()) {
|
---|
732 | search.docTargetDocument(default_index, queryparams.collection,
|
---|
733 | info.d, content);
|
---|
734 | if (info.t != "<i>(introductory text)</i>")
|
---|
735 | content = "<h3>" + info.t + "</h3>\n" + content;
|
---|
736 | }
|
---|
737 | }
|
---|
738 |
|
---|
739 | if (args["g"][1] == '1') {
|
---|
740 | // want to get expanded out text
|
---|
741 | vector<text_t> contents_arr;
|
---|
742 | text_t booksection;
|
---|
743 | int levelcount;
|
---|
744 |
|
---|
745 | get_book (args["d"], booksection);
|
---|
746 | levelcount = count_dots(booksection);
|
---|
747 |
|
---|
748 | browse->get_contents_arr(args, gdbm, contents_arr);
|
---|
749 |
|
---|
750 |
|
---|
751 | // get text for each section of book
|
---|
752 | vector<text_t>::const_iterator thiscontent = contents_arr.begin();
|
---|
753 | vector<text_t>::const_iterator end = contents_arr.end();
|
---|
754 |
|
---|
755 | int first = 1;
|
---|
756 | int count = 1;
|
---|
757 | while (thiscontent != end) {
|
---|
758 | text_t text;
|
---|
759 |
|
---|
760 | // get docnum from gdbm
|
---|
761 | if (gdbm.getinfo(*thiscontent, queryparams.collection, info) != 0) {
|
---|
762 | logout << text_t2ascii << "info_db wasn't opened - " << args["d"] << "\n";
|
---|
763 | return;
|
---|
764 | }
|
---|
765 |
|
---|
766 | // if section has text get it, otherwise output section title
|
---|
767 | if (info.c.empty()) {
|
---|
768 |
|
---|
769 | // output <a name= > tags for all text sections currently displayed in toc (all text sections
|
---|
770 | // if contents are expanded
|
---|
771 | if (count_dots(*thiscontent) == levelcount || args["g"][0] == '1') {
|
---|
772 | content += "<a name=\"";
|
---|
773 | content += count;
|
---|
774 | content += "\"></a>\n";
|
---|
775 | count ++;
|
---|
776 | }
|
---|
777 |
|
---|
778 | search.docTargetDocument(default_index, queryparams.collection,
|
---|
779 | info.d, text);
|
---|
780 | if (info.t != "<i>(introductory text)</i>")
|
---|
781 | content += "<h3>" + info.t + "</h3>\n";
|
---|
782 | // content += text + "<hr><br>\n";
|
---|
783 | content += text + "<p>\n"; // no longer want <hr> between sections
|
---|
784 | } else {
|
---|
785 | content += "<h3>" + info.t + "</h3>\n";
|
---|
786 | }
|
---|
787 |
|
---|
788 | if (args["n"] == 1) {
|
---|
789 | if (first) {
|
---|
790 | browse->get_top_locator(args, gdbm, 0, locator);
|
---|
791 | disp.expandstring("text", "_header_", output);
|
---|
792 | textout << text_t2ascii << output;
|
---|
793 | disp.expandstring("text", locator, output);
|
---|
794 | textout << text_t2ascii << output;
|
---|
795 | }
|
---|
796 |
|
---|
797 | disp.expandstring("text", content, output);
|
---|
798 | if (!queryparams.querystring.empty())
|
---|
799 | highlighttext(queryresults.termvariants, textout, logout, output);
|
---|
800 | else
|
---|
801 | textout << text_t2ascii << output;
|
---|
802 | first = 0;
|
---|
803 | content.clear();
|
---|
804 | }
|
---|
805 |
|
---|
806 | thiscontent ++;
|
---|
807 | if (content.size() > 200000 && args["n"] != 1) {
|
---|
808 | content.clear();
|
---|
809 | oversize = 1;
|
---|
810 | args["g"][1] = '0';
|
---|
811 | break;
|
---|
812 | }
|
---|
813 | }
|
---|
814 |
|
---|
815 | if (args["g"][1] == '0') {
|
---|
816 | // get docnum from gdbm
|
---|
817 | text_t docref;
|
---|
818 | if (args["d"][0] != 'B') get_book(args["d"], docref);
|
---|
819 | else docref = args["d"];
|
---|
820 | if (gdbm.getinfo(docref, queryparams.collection, info) != 0) {
|
---|
821 | logout << text_t2ascii << "info_db wasn't opened - " << docref << "\n";
|
---|
822 | return;
|
---|
823 | }
|
---|
824 |
|
---|
825 | // get document text if there is any
|
---|
826 | if (info.c.empty()) {
|
---|
827 | search.docTargetDocument(default_index, queryparams.collection,
|
---|
828 | info.d, content);
|
---|
829 | if (info.t != "<i>(introductory text)</i>")
|
---|
830 | content = "<h3>" + info.t + "</h3>\n" + content;
|
---|
831 | }
|
---|
832 | }
|
---|
833 | }
|
---|
834 |
|
---|
835 | if (args["n"] != 1) {
|
---|
836 | // get top locator
|
---|
837 | browse->get_top_locator(args, gdbm, oversize, locator);
|
---|
838 |
|
---|
839 | // expand and output page
|
---|
840 | // Note: we need to expand these out using package 'text'
|
---|
841 | // so we can't use the tricky '<<' syntax
|
---|
842 | disp.expandstring("text", "_header_", output);
|
---|
843 | textout << text_t2ascii << output;
|
---|
844 |
|
---|
845 | disp.expandstring("text", locator, output);
|
---|
846 | textout << text_t2ascii << output;
|
---|
847 |
|
---|
848 | disp.expandstring("text", content, output);
|
---|
849 | if (!queryparams.querystring.empty())
|
---|
850 | highlighttext(queryresults.termvariants, textout, logout, output);
|
---|
851 | else
|
---|
852 | textout << text_t2ascii << output;
|
---|
853 | }
|
---|
854 |
|
---|
855 | // get links to next and previous sections unless in expand text mode
|
---|
856 | if (args["g"][1] == '0') {
|
---|
857 | browse->get_links(args, gdbm, links);
|
---|
858 | disp.expandstring("text", links, output);
|
---|
859 | textout << text_t2ascii << output;
|
---|
860 | }
|
---|
861 |
|
---|
862 | disp.expandstring("text", "_footer_", output);
|
---|
863 | textout << text_t2ascii << output;
|
---|
864 | }
|
---|
865 |
|
---|
866 |
|
---|
867 | /////////////////////////////////////////////////////////////////////////////////////////
|
---|
868 | // auxiliary_action is called to retrieve and display collection documents
|
---|
869 | // in formats other than those handled by document_action (i.e. those other
|
---|
870 | // than text). This should be overridden for collections needing to return
|
---|
871 | // images, postscript etc. You can have as many auxiliary actions as needed
|
---|
872 | // by setting arg_a to a1, a2, a3 etc. and testing arg["a"][1] within the
|
---|
873 | // auxiliary_action function.
|
---|
874 | // auxiliary_action defaults to calling document_action
|
---|
875 |
|
---|
876 | void libinterface::auxiliary_action (cgiargsclass &args, ostream &textout, ostream &logout) {
|
---|
877 | document_action (args, textout, logout);
|
---|
878 | }
|
---|
879 |
|
---|
880 | /////////////////////////////////////////////////////////////////////////////////////////
|
---|
881 | // page is called when a standard html page is to be displayed
|
---|
882 | void libinterface::page_action (cgiargsclass &args, ostream &textout, ostream &logout)
|
---|
883 | {
|
---|
884 |
|
---|
885 | text_t &arg_p = args["p"];
|
---|
886 |
|
---|
887 | prepare_page(args, logout);
|
---|
888 |
|
---|
889 | if (arg_p == "preferences")
|
---|
890 | define_pref_macros(args, logout);
|
---|
891 |
|
---|
892 | textout << text_t2ascii << disp << ("_" + arg_p + ":header_\n")
|
---|
893 | << ("_" + arg_p + ":imagestandardbar_\n") << ("_" + arg_p + ":content_\n")
|
---|
894 | << ("_" + arg_p + ":footer_\n");
|
---|
895 | }
|
---|
896 |
|
---|
897 | // highlighttext highlights query terms in text string and outputs the resulting text string
|
---|
898 | void libinterface::highlighttext(vector<text_t> &termvars, ostream &textout,
|
---|
899 | ostream &logout, text_t &text)
|
---|
900 | {
|
---|
901 | map<text_t, int, lttext_t> terms;
|
---|
902 | map<text_t, int, lttext_t>::const_iterator it;
|
---|
903 | for (unsigned int i = 0; i < termvars.size(); i++) {
|
---|
904 | terms[termvars[i]] = 1;
|
---|
905 | }
|
---|
906 |
|
---|
907 | text_t::iterator here = text.begin();
|
---|
908 | text_t::iterator end = text.end();
|
---|
909 | text_t word, buffer;
|
---|
910 | while (here != end) {
|
---|
911 | if (((*here >= 65) && (*here <= 90)) ||
|
---|
912 | ((*here >= 97) && (*here <= 122)) ||
|
---|
913 | ((*here >= '0') && (*here <= '9')) ||
|
---|
914 | ((*here >= 192) && (*here <= 214)) ||
|
---|
915 | ((*here >= 216) && (*here <= 246)) ||
|
---|
916 | ((*here >= 248) && (*here <= 255))) {
|
---|
917 | // not word boundary
|
---|
918 | word.push_back(*here);
|
---|
919 | here++;
|
---|
920 |
|
---|
921 | } else {
|
---|
922 | // found word boundary
|
---|
923 | // add last word if there was one
|
---|
924 | if (!word.empty()) {
|
---|
925 | it = terms.find(word);
|
---|
926 | if (it != terms.end()) {
|
---|
927 | word = "<b><u>" + word + "</u></b>";
|
---|
928 | }
|
---|
929 | buffer += word;
|
---|
930 | word.clear();
|
---|
931 | }
|
---|
932 |
|
---|
933 | if (*here == '<') {
|
---|
934 | // skip over rest of html tag
|
---|
935 | while ((here != end) && (*here != '>')) {
|
---|
936 | buffer.push_back(*here);
|
---|
937 | here++;
|
---|
938 | }
|
---|
939 | }
|
---|
940 |
|
---|
941 | buffer.push_back(*here);
|
---|
942 | here++;
|
---|
943 |
|
---|
944 | if (buffer.size() > 1024) {
|
---|
945 | textout << text_t2ascii << buffer;
|
---|
946 | buffer.clear();
|
---|
947 | }
|
---|
948 | }
|
---|
949 | }
|
---|
950 | textout << text_t2ascii << buffer;
|
---|
951 | }
|
---|
952 |
|
---|
953 | void libinterface::define_pref_macros (cgiargsclass &args, ostream &logout)
|
---|
954 | {
|
---|
955 | // the caseoption macro
|
---|
956 | text_t caseoption;
|
---|
957 | int arg_k = args.getintarg("k");
|
---|
958 |
|
---|
959 | caseoption += "\n<input type=radio name=k value=1";
|
---|
960 | if (arg_k) caseoption += " checked";
|
---|
961 | caseoption += "> ignore case differences<br>\n";
|
---|
962 | caseoption += "<input type=radio name=k value=0";
|
---|
963 | if (!arg_k) caseoption += " checked";
|
---|
964 | caseoption += "> upper/lower case must match\n";
|
---|
965 |
|
---|
966 | disp.setmacro ("caseoption", "preferences", caseoption);
|
---|
967 |
|
---|
968 | // the stemoption macro
|
---|
969 | text_t stemoption;
|
---|
970 | int arg_s = args.getintarg("s");
|
---|
971 |
|
---|
972 | stemoption += "\n<input type=radio name=s value=1";
|
---|
973 | if (arg_s) stemoption += " checked";
|
---|
974 | stemoption += "> ignore word endings<br>\n";
|
---|
975 | stemoption += "<input type=radio name=s value=0";
|
---|
976 | if (!arg_s) stemoption += " checked";
|
---|
977 | stemoption += "> whole word must match\n";
|
---|
978 |
|
---|
979 | disp.setmacro ("stemoption", "preferences", stemoption);
|
---|
980 |
|
---|
981 |
|
---|
982 | // the maxdocoption
|
---|
983 | text_t maxdocoption;
|
---|
984 | int arg_m = args.getintarg("m");
|
---|
985 |
|
---|
986 | maxdocoption += "\n<select name=m>\n";
|
---|
987 | maxdocoption += " <option value=\"50\"";
|
---|
988 | if (arg_m < 100) maxdocoption += " selected";
|
---|
989 | maxdocoption += ">50\n";
|
---|
990 | maxdocoption += " <option value=\"100\"";
|
---|
991 | if (arg_m >= 100 && arg_m < 200) maxdocoption += " selected";
|
---|
992 | maxdocoption += ">100\n";
|
---|
993 | maxdocoption += " <option value=\"200\"";
|
---|
994 | if (arg_m >= 200 && arg_m < 500) maxdocoption += " selected";
|
---|
995 | maxdocoption += ">200\n";
|
---|
996 | maxdocoption += " <option value=\"500\"";
|
---|
997 | if (arg_m >= 500) maxdocoption += " selected";
|
---|
998 | maxdocoption += ">500\n";
|
---|
999 | maxdocoption += "</select>\n";
|
---|
1000 |
|
---|
1001 | disp.setmacro ("maxdocoption", "preferences", maxdocoption);
|
---|
1002 |
|
---|
1003 | // the hitsperpageoption
|
---|
1004 | text_t hitsoption;
|
---|
1005 | int arg_o = args.getintarg("o");
|
---|
1006 |
|
---|
1007 | hitsoption += "\n<select name=o>\n";
|
---|
1008 | hitsoption += " <option value=\"10\"";
|
---|
1009 | if (arg_o < 20) hitsoption += " selected";
|
---|
1010 | hitsoption += ">10\n";
|
---|
1011 | hitsoption += " <option value=\"20\"";
|
---|
1012 | if (arg_o >= 20 && arg_o < 50) hitsoption += " selected";
|
---|
1013 | hitsoption += ">20\n";
|
---|
1014 | hitsoption += " <option value=\"50\"";
|
---|
1015 | if (arg_o >= 50 && arg_o < 100) hitsoption += " selected";
|
---|
1016 | hitsoption += ">50\n";
|
---|
1017 | hitsoption += " <option value=\"100\"";
|
---|
1018 | if (arg_o >= 100 && arg_o < 500) hitsoption += " selected";
|
---|
1019 | hitsoption += ">100\n";
|
---|
1020 | hitsoption += " <option value=\"500\"";
|
---|
1021 | if (arg_o >= 500) hitsoption += " selected";
|
---|
1022 | hitsoption += ">all\n";
|
---|
1023 | hitsoption += " </select>\n";
|
---|
1024 |
|
---|
1025 | disp.setmacro ("hitsperpageoption", "preferences", hitsoption);
|
---|
1026 | }
|
---|