1 | #include <string.h>
|
---|
2 | #include <stdio.h>
|
---|
3 | #include <stdlib.h>
|
---|
4 | #include <ctype.h>
|
---|
5 | #include <time.h>
|
---|
6 | #include "libinterface.h"
|
---|
7 | #include "cgiargs.h"
|
---|
8 |
|
---|
9 | #include <assert.h>
|
---|
10 |
|
---|
11 | ///////////////////////
|
---|
12 | // support functions //
|
---|
13 | ///////////////////////
|
---|
14 |
|
---|
15 | unsigned short hexdigit (unsigned short c)
|
---|
16 | {
|
---|
17 | if (c >= '0' && c <= '9') return (c-'0');
|
---|
18 | if (c >= 'a' && c <= 'f') return (c-'a'+10);
|
---|
19 | if (c >= 'A' && c <= 'F') return (c-'A'+10);
|
---|
20 | return c;
|
---|
21 | }
|
---|
22 |
|
---|
23 |
|
---|
24 | void c2hex (unsigned short c, text_t &t)
|
---|
25 | {
|
---|
26 | t.clear();
|
---|
27 |
|
---|
28 | if (c >= 256)
|
---|
29 | {
|
---|
30 | t = "20"; // ' '
|
---|
31 | return;
|
---|
32 | }
|
---|
33 |
|
---|
34 | unsigned short o1, o2;
|
---|
35 |
|
---|
36 | o1 = (c/16) % 16;
|
---|
37 | o2 = c % 16;
|
---|
38 | if (o1 >= 10) o1 += 'a' - 10;
|
---|
39 | else o1 += '0';
|
---|
40 | if (o2 >= 10) o2 += 'a' - 10;
|
---|
41 | else o2 += '0';
|
---|
42 |
|
---|
43 | t.push_back(o1);
|
---|
44 | t.push_back(o2);
|
---|
45 | }
|
---|
46 |
|
---|
47 | // convert %xx and + to their appropriate equivalents
|
---|
48 | void decode (text_t &argstr)
|
---|
49 | {
|
---|
50 | text_t::iterator in = argstr.begin();
|
---|
51 | text_t::iterator out = in;
|
---|
52 | text_t::iterator end = argstr.end();
|
---|
53 |
|
---|
54 | while (in != end)
|
---|
55 | {
|
---|
56 | if (*in == '+') *out = ' ';
|
---|
57 |
|
---|
58 | else if (*in == '%')
|
---|
59 | {
|
---|
60 | unsigned short c = '%';
|
---|
61 | in++;
|
---|
62 | if (in != end)
|
---|
63 | {
|
---|
64 | c = hexdigit (*in);
|
---|
65 | in++;
|
---|
66 | }
|
---|
67 | if (in != end && c < 16) // sanity check on the previous character
|
---|
68 | {
|
---|
69 | c = c*16 + hexdigit (*in);
|
---|
70 | }
|
---|
71 |
|
---|
72 | *out = c;
|
---|
73 | }
|
---|
74 | else *out = *in;
|
---|
75 |
|
---|
76 | if (in != end) in++;
|
---|
77 | out++;
|
---|
78 | }
|
---|
79 |
|
---|
80 | // remove the excess characters
|
---|
81 | argstr.erase (out, end);
|
---|
82 | }
|
---|
83 |
|
---|
84 |
|
---|
85 | // split up the cgi arguments
|
---|
86 | void parse_cgi_args (text_t argstr, cgiargsclass &args)
|
---|
87 | {
|
---|
88 | args.clear();
|
---|
89 |
|
---|
90 | text_t::iterator here = argstr.begin();
|
---|
91 | text_t::iterator end = argstr.end();
|
---|
92 | text_t key, value;
|
---|
93 |
|
---|
94 | // extract out the key=value pairs
|
---|
95 | while (here != end)
|
---|
96 | {
|
---|
97 | // get the next key and value pair
|
---|
98 | here = getdelimitstr (here, end, '=', key);
|
---|
99 | here = getdelimitstr (here, end, '&', value);
|
---|
100 |
|
---|
101 | // convert %xx and + to their appropriate equivalents
|
---|
102 | decode (value);
|
---|
103 | // store this key=value pair
|
---|
104 | if (!key.empty()) args.setarg (key, value);
|
---|
105 | }
|
---|
106 | }
|
---|
107 |
|
---|
108 | text_t cgisafe (text_t &intext)
|
---|
109 | {
|
---|
110 | text_t outtext;
|
---|
111 |
|
---|
112 | text_t::iterator here = intext.begin ();
|
---|
113 | text_t::iterator end = intext.end ();
|
---|
114 | unsigned short c;
|
---|
115 | text_t ttmp;
|
---|
116 |
|
---|
117 | while (here != end)
|
---|
118 | {
|
---|
119 | c = *here;
|
---|
120 | if (((c >= 'a') && (c <= 'z')) ||
|
---|
121 | ((c >= 'A') && (c <= 'Z')) ||
|
---|
122 | ((c >= '0') && (c <= '9'))) {
|
---|
123 | // alphanumeric character
|
---|
124 | outtext.push_back(c);
|
---|
125 | } else {
|
---|
126 | // non-alphnumeric character
|
---|
127 | outtext.push_back('%');
|
---|
128 | c2hex(c, ttmp);
|
---|
129 | outtext += ttmp;
|
---|
130 | }
|
---|
131 |
|
---|
132 | here++;
|
---|
133 | }
|
---|
134 |
|
---|
135 | return outtext;
|
---|
136 | }
|
---|
137 |
|
---|
138 | //////////////////////////////
|
---|
139 | // methods for libinterface //
|
---|
140 | //////////////////////////////
|
---|
141 |
|
---|
142 | // constructor
|
---|
143 |
|
---|
144 | libinterface::libinterface()
|
---|
145 | {
|
---|
146 | browse = NULL;
|
---|
147 | }
|
---|
148 |
|
---|
149 | void libinterface::setgsdlhome (const text_t &thegsdlhome)
|
---|
150 | {
|
---|
151 | gsdlhome = thegsdlhome;
|
---|
152 | setmacroshome (thegsdlhome);
|
---|
153 | setgdbmhome (thegsdlhome);
|
---|
154 | setindexhome (thegsdlhome);
|
---|
155 | }
|
---|
156 |
|
---|
157 | void libinterface::setmacroshome (const text_t &themacroshome)
|
---|
158 | {
|
---|
159 | macroshome = themacroshome;
|
---|
160 | }
|
---|
161 |
|
---|
162 | void libinterface::setgdbmhome (const text_t &thegdbmhome)
|
---|
163 | {
|
---|
164 | gdbmhome = thegdbmhome;
|
---|
165 | gdbm.setgdbmhome(gdbmhome);
|
---|
166 | }
|
---|
167 |
|
---|
168 | void libinterface::setindexhome (const text_t &theindexhome)
|
---|
169 | {
|
---|
170 | indexhome = theindexhome;
|
---|
171 | search.setindexhome(theindexhome);
|
---|
172 | }
|
---|
173 |
|
---|
174 | void libinterface::sethttpprefix (const text_t &thehttpprefix)
|
---|
175 | {
|
---|
176 | httpprefix = thehttpprefix;
|
---|
177 | }
|
---|
178 |
|
---|
179 | void libinterface::setgwcgi (const text_t &thegwcgi)
|
---|
180 | {
|
---|
181 | gwcgi = thegwcgi;
|
---|
182 | }
|
---|
183 |
|
---|
184 | // init should be called after the various homes are set,
|
---|
185 | // it returns 'false' on failure and 'true' on success
|
---|
186 | bool libinterface::init ()
|
---|
187 | {
|
---|
188 | set_default_index();
|
---|
189 | text_t collection = get_collection_name();
|
---|
190 |
|
---|
191 | // load up default macro files -- these shouldn't have absolute file names
|
---|
192 | #ifdef __WIN32__
|
---|
193 | disp.loaddefaultmacros(macroshome + "\\macros\\base.dm");
|
---|
194 | disp.loaddefaultmacros(macroshome + "\\macros\\browse.dm");
|
---|
195 | disp.loaddefaultmacros(macroshome + "\\macros\\text.dm");
|
---|
196 | disp.loaddefaultmacros(macroshome + "\\macros\\query.dm");
|
---|
197 | disp.loaddefaultmacros(macroshome + "\\macros\\help.dm");
|
---|
198 | disp.loaddefaultmacros(macroshome + "\\macros\\gsdl.dm");
|
---|
199 | disp.loaddefaultmacros(macroshome + "\\macros\\pref.dm");
|
---|
200 | #else
|
---|
201 | disp.loaddefaultmacros(macroshome + "/macros/base.dm");
|
---|
202 | disp.loaddefaultmacros(macroshome + "/macros/browse.dm");
|
---|
203 | disp.loaddefaultmacros(macroshome + "/macros/text.dm");
|
---|
204 | disp.loaddefaultmacros(macroshome + "/macros/query.dm");
|
---|
205 | disp.loaddefaultmacros(macroshome + "/macros/help.dm");
|
---|
206 | disp.loaddefaultmacros(macroshome + "/macros/gsdl.dm");
|
---|
207 | disp.loaddefaultmacros(macroshome + "/macros/pref.dm");
|
---|
208 | #endif
|
---|
209 |
|
---|
210 | srand(time(NULL));
|
---|
211 |
|
---|
212 | return collection_init(collection);
|
---|
213 | }
|
---|
214 |
|
---|
215 |
|
---|
216 | // examine the cgi arguments and create the appropriate page,
|
---|
217 | // outputing the page to textout and any debug information to logout
|
---|
218 | //
|
---|
219 | // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
|
---|
220 | int libinterface::getpage (const text_t &argstr, ostream &textout, ostream &logout)
|
---|
221 | {
|
---|
222 | int err = LI_NOERROR;
|
---|
223 |
|
---|
224 | gdbm.setlogout (&logout); // make the output go where they want!
|
---|
225 |
|
---|
226 | cgiargsclass args;
|
---|
227 |
|
---|
228 | parse_cgi_args (argstr, args);
|
---|
229 | expand_compressed_args (args);
|
---|
230 | add_default_args (args);
|
---|
231 | check_args (args);
|
---|
232 |
|
---|
233 | lastcomparg = get_compressed_args (args);
|
---|
234 |
|
---|
235 | logout << args;
|
---|
236 |
|
---|
237 | text_t &arg_a = args["a"];
|
---|
238 | if (arg_a == "q") err = query_action (args, textout, logout);
|
---|
239 | else if (arg_a == "b") browse_action (args, textout, logout);
|
---|
240 | else if (arg_a == "t") document_action (args, textout, logout);
|
---|
241 | else if (arg_a == "p") page_action (args, textout, logout);
|
---|
242 | else
|
---|
243 | {
|
---|
244 | // output error page
|
---|
245 | }
|
---|
246 |
|
---|
247 | return err;
|
---|
248 | }
|
---|
249 |
|
---|
250 | // the arg config string is used to do processing on the arguments
|
---|
251 | // entries take the form argname[defaultvalue]
|
---|
252 | // if the argument name is preceeded by a "+" it means that the
|
---|
253 | // value may be more than one character long
|
---|
254 | // the main state variable missed on this list is "q" the query string
|
---|
255 | void libinterface::get_arg_config (text_t &argconfigstr)
|
---|
256 | {
|
---|
257 | argconfigstr =
|
---|
258 | "a[p]" // action: q=query, b=browse, t=targetdoc, p=page
|
---|
259 | "t[1]" // query type: 0=boolean, 1=ranked
|
---|
260 | "i[c]" // index: c=chapter, p=paragraph, t=title, b=book
|
---|
261 | "k[1]" // casefolding: 0=off, 1=on
|
---|
262 | "s[0]" // stemming: 0=off, 1=on
|
---|
263 | "+p[home]" // page
|
---|
264 | "+c[unu2]" // collection
|
---|
265 | "+r[1]" // results from
|
---|
266 | "+d[C.1]" // the target document
|
---|
267 | "+j[11]" // partial index: 11=all, 10=f&n, 01=other
|
---|
268 | "+m[100]" // maxdocs
|
---|
269 | "+o[20]" // hits per page
|
---|
270 | "v[0]" // version: 0=text+graphics, 1=text
|
---|
271 | "f[0]"; // query box size: 0=normal, 1=big
|
---|
272 |
|
---|
273 | }
|
---|
274 |
|
---|
275 | text_t::iterator libinterface::get_next_config_arg (text_t::iterator first,
|
---|
276 | text_t::iterator last,
|
---|
277 | text_t &argname,
|
---|
278 | text_t &defaultvalue,
|
---|
279 | bool &longarg)
|
---|
280 | {
|
---|
281 | first = getdelimitstr (first, last, '[', argname);
|
---|
282 | first = getdelimitstr (first, last, ']', defaultvalue);
|
---|
283 | longarg = false;
|
---|
284 |
|
---|
285 | if (!argname.empty() && (argname[0] == '+'))
|
---|
286 | {
|
---|
287 | argname.erase(argname.begin(), argname.begin()+1);
|
---|
288 | longarg = true;
|
---|
289 | }
|
---|
290 |
|
---|
291 | return first;
|
---|
292 | }
|
---|
293 |
|
---|
294 | text_t libinterface::get_compressed_args (cgiargsclass &args)
|
---|
295 | {
|
---|
296 | text_t argconfigstr; get_arg_config (argconfigstr);
|
---|
297 | text_t arg_e;
|
---|
298 | text_t argname, defaultvalue;
|
---|
299 | text_t *argvalue;
|
---|
300 | bool longarg;
|
---|
301 |
|
---|
302 | text_t::iterator here = argconfigstr.begin();
|
---|
303 | text_t::iterator end = argconfigstr.end();
|
---|
304 | while (here != end)
|
---|
305 | {
|
---|
306 | here = get_next_config_arg (here, end, argname, defaultvalue, longarg);
|
---|
307 |
|
---|
308 | if (!argname.empty())
|
---|
309 | {
|
---|
310 | argvalue = args.getarg (argname);
|
---|
311 | if (argvalue == NULL) arg_e += defaultvalue;
|
---|
312 | else arg_e += *argvalue;
|
---|
313 |
|
---|
314 | if (longarg) arg_e += "-";
|
---|
315 | }
|
---|
316 | }
|
---|
317 |
|
---|
318 | return arg_e;
|
---|
319 | }
|
---|
320 |
|
---|
321 |
|
---|
322 | // the compressed options should never override explicit options
|
---|
323 | // but they should always be expanded before add_default_args is
|
---|
324 | // called
|
---|
325 | void libinterface::expand_compressed_args (cgiargsclass &args)
|
---|
326 | {
|
---|
327 | text_t *arg_e = args.getarg("e");
|
---|
328 |
|
---|
329 | // see if there is compressed options
|
---|
330 | if (arg_e != NULL)
|
---|
331 | {
|
---|
332 | text_t argconfigstr; get_arg_config (argconfigstr);
|
---|
333 | text_t argname, defaultvalue, argvalue;
|
---|
334 | bool longarg;
|
---|
335 |
|
---|
336 | text_t::iterator confighere = argconfigstr.begin();
|
---|
337 | text_t::iterator configend = argconfigstr.end();
|
---|
338 |
|
---|
339 | text_t::iterator arghere = arg_e->begin();
|
---|
340 | text_t::iterator argend = arg_e->end();
|
---|
341 | while (confighere != configend && arghere != argend)
|
---|
342 | {
|
---|
343 | confighere = get_next_config_arg (confighere, configend, argname,
|
---|
344 | defaultvalue, longarg);
|
---|
345 | if (!argname.empty())
|
---|
346 | {
|
---|
347 | if (longarg)
|
---|
348 | {
|
---|
349 | arghere = getdelimitstr (arghere, argend, '-', argvalue);
|
---|
350 | if (!argvalue.empty()) args.setdefaultarg (argname, argvalue);
|
---|
351 | }
|
---|
352 | else
|
---|
353 | {
|
---|
354 | args.setdefaultcarg (argname,*arghere);
|
---|
355 | arghere++;
|
---|
356 | }
|
---|
357 | }
|
---|
358 | }
|
---|
359 | }
|
---|
360 | }
|
---|
361 |
|
---|
362 |
|
---|
363 | void libinterface::add_default_args (cgiargsclass &args)
|
---|
364 | {
|
---|
365 | text_t argconfigstr; get_arg_config (argconfigstr);
|
---|
366 | text_t argname, defaultvalue;
|
---|
367 | bool longarg;
|
---|
368 |
|
---|
369 | text_t::iterator confighere = argconfigstr.begin();
|
---|
370 | text_t::iterator configend = argconfigstr.end();
|
---|
371 | while (confighere != configend)
|
---|
372 | {
|
---|
373 | confighere = get_next_config_arg (confighere, configend, argname,
|
---|
374 | defaultvalue, longarg);
|
---|
375 | if (!argname.empty()) args.setdefaultarg (argname, defaultvalue);
|
---|
376 | }
|
---|
377 |
|
---|
378 | // the query string and format string are not included in the argument configuration string
|
---|
379 | args.setdefaultarg ("q", ""); // the default query string is ""
|
---|
380 | args.setdefaultarg ("g", "00");
|
---|
381 | args.setdefaultarg ("x", "0");
|
---|
382 | }
|
---|
383 |
|
---|
384 |
|
---|
385 | // check and attempt to fix an problems encountered in the list
|
---|
386 | // of cgi arguments
|
---|
387 | void libinterface::check_args (cgiargsclass &args)
|
---|
388 | {
|
---|
389 | text_t collection = get_collection_name();
|
---|
390 | args.setarg("c", collection);
|
---|
391 | }
|
---|
392 |
|
---|
393 |
|
---|
394 | void libinterface::define_general_macros (cgiargsclass &args, ostream &logout)
|
---|
395 | {
|
---|
396 | disp.setmacro("httpprefix", "Global", httpprefix);
|
---|
397 | disp.setmacro("gwcgi", "Global", gwcgi);
|
---|
398 |
|
---|
399 | disp.setmacro("collection", "Global", cgisafe(args["c"]));
|
---|
400 | disp.setmacro("compressedoptions", "Global", get_compressed_args(args));
|
---|
401 | disp.setmacro("urlsafequerystring", "Global", cgisafe(args["q"]));
|
---|
402 |
|
---|
403 | // need to escape any quotes in querystring to prevent them upsetting the html
|
---|
404 | text_t querystring;
|
---|
405 | text_t::iterator here = args["q"].begin();
|
---|
406 | text_t::iterator end = args["q"].end();
|
---|
407 | while (here != end) {
|
---|
408 | if (*here == '"') querystring += """;
|
---|
409 | else querystring.push_back(*here);
|
---|
410 | here ++;
|
---|
411 | }
|
---|
412 | disp.setmacro("querystring", "Global", querystring);
|
---|
413 |
|
---|
414 |
|
---|
415 | if (args.getintarg("x") == 0) disp.setmacro("notdetached", "Global", "1");
|
---|
416 | if (args["d"][0] == 'T') disp.setmacro("istitle", "Global", "1");
|
---|
417 |
|
---|
418 | int i = rand();
|
---|
419 | disp.setmacro("pagedest", "Global", text_t(i));
|
---|
420 |
|
---|
421 | // define the macro for the "g" argument
|
---|
422 | disp.setmacro("g", "Global", args["g"]);
|
---|
423 | }
|
---|
424 |
|
---|
425 |
|
---|
426 | // prepare_page prepares to write out a page using the current
|
---|
427 | // page parameters and defines any general macros
|
---|
428 | void libinterface::prepare_page (cgiargsclass &args, ostream &logout)
|
---|
429 | {
|
---|
430 | // get page parameters
|
---|
431 | text_t pageparams = text_t("collection=") + args["c"];
|
---|
432 | if (args.getintarg("u") == 1) pageparams += ",style=htmlonly";
|
---|
433 | if (args.getintarg("v") == 1) pageparams += ",version=text";
|
---|
434 | if (args.getintarg("f") == 1) pageparams += ",queryversion=big";
|
---|
435 |
|
---|
436 | disp.openpage(pageparams, MACROPRECEDENCE);
|
---|
437 | define_general_macros(args, logout);
|
---|
438 | define_collection_macros(args, logout);
|
---|
439 | }
|
---|
440 |
|
---|
441 | void libinterface::set_query_params (cgiargsclass &args, queryparamclass &queryparams)
|
---|
442 | {
|
---|
443 | queryparams.collection = args["c"];
|
---|
444 | assemble_index (args, queryparams.search_index);
|
---|
445 | queryparams.querystring = args["q"];
|
---|
446 | format_querystring (queryparams.querystring);
|
---|
447 | queryparams.search_type = args.getintarg ("t");
|
---|
448 | queryparams.casefolding = args.getintarg ("k");
|
---|
449 | queryparams.stemming = args.getintarg ("s");
|
---|
450 | queryparams.maxdocs = args.getintarg ("m");
|
---|
451 | }
|
---|
452 |
|
---|
453 | void libinterface::format_querystring (text_t &querystring)
|
---|
454 | {
|
---|
455 | text_t formattedstring;
|
---|
456 | quotedstring.clear();
|
---|
457 |
|
---|
458 | text_t::iterator here = querystring.begin();
|
---|
459 | text_t::iterator end = querystring.end();
|
---|
460 | int foundquote = 0;
|
---|
461 |
|
---|
462 | // want to remove ()|!& from querystring so boolean queries are just
|
---|
463 | // "all the words" queries
|
---|
464 | while (here != end) {
|
---|
465 | if (*here == '(' || *here == ')' || *here == '|' ||
|
---|
466 | *here == '!' || *here == '&') {
|
---|
467 | formattedstring += " ";
|
---|
468 | } else {
|
---|
469 | if (*here == '"') {
|
---|
470 | if (foundquote) {foundquote = 0; quotedstring.push_back(*here);}
|
---|
471 | else foundquote = 1;
|
---|
472 | } else {
|
---|
473 | formattedstring.push_back(*here);
|
---|
474 | }
|
---|
475 | if (foundquote) quotedstring.push_back(*here);
|
---|
476 | }
|
---|
477 | here ++;
|
---|
478 | }
|
---|
479 | querystring = formattedstring + quotedstring;
|
---|
480 | }
|
---|
481 |
|
---|
482 | void libinterface::define_query_macros (cgiargsclass &args,
|
---|
483 | queryparamclass &queryparams,
|
---|
484 | queryresultsclass &queryresults,
|
---|
485 | ostream &logout)
|
---|
486 | {
|
---|
487 | int numdocs = queryresults.getnumdocs();
|
---|
488 | int numterms = queryresults.getnumterms();
|
---|
489 | disp.setmacro("querysize", "query", args["f"]);
|
---|
490 | disp.setmacro("haveresults", "query", numdocs);
|
---|
491 |
|
---|
492 | // set the display frequency macro
|
---|
493 | text_t freqmsg = "_textfm1_";
|
---|
494 |
|
---|
495 | int first = 1;
|
---|
496 | for (int i = 0; i < numterms; i++) {
|
---|
497 | if (first == 0) freqmsg += "; ";
|
---|
498 | first = 0;
|
---|
499 | freqmsg += queryresults.terms[i].termstr + ": " + queryresults.terms[i].termfreq;
|
---|
500 | }
|
---|
501 | if (!quotedstring.empty()) freqmsg += "<br><i>post-processed to find " + quotedstring + "</i>\n";
|
---|
502 | disp.setmacro("freqmsg", "query", freqmsg);
|
---|
503 |
|
---|
504 | // set the result line macro
|
---|
505 |
|
---|
506 | text_t resline;
|
---|
507 |
|
---|
508 | if (numdocs >= queryparams.maxdocs)
|
---|
509 | resline.setcstr("_textmt2_");
|
---|
510 |
|
---|
511 | if (numdocs == 0) {
|
---|
512 | resline.setcstr("_textndmtq_");
|
---|
513 | } else if (numdocs == 1) {
|
---|
514 | resline += text_t(numdocs) + text_t(" _textdmtq2_.");
|
---|
515 | } else {
|
---|
516 | resline += text_t(numdocs) + text_t(" _textdmtq3_.");
|
---|
517 | }
|
---|
518 |
|
---|
519 | disp.setmacro("resultline", "query", resline);
|
---|
520 |
|
---|
521 | // define_collection_macros (args, logout);
|
---|
522 |
|
---|
523 | if (queryresults.getnumdocs() > 0) {
|
---|
524 | docLinks(args, queryresults, logout);
|
---|
525 | }
|
---|
526 | }
|
---|
527 |
|
---|
528 |
|
---|
529 | // set the _links_ macro to create the links between pages of query results
|
---|
530 | void libinterface::docLinks (cgiargsclass &args,
|
---|
531 | queryresultsclass &queryresults,
|
---|
532 | ostream &logout)
|
---|
533 | {
|
---|
534 | text_t links;
|
---|
535 | int a, b, documents, nextfirst, nextlast, prevfirst, prevlast;
|
---|
536 | int results_from = args.getintarg("r");
|
---|
537 | int hitsperpage = args.getintarg("o");
|
---|
538 |
|
---|
539 | documents = queryresults.getnumdocs();
|
---|
540 |
|
---|
541 | a = results_from;
|
---|
542 | b = a + (hitsperpage - 1);
|
---|
543 |
|
---|
544 | // make sure a and b are in range
|
---|
545 | if (a < 1) a = 1;
|
---|
546 | if (b < 1) b = 1;
|
---|
547 | if (a > documents) a = documents;
|
---|
548 | if (b > documents) b = documents;
|
---|
549 |
|
---|
550 | links.setcstr("<table cellspacing=0 cellpadding=0 border=0 width=\"100%\">\n");
|
---|
551 | links += "<tr valign=bottom>\n";
|
---|
552 | links += "<td align=left>\n";
|
---|
553 | links += "<_font_>\n";
|
---|
554 |
|
---|
555 | // previous page link
|
---|
556 | if (a > 1) {
|
---|
557 | prevlast = a - 1;
|
---|
558 | prevfirst = a - hitsperpage;
|
---|
559 |
|
---|
560 | links += "<p><a href=\"_httpqueryresults_&r=";
|
---|
561 | links += prevfirst;
|
---|
562 | links += "\">_iconprev__textmatches_ ";
|
---|
563 | links += prevfirst;
|
---|
564 | links += " - ";
|
---|
565 | links += prevlast;
|
---|
566 | links += "</a>\n";
|
---|
567 | }
|
---|
568 |
|
---|
569 | links += "</font>\n";
|
---|
570 | links += "</td><td width=100></td><td align=right>\n";
|
---|
571 | links += "<_font_>\n";
|
---|
572 |
|
---|
573 | // next page link
|
---|
574 | if (b < documents) {
|
---|
575 | nextfirst = b + 1;
|
---|
576 | nextlast = b + hitsperpage;
|
---|
577 | if (nextlast > documents) nextlast = documents;
|
---|
578 |
|
---|
579 | links += "<p><a href=\"_httpqueryresults_&r=";
|
---|
580 | links += nextfirst;
|
---|
581 | links += "\">_textmatches_ ";
|
---|
582 | links += nextfirst;
|
---|
583 | links += " - ";
|
---|
584 | links += nextlast ;
|
---|
585 | links += "_iconnext_</a>\n";
|
---|
586 | }
|
---|
587 |
|
---|
588 | links += "</font>\n";
|
---|
589 | links += "</td></tr></table>\n";
|
---|
590 |
|
---|
591 | disp.setmacro("links", "query", links);
|
---|
592 | }
|
---|
593 |
|
---|
594 |
|
---|
595 | // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
|
---|
596 | int libinterface::do_query(cgiargsclass &args, queryparamclass &queryparams,
|
---|
597 | queryresultsclass &queryresults, ostream &logout)
|
---|
598 | {
|
---|
599 | set_query_params(args, queryparams);
|
---|
600 |
|
---|
601 | if (!queryparams.querystring.empty()) {
|
---|
602 | // do the query - the results are returned in queryresults
|
---|
603 | if (!search.search(queryparams, queryresults)) {
|
---|
604 | logout << "ERROR: database didn't load\n";
|
---|
605 | return LI_LOADDATABASEFAILED;
|
---|
606 | }
|
---|
607 | }
|
---|
608 | return LI_NOERROR;
|
---|
609 | }
|
---|
610 |
|
---|
611 | ////////////////////////////////////////////////////////////////////////////////////////
|
---|
612 | // query_action is called whenever a search is to be carried out (i.e. when the
|
---|
613 | // 'a' parameter == 'q') - query calls the mgsearch search() function (via do_query()) to
|
---|
614 | // carry out the search then displays the first page of results.
|
---|
615 | //
|
---|
616 | // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
|
---|
617 | int libinterface::query_action (cgiargsclass &args, ostream &textout, ostream &logout)
|
---|
618 | {
|
---|
619 | int err = LI_NOERROR;
|
---|
620 |
|
---|
621 | queryparamclass queryparams;
|
---|
622 | queryresultsclass queryresults;
|
---|
623 |
|
---|
624 | err = do_query(args, queryparams, queryresults, logout);
|
---|
625 |
|
---|
626 | // prepare to print out the page
|
---|
627 | prepare_page(args, logout);
|
---|
628 | define_query_macros(args, queryparams, queryresults, logout);
|
---|
629 |
|
---|
630 | // print out the query page
|
---|
631 | textout << text_t2ascii << disp << "_query:header_\n";
|
---|
632 |
|
---|
633 | // output query results if there is a query string -
|
---|
634 | // otherwise output help text
|
---|
635 | if (!queryparams.querystring.empty())
|
---|
636 | {
|
---|
637 | displayresults (args, textout, logout, queryresults);
|
---|
638 | }
|
---|
639 | else
|
---|
640 | {
|
---|
641 | textout << text_t2ascii << disp << "_query:noqueryheader_\n";
|
---|
642 | }
|
---|
643 |
|
---|
644 | textout << text_t2ascii << disp << "_query:footer_\n";
|
---|
645 |
|
---|
646 | return err;
|
---|
647 | }
|
---|
648 |
|
---|
649 | void libinterface::displayresults (cgiargsclass &args, ostream &textout, ostream &logout,
|
---|
650 | queryresultsclass &queryresults)
|
---|
651 | {
|
---|
652 | textout << text_t2ascii << disp << "_query:queryheader_";
|
---|
653 |
|
---|
654 |
|
---|
655 | int startresults = args.getintarg("r") - 1;
|
---|
656 | int numresults = args.getintarg("o");
|
---|
657 |
|
---|
658 | textout << text_t2ascii << "<table cellspacing=4>\n";
|
---|
659 | for (int i=startresults; i < startresults+numresults; i++)
|
---|
660 | {
|
---|
661 | displaydocsummary (args, textout, logout, queryresults, i);
|
---|
662 | }
|
---|
663 | textout << text_t2ascii << "</table>\n\n";
|
---|
664 |
|
---|
665 | textout << text_t2ascii << disp << "_query:queryfooter_";
|
---|
666 | }
|
---|
667 |
|
---|
668 |
|
---|
669 | ///////////////////////////////////////////////////////////////////////////////////////////////
|
---|
670 | // browse_action writes out the browse pages (i.e. the top level hierarchy pages)
|
---|
671 | void libinterface::browse_action (cgiargsclass &args, ostream &textout, ostream &logout)
|
---|
672 | {
|
---|
673 | text_t browse_bar, locator, output;
|
---|
674 | gdbm_info info;
|
---|
675 |
|
---|
676 | prepare_page(args, logout);
|
---|
677 |
|
---|
678 | // get browse bar unless page has been detached
|
---|
679 | if (args.getintarg("x") == 0) {
|
---|
680 | browse->get_browse_bar(args["d"], browse_bar);
|
---|
681 | }
|
---|
682 |
|
---|
683 | // get top locator
|
---|
684 | browse->get_top_locator(args, gdbm, 0, locator);
|
---|
685 |
|
---|
686 | // expand and output page
|
---|
687 | // Note: we need to expand these out using package 'browse'
|
---|
688 | // so we can't use the tricky '<<' syntax
|
---|
689 | disp.expandstring("browse", "_header_", output);
|
---|
690 | textout << text_t2ascii << output;
|
---|
691 | disp.expandstring("browse", browse_bar, output);
|
---|
692 | textout << text_t2ascii << output;
|
---|
693 | disp.expandstring("browse", locator, output);
|
---|
694 | textout << text_t2ascii << output;
|
---|
695 | disp.expandstring("browse", "_footer_", output);
|
---|
696 | textout << text_t2ascii << output;
|
---|
697 | }
|
---|
698 |
|
---|
699 |
|
---|
700 |
|
---|
701 | /////////////////////////////////////////////////////////////////////////////////////////
|
---|
702 | // document_action is called to retrieve and display collection documents
|
---|
703 | // It calls the mgsearch function docTargetDocument() to retrieve
|
---|
704 | // a document.
|
---|
705 |
|
---|
706 | void libinterface::document_action (cgiargsclass &args, ostream &textout, ostream &logout) {
|
---|
707 |
|
---|
708 | text_t locator, content, links, output;
|
---|
709 | gdbm_info info;
|
---|
710 | queryparamclass queryparams;
|
---|
711 | queryresultsclass queryresults;
|
---|
712 | int oversize = 0;
|
---|
713 |
|
---|
714 | // have to redo the query to get queryterms for highlight text
|
---|
715 | do_query(args, queryparams, queryresults, logout);
|
---|
716 |
|
---|
717 | prepare_page(args, logout);
|
---|
718 |
|
---|
719 | if (args["g"][1] == '0') {
|
---|
720 | // get docnum from gdbm
|
---|
721 | text_t docref;
|
---|
722 | if (args["d"][0] != 'B') get_book(args["d"], docref);
|
---|
723 | else docref = args["d"];
|
---|
724 | if (gdbm.getinfo(docref, queryparams.collection, info) != 0) {
|
---|
725 | logout << text_t2ascii << "info_db wasn't opened - " << docref << "\n";
|
---|
726 | return;
|
---|
727 | }
|
---|
728 |
|
---|
729 | // get document text if there is any
|
---|
730 | if (info.c.empty()) {
|
---|
731 | search.docTargetDocument(default_index, queryparams.collection,
|
---|
732 | info.d, content);
|
---|
733 | if (info.t != "<i>(introductory text)</i>")
|
---|
734 | content = "<h3>" + info.t + "</h3>\n" + content;
|
---|
735 | }
|
---|
736 | }
|
---|
737 |
|
---|
738 | if (args["g"][1] == '1') {
|
---|
739 | // want to get expanded out text
|
---|
740 | vector<text_t> contents_arr;
|
---|
741 | text_t booksection;
|
---|
742 | int levelcount;
|
---|
743 |
|
---|
744 | get_book (args["d"], booksection);
|
---|
745 | levelcount = count_dots(booksection);
|
---|
746 |
|
---|
747 | browse->get_contents_arr(args, gdbm, contents_arr);
|
---|
748 |
|
---|
749 |
|
---|
750 | // get text for each section of book
|
---|
751 | vector<text_t>::const_iterator thiscontent = contents_arr.begin();
|
---|
752 | vector<text_t>::const_iterator end = contents_arr.end();
|
---|
753 |
|
---|
754 | int first = 1;
|
---|
755 | int count = 1;
|
---|
756 | while (thiscontent != end) {
|
---|
757 | text_t text;
|
---|
758 |
|
---|
759 | // get docnum from gdbm
|
---|
760 | if (gdbm.getinfo(*thiscontent, queryparams.collection, info) != 0) {
|
---|
761 | logout << text_t2ascii << "info_db wasn't opened - " << args["d"] << "\n";
|
---|
762 | return;
|
---|
763 | }
|
---|
764 |
|
---|
765 | // if section has text get it, otherwise output section title
|
---|
766 | if (info.c.empty()) {
|
---|
767 |
|
---|
768 | // output <a name= > tags for all text sections currently displayed in toc (all text sections
|
---|
769 | // if contents are expanded
|
---|
770 | if (count_dots(*thiscontent) == levelcount || args["g"][0] == '1') {
|
---|
771 | content += "<a name=\"";
|
---|
772 | content += count;
|
---|
773 | content += "\"></a>\n";
|
---|
774 | count ++;
|
---|
775 | }
|
---|
776 |
|
---|
777 | search.docTargetDocument(default_index, queryparams.collection,
|
---|
778 | info.d, text);
|
---|
779 | if (info.t != "<i>(introductory text)</i>")
|
---|
780 | content += "<h3>" + info.t + "</h3>\n";
|
---|
781 | // content += text + "<hr><br>\n";
|
---|
782 | content += text + "<p>\n"; // no longer want <hr> between sections
|
---|
783 | } else {
|
---|
784 | content += "<h3>" + info.t + "</h3>\n";
|
---|
785 | }
|
---|
786 |
|
---|
787 | if (args["n"] == 1) {
|
---|
788 | if (first) {
|
---|
789 | browse->get_top_locator(args, gdbm, 0, locator);
|
---|
790 | disp.expandstring("text", "_header_", output);
|
---|
791 | textout << text_t2ascii << output;
|
---|
792 | disp.expandstring("text", locator, output);
|
---|
793 | textout << text_t2ascii << output;
|
---|
794 | }
|
---|
795 |
|
---|
796 | disp.expandstring("text", content, output);
|
---|
797 | if (!queryparams.querystring.empty())
|
---|
798 | highlighttext(queryresults.termvariants, textout, logout, output);
|
---|
799 | else
|
---|
800 | textout << text_t2ascii << output;
|
---|
801 | first = 0;
|
---|
802 | content.clear();
|
---|
803 | }
|
---|
804 |
|
---|
805 | thiscontent ++;
|
---|
806 | if (content.size() > 200000 && args["n"] != 1) {
|
---|
807 | content.clear();
|
---|
808 | oversize = 1;
|
---|
809 | args["g"][1] = '0';
|
---|
810 | break;
|
---|
811 | }
|
---|
812 | }
|
---|
813 |
|
---|
814 | if (args["g"][1] == '0') {
|
---|
815 | // get docnum from gdbm
|
---|
816 | text_t docref;
|
---|
817 | if (args["d"][0] != 'B') get_book(args["d"], docref);
|
---|
818 | else docref = args["d"];
|
---|
819 | if (gdbm.getinfo(docref, queryparams.collection, info) != 0) {
|
---|
820 | logout << text_t2ascii << "info_db wasn't opened - " << docref << "\n";
|
---|
821 | return;
|
---|
822 | }
|
---|
823 |
|
---|
824 | // get document text if there is any
|
---|
825 | if (info.c.empty()) {
|
---|
826 | search.docTargetDocument(default_index, queryparams.collection,
|
---|
827 | info.d, content);
|
---|
828 | if (info.t != "<i>(introductory text)</i>")
|
---|
829 | content = "<h3>" + info.t + "</h3>\n" + content;
|
---|
830 | }
|
---|
831 | }
|
---|
832 | }
|
---|
833 |
|
---|
834 | if (args["n"] != 1) {
|
---|
835 | // get top locator
|
---|
836 | browse->get_top_locator(args, gdbm, oversize, locator);
|
---|
837 |
|
---|
838 | // expand and output page
|
---|
839 | // Note: we need to expand these out using package 'text'
|
---|
840 | // so we can't use the tricky '<<' syntax
|
---|
841 | disp.expandstring("text", "_header_", output);
|
---|
842 | textout << text_t2ascii << output;
|
---|
843 |
|
---|
844 | disp.expandstring("text", locator, output);
|
---|
845 | textout << text_t2ascii << output;
|
---|
846 |
|
---|
847 | disp.expandstring("text", content, output);
|
---|
848 | if (!queryparams.querystring.empty())
|
---|
849 | highlighttext(queryresults.termvariants, textout, logout, output);
|
---|
850 | else
|
---|
851 | textout << text_t2ascii << output;
|
---|
852 | }
|
---|
853 |
|
---|
854 | // get links to next and previous sections unless in expand text mode
|
---|
855 | if (args["g"][1] == '0') {
|
---|
856 | browse->get_links(args, gdbm, links);
|
---|
857 | disp.expandstring("text", links, output);
|
---|
858 | textout << text_t2ascii << output;
|
---|
859 | }
|
---|
860 |
|
---|
861 | disp.expandstring("text", "_footer_", output);
|
---|
862 | textout << text_t2ascii << output;
|
---|
863 | }
|
---|
864 |
|
---|
865 |
|
---|
866 | /////////////////////////////////////////////////////////////////////////////////////////
|
---|
867 | // page is called when a standard html page is to be displayed
|
---|
868 | void libinterface::page_action (cgiargsclass &args, ostream &textout, ostream &logout)
|
---|
869 | {
|
---|
870 |
|
---|
871 | text_t &arg_p = args["p"];
|
---|
872 |
|
---|
873 | prepare_page(args, logout);
|
---|
874 |
|
---|
875 | if (arg_p == "preferences")
|
---|
876 | define_pref_macros(args, logout);
|
---|
877 |
|
---|
878 | textout << text_t2ascii << disp << ("_" + arg_p + ":header_\n")
|
---|
879 | << ("_" + arg_p + ":imagestandardbar_\n") << ("_" + arg_p + ":content_\n")
|
---|
880 | << ("_" + arg_p + ":footer_\n");
|
---|
881 | }
|
---|
882 |
|
---|
883 | // highlighttext highlights query terms in text string and outputs the resulting text string
|
---|
884 | void libinterface::highlighttext(vector<text_t> &termvars, ostream &textout,
|
---|
885 | ostream &logout, text_t &text)
|
---|
886 | {
|
---|
887 | map<text_t, int, lttext_t> terms;
|
---|
888 | map<text_t, int, lttext_t>::const_iterator it;
|
---|
889 | for (unsigned int i = 0; i < termvars.size(); i++) {
|
---|
890 | terms[termvars[i]] = 1;
|
---|
891 | }
|
---|
892 |
|
---|
893 | text_t::iterator here = text.begin();
|
---|
894 | text_t::iterator end = text.end();
|
---|
895 | text_t word, buffer;
|
---|
896 | while (here != end) {
|
---|
897 | if (((*here >= 65) && (*here <= 90)) ||
|
---|
898 | ((*here >= 97) && (*here <= 122)) ||
|
---|
899 | ((*here >= '0') && (*here <= '9')) ||
|
---|
900 | ((*here >= 192) && (*here <= 214)) ||
|
---|
901 | ((*here >= 216) && (*here <= 246)) ||
|
---|
902 | ((*here >= 248) && (*here <= 255))) {
|
---|
903 | // not word boundary
|
---|
904 | word.push_back(*here);
|
---|
905 | here++;
|
---|
906 |
|
---|
907 | } else {
|
---|
908 | // found word boundary
|
---|
909 | // add last word if there was one
|
---|
910 | if (!word.empty()) {
|
---|
911 | it = terms.find(word);
|
---|
912 | if (it != terms.end()) {
|
---|
913 | word = "<b><u>" + word + "</u></b>";
|
---|
914 | }
|
---|
915 | buffer += word;
|
---|
916 | word.clear();
|
---|
917 | }
|
---|
918 |
|
---|
919 | if (*here == '<') {
|
---|
920 | // skip over rest of html tag
|
---|
921 | while ((here != end) && (*here != '>')) {
|
---|
922 | buffer.push_back(*here);
|
---|
923 | here++;
|
---|
924 | }
|
---|
925 | }
|
---|
926 |
|
---|
927 | buffer.push_back(*here);
|
---|
928 | here++;
|
---|
929 |
|
---|
930 | if (buffer.size() > 1024) {
|
---|
931 | textout << text_t2ascii << buffer;
|
---|
932 | buffer.clear();
|
---|
933 | }
|
---|
934 | }
|
---|
935 | }
|
---|
936 | textout << text_t2ascii << buffer;
|
---|
937 | }
|
---|
938 |
|
---|
939 | void libinterface::define_pref_macros (cgiargsclass &args, ostream &logout)
|
---|
940 | {
|
---|
941 | // the caseoption macro
|
---|
942 | text_t caseoption;
|
---|
943 | int arg_k = args.getintarg("k");
|
---|
944 |
|
---|
945 | caseoption += "\n<input type=radio name=k value=1";
|
---|
946 | if (arg_k) caseoption += " checked";
|
---|
947 | caseoption += "> ignore case differences<br>\n";
|
---|
948 | caseoption += "<input type=radio name=k value=0";
|
---|
949 | if (!arg_k) caseoption += " checked";
|
---|
950 | caseoption += "> upper/lower case must match\n";
|
---|
951 |
|
---|
952 | disp.setmacro ("caseoption", "preferences", caseoption);
|
---|
953 |
|
---|
954 | // the stemoption macro
|
---|
955 | text_t stemoption;
|
---|
956 | int arg_s = args.getintarg("s");
|
---|
957 |
|
---|
958 | stemoption += "\n<input type=radio name=s value=1";
|
---|
959 | if (arg_s) stemoption += " checked";
|
---|
960 | stemoption += "> ignore word endings<br>\n";
|
---|
961 | stemoption += "<input type=radio name=s value=0";
|
---|
962 | if (!arg_s) stemoption += " checked";
|
---|
963 | stemoption += "> whole word must match\n";
|
---|
964 |
|
---|
965 | disp.setmacro ("stemoption", "preferences", stemoption);
|
---|
966 |
|
---|
967 |
|
---|
968 | // the maxdocoption
|
---|
969 | text_t maxdocoption;
|
---|
970 | int arg_m = args.getintarg("m");
|
---|
971 |
|
---|
972 | maxdocoption += "\n<select name=m>\n";
|
---|
973 | maxdocoption += " <option value=\"50\"";
|
---|
974 | if (arg_m < 100) maxdocoption += " selected";
|
---|
975 | maxdocoption += ">50\n";
|
---|
976 | maxdocoption += " <option value=\"100\"";
|
---|
977 | if (arg_m >= 100 && arg_m < 200) maxdocoption += " selected";
|
---|
978 | maxdocoption += ">100\n";
|
---|
979 | maxdocoption += " <option value=\"200\"";
|
---|
980 | if (arg_m >= 200 && arg_m < 500) maxdocoption += " selected";
|
---|
981 | maxdocoption += ">200\n";
|
---|
982 | maxdocoption += " <option value=\"500\"";
|
---|
983 | if (arg_m >= 500) maxdocoption += " selected";
|
---|
984 | maxdocoption += ">500\n";
|
---|
985 | maxdocoption += "</select>\n";
|
---|
986 |
|
---|
987 | disp.setmacro ("maxdocoption", "preferences", maxdocoption);
|
---|
988 |
|
---|
989 | // the hitsperpageoption
|
---|
990 | text_t hitsoption;
|
---|
991 | int arg_o = args.getintarg("o");
|
---|
992 |
|
---|
993 | hitsoption += "\n<select name=o>\n";
|
---|
994 | hitsoption += " <option value=\"10\"";
|
---|
995 | if (arg_o < 20) hitsoption += " selected";
|
---|
996 | hitsoption += ">10\n";
|
---|
997 | hitsoption += " <option value=\"20\"";
|
---|
998 | if (arg_o >= 20 && arg_o < 50) hitsoption += " selected";
|
---|
999 | hitsoption += ">20\n";
|
---|
1000 | hitsoption += " <option value=\"50\"";
|
---|
1001 | if (arg_o >= 50 && arg_o < 100) hitsoption += " selected";
|
---|
1002 | hitsoption += ">50\n";
|
---|
1003 | hitsoption += " <option value=\"100\"";
|
---|
1004 | if (arg_o >= 100 && arg_o < 500) hitsoption += " selected";
|
---|
1005 | hitsoption += ">100\n";
|
---|
1006 | hitsoption += " <option value=\"500\"";
|
---|
1007 | if (arg_o >= 500) hitsoption += " selected";
|
---|
1008 | hitsoption += ">all\n";
|
---|
1009 | hitsoption += " </select>\n";
|
---|
1010 |
|
---|
1011 | disp.setmacro ("hitsperpageoption", "preferences", hitsoption);
|
---|
1012 | }
|
---|