source: trunk/gsdl/src/library/libinterface.cpp@ 99

Last change on this file since 99 was 99, checked in by rjmcnab, 25 years ago

Enabled mg and the library software to read in more than one index
at a time.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 36.0 KB
Line 
1#include <string.h>
2#include <stdio.h>
3#include <stdlib.h>
4#include <ctype.h>
5#include <time.h>
6#include "libinterface.h"
7#include "cgiargs.h"
8#include "fileutil.h"
9#include "cfgread.h"
10#include "gsdlunicode.h"
11#include "unitool.h"
12
13#include <assert.h>
14
15
16
17
18///////////////////////
19// support functions //
20///////////////////////
21
22unsigned short hexdigit (unsigned short c)
23{
24 if (c >= '0' && c <= '9') return (c-'0');
25 if (c >= 'a' && c <= 'f') return (c-'a'+10);
26 if (c >= 'A' && c <= 'F') return (c-'A'+10);
27 return c;
28}
29
30
31void c2hex (unsigned short c, text_t &t)
32{
33 t.clear();
34
35 if (c >= 256)
36 {
37 t = "20"; // ' '
38 return;
39 }
40
41 unsigned short o1, o2;
42
43 o1 = (c/16) % 16;
44 o2 = c % 16;
45 if (o1 >= 10) o1 += 'a' - 10;
46 else o1 += '0';
47 if (o2 >= 10) o2 += 'a' - 10;
48 else o2 += '0';
49
50 t.push_back(o1);
51 t.push_back(o2);
52}
53
54// convert %xx and + to their appropriate equivalents
55void decode (text_t &argstr)
56{
57 text_t::iterator in = argstr.begin();
58 text_t::iterator out = in;
59 text_t::iterator end = argstr.end();
60
61 while (in != end)
62 {
63 if (*in == '+') *out = ' ';
64
65 else if (*in == '%')
66 {
67 unsigned short c = '%';
68 in++;
69 if (in != end)
70 {
71 c = hexdigit (*in);
72 in++;
73 }
74 if (in != end && c < 16) // sanity check on the previous character
75 {
76 c = c*16 + hexdigit (*in);
77 }
78
79 *out = c;
80 }
81 else *out = *in;
82
83 if (in != end) in++;
84 out++;
85 }
86
87 // remove the excess characters
88 argstr.erase (out, end);
89}
90
91
92// split up the cgi arguments
93void parse_cgi_args (text_t argstr, cgiargsclass &args)
94{
95 args.clear();
96
97 text_t::iterator here = argstr.begin();
98 text_t::iterator end = argstr.end();
99 text_t key, value;
100
101 // extract out the key=value pairs
102 while (here != end)
103 {
104 // get the next key and value pair
105 here = getdelimitstr (here, end, '=', key);
106 here = getdelimitstr (here, end, '&', value);
107
108 // convert %xx and + to their appropriate equivalents
109 decode (value);
110 value.setencoding(1); // other encoding
111 // store this key=value pair
112 if (!key.empty()) args.setarg (key, value);
113 }
114}
115
116text_t cgisafe (const text_t &intext)
117{
118 text_t outtext;
119
120 text_t::const_iterator here = intext.begin ();
121 text_t::const_iterator end = intext.end ();
122 unsigned short c;
123 text_t ttmp;
124
125 while (here != end)
126 {
127 c = *here;
128 if (((c >= 'a') && (c <= 'z')) ||
129 ((c >= 'A') && (c <= 'Z')) ||
130 ((c >= '0') && (c <= '9'))) {
131 // alphanumeric character
132 outtext.push_back(c);
133 } else {
134 // non-alphnumeric character
135 outtext.push_back('%');
136 c2hex(c, ttmp);
137 outtext += ttmp;
138 }
139
140 here++;
141 }
142
143 return outtext;
144}
145
146//////////////////////////////
147// methods for libinterface //
148//////////////////////////////
149
150// constructor
151
152libinterface::libinterface() {
153 browse = NULL;
154}
155
156void libinterface::setgsdlhome (const text_t &thegsdlhome) {
157 text_t thecollectdir, thegdbmdir, filename;
158
159 gsdlhome = thegsdlhome;
160
161 // search for etc/collect.cfg
162 thecollectdir = filename_cat (gsdlhome, "collect");
163 thecollectdir = filename_cat (thecollectdir, get_collection_name());
164 filename = filename_cat (thecollectdir, "etc");
165 filename = filename_cat (filename, "collect.cfg");
166
167 if (!file_exists(filename)) thecollectdir = gsdlhome;
168
169 thegdbmdir = filename_cat (thecollectdir, "index");
170 thegdbmdir = filename_cat (thegdbmdir, "text");
171
172 setcollectdir (thecollectdir);
173 setgdbmdir (thegdbmdir);
174}
175
176void libinterface::setcollectdir (const text_t &thecollectdir) {
177 collectdir = thecollectdir;
178
179 search.setcollectdir (collectdir);
180}
181
182void libinterface::setgdbmdir (const text_t &thegdbmdir) {
183 gdbmdir = thegdbmdir;
184}
185
186void libinterface::sethttpprefix (const text_t &thehttpprefix) {
187 httpprefix = thehttpprefix;
188}
189
190void libinterface::setgwcgi (const text_t &thegwcgi) {
191 gwcgi = thegwcgi;
192}
193
194
195int libinterface::cfg_read (const text_t &filename) {
196 text_t key;
197 text_tarray cfgline;
198 char *cstr = filename.getcstr();
199 ifstream confin (cstr);
200 delete cstr;
201
202 if (confin) {
203 while (read_cfg_line(confin, cfgline) >= 0) {
204 if (cfgline.size () >= 2) {
205 key = cfgline[0];
206 cfgline.erase(cfgline.begin());
207 if (key == "maintainer") cfg_info.maintainer = cfgline[0];
208 else if (key == "indexes") cfg_info.indexes = cfgline;
209 else if (key == "defaultindex") cfg_info.defaultindex = cfgline[0];
210 else if (key == "macrofiles") cfg_info.macrofiles = cfgline;
211 else if (key == "builddate") cfg_info.builddate = cfgline[0];
212 else if (key == "indexmap") cfg_info.indexmap = cfgline;
213 else if (key == "numbytes") cfg_info.numbytes = (double)cfgline[0].getint();
214 else if (key == "numdocs") cfg_info.numdocs = (double)cfgline[0].getint();
215 else if (key == "defaultencoding")
216 cfg_info.defaultencoding = cfgline[0];
217 }
218 }
219 confin.close ();
220 return 1;
221 }
222 return 0;
223}
224
225
226// init should be called after the various homes are set,
227// it returns 'false' on failure and 'true' on success
228bool libinterface::init (ostream &logout) {
229 text_t collection = get_collection_name();
230
231 // redirect the error output to logout
232 disp.setlogout (&logout);
233 gdbm.setlogout (&logout);
234
235 // open the gdbm file
236 text_t filename = filename_cat (gdbmdir, get_collection_name ());
237#ifdef _LITTLE_ENDIAN
238 filename += ".ldb"; // little endian version of the gdbm database
239#else
240 filename += ".bdb"; // big endian version on the gdbm database
241#endif
242 gdbm.opendatabase (filename);
243
244
245 // set default values for the configuration file
246 cfg_info.defaultencoding = "w";
247
248 // read in the configuration files etc/collect.cfg and index/build.cfg
249 // entries in build.cfg should override those in collect.cfg
250 filename = filename_cat (collectdir, "etc");
251 filename = filename_cat (filename, "collect.cfg");
252 cfg_read(filename);
253 filename = filename_cat (collectdir, "index");
254 filename = filename_cat (filename, "build.cfg");
255 cfg_read(filename);
256
257 // logout << logconvert << "defaultindex: " << cfg_info.defaultindex << "\n";
258
259 // set the default index
260 if (cfg_info.indexmap.empty()) {
261 // ?? no indexes built ??
262 logout << "warning: no indexes have been built\n";
263 default_index.clear();
264 } else if (cfg_info.defaultindex.empty() ||
265 !isrealindex (cfg_info.indexmap, cfg_info.defaultindex)) {
266 logout << "warning: the default index has been reset to the first index\n";
267 getrealdirindex (cfg_info.indexmap[0], cfg_info.defaultindex, default_index);
268 } else {
269 default_index = real2dirindex (cfg_info.indexmap, cfg_info.defaultindex);
270 }
271
272 // set the text default index (the default index to use when
273 // retrieving documents).
274 text_default_index = default_index;
275 if (!isdoclevelindex (cfg_info.defaultindex)) {
276 text_default_index = real2dirindex (cfg_info.indexmap,
277 getdoclevelindex (cfg_info.indexmap));
278 }
279
280 // load up the default macro files, the collection directory
281 // is searched first for the file and then the main directory
282 text_t colmacrodir = filename_cat (collectdir, "macros");
283 text_t gsdlmacrodir = filename_cat (gsdlhome, "macros");
284 text_tarray::iterator arrhere = cfg_info.macrofiles.begin();
285 text_tarray::iterator arrend = cfg_info.macrofiles.end();
286 while (arrhere != arrend) {
287 filename = filename_cat (colmacrodir, *arrhere);
288 if (!file_exists (filename)) {
289 filename = filename_cat (gsdlmacrodir, *arrhere);
290 }
291 disp.loaddefaultmacros(filename);
292 arrhere++;
293 }
294
295 srand(time(NULL));
296
297 utf8outconvert.set_rzws(1);
298 gboutconvert.set_rzws(1);
299
300 return collection_init(collection);
301}
302
303
304// examine the cgi arguments and create the appropriate page,
305// outputing the page to textout and any debug information to logout
306//
307// returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
308int libinterface::getpage (const text_t &argstr, ostream &textout, ostream &logout) {
309 int err = LI_NOERROR;
310
311 // make the output go where they want!
312 gdbm.setlogout (&logout);
313 disp.setlogout (&logout);
314
315 cgiargsclass args;
316
317 parse_cgi_args (argstr, args);
318 expand_compressed_args (args);
319 add_default_args (args);
320 check_args (args);
321
322 // get the input encoding
323 text_t &arg_w = args["w"];
324 inconvertclass *inconvert = NULL;
325 if (arg_w == "8") {
326 inconvert = &utf8inconvert;
327 } else if (arg_w == "g") {
328 // The map files will only be loaded the first time they are
329 // needed. The loading is done here to reduce the memory load
330 // for collections which don't need to convert to GB.
331 gbinconvert.loadmapfile (gsdlhome, "gbku", 0x25a1);
332 inconvert = &gbinconvert;
333 } else {
334 inconvert = &asciiinconvert; // default
335 }
336
337 // see if the next page will have a different encoding
338 if (args.getarg("nw") != NULL) args["w"] = args["nw"];
339
340 // convert arguments which aren't in unicode to unicode
341 args_tounicode (args, *inconvert);
342
343 // remember the state of the compressed arguments
344 lastcomparg = get_compressed_args (args);
345 logout << args;
346
347 // get the output encoding
348 text_t &arg_nw = args["w"];
349 outconvertclass *outconvert = NULL;
350 if (arg_nw == "8") {
351 outconvert = &utf8outconvert;
352 } else if (arg_nw == "g") {
353 gboutconvert.loadmapfile (gsdlhome, "ugbk", 0xa1f5);
354 outconvert = &gboutconvert;
355 } else {
356 outconvert = &asciioutconvert; // default
357 }
358
359
360 // dispatch the request
361 text_t &arg_a = args["a"];
362 if (arg_a == "q") err = query_action (args, *outconvert, textout, logout);
363 else if (arg_a == "b") browse_action (args, *outconvert, textout, logout);
364 else if (arg_a == "t") document_action (args, *outconvert, textout, logout);
365 else if (arg_a == "p") page_action (args, *outconvert, textout, logout);
366 else if ((arg_a.size() == 2) && (arg_a[0] == 'a'))
367 auxiliary_action (args, *outconvert, textout, logout);
368 else
369 {
370 // output error page
371 }
372
373 return err;
374}
375
376// the arg config string is used to do processing on the arguments
377// entries take the form argname[defaultvalue]
378// if the argument name is preceeded by a "+" it means that the
379// value may be more than one character long
380// the main state variable missed on this list is "q" the query string
381void libinterface::get_arg_config (text_t &argconfigstr)
382{
383 argconfigstr =
384 "+a[p]" // action: q=query, b=browse, t=targetdoc, p=page, a1=auxiliary
385 "w[]" // encoding: w=western, 8=utf8, 7=utf7, g=GB2312, k=GBK
386 "t[1]" // query type: 0=boolean, 1=ranked
387 "+i[]" // index: collection dependant
388 "k[1]" // casefolding: 0=off, 1=on
389 "s[0]" // stemming: 0=off, 1=on
390 "+p[home]" // page
391 "+c[]" // collection (collection dependant)
392 "+r[1]" // results from
393 "+d[C.1]" // the target document
394 "+j[11]" // partial index: 11=all, 10=f&n, 01=other
395 "+m[100]" // maxdocs
396 "+o[20]" // hits per page
397 "v[0]" // version: 0=text+graphics, 1=text
398 "f[0]" // query box size: 0=normal, 1=big
399 "l[e]"; // language: e=english, m=maori
400
401}
402
403text_t::iterator libinterface::get_next_config_arg (text_t::iterator first,
404 text_t::iterator last,
405 text_t &argname,
406 text_t &defaultvalue,
407 bool &longarg)
408{
409 first = getdelimitstr (first, last, '[', argname);
410 first = getdelimitstr (first, last, ']', defaultvalue);
411 longarg = false;
412
413 if (!argname.empty() && (argname[0] == '+'))
414 {
415 argname.erase(argname.begin(), argname.begin()+1);
416 longarg = true;
417 }
418
419 return first;
420}
421
422text_t libinterface::get_compressed_args (cgiargsclass &args)
423{
424 text_t argconfigstr; get_arg_config (argconfigstr);
425 text_t arg_e;
426 text_t argname, defaultvalue;
427 text_t *argvalue;
428 bool longarg;
429
430 text_t::iterator here = argconfigstr.begin();
431 text_t::iterator end = argconfigstr.end();
432 while (here != end)
433 {
434 here = get_next_config_arg (here, end, argname, defaultvalue, longarg);
435
436 if (!argname.empty())
437 {
438 argvalue = args.getarg (argname);
439 if (argvalue == NULL) arg_e += defaultvalue;
440 else arg_e += *argvalue;
441
442 if (longarg) arg_e += "-";
443 }
444 }
445
446 return arg_e;
447}
448
449
450// the compressed options should never override explicit options
451// but they should always be expanded before add_default_args is
452// called
453void libinterface::expand_compressed_args (cgiargsclass &args)
454{
455 text_t *arg_e = args.getarg("e");
456
457 // see if there is compressed options
458 if (arg_e != NULL)
459 {
460 text_t argconfigstr; get_arg_config (argconfigstr);
461 text_t argname, defaultvalue, argvalue;
462 bool longarg;
463
464 text_t::iterator confighere = argconfigstr.begin();
465 text_t::iterator configend = argconfigstr.end();
466
467 text_t::iterator arghere = arg_e->begin();
468 text_t::iterator argend = arg_e->end();
469 while (confighere != configend && arghere != argend)
470 {
471 confighere = get_next_config_arg (confighere, configend, argname,
472 defaultvalue, longarg);
473 if (!argname.empty())
474 {
475 if (longarg)
476 {
477 arghere = getdelimitstr (arghere, argend, '-', argvalue);
478 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue);
479 }
480 else
481 {
482 args.setdefaultcarg (argname,*arghere);
483 arghere++;
484 }
485 }
486 }
487 }
488}
489
490
491void libinterface::add_default_args (cgiargsclass &args)
492{
493 text_t argconfigstr; get_arg_config (argconfigstr);
494 text_t argname, defaultvalue;
495 bool longarg;
496
497 text_t::iterator confighere = argconfigstr.begin();
498 text_t::iterator configend = argconfigstr.end();
499 while (confighere != configend)
500 {
501 confighere = get_next_config_arg (confighere, configend, argname,
502 defaultvalue, longarg);
503 if (!argname.empty()) args.setdefaultarg (argname, defaultvalue);
504 }
505
506 // the query string and format string are not included in the argument configuration string
507 args.setdefaultarg ("q", ""); // the default query string is ""
508 args.setdefaultarg ("g", "00");
509 args.setdefaultarg ("x", "0");
510}
511
512
513// check and attempt to fix an problems encountered in the list
514// of cgi arguments
515void libinterface::check_args (cgiargsclass &args)
516{
517 args.setarg("c", get_collection_name());
518 if (args["w"].empty()) args.setarg("w", cfg_info.defaultencoding);
519 if (args["i"].empty()) args.setarg("i", default_index);
520}
521
522void libinterface::args_tounicode (cgiargsclass &args,
523 inconvertclass &inconvert) {
524 utf8outconvertclass text_t2utf8;
525 cgiargsclass::iterator here = args.begin();
526 cgiargsclass::iterator end = args.end();
527
528 while (here != end) {
529 if (here->second.getencoding() > 0) {
530 here->second = inconvert.convert(here->second);
531 }
532
533 here++;
534 }
535}
536
537
538void libinterface::define_general_macros (cgiargsclass &args, outconvertclass &outconvert,
539 ostream &logout) {
540 disp.setmacro("httpprefix", "Global", httpprefix);
541 disp.setmacro("gwcgi", "Global", gwcgi);
542
543 disp.setmacro("numdocs", "Global", (int)cfg_info.numdocs);
544
545 disp.setmacro("collection", "Global", cgisafe(outconvert.convert(args["c"])));
546 disp.setmacro("compressedoptions", "Global", get_compressed_args(args));
547 disp.setmacro("urlsafequerystring", "Global", cgisafe(outconvert.convert(args["q"])));
548
549 // need to escape any special characters in querystring to prevent
550 // them upsetting the html
551 text_t querystring;
552 text_t::iterator here = args["q"].begin();
553 text_t::iterator end = args["q"].end();
554 while (here != end) {
555 if (*here == '"') querystring += "&quot;";
556 else if (*here == '&') querystring += "&amp;";
557 else if (*here == '<') querystring += "&lt;";
558 else if (*here == '>') querystring += "&gt;";
559 else querystring.push_back(*here);
560 here ++;
561 }
562 disp.setmacro("querystring", "Global", querystring);
563
564 if (args.getintarg("x") == 0) disp.setmacro("notdetached", "Global", "1");
565 if (args["d"][0] == 'T') disp.setmacro("istitle", "Global", "1");
566
567 int i = rand();
568 disp.setmacro("pagedest", "Global", text_t(i));
569
570 // define the macro for the "g" argument
571 disp.setmacro("g", "Global", args["g"]);
572
573 // set the selection macros
574
575 text_t indexselect;
576 text_t maprealindex, mapdirindex;
577 if (cfg_info.indexmap.size() == 1) {
578 getrealdirindex (cfg_info.indexmap[0], maprealindex, mapdirindex);
579 indexselect += "<input type=hidden name=\"i\" value=\"";
580 indexselect += mapdirindex;
581 indexselect += "\">_query:";
582 indexselect += real2macroindex (maprealindex);
583 indexselect += "_\n";
584
585 } else {
586 text_t &arg_i = args["i"];
587 text_tarray::const_iterator maphere = cfg_info.indexmap.begin();
588 text_tarray::const_iterator mapend = cfg_info.indexmap.end();
589
590 indexselect += "<select name=\"i\">\n";
591 while (maphere != mapend) {
592 getrealdirindex (*maphere, maprealindex, mapdirindex);
593 indexselect += "<option value=\"";
594 indexselect += mapdirindex;
595 indexselect += "\"";
596 if (arg_i == mapdirindex) indexselect += " selected";
597 indexselect += ">_query:";
598 indexselect += real2macroindex (maprealindex);
599 indexselect += "_\n";
600
601 maphere++;
602 }
603 indexselect += "</select>\n";
604 }
605
606 disp.setmacro("indexselection", "query", indexselect);
607
608 text_t qtselect;
609 text_t &arg_t = args["t"];
610
611 qtselect += "<select name=\"t\">\n";
612 qtselect += "<option value=\"1\"";
613 if (arg_t == "1") qtselect += " selected";
614 qtselect += ">_query:textsome_\n";
615 qtselect += "<option value=\"0\"";
616 if (arg_t == "0") qtselect += " selected";
617 qtselect += ">_query:textall_\n";
618 qtselect += "</select>\n";
619
620 disp.setmacro("querytypeselection", "query", qtselect);
621}
622
623
624// prepare_page prepares to write out a page using the current
625// page parameters and defines any general macros
626void libinterface::prepare_page (cgiargsclass &args, outconvertclass &outconvert,
627 ostream &logout) {
628 // get page parameters
629 text_t pageparams = text_t("collection=") + args["c"];
630 if (args.getintarg("u") == 1) pageparams += ",style=htmlonly";
631 if (args.getintarg("v") == 1) pageparams += ",version=text";
632 if (args.getintarg("f") == 1) pageparams += ",queryversion=big";
633 if (args["l"] != 'e') pageparams += ",language=" + args["l"];
634
635 disp.openpage(pageparams, MACROPRECEDENCE);
636 define_general_macros(args, outconvert, logout);
637 define_collection_macros(args, logout);
638}
639
640void libinterface::set_query_params (cgiargsclass &args, queryparamclass &queryparams)
641{
642 queryparams.collection = args["c"];
643 // assemble_index (args, queryparams.search_index);
644 queryparams.search_index = args["i"];
645 queryparams.querystring = args["q"];
646 format_querystring (queryparams.querystring);
647 queryparams.search_type = args.getintarg ("t");
648 queryparams.casefolding = args.getintarg ("k");
649 queryparams.stemming = args.getintarg ("s");
650 queryparams.maxdocs = args.getintarg ("m");
651}
652
653void libinterface::format_querystring (text_t &querystring)
654{
655 text_t formattedstring;
656 quotedstring.clear();
657
658 text_t::iterator here = querystring.begin();
659 text_t::iterator end = querystring.end();
660 int foundquote = 0;
661
662 // want to remove ()|!& from querystring so boolean queries are just
663 // "all the words" queries
664 while (here != end) {
665 if (*here == '(' || *here == ')' || *here == '|' ||
666 *here == '!' || *here == '&') {
667 formattedstring += " ";
668 } else {
669 if (*here == '"') {
670 if (foundquote) {foundquote = 0; quotedstring.push_back(*here);}
671 else foundquote = 1;
672 } else {
673 formattedstring.push_back(*here);
674 }
675 if (foundquote) quotedstring.push_back(*here);
676 }
677 here ++;
678 }
679 querystring = formattedstring + quotedstring;
680}
681
682void libinterface::define_query_macros (cgiargsclass &args,
683 queryparamclass &queryparams,
684 queryresultsclass &queryresults,
685 ostream &logout)
686{
687 int numdocs = queryresults.getnumdocs();
688 int numterms = queryresults.getnumterms();
689 disp.setmacro("querysize", "query", args["f"]);
690 disp.setmacro("haveresults", "query", numdocs);
691
692 // set the display frequency macro
693 text_t freqmsg = "_textfm1_";
694
695 int first = 1;
696 for (int i = 0; i < numterms; i++) {
697 if (first == 0) freqmsg += "; ";
698 first = 0;
699 freqmsg += queryresults.terms[i].termstr + ": " + queryresults.terms[i].termfreq;
700 }
701 if (!quotedstring.empty()) freqmsg += "<br><i>post-processed to find " + quotedstring + "</i>\n";
702 disp.setmacro("freqmsg", "query", freqmsg);
703
704 // set the result line macro
705
706 text_t resline;
707
708 if (numdocs >= queryparams.maxdocs)
709 resline.setcstr("_textmt2_");
710
711 if (numdocs == 0) {
712 resline.setcstr("_textndmtq_");
713 } else if (numdocs == 1) {
714 resline += text_t(numdocs) + text_t(" _textdmtq2_.");
715 } else {
716 resline += text_t(numdocs) + text_t(" _textdmtq3_.");
717 }
718
719 disp.setmacro("resultline", "query", resline);
720
721 // define_collection_macros (args, logout);
722
723 if (queryresults.getnumdocs() > 0) {
724 docLinks(args, queryresults, logout);
725 }
726}
727
728
729// set the _links_ macro to create the links between pages of query results
730void libinterface::docLinks (cgiargsclass &args,
731 queryresultsclass &queryresults,
732 ostream &logout)
733{
734 text_t links;
735 int a, b, documents, nextfirst, nextlast, prevfirst, prevlast;
736 int results_from = args.getintarg("r");
737 int hitsperpage = args.getintarg("o");
738
739 documents = queryresults.getnumdocs();
740
741 a = results_from;
742 b = a + (hitsperpage - 1);
743
744 // make sure a and b are in range
745 if (a < 1) a = 1;
746 if (b < 1) b = 1;
747 if (a > documents) a = documents;
748 if (b > documents) b = documents;
749
750 links.setcstr("<table cellspacing=0 cellpadding=0 border=0 width=\"100%\">\n");
751 links += "<tr valign=bottom>\n";
752 links += "<td align=left>\n";
753 links += "<_font_>\n";
754
755 // previous page link
756 if (a > 1) {
757 prevlast = a - 1;
758 prevfirst = a - hitsperpage;
759
760 links += "<p><a href=\"_httpqueryresults_&r=";
761 links += prevfirst;
762 links += "\">_iconprev__textmatches_ ";
763 links += prevfirst;
764 links += " - ";
765 links += prevlast;
766 links += "</a>\n";
767 }
768
769 links += "</font>\n";
770 links += "</td><td width=100></td><td align=right>\n";
771 links += "<_font_>\n";
772
773 // next page link
774 if (b < documents) {
775 nextfirst = b + 1;
776 nextlast = b + hitsperpage;
777 if (nextlast > documents) nextlast = documents;
778
779 links += "<p><a href=\"_httpqueryresults_&r=";
780 links += nextfirst;
781 links += "\">_textmatches_ ";
782 links += nextfirst;
783 links += " - ";
784 links += nextlast ;
785 links += "_iconnext_</a>\n";
786 }
787
788 links += "</font>\n";
789 links += "</td></tr></table>\n";
790
791 disp.setmacro("links", "query", links);
792}
793
794
795// returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
796int libinterface::do_query(cgiargsclass &args, queryparamclass &queryparams,
797 queryresultsclass &queryresults, ostream &logout)
798{
799 set_query_params(args, queryparams);
800
801 if (!queryparams.querystring.empty()) {
802 // do the query - the results are returned in queryresults
803 if (!search.search(queryparams, queryresults)) {
804 logout << "ERROR: database didn't load\n";
805 return LI_LOADDATABASEFAILED;
806 }
807 }
808 return LI_NOERROR;
809}
810
811////////////////////////////////////////////////////////////////////////////////////////
812// query_action is called whenever a search is to be carried out (i.e. when the
813// 'a' parameter == 'q') - query calls the mgsearch search() function (via do_query()) to
814// carry out the search then displays the first page of results.
815//
816// returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
817int libinterface::query_action (cgiargsclass &args, outconvertclass &outconvert,
818 ostream &textout, ostream &logout) {
819 int err = LI_NOERROR;
820
821 queryparamclass queryparams;
822 queryresultsclass queryresults;
823
824 err = do_query(args, queryparams, queryresults, logout);
825
826 // prepare to print out the page
827 prepare_page(args, outconvert, logout);
828 define_query_macros(args, queryparams, queryresults, logout);
829
830 // print out the query page
831 textout << outconvert << disp << "_query:header_\n";
832
833 // output query results if there is a query string -
834 // otherwise output help text
835 if (!queryparams.querystring.empty())
836 {
837 displayresults (args, outconvert, textout, logout, queryresults);
838 }
839 else
840 {
841 textout << outconvert << disp << "_query:noqueryheader_\n";
842 }
843
844 textout << outconvert << disp << "_query:footer_\n";
845
846 return err;
847}
848
849void libinterface::displayresults (cgiargsclass &args, outconvertclass &outconvert,
850 ostream &textout, ostream &logout,
851 queryresultsclass &queryresults) {
852 textout << outconvert << disp << "_query:queryheader_";
853
854 int startresults = args.getintarg("r") - 1;
855 int numresults = args.getintarg("o");
856
857 textout << outconvert << "<table cellspacing=4>\n";
858 for (int i=startresults; i < startresults+numresults; i++) {
859 displaydocsummary (args, outconvert, textout, logout, queryresults, i);
860 }
861 textout << outconvert << "</table>\n\n";
862
863 textout << outconvert << disp << "_query:queryfooter_";
864}
865
866
867///////////////////////////////////////////////////////////////////////////////////////////////
868// browse_action writes out the browse pages (i.e. the top level hierarchy pages)
869void libinterface::browse_action (cgiargsclass &args, outconvertclass &outconvert,
870 ostream &textout, ostream &logout) {
871 text_t browse_bar, locator, output;
872 gdbm_info info;
873
874 prepare_page(args, outconvert, logout);
875
876 // get browse bar unless page has been detached
877 if (args.getintarg("x") == 0) {
878 browse->get_browse_bar(args["d"], browse_bar);
879 }
880
881 // get top locator
882 browse->get_top_locator(args, gdbm, 0, locator);
883
884 // expand and output page
885 // Note: we need to expand these out using package 'browse'
886 // so we can't use the tricky '<<' syntax
887 disp.expandstring("browse", "_header_", output);
888 textout << outconvert << output;
889 disp.expandstring("browse", browse_bar, output);
890 textout << outconvert << output;
891 disp.expandstring("browse", locator, output);
892 textout << outconvert << output;
893 disp.expandstring("browse", "_footer_", output);
894 textout << outconvert << output;
895}
896
897
898
899/////////////////////////////////////////////////////////////////////////////////////////
900// document_action is called to retrieve and display collection documents
901// It calls the mgsearch function docTargetDocument() to retrieve
902// a document.
903
904void libinterface::document_action (cgiargsclass &args, outconvertclass &outconvert,
905 ostream &textout, ostream &logout) {
906
907 text_t locator, content, links, output;
908 gdbm_info info;
909 queryparamclass queryparams;
910 queryresultsclass queryresults;
911 int oversize = 0;
912
913 // have to redo the query to get queryterms for highlight text
914 do_query(args, queryparams, queryresults, logout);
915
916 prepare_page(args, outconvert, logout);
917
918 if (args["g"][1] == '0') {
919 // get docnum from gdbm
920 text_t docref;
921 if (args["d"][0] != 'B') get_book(args["d"], docref);
922 else docref = args["d"];
923 if (gdbm.getinfo(docref, info) != 0) {
924 logout << logconvert << "info_db wasn't opened - " << docref << "\n";
925 return;
926 }
927
928 // get document text if there is any
929 if (info.contents.empty()) {
930 search.docTargetDocument(text_default_index, queryparams.collection,
931 info.docnum, content);
932 if (info.title != "<i>(introductory text)</i>")
933 content = "<h3>" + info.title + "</h3>\n" + content;
934 }
935 }
936
937 if (args["g"][1] == '1') {
938 // want to get expanded out text
939 vector<text_t> contents_arr;
940 text_t booksection;
941 int levelcount;
942
943 get_book (args["d"], booksection);
944 levelcount = count_dots(booksection);
945
946 browse->get_contents_arr(args, gdbm, contents_arr);
947
948
949 // get text for each section of book
950 vector<text_t>::const_iterator thiscontent = contents_arr.begin();
951 vector<text_t>::const_iterator end = contents_arr.end();
952
953 int first = 1;
954 int count = 1;
955 while (thiscontent != end) {
956 text_t text;
957
958 // get docnum from gdbm
959 if (gdbm.getinfo(*thiscontent, info) != 0) {
960 logout << logconvert << "info_db wasn't opened - " << args["d"] << "\n";
961 return;
962 }
963
964 // if section has text get it, otherwise output section title
965 if (info.contents.empty()) {
966
967 // output <a name= > tags for all text sections currently displayed in toc (all text sections
968 // if contents are expanded
969 if (count_dots(*thiscontent) == levelcount || args["g"][0] == '1') {
970 content += "<a name=\"";
971 content += count;
972 content += "\"></a>\n";
973 count ++;
974 }
975
976 search.docTargetDocument(text_default_index, queryparams.collection,
977 info.docnum, text);
978 if (info.title != "<i>(introductory text)</i>")
979 content += "<h3>" + info.title + "</h3>\n";
980 // content += text + "<hr><br>\n";
981 content += text + "<p>\n"; // no longer want <hr> between sections
982 } else {
983 content += "<h3>" + info.title + "</h3>\n";
984 }
985
986 if (args["n"] == 1) {
987 if (first) {
988 browse->get_top_locator(args, gdbm, 0, locator);
989 disp.expandstring("text", "_header_", output);
990 textout << outconvert << output;
991 disp.expandstring("text", locator, output);
992 textout << outconvert << output;
993 }
994
995 disp.expandstring("text", content, output);
996 if (!queryparams.querystring.empty())
997 highlighttext(queryresults.termvariants, outconvert, textout, logout, output);
998 else
999 textout << outconvert << output;
1000 first = 0;
1001 content.clear();
1002 }
1003
1004 thiscontent ++;
1005 if (content.size() > 200000 && args["n"] != 1) {
1006 content.clear();
1007 oversize = 1;
1008 args["g"][1] = '0';
1009 break;
1010 }
1011 }
1012
1013 if (args["g"][1] == '0') {
1014 // get docnum from gdbm
1015 text_t docref;
1016 if (args["d"][0] != 'B') get_book(args["d"], docref);
1017 else docref = args["d"];
1018 if (gdbm.getinfo(docref, info) != 0) {
1019 logout << logconvert << "info_db wasn't opened - " << docref << "\n";
1020 return;
1021 }
1022
1023 // get document text if there is any
1024 if (info.contents.empty()) {
1025 search.docTargetDocument(text_default_index, queryparams.collection,
1026 info.docnum, content);
1027 if (info.title != "<i>(introductory text)</i>")
1028 content = "<h3>" + info.title + "</h3>\n" + content;
1029 }
1030 }
1031 }
1032
1033 if (args["n"] != 1) {
1034 // get top locator
1035 browse->get_top_locator(args, gdbm, oversize, locator);
1036
1037 // expand and output page
1038 // Note: we need to expand these out using package 'text'
1039 // so we can't use the tricky '<<' syntax
1040 disp.expandstring("text", "_header_", output);
1041 textout << outconvert << output;
1042
1043 disp.expandstring("text", locator, output);
1044 textout << outconvert << output;
1045
1046 disp.expandstring("text", content, output);
1047 if (!queryparams.querystring.empty())
1048 highlighttext(queryresults.termvariants, outconvert, textout, logout, output);
1049 else
1050 textout << outconvert << output;
1051 }
1052
1053 // get links to next and previous sections unless in expand text mode
1054 if (args["g"][1] == '0') {
1055 browse->get_links(args, gdbm, links);
1056 disp.expandstring("text", links, output);
1057 textout << outconvert << output;
1058 }
1059
1060 disp.expandstring("text", "_footer_", output);
1061 textout << outconvert << output;
1062}
1063
1064
1065/////////////////////////////////////////////////////////////////////////////////////////
1066// auxiliary_action is called to retrieve and display collection documents
1067// in formats other than those handled by document_action (i.e. those other
1068// than text). This should be overridden for collections needing to return
1069// images, postscript etc. You can have as many auxiliary actions as needed
1070// by setting arg_a to a1, a2, a3 etc. and testing arg["a"][1] within the
1071// auxiliary_action function.
1072// auxiliary_action defaults to calling document_action
1073
1074void libinterface::auxiliary_action (cgiargsclass &args, outconvertclass &outconvert,
1075 ostream &textout, ostream &logout) {
1076 document_action (args, outconvert, textout, logout);
1077}
1078
1079/////////////////////////////////////////////////////////////////////////////////////////
1080// page is called when a standard html page is to be displayed
1081void libinterface::page_action (cgiargsclass &args, outconvertclass &outconvert,
1082 ostream &textout, ostream &logout) {
1083
1084 text_t &arg_p = args["p"];
1085
1086 prepare_page(args, outconvert, logout);
1087
1088 if (arg_p == "preferences")
1089 define_pref_macros(args, logout);
1090
1091 textout << outconvert << disp << ("_" + arg_p + ":header_\n")
1092 << ("_" + arg_p + ":imagestandardbar_\n") << ("_" + arg_p + ":content_\n")
1093 << ("_" + arg_p + ":footer_\n");
1094}
1095
1096// highlighttext highlights query terms in text string and outputs the resulting text string
1097void libinterface::highlighttext(text_tarray &termvars, outconvertclass &outconvert,
1098 ostream &textout, ostream &logout, text_t &text) {
1099 map<text_t, int, lttext_t> terms;
1100 map<text_t, int, lttext_t>::const_iterator it;
1101 for (unsigned int i = 0; i < termvars.size(); i++) {
1102 terms[termvars[i]] = 1;
1103 }
1104
1105 text_t::iterator here = text.begin();
1106 text_t::iterator end = text.end();
1107 text_t word, buffer;
1108 while (here != end) {
1109 if (is_unicode_letdig(*here)) {
1110 // not word boundary
1111 word.push_back(*here);
1112 here++;
1113
1114 } else {
1115 // found word boundary
1116 // add last word if there was one
1117 if (!word.empty()) {
1118 it = terms.find(word);
1119 if (it != terms.end()) {
1120 word = "<b><u>" + word + "</u></b>";
1121 }
1122 buffer += word;
1123 word.clear();
1124 }
1125
1126 if (*here == '<') {
1127 // skip over rest of html tag
1128 while ((here != end) && (*here != '>')) {
1129 buffer.push_back(*here);
1130 here++;
1131 }
1132 }
1133
1134 buffer.push_back(*here);
1135 here++;
1136
1137 if (buffer.size() > 1024) {
1138 textout << outconvert << buffer;
1139 buffer.clear();
1140 }
1141 }
1142 }
1143 textout << outconvert << buffer;
1144}
1145
1146void libinterface::define_pref_macros (cgiargsclass &args, ostream &logout)
1147{
1148 // the caseoption macro
1149 text_t caseoption;
1150 int arg_k = args.getintarg("k");
1151
1152 caseoption += "\n<input type=radio name=k value=1";
1153 if (arg_k) caseoption += " checked";
1154 caseoption += "> ignore case differences<br>\n";
1155 caseoption += "<input type=radio name=k value=0";
1156 if (!arg_k) caseoption += " checked";
1157 caseoption += "> upper/lower case must match\n";
1158
1159 disp.setmacro ("caseoption", "preferences", caseoption);
1160
1161 // the stemoption macro
1162 text_t stemoption;
1163 int arg_s = args.getintarg("s");
1164
1165 stemoption += "\n<input type=radio name=s value=1";
1166 if (arg_s) stemoption += " checked";
1167 stemoption += "> ignore word endings<br>\n";
1168 stemoption += "<input type=radio name=s value=0";
1169 if (!arg_s) stemoption += " checked";
1170 stemoption += "> whole word must match\n";
1171
1172 disp.setmacro ("stemoption", "preferences", stemoption);
1173
1174
1175 // the encodingoption
1176 text_t encodingoption;
1177 const text_t &arg_w = args["w"];
1178
1179 encodingoption += "\n<select name=\"nw\">\n";
1180 encodingoption += " <option value=\"w\"";
1181 if (arg_w == "w") encodingoption += " selected";
1182 encodingoption += ">Western (ISO-8859-1)\n";
1183 encodingoption += " <option value=\"g\"";
1184 if (arg_w == "g") encodingoption += " selected";
1185 encodingoption += ">Simplified Chinese (GB2312)\n";
1186 encodingoption += " <option value=\"8\"";
1187 if (arg_w == "8") encodingoption += " selected";
1188 encodingoption += ">Unicode (UTF-8)\n";
1189 encodingoption += "</select>\n";
1190
1191 disp.setmacro ("encodingoption", "preferences", encodingoption);
1192
1193 // the maxdocoption
1194 text_t maxdocoption;
1195 int arg_m = args.getintarg("m");
1196
1197 maxdocoption += "\n<select name=m>\n";
1198 maxdocoption += " <option value=\"50\"";
1199 if (arg_m < 100) maxdocoption += " selected";
1200 maxdocoption += ">50\n";
1201 maxdocoption += " <option value=\"100\"";
1202 if (arg_m >= 100 && arg_m < 200) maxdocoption += " selected";
1203 maxdocoption += ">100\n";
1204 maxdocoption += " <option value=\"200\"";
1205 if (arg_m >= 200 && arg_m < 500) maxdocoption += " selected";
1206 maxdocoption += ">200\n";
1207 maxdocoption += " <option value=\"500\"";
1208 if (arg_m >= 500) maxdocoption += " selected";
1209 maxdocoption += ">500\n";
1210 maxdocoption += "</select>\n";
1211
1212 disp.setmacro ("maxdocoption", "preferences", maxdocoption);
1213
1214 // the hitsperpageoption
1215 text_t hitsoption;
1216 int arg_o = args.getintarg("o");
1217
1218 hitsoption += "\n<select name=o>\n";
1219 hitsoption += " <option value=\"10\"";
1220 if (arg_o < 20) hitsoption += " selected";
1221 hitsoption += ">10\n";
1222 hitsoption += " <option value=\"20\"";
1223 if (arg_o >= 20 && arg_o < 50) hitsoption += " selected";
1224 hitsoption += ">20\n";
1225 hitsoption += " <option value=\"50\"";
1226 if (arg_o >= 50 && arg_o < 100) hitsoption += " selected";
1227 hitsoption += ">50\n";
1228 hitsoption += " <option value=\"100\"";
1229 if (arg_o >= 100 && arg_o < 500) hitsoption += " selected";
1230 hitsoption += ">100\n";
1231 hitsoption += " <option value=\"500\"";
1232 if (arg_o >= 500) hitsoption += " selected";
1233 hitsoption += ">all\n";
1234 hitsoption += " </select>\n";
1235
1236 disp.setmacro ("hitsperpageoption", "preferences", hitsoption);
1237}
Note: See TracBrowser for help on using the repository browser.