1 | #include <string.h>
|
---|
2 | #include <stdio.h>
|
---|
3 | #include <stdlib.h>
|
---|
4 | #include <ctype.h>
|
---|
5 | #include <time.h>
|
---|
6 | #include "libinterface.h"
|
---|
7 | #include "cgiargs.h"
|
---|
8 | #include "fileutil.h"
|
---|
9 | #include "cfgread.h"
|
---|
10 | #include "gsdlunicode.h"
|
---|
11 | #include "unitool.h"
|
---|
12 |
|
---|
13 | #include <assert.h>
|
---|
14 |
|
---|
15 |
|
---|
16 |
|
---|
17 |
|
---|
18 | ///////////////////////
|
---|
19 | // support functions //
|
---|
20 | ///////////////////////
|
---|
21 |
|
---|
22 | unsigned short hexdigit (unsigned short c)
|
---|
23 | {
|
---|
24 | if (c >= '0' && c <= '9') return (c-'0');
|
---|
25 | if (c >= 'a' && c <= 'f') return (c-'a'+10);
|
---|
26 | if (c >= 'A' && c <= 'F') return (c-'A'+10);
|
---|
27 | return c;
|
---|
28 | }
|
---|
29 |
|
---|
30 |
|
---|
31 | void c2hex (unsigned short c, text_t &t)
|
---|
32 | {
|
---|
33 | t.clear();
|
---|
34 |
|
---|
35 | if (c >= 256)
|
---|
36 | {
|
---|
37 | t = "20"; // ' '
|
---|
38 | return;
|
---|
39 | }
|
---|
40 |
|
---|
41 | unsigned short o1, o2;
|
---|
42 |
|
---|
43 | o1 = (c/16) % 16;
|
---|
44 | o2 = c % 16;
|
---|
45 | if (o1 >= 10) o1 += 'a' - 10;
|
---|
46 | else o1 += '0';
|
---|
47 | if (o2 >= 10) o2 += 'a' - 10;
|
---|
48 | else o2 += '0';
|
---|
49 |
|
---|
50 | t.push_back(o1);
|
---|
51 | t.push_back(o2);
|
---|
52 | }
|
---|
53 |
|
---|
54 | // convert %xx and + to their appropriate equivalents
|
---|
55 | void decode (text_t &argstr)
|
---|
56 | {
|
---|
57 | text_t::iterator in = argstr.begin();
|
---|
58 | text_t::iterator out = in;
|
---|
59 | text_t::iterator end = argstr.end();
|
---|
60 |
|
---|
61 | while (in != end)
|
---|
62 | {
|
---|
63 | if (*in == '+') *out = ' ';
|
---|
64 |
|
---|
65 | else if (*in == '%')
|
---|
66 | {
|
---|
67 | unsigned short c = '%';
|
---|
68 | in++;
|
---|
69 | if (in != end)
|
---|
70 | {
|
---|
71 | c = hexdigit (*in);
|
---|
72 | in++;
|
---|
73 | }
|
---|
74 | if (in != end && c < 16) // sanity check on the previous character
|
---|
75 | {
|
---|
76 | c = c*16 + hexdigit (*in);
|
---|
77 | }
|
---|
78 |
|
---|
79 | *out = c;
|
---|
80 | }
|
---|
81 | else *out = *in;
|
---|
82 |
|
---|
83 | if (in != end) in++;
|
---|
84 | out++;
|
---|
85 | }
|
---|
86 |
|
---|
87 | // remove the excess characters
|
---|
88 | argstr.erase (out, end);
|
---|
89 | }
|
---|
90 |
|
---|
91 |
|
---|
92 | // split up the cgi arguments
|
---|
93 | void parse_cgi_args (text_t argstr, cgiargsclass &args)
|
---|
94 | {
|
---|
95 | args.clear();
|
---|
96 |
|
---|
97 | text_t::iterator here = argstr.begin();
|
---|
98 | text_t::iterator end = argstr.end();
|
---|
99 | text_t key, value;
|
---|
100 |
|
---|
101 | // extract out the key=value pairs
|
---|
102 | while (here != end)
|
---|
103 | {
|
---|
104 | // get the next key and value pair
|
---|
105 | here = getdelimitstr (here, end, '=', key);
|
---|
106 | here = getdelimitstr (here, end, '&', value);
|
---|
107 |
|
---|
108 | // convert %xx and + to their appropriate equivalents
|
---|
109 | decode (value);
|
---|
110 | value.setencoding(1); // other encoding
|
---|
111 | // store this key=value pair
|
---|
112 | if (!key.empty()) args.setarg (key, value);
|
---|
113 | }
|
---|
114 | }
|
---|
115 |
|
---|
116 | text_t cgisafe (const text_t &intext)
|
---|
117 | {
|
---|
118 | text_t outtext;
|
---|
119 |
|
---|
120 | text_t::const_iterator here = intext.begin ();
|
---|
121 | text_t::const_iterator end = intext.end ();
|
---|
122 | unsigned short c;
|
---|
123 | text_t ttmp;
|
---|
124 |
|
---|
125 | while (here != end)
|
---|
126 | {
|
---|
127 | c = *here;
|
---|
128 | if (((c >= 'a') && (c <= 'z')) ||
|
---|
129 | ((c >= 'A') && (c <= 'Z')) ||
|
---|
130 | ((c >= '0') && (c <= '9'))) {
|
---|
131 | // alphanumeric character
|
---|
132 | outtext.push_back(c);
|
---|
133 | } else {
|
---|
134 | // non-alphnumeric character
|
---|
135 | outtext.push_back('%');
|
---|
136 | c2hex(c, ttmp);
|
---|
137 | outtext += ttmp;
|
---|
138 | }
|
---|
139 |
|
---|
140 | here++;
|
---|
141 | }
|
---|
142 |
|
---|
143 | return outtext;
|
---|
144 | }
|
---|
145 |
|
---|
146 | //////////////////////////////
|
---|
147 | // methods for libinterface //
|
---|
148 | //////////////////////////////
|
---|
149 |
|
---|
150 | // constructor
|
---|
151 |
|
---|
152 | libinterface::libinterface() {
|
---|
153 | browse = NULL;
|
---|
154 | }
|
---|
155 |
|
---|
156 | void libinterface::setgsdlhome (const text_t &thegsdlhome) {
|
---|
157 | text_t thecollectdir, thegdbmdir, filename;
|
---|
158 |
|
---|
159 | gsdlhome = thegsdlhome;
|
---|
160 |
|
---|
161 | // search for etc/collect.cfg
|
---|
162 | thecollectdir = filename_cat (gsdlhome, "collect");
|
---|
163 | thecollectdir = filename_cat (thecollectdir, get_collection_name());
|
---|
164 | filename = filename_cat (thecollectdir, "etc");
|
---|
165 | filename = filename_cat (filename, "collect.cfg");
|
---|
166 |
|
---|
167 | if (!file_exists(filename)) thecollectdir = gsdlhome;
|
---|
168 |
|
---|
169 | thegdbmdir = filename_cat (thecollectdir, "index");
|
---|
170 | thegdbmdir = filename_cat (thegdbmdir, "text");
|
---|
171 |
|
---|
172 | setcollectdir (thecollectdir);
|
---|
173 | setgdbmdir (thegdbmdir);
|
---|
174 | }
|
---|
175 |
|
---|
176 | void libinterface::setcollectdir (const text_t &thecollectdir) {
|
---|
177 | collectdir = thecollectdir;
|
---|
178 |
|
---|
179 | search.setcollectdir (collectdir);
|
---|
180 | }
|
---|
181 |
|
---|
182 | void libinterface::setgdbmdir (const text_t &thegdbmdir) {
|
---|
183 | gdbmdir = thegdbmdir;
|
---|
184 | }
|
---|
185 |
|
---|
186 | void libinterface::sethttpprefix (const text_t &thehttpprefix) {
|
---|
187 | httpprefix = thehttpprefix;
|
---|
188 | }
|
---|
189 |
|
---|
190 | void libinterface::setgwcgi (const text_t &thegwcgi) {
|
---|
191 | gwcgi = thegwcgi;
|
---|
192 | }
|
---|
193 |
|
---|
194 |
|
---|
195 | int libinterface::cfg_read (const text_t &filename) {
|
---|
196 | text_t key;
|
---|
197 | text_tarray cfgline;
|
---|
198 | char *cstr = filename.getcstr();
|
---|
199 | ifstream confin (cstr);
|
---|
200 | delete cstr;
|
---|
201 |
|
---|
202 | if (confin) {
|
---|
203 | while (read_cfg_line(confin, cfgline) >= 0) {
|
---|
204 | if (cfgline.size () >= 2) {
|
---|
205 | key = cfgline[0];
|
---|
206 | cfgline.erase(cfgline.begin());
|
---|
207 | if (key == "maintainer") cfg_info.maintainer = cfgline[0];
|
---|
208 | else if (key == "indexes") cfg_info.indexes = cfgline;
|
---|
209 | else if (key == "defaultindex") cfg_info.defaultindex = cfgline[0];
|
---|
210 | else if (key == "macrofiles") cfg_info.macrofiles = cfgline;
|
---|
211 | else if (key == "builddate") cfg_info.builddate = cfgline[0];
|
---|
212 | else if (key == "indexmap") cfg_info.indexmap = cfgline;
|
---|
213 | else if (key == "numbytes") cfg_info.numbytes = (double)cfgline[0].getint();
|
---|
214 | else if (key == "numdocs") cfg_info.numdocs = (double)cfgline[0].getint();
|
---|
215 | else if (key == "defaultencoding")
|
---|
216 | cfg_info.defaultencoding = cfgline[0];
|
---|
217 | }
|
---|
218 | }
|
---|
219 | confin.close ();
|
---|
220 | return 1;
|
---|
221 | }
|
---|
222 | return 0;
|
---|
223 | }
|
---|
224 |
|
---|
225 |
|
---|
226 | // init should be called after the various homes are set,
|
---|
227 | // it returns 'false' on failure and 'true' on success
|
---|
228 | bool libinterface::init (ostream &logout) {
|
---|
229 | text_t collection = get_collection_name();
|
---|
230 |
|
---|
231 | // redirect the error output to logout
|
---|
232 | disp.setlogout (&logout);
|
---|
233 | gdbm.setlogout (&logout);
|
---|
234 |
|
---|
235 | // open the gdbm file
|
---|
236 | text_t filename = filename_cat (gdbmdir, get_collection_name ());
|
---|
237 | #ifdef _LITTLE_ENDIAN
|
---|
238 | filename += ".ldb"; // little endian version of the gdbm database
|
---|
239 | #else
|
---|
240 | filename += ".bdb"; // big endian version on the gdbm database
|
---|
241 | #endif
|
---|
242 | gdbm.opendatabase (filename);
|
---|
243 |
|
---|
244 |
|
---|
245 | // set default values for the configuration file
|
---|
246 | cfg_info.defaultencoding = "w";
|
---|
247 |
|
---|
248 | // read in the configuration files etc/collect.cfg and index/build.cfg
|
---|
249 | // entries in build.cfg should override those in collect.cfg
|
---|
250 | filename = filename_cat (collectdir, "etc");
|
---|
251 | filename = filename_cat (filename, "collect.cfg");
|
---|
252 | cfg_read(filename);
|
---|
253 | filename = filename_cat (collectdir, "index");
|
---|
254 | filename = filename_cat (filename, "build.cfg");
|
---|
255 | cfg_read(filename);
|
---|
256 |
|
---|
257 | // logout << logconvert << "defaultindex: " << cfg_info.defaultindex << "\n";
|
---|
258 |
|
---|
259 | // set the default index
|
---|
260 | if (cfg_info.indexmap.empty()) {
|
---|
261 | // ?? no indexes built ??
|
---|
262 | logout << "warning: no indexes have been built\n";
|
---|
263 | default_index.clear();
|
---|
264 | } else if (cfg_info.defaultindex.empty() ||
|
---|
265 | !isrealindex (cfg_info.indexmap, cfg_info.defaultindex)) {
|
---|
266 | logout << "warning: the default index has been reset to the first index\n";
|
---|
267 | getrealdirindex (cfg_info.indexmap[0], cfg_info.defaultindex, default_index);
|
---|
268 | } else {
|
---|
269 | default_index = real2dirindex (cfg_info.indexmap, cfg_info.defaultindex);
|
---|
270 | }
|
---|
271 |
|
---|
272 | // set the text default index (the default index to use when
|
---|
273 | // retrieving documents).
|
---|
274 | text_default_index = default_index;
|
---|
275 | if (!isdoclevelindex (cfg_info.defaultindex)) {
|
---|
276 | text_default_index = real2dirindex (cfg_info.indexmap,
|
---|
277 | getdoclevelindex (cfg_info.indexmap));
|
---|
278 | }
|
---|
279 |
|
---|
280 | // load up the default macro files, the collection directory
|
---|
281 | // is searched first for the file and then the main directory
|
---|
282 | text_t colmacrodir = filename_cat (collectdir, "macros");
|
---|
283 | text_t gsdlmacrodir = filename_cat (gsdlhome, "macros");
|
---|
284 | text_tarray::iterator arrhere = cfg_info.macrofiles.begin();
|
---|
285 | text_tarray::iterator arrend = cfg_info.macrofiles.end();
|
---|
286 | while (arrhere != arrend) {
|
---|
287 | filename = filename_cat (colmacrodir, *arrhere);
|
---|
288 | if (!file_exists (filename)) {
|
---|
289 | filename = filename_cat (gsdlmacrodir, *arrhere);
|
---|
290 | }
|
---|
291 | disp.loaddefaultmacros(filename);
|
---|
292 | arrhere++;
|
---|
293 | }
|
---|
294 |
|
---|
295 | srand(time(NULL));
|
---|
296 |
|
---|
297 | utf8outconvert.set_rzws(1);
|
---|
298 | gboutconvert.set_rzws(1);
|
---|
299 |
|
---|
300 | return collection_init(collection);
|
---|
301 | }
|
---|
302 |
|
---|
303 |
|
---|
304 | // examine the cgi arguments and create the appropriate page,
|
---|
305 | // outputing the page to textout and any debug information to logout
|
---|
306 | //
|
---|
307 | // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
|
---|
308 | int libinterface::getpage (const text_t &argstr, ostream &textout, ostream &logout) {
|
---|
309 | int err = LI_NOERROR;
|
---|
310 |
|
---|
311 | // make the output go where they want!
|
---|
312 | gdbm.setlogout (&logout);
|
---|
313 | disp.setlogout (&logout);
|
---|
314 |
|
---|
315 | cgiargsclass args;
|
---|
316 |
|
---|
317 | parse_cgi_args (argstr, args);
|
---|
318 | expand_compressed_args (args);
|
---|
319 | add_default_args (args);
|
---|
320 | check_args (args);
|
---|
321 |
|
---|
322 | // get the input encoding
|
---|
323 | text_t &arg_w = args["w"];
|
---|
324 | inconvertclass *inconvert = NULL;
|
---|
325 | if (arg_w == "8") {
|
---|
326 | inconvert = &utf8inconvert;
|
---|
327 | } else if (arg_w == "g") {
|
---|
328 | // The map files will only be loaded the first time they are
|
---|
329 | // needed. The loading is done here to reduce the memory load
|
---|
330 | // for collections which don't need to convert to GB.
|
---|
331 | gbinconvert.loadmapfile (gsdlhome, "gbku", 0x25a1);
|
---|
332 | inconvert = &gbinconvert;
|
---|
333 | } else {
|
---|
334 | inconvert = &asciiinconvert; // default
|
---|
335 | }
|
---|
336 |
|
---|
337 | // see if the next page will have a different encoding
|
---|
338 | if (args.getarg("nw") != NULL) args["w"] = args["nw"];
|
---|
339 |
|
---|
340 | // convert arguments which aren't in unicode to unicode
|
---|
341 | args_tounicode (args, *inconvert);
|
---|
342 |
|
---|
343 | // remember the state of the compressed arguments
|
---|
344 | lastcomparg = get_compressed_args (args);
|
---|
345 | logout << args;
|
---|
346 |
|
---|
347 | // get the output encoding
|
---|
348 | text_t &arg_nw = args["w"];
|
---|
349 | outconvertclass *outconvert = NULL;
|
---|
350 | if (arg_nw == "8") {
|
---|
351 | outconvert = &utf8outconvert;
|
---|
352 | } else if (arg_nw == "g") {
|
---|
353 | gboutconvert.loadmapfile (gsdlhome, "ugbk", 0xa1f5);
|
---|
354 | outconvert = &gboutconvert;
|
---|
355 | } else {
|
---|
356 | outconvert = &asciioutconvert; // default
|
---|
357 | }
|
---|
358 |
|
---|
359 |
|
---|
360 | // dispatch the request
|
---|
361 | text_t &arg_a = args["a"];
|
---|
362 | if (arg_a == "q") err = query_action (args, *outconvert, textout, logout);
|
---|
363 | else if (arg_a == "b") browse_action (args, *outconvert, textout, logout);
|
---|
364 | else if (arg_a == "t") document_action (args, *outconvert, textout, logout);
|
---|
365 | else if (arg_a == "p") page_action (args, *outconvert, textout, logout);
|
---|
366 | else if ((arg_a.size() == 2) && (arg_a[0] == 'a'))
|
---|
367 | auxiliary_action (args, *outconvert, textout, logout);
|
---|
368 | else
|
---|
369 | {
|
---|
370 | // output error page
|
---|
371 | }
|
---|
372 |
|
---|
373 | return err;
|
---|
374 | }
|
---|
375 |
|
---|
376 | // the arg config string is used to do processing on the arguments
|
---|
377 | // entries take the form argname[defaultvalue]
|
---|
378 | // if the argument name is preceeded by a "+" it means that the
|
---|
379 | // value may be more than one character long
|
---|
380 | // the main state variable missed on this list is "q" the query string
|
---|
381 | void libinterface::get_arg_config (text_t &argconfigstr)
|
---|
382 | {
|
---|
383 | argconfigstr =
|
---|
384 | "+a[p]" // action: q=query, b=browse, t=targetdoc, p=page, a1=auxiliary
|
---|
385 | "w[]" // encoding: w=western, 8=utf8, 7=utf7, g=GB2312, k=GBK
|
---|
386 | "t[1]" // query type: 0=boolean, 1=ranked
|
---|
387 | "+i[]" // index: collection dependant
|
---|
388 | "k[1]" // casefolding: 0=off, 1=on
|
---|
389 | "s[0]" // stemming: 0=off, 1=on
|
---|
390 | "+p[home]" // page
|
---|
391 | "+c[]" // collection (collection dependant)
|
---|
392 | "+r[1]" // results from
|
---|
393 | "+d[C.1]" // the target document
|
---|
394 | "+j[11]" // partial index: 11=all, 10=f&n, 01=other
|
---|
395 | "+m[100]" // maxdocs
|
---|
396 | "+o[20]" // hits per page
|
---|
397 | "v[0]" // version: 0=text+graphics, 1=text
|
---|
398 | "f[0]" // query box size: 0=normal, 1=big
|
---|
399 | "l[e]"; // language: e=english, m=maori
|
---|
400 |
|
---|
401 | }
|
---|
402 |
|
---|
403 | text_t::iterator libinterface::get_next_config_arg (text_t::iterator first,
|
---|
404 | text_t::iterator last,
|
---|
405 | text_t &argname,
|
---|
406 | text_t &defaultvalue,
|
---|
407 | bool &longarg)
|
---|
408 | {
|
---|
409 | first = getdelimitstr (first, last, '[', argname);
|
---|
410 | first = getdelimitstr (first, last, ']', defaultvalue);
|
---|
411 | longarg = false;
|
---|
412 |
|
---|
413 | if (!argname.empty() && (argname[0] == '+'))
|
---|
414 | {
|
---|
415 | argname.erase(argname.begin(), argname.begin()+1);
|
---|
416 | longarg = true;
|
---|
417 | }
|
---|
418 |
|
---|
419 | return first;
|
---|
420 | }
|
---|
421 |
|
---|
422 | text_t libinterface::get_compressed_args (cgiargsclass &args)
|
---|
423 | {
|
---|
424 | text_t argconfigstr; get_arg_config (argconfigstr);
|
---|
425 | text_t arg_e;
|
---|
426 | text_t argname, defaultvalue;
|
---|
427 | text_t *argvalue;
|
---|
428 | bool longarg;
|
---|
429 |
|
---|
430 | text_t::iterator here = argconfigstr.begin();
|
---|
431 | text_t::iterator end = argconfigstr.end();
|
---|
432 | while (here != end)
|
---|
433 | {
|
---|
434 | here = get_next_config_arg (here, end, argname, defaultvalue, longarg);
|
---|
435 |
|
---|
436 | if (!argname.empty())
|
---|
437 | {
|
---|
438 | argvalue = args.getarg (argname);
|
---|
439 | if (argvalue == NULL) arg_e += defaultvalue;
|
---|
440 | else arg_e += *argvalue;
|
---|
441 |
|
---|
442 | if (longarg) arg_e += "-";
|
---|
443 | }
|
---|
444 | }
|
---|
445 |
|
---|
446 | return arg_e;
|
---|
447 | }
|
---|
448 |
|
---|
449 |
|
---|
450 | // the compressed options should never override explicit options
|
---|
451 | // but they should always be expanded before add_default_args is
|
---|
452 | // called
|
---|
453 | void libinterface::expand_compressed_args (cgiargsclass &args)
|
---|
454 | {
|
---|
455 | text_t *arg_e = args.getarg("e");
|
---|
456 |
|
---|
457 | // see if there is compressed options
|
---|
458 | if (arg_e != NULL)
|
---|
459 | {
|
---|
460 | text_t argconfigstr; get_arg_config (argconfigstr);
|
---|
461 | text_t argname, defaultvalue, argvalue;
|
---|
462 | bool longarg;
|
---|
463 |
|
---|
464 | text_t::iterator confighere = argconfigstr.begin();
|
---|
465 | text_t::iterator configend = argconfigstr.end();
|
---|
466 |
|
---|
467 | text_t::iterator arghere = arg_e->begin();
|
---|
468 | text_t::iterator argend = arg_e->end();
|
---|
469 | while (confighere != configend && arghere != argend)
|
---|
470 | {
|
---|
471 | confighere = get_next_config_arg (confighere, configend, argname,
|
---|
472 | defaultvalue, longarg);
|
---|
473 | if (!argname.empty())
|
---|
474 | {
|
---|
475 | if (longarg)
|
---|
476 | {
|
---|
477 | arghere = getdelimitstr (arghere, argend, '-', argvalue);
|
---|
478 | if (!argvalue.empty()) args.setdefaultarg (argname, argvalue);
|
---|
479 | }
|
---|
480 | else
|
---|
481 | {
|
---|
482 | args.setdefaultcarg (argname,*arghere);
|
---|
483 | arghere++;
|
---|
484 | }
|
---|
485 | }
|
---|
486 | }
|
---|
487 | }
|
---|
488 | }
|
---|
489 |
|
---|
490 |
|
---|
491 | void libinterface::add_default_args (cgiargsclass &args)
|
---|
492 | {
|
---|
493 | text_t argconfigstr; get_arg_config (argconfigstr);
|
---|
494 | text_t argname, defaultvalue;
|
---|
495 | bool longarg;
|
---|
496 |
|
---|
497 | text_t::iterator confighere = argconfigstr.begin();
|
---|
498 | text_t::iterator configend = argconfigstr.end();
|
---|
499 | while (confighere != configend)
|
---|
500 | {
|
---|
501 | confighere = get_next_config_arg (confighere, configend, argname,
|
---|
502 | defaultvalue, longarg);
|
---|
503 | if (!argname.empty()) args.setdefaultarg (argname, defaultvalue);
|
---|
504 | }
|
---|
505 |
|
---|
506 | // the query string and format string are not included in the argument configuration string
|
---|
507 | args.setdefaultarg ("q", ""); // the default query string is ""
|
---|
508 | args.setdefaultarg ("g", "00");
|
---|
509 | args.setdefaultarg ("x", "0");
|
---|
510 | }
|
---|
511 |
|
---|
512 |
|
---|
513 | // check and attempt to fix an problems encountered in the list
|
---|
514 | // of cgi arguments
|
---|
515 | void libinterface::check_args (cgiargsclass &args)
|
---|
516 | {
|
---|
517 | args.setarg("c", get_collection_name());
|
---|
518 | if (args["w"].empty()) args.setarg("w", cfg_info.defaultencoding);
|
---|
519 | if (args["i"].empty()) args.setarg("i", default_index);
|
---|
520 | }
|
---|
521 |
|
---|
522 | void libinterface::args_tounicode (cgiargsclass &args,
|
---|
523 | inconvertclass &inconvert) {
|
---|
524 | utf8outconvertclass text_t2utf8;
|
---|
525 | cgiargsclass::iterator here = args.begin();
|
---|
526 | cgiargsclass::iterator end = args.end();
|
---|
527 |
|
---|
528 | while (here != end) {
|
---|
529 | if (here->second.getencoding() > 0) {
|
---|
530 | here->second = inconvert.convert(here->second);
|
---|
531 | }
|
---|
532 |
|
---|
533 | here++;
|
---|
534 | }
|
---|
535 | }
|
---|
536 |
|
---|
537 |
|
---|
538 | void libinterface::define_general_macros (cgiargsclass &args, outconvertclass &outconvert,
|
---|
539 | ostream &logout) {
|
---|
540 | disp.setmacro("httpprefix", "Global", httpprefix);
|
---|
541 | disp.setmacro("gwcgi", "Global", gwcgi);
|
---|
542 |
|
---|
543 | disp.setmacro("numdocs", "Global", (int)cfg_info.numdocs);
|
---|
544 |
|
---|
545 | disp.setmacro("collection", "Global", cgisafe(outconvert.convert(args["c"])));
|
---|
546 | disp.setmacro("compressedoptions", "Global", get_compressed_args(args));
|
---|
547 | disp.setmacro("urlsafequerystring", "Global", cgisafe(outconvert.convert(args["q"])));
|
---|
548 |
|
---|
549 | // need to escape any special characters in querystring to prevent
|
---|
550 | // them upsetting the html
|
---|
551 | text_t querystring;
|
---|
552 | text_t::iterator here = args["q"].begin();
|
---|
553 | text_t::iterator end = args["q"].end();
|
---|
554 | while (here != end) {
|
---|
555 | if (*here == '"') querystring += """;
|
---|
556 | else if (*here == '&') querystring += "&";
|
---|
557 | else if (*here == '<') querystring += "<";
|
---|
558 | else if (*here == '>') querystring += ">";
|
---|
559 | else querystring.push_back(*here);
|
---|
560 | here ++;
|
---|
561 | }
|
---|
562 | disp.setmacro("querystring", "Global", querystring);
|
---|
563 |
|
---|
564 | if (args.getintarg("x") == 0) disp.setmacro("notdetached", "Global", "1");
|
---|
565 | if (args["d"][0] == 'T') disp.setmacro("istitle", "Global", "1");
|
---|
566 |
|
---|
567 | int i = rand();
|
---|
568 | disp.setmacro("pagedest", "Global", text_t(i));
|
---|
569 |
|
---|
570 | // define the macro for the "g" argument
|
---|
571 | disp.setmacro("g", "Global", args["g"]);
|
---|
572 |
|
---|
573 | // set the selection macros
|
---|
574 |
|
---|
575 | text_t indexselect;
|
---|
576 | text_t maprealindex, mapdirindex;
|
---|
577 | if (cfg_info.indexmap.size() == 1) {
|
---|
578 | getrealdirindex (cfg_info.indexmap[0], maprealindex, mapdirindex);
|
---|
579 | indexselect += "<input type=hidden name=\"i\" value=\"";
|
---|
580 | indexselect += mapdirindex;
|
---|
581 | indexselect += "\">_query:";
|
---|
582 | indexselect += real2macroindex (maprealindex);
|
---|
583 | indexselect += "_\n";
|
---|
584 |
|
---|
585 | } else {
|
---|
586 | text_t &arg_i = args["i"];
|
---|
587 | text_tarray::const_iterator maphere = cfg_info.indexmap.begin();
|
---|
588 | text_tarray::const_iterator mapend = cfg_info.indexmap.end();
|
---|
589 |
|
---|
590 | indexselect += "<select name=\"i\">\n";
|
---|
591 | while (maphere != mapend) {
|
---|
592 | getrealdirindex (*maphere, maprealindex, mapdirindex);
|
---|
593 | indexselect += "<option value=\"";
|
---|
594 | indexselect += mapdirindex;
|
---|
595 | indexselect += "\"";
|
---|
596 | if (arg_i == mapdirindex) indexselect += " selected";
|
---|
597 | indexselect += ">_query:";
|
---|
598 | indexselect += real2macroindex (maprealindex);
|
---|
599 | indexselect += "_\n";
|
---|
600 |
|
---|
601 | maphere++;
|
---|
602 | }
|
---|
603 | indexselect += "</select>\n";
|
---|
604 | }
|
---|
605 |
|
---|
606 | disp.setmacro("indexselection", "query", indexselect);
|
---|
607 |
|
---|
608 | text_t qtselect;
|
---|
609 | text_t &arg_t = args["t"];
|
---|
610 |
|
---|
611 | qtselect += "<select name=\"t\">\n";
|
---|
612 | qtselect += "<option value=\"1\"";
|
---|
613 | if (arg_t == "1") qtselect += " selected";
|
---|
614 | qtselect += ">_query:textsome_\n";
|
---|
615 | qtselect += "<option value=\"0\"";
|
---|
616 | if (arg_t == "0") qtselect += " selected";
|
---|
617 | qtselect += ">_query:textall_\n";
|
---|
618 | qtselect += "</select>\n";
|
---|
619 |
|
---|
620 | disp.setmacro("querytypeselection", "query", qtselect);
|
---|
621 | }
|
---|
622 |
|
---|
623 |
|
---|
624 | // prepare_page prepares to write out a page using the current
|
---|
625 | // page parameters and defines any general macros
|
---|
626 | void libinterface::prepare_page (cgiargsclass &args, outconvertclass &outconvert,
|
---|
627 | ostream &logout) {
|
---|
628 | // get page parameters
|
---|
629 | text_t pageparams = text_t("collection=") + args["c"];
|
---|
630 | if (args.getintarg("u") == 1) pageparams += ",style=htmlonly";
|
---|
631 | if (args.getintarg("v") == 1) pageparams += ",version=text";
|
---|
632 | if (args.getintarg("f") == 1) pageparams += ",queryversion=big";
|
---|
633 | if (args["l"] != 'e') pageparams += ",language=" + args["l"];
|
---|
634 |
|
---|
635 | disp.openpage(pageparams, MACROPRECEDENCE);
|
---|
636 | define_general_macros(args, outconvert, logout);
|
---|
637 | define_collection_macros(args, logout);
|
---|
638 | }
|
---|
639 |
|
---|
640 | void libinterface::set_query_params (cgiargsclass &args, queryparamclass &queryparams)
|
---|
641 | {
|
---|
642 | queryparams.collection = args["c"];
|
---|
643 | // assemble_index (args, queryparams.search_index);
|
---|
644 | queryparams.search_index = args["i"];
|
---|
645 | queryparams.querystring = args["q"];
|
---|
646 | format_querystring (queryparams.querystring);
|
---|
647 | queryparams.search_type = args.getintarg ("t");
|
---|
648 | queryparams.casefolding = args.getintarg ("k");
|
---|
649 | queryparams.stemming = args.getintarg ("s");
|
---|
650 | queryparams.maxdocs = args.getintarg ("m");
|
---|
651 | }
|
---|
652 |
|
---|
653 | void libinterface::format_querystring (text_t &querystring)
|
---|
654 | {
|
---|
655 | text_t formattedstring;
|
---|
656 | quotedstring.clear();
|
---|
657 |
|
---|
658 | text_t::iterator here = querystring.begin();
|
---|
659 | text_t::iterator end = querystring.end();
|
---|
660 | int foundquote = 0;
|
---|
661 |
|
---|
662 | // want to remove ()|!& from querystring so boolean queries are just
|
---|
663 | // "all the words" queries
|
---|
664 | while (here != end) {
|
---|
665 | if (*here == '(' || *here == ')' || *here == '|' ||
|
---|
666 | *here == '!' || *here == '&') {
|
---|
667 | formattedstring += " ";
|
---|
668 | } else {
|
---|
669 | if (*here == '"') {
|
---|
670 | if (foundquote) {foundquote = 0; quotedstring.push_back(*here);}
|
---|
671 | else foundquote = 1;
|
---|
672 | } else {
|
---|
673 | formattedstring.push_back(*here);
|
---|
674 | }
|
---|
675 | if (foundquote) quotedstring.push_back(*here);
|
---|
676 | }
|
---|
677 | here ++;
|
---|
678 | }
|
---|
679 | querystring = formattedstring + quotedstring;
|
---|
680 | }
|
---|
681 |
|
---|
682 | void libinterface::define_query_macros (cgiargsclass &args,
|
---|
683 | queryparamclass &queryparams,
|
---|
684 | queryresultsclass &queryresults,
|
---|
685 | ostream &logout)
|
---|
686 | {
|
---|
687 | int numdocs = queryresults.getnumdocs();
|
---|
688 | int numterms = queryresults.getnumterms();
|
---|
689 | disp.setmacro("querysize", "query", args["f"]);
|
---|
690 | disp.setmacro("haveresults", "query", numdocs);
|
---|
691 |
|
---|
692 | // set the display frequency macro
|
---|
693 | text_t freqmsg = "_textfm1_";
|
---|
694 |
|
---|
695 | int first = 1;
|
---|
696 | for (int i = 0; i < numterms; i++) {
|
---|
697 | if (first == 0) freqmsg += "; ";
|
---|
698 | first = 0;
|
---|
699 | freqmsg += queryresults.terms[i].termstr + ": " + queryresults.terms[i].termfreq;
|
---|
700 | }
|
---|
701 | if (!quotedstring.empty()) freqmsg += "<br><i>post-processed to find " + quotedstring + "</i>\n";
|
---|
702 | disp.setmacro("freqmsg", "query", freqmsg);
|
---|
703 |
|
---|
704 | // set the result line macro
|
---|
705 |
|
---|
706 | text_t resline;
|
---|
707 |
|
---|
708 | if (numdocs >= queryparams.maxdocs)
|
---|
709 | resline.setcstr("_textmt2_");
|
---|
710 |
|
---|
711 | if (numdocs == 0) {
|
---|
712 | resline.setcstr("_textndmtq_");
|
---|
713 | } else if (numdocs == 1) {
|
---|
714 | resline += text_t(numdocs) + text_t(" _textdmtq2_.");
|
---|
715 | } else {
|
---|
716 | resline += text_t(numdocs) + text_t(" _textdmtq3_.");
|
---|
717 | }
|
---|
718 |
|
---|
719 | disp.setmacro("resultline", "query", resline);
|
---|
720 |
|
---|
721 | // define_collection_macros (args, logout);
|
---|
722 |
|
---|
723 | if (queryresults.getnumdocs() > 0) {
|
---|
724 | docLinks(args, queryresults, logout);
|
---|
725 | }
|
---|
726 | }
|
---|
727 |
|
---|
728 |
|
---|
729 | // set the _links_ macro to create the links between pages of query results
|
---|
730 | void libinterface::docLinks (cgiargsclass &args,
|
---|
731 | queryresultsclass &queryresults,
|
---|
732 | ostream &logout)
|
---|
733 | {
|
---|
734 | text_t links;
|
---|
735 | int a, b, documents, nextfirst, nextlast, prevfirst, prevlast;
|
---|
736 | int results_from = args.getintarg("r");
|
---|
737 | int hitsperpage = args.getintarg("o");
|
---|
738 |
|
---|
739 | documents = queryresults.getnumdocs();
|
---|
740 |
|
---|
741 | a = results_from;
|
---|
742 | b = a + (hitsperpage - 1);
|
---|
743 |
|
---|
744 | // make sure a and b are in range
|
---|
745 | if (a < 1) a = 1;
|
---|
746 | if (b < 1) b = 1;
|
---|
747 | if (a > documents) a = documents;
|
---|
748 | if (b > documents) b = documents;
|
---|
749 |
|
---|
750 | links.setcstr("<table cellspacing=0 cellpadding=0 border=0 width=\"100%\">\n");
|
---|
751 | links += "<tr valign=bottom>\n";
|
---|
752 | links += "<td align=left>\n";
|
---|
753 | links += "<_font_>\n";
|
---|
754 |
|
---|
755 | // previous page link
|
---|
756 | if (a > 1) {
|
---|
757 | prevlast = a - 1;
|
---|
758 | prevfirst = a - hitsperpage;
|
---|
759 |
|
---|
760 | links += "<p><a href=\"_httpqueryresults_&r=";
|
---|
761 | links += prevfirst;
|
---|
762 | links += "\">_iconprev__textmatches_ ";
|
---|
763 | links += prevfirst;
|
---|
764 | links += " - ";
|
---|
765 | links += prevlast;
|
---|
766 | links += "</a>\n";
|
---|
767 | }
|
---|
768 |
|
---|
769 | links += "</font>\n";
|
---|
770 | links += "</td><td width=100></td><td align=right>\n";
|
---|
771 | links += "<_font_>\n";
|
---|
772 |
|
---|
773 | // next page link
|
---|
774 | if (b < documents) {
|
---|
775 | nextfirst = b + 1;
|
---|
776 | nextlast = b + hitsperpage;
|
---|
777 | if (nextlast > documents) nextlast = documents;
|
---|
778 |
|
---|
779 | links += "<p><a href=\"_httpqueryresults_&r=";
|
---|
780 | links += nextfirst;
|
---|
781 | links += "\">_textmatches_ ";
|
---|
782 | links += nextfirst;
|
---|
783 | links += " - ";
|
---|
784 | links += nextlast ;
|
---|
785 | links += "_iconnext_</a>\n";
|
---|
786 | }
|
---|
787 |
|
---|
788 | links += "</font>\n";
|
---|
789 | links += "</td></tr></table>\n";
|
---|
790 |
|
---|
791 | disp.setmacro("links", "query", links);
|
---|
792 | }
|
---|
793 |
|
---|
794 |
|
---|
795 | // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
|
---|
796 | int libinterface::do_query(cgiargsclass &args, queryparamclass &queryparams,
|
---|
797 | queryresultsclass &queryresults, ostream &logout)
|
---|
798 | {
|
---|
799 | set_query_params(args, queryparams);
|
---|
800 |
|
---|
801 | if (!queryparams.querystring.empty()) {
|
---|
802 | // do the query - the results are returned in queryresults
|
---|
803 | if (!search.search(queryparams, queryresults)) {
|
---|
804 | logout << "ERROR: database didn't load\n";
|
---|
805 | return LI_LOADDATABASEFAILED;
|
---|
806 | }
|
---|
807 | }
|
---|
808 | return LI_NOERROR;
|
---|
809 | }
|
---|
810 |
|
---|
811 | ////////////////////////////////////////////////////////////////////////////////////////
|
---|
812 | // query_action is called whenever a search is to be carried out (i.e. when the
|
---|
813 | // 'a' parameter == 'q') - query calls the mgsearch search() function (via do_query()) to
|
---|
814 | // carry out the search then displays the first page of results.
|
---|
815 | //
|
---|
816 | // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
|
---|
817 | int libinterface::query_action (cgiargsclass &args, outconvertclass &outconvert,
|
---|
818 | ostream &textout, ostream &logout) {
|
---|
819 | int err = LI_NOERROR;
|
---|
820 |
|
---|
821 | queryparamclass queryparams;
|
---|
822 | queryresultsclass queryresults;
|
---|
823 |
|
---|
824 | err = do_query(args, queryparams, queryresults, logout);
|
---|
825 |
|
---|
826 | // prepare to print out the page
|
---|
827 | prepare_page(args, outconvert, logout);
|
---|
828 | define_query_macros(args, queryparams, queryresults, logout);
|
---|
829 |
|
---|
830 | // print out the query page
|
---|
831 | textout << outconvert << disp << "_query:header_\n";
|
---|
832 |
|
---|
833 | // output query results if there is a query string -
|
---|
834 | // otherwise output help text
|
---|
835 | if (!queryparams.querystring.empty())
|
---|
836 | {
|
---|
837 | displayresults (args, outconvert, textout, logout, queryresults);
|
---|
838 | }
|
---|
839 | else
|
---|
840 | {
|
---|
841 | textout << outconvert << disp << "_query:noqueryheader_\n";
|
---|
842 | }
|
---|
843 |
|
---|
844 | textout << outconvert << disp << "_query:footer_\n";
|
---|
845 |
|
---|
846 | return err;
|
---|
847 | }
|
---|
848 |
|
---|
849 | void libinterface::displayresults (cgiargsclass &args, outconvertclass &outconvert,
|
---|
850 | ostream &textout, ostream &logout,
|
---|
851 | queryresultsclass &queryresults) {
|
---|
852 | textout << outconvert << disp << "_query:queryheader_";
|
---|
853 |
|
---|
854 | int startresults = args.getintarg("r") - 1;
|
---|
855 | int numresults = args.getintarg("o");
|
---|
856 |
|
---|
857 | textout << outconvert << "<table cellspacing=4>\n";
|
---|
858 | for (int i=startresults; i < startresults+numresults; i++) {
|
---|
859 | displaydocsummary (args, outconvert, textout, logout, queryresults, i);
|
---|
860 | }
|
---|
861 | textout << outconvert << "</table>\n\n";
|
---|
862 |
|
---|
863 | textout << outconvert << disp << "_query:queryfooter_";
|
---|
864 | }
|
---|
865 |
|
---|
866 |
|
---|
867 | ///////////////////////////////////////////////////////////////////////////////////////////////
|
---|
868 | // browse_action writes out the browse pages (i.e. the top level hierarchy pages)
|
---|
869 | void libinterface::browse_action (cgiargsclass &args, outconvertclass &outconvert,
|
---|
870 | ostream &textout, ostream &logout) {
|
---|
871 | text_t browse_bar, locator, output;
|
---|
872 | gdbm_info info;
|
---|
873 |
|
---|
874 | prepare_page(args, outconvert, logout);
|
---|
875 |
|
---|
876 | // get browse bar unless page has been detached
|
---|
877 | if (args.getintarg("x") == 0) {
|
---|
878 | browse->get_browse_bar(args["d"], browse_bar);
|
---|
879 | }
|
---|
880 |
|
---|
881 | // get top locator
|
---|
882 | browse->get_top_locator(args, gdbm, 0, locator);
|
---|
883 |
|
---|
884 | // expand and output page
|
---|
885 | // Note: we need to expand these out using package 'browse'
|
---|
886 | // so we can't use the tricky '<<' syntax
|
---|
887 | disp.expandstring("browse", "_header_", output);
|
---|
888 | textout << outconvert << output;
|
---|
889 | disp.expandstring("browse", browse_bar, output);
|
---|
890 | textout << outconvert << output;
|
---|
891 | disp.expandstring("browse", locator, output);
|
---|
892 | textout << outconvert << output;
|
---|
893 | disp.expandstring("browse", "_footer_", output);
|
---|
894 | textout << outconvert << output;
|
---|
895 | }
|
---|
896 |
|
---|
897 |
|
---|
898 |
|
---|
899 | /////////////////////////////////////////////////////////////////////////////////////////
|
---|
900 | // document_action is called to retrieve and display collection documents
|
---|
901 | // It calls the mgsearch function docTargetDocument() to retrieve
|
---|
902 | // a document.
|
---|
903 |
|
---|
904 | void libinterface::document_action (cgiargsclass &args, outconvertclass &outconvert,
|
---|
905 | ostream &textout, ostream &logout) {
|
---|
906 |
|
---|
907 | text_t locator, content, links, output;
|
---|
908 | gdbm_info info;
|
---|
909 | queryparamclass queryparams;
|
---|
910 | queryresultsclass queryresults;
|
---|
911 | int oversize = 0;
|
---|
912 |
|
---|
913 | // have to redo the query to get queryterms for highlight text
|
---|
914 | do_query(args, queryparams, queryresults, logout);
|
---|
915 |
|
---|
916 | prepare_page(args, outconvert, logout);
|
---|
917 |
|
---|
918 | if (args["g"][1] == '0') {
|
---|
919 | // get docnum from gdbm
|
---|
920 | text_t docref;
|
---|
921 | if (args["d"][0] != 'B') get_book(args["d"], docref);
|
---|
922 | else docref = args["d"];
|
---|
923 | if (gdbm.getinfo(docref, info) != 0) {
|
---|
924 | logout << logconvert << "info_db wasn't opened - " << docref << "\n";
|
---|
925 | return;
|
---|
926 | }
|
---|
927 |
|
---|
928 | // get document text if there is any
|
---|
929 | if (info.contents.empty()) {
|
---|
930 | search.docTargetDocument(text_default_index, queryparams.collection,
|
---|
931 | info.docnum, content);
|
---|
932 | if (info.title != "<i>(introductory text)</i>")
|
---|
933 | content = "<h3>" + info.title + "</h3>\n" + content;
|
---|
934 | }
|
---|
935 | }
|
---|
936 |
|
---|
937 | if (args["g"][1] == '1') {
|
---|
938 | // want to get expanded out text
|
---|
939 | vector<text_t> contents_arr;
|
---|
940 | text_t booksection;
|
---|
941 | int levelcount;
|
---|
942 |
|
---|
943 | get_book (args["d"], booksection);
|
---|
944 | levelcount = count_dots(booksection);
|
---|
945 |
|
---|
946 | browse->get_contents_arr(args, gdbm, contents_arr);
|
---|
947 |
|
---|
948 |
|
---|
949 | // get text for each section of book
|
---|
950 | vector<text_t>::const_iterator thiscontent = contents_arr.begin();
|
---|
951 | vector<text_t>::const_iterator end = contents_arr.end();
|
---|
952 |
|
---|
953 | int first = 1;
|
---|
954 | int count = 1;
|
---|
955 | while (thiscontent != end) {
|
---|
956 | text_t text;
|
---|
957 |
|
---|
958 | // get docnum from gdbm
|
---|
959 | if (gdbm.getinfo(*thiscontent, info) != 0) {
|
---|
960 | logout << logconvert << "info_db wasn't opened - " << args["d"] << "\n";
|
---|
961 | return;
|
---|
962 | }
|
---|
963 |
|
---|
964 | // if section has text get it, otherwise output section title
|
---|
965 | if (info.contents.empty()) {
|
---|
966 |
|
---|
967 | // output <a name= > tags for all text sections currently displayed in toc (all text sections
|
---|
968 | // if contents are expanded
|
---|
969 | if (count_dots(*thiscontent) == levelcount || args["g"][0] == '1') {
|
---|
970 | content += "<a name=\"";
|
---|
971 | content += count;
|
---|
972 | content += "\"></a>\n";
|
---|
973 | count ++;
|
---|
974 | }
|
---|
975 |
|
---|
976 | search.docTargetDocument(text_default_index, queryparams.collection,
|
---|
977 | info.docnum, text);
|
---|
978 | if (info.title != "<i>(introductory text)</i>")
|
---|
979 | content += "<h3>" + info.title + "</h3>\n";
|
---|
980 | // content += text + "<hr><br>\n";
|
---|
981 | content += text + "<p>\n"; // no longer want <hr> between sections
|
---|
982 | } else {
|
---|
983 | content += "<h3>" + info.title + "</h3>\n";
|
---|
984 | }
|
---|
985 |
|
---|
986 | if (args["n"] == 1) {
|
---|
987 | if (first) {
|
---|
988 | browse->get_top_locator(args, gdbm, 0, locator);
|
---|
989 | disp.expandstring("text", "_header_", output);
|
---|
990 | textout << outconvert << output;
|
---|
991 | disp.expandstring("text", locator, output);
|
---|
992 | textout << outconvert << output;
|
---|
993 | }
|
---|
994 |
|
---|
995 | disp.expandstring("text", content, output);
|
---|
996 | if (!queryparams.querystring.empty())
|
---|
997 | highlighttext(queryresults.termvariants, outconvert, textout, logout, output);
|
---|
998 | else
|
---|
999 | textout << outconvert << output;
|
---|
1000 | first = 0;
|
---|
1001 | content.clear();
|
---|
1002 | }
|
---|
1003 |
|
---|
1004 | thiscontent ++;
|
---|
1005 | if (content.size() > 200000 && args["n"] != 1) {
|
---|
1006 | content.clear();
|
---|
1007 | oversize = 1;
|
---|
1008 | args["g"][1] = '0';
|
---|
1009 | break;
|
---|
1010 | }
|
---|
1011 | }
|
---|
1012 |
|
---|
1013 | if (args["g"][1] == '0') {
|
---|
1014 | // get docnum from gdbm
|
---|
1015 | text_t docref;
|
---|
1016 | if (args["d"][0] != 'B') get_book(args["d"], docref);
|
---|
1017 | else docref = args["d"];
|
---|
1018 | if (gdbm.getinfo(docref, info) != 0) {
|
---|
1019 | logout << logconvert << "info_db wasn't opened - " << docref << "\n";
|
---|
1020 | return;
|
---|
1021 | }
|
---|
1022 |
|
---|
1023 | // get document text if there is any
|
---|
1024 | if (info.contents.empty()) {
|
---|
1025 | search.docTargetDocument(text_default_index, queryparams.collection,
|
---|
1026 | info.docnum, content);
|
---|
1027 | if (info.title != "<i>(introductory text)</i>")
|
---|
1028 | content = "<h3>" + info.title + "</h3>\n" + content;
|
---|
1029 | }
|
---|
1030 | }
|
---|
1031 | }
|
---|
1032 |
|
---|
1033 | if (args["n"] != 1) {
|
---|
1034 | // get top locator
|
---|
1035 | browse->get_top_locator(args, gdbm, oversize, locator);
|
---|
1036 |
|
---|
1037 | // expand and output page
|
---|
1038 | // Note: we need to expand these out using package 'text'
|
---|
1039 | // so we can't use the tricky '<<' syntax
|
---|
1040 | disp.expandstring("text", "_header_", output);
|
---|
1041 | textout << outconvert << output;
|
---|
1042 |
|
---|
1043 | disp.expandstring("text", locator, output);
|
---|
1044 | textout << outconvert << output;
|
---|
1045 |
|
---|
1046 | disp.expandstring("text", content, output);
|
---|
1047 | if (!queryparams.querystring.empty())
|
---|
1048 | highlighttext(queryresults.termvariants, outconvert, textout, logout, output);
|
---|
1049 | else
|
---|
1050 | textout << outconvert << output;
|
---|
1051 | }
|
---|
1052 |
|
---|
1053 | // get links to next and previous sections unless in expand text mode
|
---|
1054 | if (args["g"][1] == '0') {
|
---|
1055 | browse->get_links(args, gdbm, links);
|
---|
1056 | disp.expandstring("text", links, output);
|
---|
1057 | textout << outconvert << output;
|
---|
1058 | }
|
---|
1059 |
|
---|
1060 | disp.expandstring("text", "_footer_", output);
|
---|
1061 | textout << outconvert << output;
|
---|
1062 | }
|
---|
1063 |
|
---|
1064 |
|
---|
1065 | /////////////////////////////////////////////////////////////////////////////////////////
|
---|
1066 | // auxiliary_action is called to retrieve and display collection documents
|
---|
1067 | // in formats other than those handled by document_action (i.e. those other
|
---|
1068 | // than text). This should be overridden for collections needing to return
|
---|
1069 | // images, postscript etc. You can have as many auxiliary actions as needed
|
---|
1070 | // by setting arg_a to a1, a2, a3 etc. and testing arg["a"][1] within the
|
---|
1071 | // auxiliary_action function.
|
---|
1072 | // auxiliary_action defaults to calling document_action
|
---|
1073 |
|
---|
1074 | void libinterface::auxiliary_action (cgiargsclass &args, outconvertclass &outconvert,
|
---|
1075 | ostream &textout, ostream &logout) {
|
---|
1076 | document_action (args, outconvert, textout, logout);
|
---|
1077 | }
|
---|
1078 |
|
---|
1079 | /////////////////////////////////////////////////////////////////////////////////////////
|
---|
1080 | // page is called when a standard html page is to be displayed
|
---|
1081 | void libinterface::page_action (cgiargsclass &args, outconvertclass &outconvert,
|
---|
1082 | ostream &textout, ostream &logout) {
|
---|
1083 |
|
---|
1084 | text_t &arg_p = args["p"];
|
---|
1085 |
|
---|
1086 | prepare_page(args, outconvert, logout);
|
---|
1087 |
|
---|
1088 | if (arg_p == "preferences")
|
---|
1089 | define_pref_macros(args, logout);
|
---|
1090 |
|
---|
1091 | textout << outconvert << disp << ("_" + arg_p + ":header_\n")
|
---|
1092 | << ("_" + arg_p + ":imagestandardbar_\n") << ("_" + arg_p + ":content_\n")
|
---|
1093 | << ("_" + arg_p + ":footer_\n");
|
---|
1094 | }
|
---|
1095 |
|
---|
1096 | // highlighttext highlights query terms in text string and outputs the resulting text string
|
---|
1097 | void libinterface::highlighttext(text_tarray &termvars, outconvertclass &outconvert,
|
---|
1098 | ostream &textout, ostream &logout, text_t &text) {
|
---|
1099 | map<text_t, int, lttext_t> terms;
|
---|
1100 | map<text_t, int, lttext_t>::const_iterator it;
|
---|
1101 | for (unsigned int i = 0; i < termvars.size(); i++) {
|
---|
1102 | terms[termvars[i]] = 1;
|
---|
1103 | }
|
---|
1104 |
|
---|
1105 | text_t::iterator here = text.begin();
|
---|
1106 | text_t::iterator end = text.end();
|
---|
1107 | text_t word, buffer;
|
---|
1108 | while (here != end) {
|
---|
1109 | if (is_unicode_letdig(*here)) {
|
---|
1110 | // not word boundary
|
---|
1111 | word.push_back(*here);
|
---|
1112 | here++;
|
---|
1113 |
|
---|
1114 | } else {
|
---|
1115 | // found word boundary
|
---|
1116 | // add last word if there was one
|
---|
1117 | if (!word.empty()) {
|
---|
1118 | it = terms.find(word);
|
---|
1119 | if (it != terms.end()) {
|
---|
1120 | word = "<b><u>" + word + "</u></b>";
|
---|
1121 | }
|
---|
1122 | buffer += word;
|
---|
1123 | word.clear();
|
---|
1124 | }
|
---|
1125 |
|
---|
1126 | if (*here == '<') {
|
---|
1127 | // skip over rest of html tag
|
---|
1128 | while ((here != end) && (*here != '>')) {
|
---|
1129 | buffer.push_back(*here);
|
---|
1130 | here++;
|
---|
1131 | }
|
---|
1132 | }
|
---|
1133 |
|
---|
1134 | buffer.push_back(*here);
|
---|
1135 | here++;
|
---|
1136 |
|
---|
1137 | if (buffer.size() > 1024) {
|
---|
1138 | textout << outconvert << buffer;
|
---|
1139 | buffer.clear();
|
---|
1140 | }
|
---|
1141 | }
|
---|
1142 | }
|
---|
1143 | textout << outconvert << buffer;
|
---|
1144 | }
|
---|
1145 |
|
---|
1146 | void libinterface::define_pref_macros (cgiargsclass &args, ostream &logout)
|
---|
1147 | {
|
---|
1148 | // the caseoption macro
|
---|
1149 | text_t caseoption;
|
---|
1150 | int arg_k = args.getintarg("k");
|
---|
1151 |
|
---|
1152 | caseoption += "\n<input type=radio name=k value=1";
|
---|
1153 | if (arg_k) caseoption += " checked";
|
---|
1154 | caseoption += "> ignore case differences<br>\n";
|
---|
1155 | caseoption += "<input type=radio name=k value=0";
|
---|
1156 | if (!arg_k) caseoption += " checked";
|
---|
1157 | caseoption += "> upper/lower case must match\n";
|
---|
1158 |
|
---|
1159 | disp.setmacro ("caseoption", "preferences", caseoption);
|
---|
1160 |
|
---|
1161 | // the stemoption macro
|
---|
1162 | text_t stemoption;
|
---|
1163 | int arg_s = args.getintarg("s");
|
---|
1164 |
|
---|
1165 | stemoption += "\n<input type=radio name=s value=1";
|
---|
1166 | if (arg_s) stemoption += " checked";
|
---|
1167 | stemoption += "> ignore word endings<br>\n";
|
---|
1168 | stemoption += "<input type=radio name=s value=0";
|
---|
1169 | if (!arg_s) stemoption += " checked";
|
---|
1170 | stemoption += "> whole word must match\n";
|
---|
1171 |
|
---|
1172 | disp.setmacro ("stemoption", "preferences", stemoption);
|
---|
1173 |
|
---|
1174 |
|
---|
1175 | // the encodingoption
|
---|
1176 | text_t encodingoption;
|
---|
1177 | const text_t &arg_w = args["w"];
|
---|
1178 |
|
---|
1179 | encodingoption += "\n<select name=\"nw\">\n";
|
---|
1180 | encodingoption += " <option value=\"w\"";
|
---|
1181 | if (arg_w == "w") encodingoption += " selected";
|
---|
1182 | encodingoption += ">Western (ISO-8859-1)\n";
|
---|
1183 | encodingoption += " <option value=\"g\"";
|
---|
1184 | if (arg_w == "g") encodingoption += " selected";
|
---|
1185 | encodingoption += ">Simplified Chinese (GB2312)\n";
|
---|
1186 | encodingoption += " <option value=\"8\"";
|
---|
1187 | if (arg_w == "8") encodingoption += " selected";
|
---|
1188 | encodingoption += ">Unicode (UTF-8)\n";
|
---|
1189 | encodingoption += "</select>\n";
|
---|
1190 |
|
---|
1191 | disp.setmacro ("encodingoption", "preferences", encodingoption);
|
---|
1192 |
|
---|
1193 | // the maxdocoption
|
---|
1194 | text_t maxdocoption;
|
---|
1195 | int arg_m = args.getintarg("m");
|
---|
1196 |
|
---|
1197 | maxdocoption += "\n<select name=m>\n";
|
---|
1198 | maxdocoption += " <option value=\"50\"";
|
---|
1199 | if (arg_m < 100) maxdocoption += " selected";
|
---|
1200 | maxdocoption += ">50\n";
|
---|
1201 | maxdocoption += " <option value=\"100\"";
|
---|
1202 | if (arg_m >= 100 && arg_m < 200) maxdocoption += " selected";
|
---|
1203 | maxdocoption += ">100\n";
|
---|
1204 | maxdocoption += " <option value=\"200\"";
|
---|
1205 | if (arg_m >= 200 && arg_m < 500) maxdocoption += " selected";
|
---|
1206 | maxdocoption += ">200\n";
|
---|
1207 | maxdocoption += " <option value=\"500\"";
|
---|
1208 | if (arg_m >= 500) maxdocoption += " selected";
|
---|
1209 | maxdocoption += ">500\n";
|
---|
1210 | maxdocoption += "</select>\n";
|
---|
1211 |
|
---|
1212 | disp.setmacro ("maxdocoption", "preferences", maxdocoption);
|
---|
1213 |
|
---|
1214 | // the hitsperpageoption
|
---|
1215 | text_t hitsoption;
|
---|
1216 | int arg_o = args.getintarg("o");
|
---|
1217 |
|
---|
1218 | hitsoption += "\n<select name=o>\n";
|
---|
1219 | hitsoption += " <option value=\"10\"";
|
---|
1220 | if (arg_o < 20) hitsoption += " selected";
|
---|
1221 | hitsoption += ">10\n";
|
---|
1222 | hitsoption += " <option value=\"20\"";
|
---|
1223 | if (arg_o >= 20 && arg_o < 50) hitsoption += " selected";
|
---|
1224 | hitsoption += ">20\n";
|
---|
1225 | hitsoption += " <option value=\"50\"";
|
---|
1226 | if (arg_o >= 50 && arg_o < 100) hitsoption += " selected";
|
---|
1227 | hitsoption += ">50\n";
|
---|
1228 | hitsoption += " <option value=\"100\"";
|
---|
1229 | if (arg_o >= 100 && arg_o < 500) hitsoption += " selected";
|
---|
1230 | hitsoption += ">100\n";
|
---|
1231 | hitsoption += " <option value=\"500\"";
|
---|
1232 | if (arg_o >= 500) hitsoption += " selected";
|
---|
1233 | hitsoption += ">all\n";
|
---|
1234 | hitsoption += " </select>\n";
|
---|
1235 |
|
---|
1236 | disp.setmacro ("hitsperpageoption", "preferences", hitsoption);
|
---|
1237 | }
|
---|