Ignore:
Timestamp:
1999-02-04T14:17:28+13:00 (25 years ago)
Author:
rjmcnab
Message:

Got it outputing something.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/recpt/receptionist.cpp

    r108 r145  
    1212/*
    1313   $Log$
    14    Revision 1.1  1999/01/08 08:40:59  rjmcnab
     14   Revision 1.2  1999/02/04 01:17:27  rjmcnab
    1515
    16    Moved from lib directory.
     16   Got it outputing something.
    1717
    18    Revision 1.1  1999/01/08 03:57:47  rjmcnab
    19 
    20    Initial revision
    2118
    2219 */
    2320
    24 static char *RCSID = "$Id$";
     21
     22#include "receptionist.h"
     23#include "fileutil.h"
     24#include <assert.h>
    2525
    2626
    27 #include "receptionist.h"
     27// this version of set_gsdlhome should be used if the receptionist
     28// is being run for multiple collections ("general" mode).
     29void receptionist::set_gsdlhome (const text_t &thegsdlhome) {
     30  gsdlhome = thegsdlhome;
     31  collectdir = thegsdlhome;
     32  collection = "";
     33}
    2834
    2935
    30 /*
    31 #include <string.h>
    32 #include <stdio.h>
    33 #include <stdlib.h>
    34 #include <ctype.h>
    35 #include <time.h>
    36 #include "libinterface.h"
    37 #include "cgiargs.h"
    38 #include "fileutil.h"
    39 #include "cfgread.h"
    40 #include "gsdlunicode.h"
    41 #include "unitool.h"
     36// this version of set_gsdlhome should be used if the receptionist
     37// is being run for a single collection ("collection specific" mode).
     38void receptionist::set_gsdlhome (const text_t &thegsdlhome, const text_t &thecollection) {
     39  gsdlhome = thegsdlhome;
     40  collection = thecollection;
    4241
    43 #include <assert.h>
    44 */
     42  // decide where collectdir is by searching for collect.cfg
     43  // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and
     44  // then $GSDLHOME/etc/collect.cfg
     45  collectdir = filename_cat (gsdlhome, "collect");
     46  collectdir = filename_cat (collectdir, collection);
     47  text_t filename = filename_cat (collectdir, "etc");
     48  filename = filename_cat (filename, "collect.cfg");
     49
     50  if (!file_exists(filename)) collectdir = gsdlhome;
     51}
    4552
    4653
    47 
    48 //////////////////////////////
    49 // methods for libinterface //
    50 //////////////////////////////
    51 
    52 // constructor
    53 
    54 libinterface::libinterface() {
    55   browse = NULL;
     54// sets the http address of the images directory. This is used to
     55// speed up the access to the images which are a part of the general
     56// interface. If this is not set the interface will have to get the
     57// images via gwcgi which will be a lot slower (especially if the
     58// browser does not cache the images).
     59void receptionist::set_httpimg (const text_t &thehttpimg) {
     60  httpimg = thehttpimg;
    5661}
    5762
    58 void libinterface::setgsdlhome (const text_t &thegsdlhome) {
    59   text_t thecollectdir, thegdbmdir, filename;
    60 
    61   gsdlhome = thegsdlhome;
    62 
    63   // search for etc/collect.cfg
    64   thecollectdir = filename_cat (gsdlhome, "collect");
    65   thecollectdir = filename_cat (thecollectdir, get_collection_name());
    66   filename = filename_cat (thecollectdir, "etc");
    67   filename = filename_cat (filename, "collect.cfg");
    68 
    69   if (!file_exists(filename)) thecollectdir = gsdlhome;
    70 
    71   thegdbmdir = filename_cat (thecollectdir, "index");
    72   thegdbmdir = filename_cat (thegdbmdir, "text");
    73 
    74   setcollectdir (thecollectdir);
    75   setgdbmdir (thegdbmdir);
    76 }
    77 
    78 void libinterface::setcollectdir (const text_t &thecollectdir) {
    79   collectdir = thecollectdir;
    80 
    81   search.setcollectdir (collectdir);
    82 }
    83 
    84 void libinterface::setgdbmdir (const text_t &thegdbmdir) {
    85   gdbmdir = thegdbmdir;
    86 }
    87 
    88 void libinterface::sethttpprefix (const text_t &thehttpprefix) {
    89   httpprefix = thehttpprefix;
    90 }
    91 
    92 void libinterface::setgwcgi (const text_t &thegwcgi) {
     63// sets the http address of the gateway cgi program (ie. the program
     64// that contains this receptionist).
     65void receptionist::set_gwcgi (const text_t &thegwcgi) {
    9366  gwcgi = thegwcgi;
    9467}
    9568
    9669
    97 
    98 
    99 // init should be called after the various homes are set,
    100 // it returns 'false' on failure and 'true' on success
    101 bool libinterface::init (ostream &logout) {
    102   text_t collection = get_collection_name();
    103 
     70// init should be called after setgsdhome has been called.
     71// It returns true on success and false on failure. If false is
     72// returned getpage should not be called (without producing
     73// meaningless output), instead an error page should be
     74// produced by the calling code.
     75bool receptionist::init (ostream &logout) {
    10476  // redirect the error output to logout
    10577  disp.setlogout (&logout);
    106   gdbm.setlogout (&logout);
    107 
    108   // open the gdbm file
    109   text_t filename = filename_cat (gdbmdir, get_collection_name ());
    110 #ifdef _LITTLE_ENDIAN
    111   filename += ".ldb"; // little endian version of the gdbm database
    112 #else
    113   filename += ".bdb"; // big endian version on the gdbm database
    114 #endif
    115   gdbm.opendatabase (filename);
    116 
    11778
    11879  // set default values for the configuration file
     80  cfg_info.defaultaction = "p";
     81  cfg_info.defaultpage = "about";
    11982  cfg_info.defaultencoding = "w";
    12083
     
    12487  filename = filename_cat (filename, "collect.cfg");
    12588  cfg_read(filename);
    126   filename = filename_cat (collectdir, "index");
    127   filename = filename_cat (filename, "build.cfg");
    128   cfg_read(filename);
    129 
    130   //  logout << logconvert << "defaultindex: " << cfg_info.defaultindex << "\n";
    131 
    132   // set the default index
    133   if (cfg_info.indexmap.empty()) {
    134     // ?? no indexes built ??
    135     logout << "warning: no indexes have been built\n";
    136     default_index.clear();
    137   } else if (cfg_info.defaultindex.empty() ||
    138          !isrealindex (cfg_info.indexmap, cfg_info.defaultindex)) {
    139     logout << "warning: the default index has been reset to the first index\n";
    140     getrealdirindex (cfg_info.indexmap[0], cfg_info.defaultindex, default_index);
    141   } else {
    142     default_index = real2dirindex (cfg_info.indexmap, cfg_info.defaultindex);
    143   }
    144 
    145   // set the text default index (the default index to use when
    146   // retrieving documents).
    147   text_default_index = default_index;
    148   if (!isdoclevelindex (cfg_info.defaultindex)) {
    149     text_default_index = real2dirindex (cfg_info.indexmap,
    150                     getdoclevelindex (cfg_info.indexmap));
    151   }
    15289
    15390  // load up the default macro files, the collection directory
     
    171108  gboutconvert.set_rzws(1);
    172109
    173   return collection_init(collection);
     110  return collect_init(collection);
    174111}
    175112
    176113
    177 // examine the cgi arguments and create the appropriate page,
    178 // outputing the page to textout and any debug information to logout
    179 //
    180 // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
    181 int libinterface::getpage (const text_t &argstr, ostream &textout, ostream &logout) {
    182   int err = LI_NOERROR;
    183 
    184   // make the output go where they want!
    185   gdbm.setlogout (&logout);
    186   disp.setlogout (&logout);
    187 
    188   cgiargsclass args;
    189 
    190   parse_cgi_args (argstr, args);
    191   expand_compressed_args (args);
    192   add_default_args (args);
    193   check_args (args);
    194 
    195   // get the input encoding
    196   text_t &arg_w = args["w"];
    197   inconvertclass *inconvert = NULL;
    198   if (arg_w == "8") {
    199     inconvert = &utf8inconvert;
    200   } else if (arg_w == "g") {
    201     // The map files will only be loaded the first time they are
    202     // needed. The loading is done here to reduce the memory load
    203     // for collections which don't need to convert to GB.
    204     gbinconvert.loadmapfile (gsdlhome, "gbku", 0x25a1);
    205     inconvert = &gbinconvert;
    206   } else {
    207     inconvert = &asciiinconvert; // default
    208   }
    209 
    210   // see if the next page will have a different encoding
    211   if (args.getarg("nw") != NULL) args["w"] = args["nw"];
    212 
    213   // convert arguments which aren't in unicode to unicode
    214   args_tounicode (args, *inconvert);
    215 
    216   // remember the state of the compressed arguments
    217   lastcomparg = get_compressed_args (args);
    218   logout << args;
    219 
    220   // get the output encoding
    221   text_t &arg_nw = args["w"];
    222   outconvertclass *outconvert = NULL;
    223   if (arg_nw == "8") {
    224     outconvert = &utf8outconvert;
    225   } else if (arg_nw == "g") {
    226     gboutconvert.loadmapfile (gsdlhome, "ugbk", 0xa1f5);
    227     outconvert = &gboutconvert;
    228   } else {
    229     outconvert = &asciioutconvert; // default
    230   }
    231 
    232 
    233   // dispatch the request
    234   text_t &arg_a = args["a"];
    235   if (arg_a == "q") err = query_action (args, *outconvert, textout, logout);
    236   else if (arg_a == "b") browse_action (args, *outconvert, textout, logout);
    237   else if (arg_a == "t") document_action (args, *outconvert, textout, logout);
    238   else if (arg_a == "p") page_action (args, *outconvert, textout, logout);
    239   else if ((arg_a.size() == 2) && (arg_a[0] == 'a'))
    240     auxiliary_action (args, *outconvert, textout, logout);
    241   else
    242     {
    243       // output error page
    244     }
    245 
    246   return err;
    247 }
    248 
    249 // the arg config string is used to do processing on the arguments
    250 // entries take the form argname[defaultvalue]
    251 // if the argument name is preceeded by a "+" it means that the
    252 // value may be more than one character long
    253 // the main state variable missed on this list is "q" the query string
    254 void libinterface::get_arg_config (text_t &argconfigstr)
    255 {
    256   argconfigstr =
    257     "+a[p]"    // action: q=query, b=browse, t=targetdoc, p=page, a1=auxiliary
    258     "w[]"      // encoding: w=western, 8=utf8, 7=utf7, g=GB2312, k=GBK
    259     "t[1]"     // query type: 0=boolean, 1=ranked
    260     "+i[]"      // index: collection dependant
    261     "k[1]"     // casefolding: 0=off, 1=on
    262     "s[0]"     // stemming: 0=off, 1=on
    263     "+p[home]" // page
    264     "+c[]"     // collection (collection dependant)
    265     "+r[1]"    // results from
    266     "+d[C.1]"  // the target document
    267     "+j[11]"   // partial index: 11=all, 10=f&n, 01=other
    268     "+m[100]"  // maxdocs
    269     "+o[20]"   // hits per page
    270     "v[0]"     // version: 0=text+graphics, 1=text
    271     "f[0]"     // query box size: 0=normal, 1=big
    272     "l[e]";    // language: e=english, m=maori
    273  
    274 }
    275 
    276 text_t::iterator libinterface::get_next_config_arg (text_t::iterator first,
    277                             text_t::iterator last,
    278                             text_t &argname,
    279                             text_t &defaultvalue,
    280                             bool &longarg)
    281 {
    282   first = getdelimitstr (first, last, '[', argname);
    283   first = getdelimitstr (first, last, ']', defaultvalue);
    284   longarg = false;
    285 
    286   if (!argname.empty() && (argname[0] == '+'))
    287     {
    288       argname.erase(argname.begin(), argname.begin()+1);
    289       longarg = true;
    290     }
    291 
    292   return first;
    293 }
    294 
    295 text_t libinterface::get_compressed_args (cgiargsclass &args)
    296 {
    297   text_t argconfigstr; get_arg_config (argconfigstr);
    298   text_t arg_e;
    299   text_t argname, defaultvalue;
    300   text_t *argvalue;
    301   bool longarg;
    302 
    303   text_t::iterator here = argconfigstr.begin();
    304   text_t::iterator end = argconfigstr.end();
    305   while (here != end)
    306     {
    307       here = get_next_config_arg (here, end, argname, defaultvalue, longarg);
    308 
    309       if (!argname.empty())
    310     {
    311       argvalue = args.getarg (argname);
    312       if (argvalue == NULL) arg_e += defaultvalue;
    313       else arg_e += *argvalue;
    314        
    315       if (longarg) arg_e += "-";
    316     }
    317     }
    318  
    319   return arg_e;
     114// produce_cgi_page will call parse_cgi_args, get_cgihead_info and
     115// produce_content in the appropriate way to output a cgi header and
     116// the page content (if needed).
     117void receptionist::produce_cgi_page (const text_t &argstr, ostream &contentout,
     118                     ostream &logout) {
     119  contentout << "Content-type: text/plain\n\nHello\n";
     120  contentout << flush;
    320121}
    321122
    322123
    323 // the compressed options should never override explicit options
    324 // but they should always be expanded before add_default_args is
    325 // called
    326 void libinterface::expand_compressed_args (cgiargsclass &args)
    327 {
    328   text_t *arg_e = args.getarg("e");
    329 
    330   // see if there is compressed options
    331   if (arg_e != NULL)
    332     {
    333       text_t argconfigstr; get_arg_config (argconfigstr);
    334       text_t argname, defaultvalue, argvalue;
    335       bool longarg;
    336 
    337       text_t::iterator confighere = argconfigstr.begin();
    338       text_t::iterator configend = argconfigstr.end();
    339 
    340       text_t::iterator arghere = arg_e->begin();
    341       text_t::iterator argend = arg_e->end();
    342       while (confighere != configend && arghere != argend)
    343     {
    344       confighere = get_next_config_arg (confighere, configend, argname,
    345                         defaultvalue, longarg);
    346       if (!argname.empty())
    347         {
    348           if (longarg)
    349         {
    350           arghere = getdelimitstr (arghere, argend, '-', argvalue);
    351           if (!argvalue.empty()) args.setdefaultarg (argname, argvalue);
    352         }
    353           else
    354         {
    355           args.setdefaultcarg (argname,*arghere);
    356           arghere++;
    357         }
    358         }
    359     }
    360     }
     124// parse_cgi_args parses cgi arguments into an argument class.
     125// This function should be called for each page request.
     126void receptionist::parse_cgi_args (const text_t &argstr, cgiargsclass &args,
     127                   ostream &logout) {
    361128}
    362129
    363130
    364 void libinterface::add_default_args (cgiargsclass &args)
    365 {
    366   text_t argconfigstr; get_arg_config (argconfigstr);
    367   text_t argname, defaultvalue;
    368   bool longarg;
    369 
    370   text_t::iterator confighere = argconfigstr.begin();
    371   text_t::iterator configend = argconfigstr.end();
    372   while (confighere != configend)
    373     {
    374       confighere = get_next_config_arg (confighere, configend, argname,
    375                     defaultvalue, longarg);
    376       if (!argname.empty()) args.setdefaultarg (argname, defaultvalue);
    377     }
    378  
    379   // the query string and format string are not included in the argument configuration string
    380   args.setdefaultarg ("q", ""); // the default query string is ""
    381   args.setdefaultarg ("g", "00");
    382   args.setdefaultarg ("x", "0");
     131// get_cgihead_info determines the cgi header information for
     132// a set of cgi arguments. If response contains location then
     133// response_data contains the redirect address. If reponse
     134// contains content then reponse_data contains the content-type.
     135// Note that images can now be produced by the receptionist.
     136void receptionist::get_cgihead_info (const cgiargsclass &args, response_t &response,
     137                     text_t &response_data, ostream &logout) {
    383138}
    384139
    385140
    386 // check and attempt to fix an problems encountered in the list
    387 // of cgi arguments
    388 void libinterface::check_args (cgiargsclass &args)
    389 {
    390   args.setarg("c", get_collection_name());
    391   if (args["w"].empty()) args.setarg("w", cfg_info.defaultencoding);
    392   if (args["i"].empty()) args.setarg("i", default_index);
    393 }
    394 
    395 void libinterface::args_tounicode (cgiargsclass &args,
    396                    inconvertclass &inconvert) {
    397   utf8outconvertclass text_t2utf8;
    398   cgiargsclass::iterator here = args.begin();
    399   cgiargsclass::iterator end = args.end();
    400 
    401   while (here != end) {
    402     if (here->second.getencoding() > 0) {
    403       here->second = inconvert.convert(here->second);
    404     }
    405    
    406     here++;
    407   }
     141// produce the page content
     142void receptionist::produce_content (const cgiargsclass &args, ostream &contentout,
     143                    ostream &logout) {
    408144}
    409145
    410146
    411 void libinterface::define_general_macros (cgiargsclass &args, outconvertclass &outconvert,
    412                       ostream &logout) {
    413   disp.setmacro("httpprefix", "Global", httpprefix);
    414   disp.setmacro("gwcgi", "Global", gwcgi);
    415 
    416   disp.setmacro("numdocs", "Global", (int)cfg_info.numdocs);
    417 
    418   disp.setmacro("collection", "Global", cgisafe(outconvert.convert(args["c"])));
    419   disp.setmacro("compressedoptions", "Global", get_compressed_args(args));
    420   disp.setmacro("urlsafequerystring", "Global", cgisafe(outconvert.convert(args["q"])));
    421 
    422   // need to escape any special characters in querystring to prevent
    423   // them upsetting the html
    424   text_t querystring;
    425   text_t::iterator here = args["q"].begin();
    426   text_t::iterator end = args["q"].end();
    427   while (here != end) {
    428     if (*here == '"') querystring += "&quot;";
    429     else if (*here == '&') querystring += "&amp;";
    430     else if (*here == '<') querystring += "&lt;";
    431     else if (*here == '>') querystring += "&gt;";
    432     else querystring.push_back(*here);
    433     here ++;
    434   }
    435   disp.setmacro("querystring", "Global", querystring);
    436 
    437   if (args.getintarg("x") == 0) disp.setmacro("notdetached", "Global", "1");
    438   if (args["d"][0] == 'T') disp.setmacro("istitle", "Global", "1");
    439 
    440   int i = rand();
    441   disp.setmacro("pagedest", "Global", text_t(i));
    442 
    443   // define the macro for the "g" argument
    444   disp.setmacro("g", "Global", args["g"]);
    445 
    446  // set the selection macros
    447 
    448   text_t indexselect;
    449   text_t maprealindex, mapdirindex;
    450   if (cfg_info.indexmap.size() == 1) {
    451     getrealdirindex (cfg_info.indexmap[0], maprealindex, mapdirindex);
    452     indexselect += "<input type=hidden name=\"i\" value=\"";
    453     indexselect += mapdirindex;
    454     indexselect += "\">_query:";
    455     indexselect += real2macroindex (maprealindex);
    456     indexselect += "_\n";
    457 
    458   } else {
    459     text_t &arg_i = args["i"];
    460     text_tarray::const_iterator maphere = cfg_info.indexmap.begin();
    461     text_tarray::const_iterator mapend = cfg_info.indexmap.end();
    462    
    463     indexselect += "<select name=\"i\">\n";
    464     while (maphere != mapend) {
    465       getrealdirindex (*maphere, maprealindex, mapdirindex);
    466       indexselect += "<option value=\"";
    467       indexselect += mapdirindex;
    468       indexselect += "\"";
    469       if (arg_i == mapdirindex) indexselect += " selected";
    470       indexselect += ">_query:";
    471       indexselect += real2macroindex (maprealindex);
    472       indexselect += "_\n";
    473      
    474       maphere++;
    475     }
    476     indexselect += "</select>\n";
    477   }
    478 
    479   disp.setmacro("indexselection", "query", indexselect);
    480 
    481   text_t qtselect;
    482   text_t &arg_t = args["t"];
    483 
    484   qtselect += "<select name=\"t\">\n";
    485   qtselect += "<option value=\"1\"";
    486   if (arg_t == "1") qtselect += " selected";
    487   qtselect += ">_query:textsome_\n";
    488   qtselect += "<option value=\"0\"";
    489   if (arg_t == "0") qtselect += " selected";
    490   qtselect += ">_query:textall_\n";
    491   qtselect += "</select>\n";
    492 
    493   disp.setmacro("querytypeselection", "query", qtselect);
     147// returns the compressed argument ("e") corresponding to the argument
     148// list. This can be used to save preferences between sessions.
     149text_t receptionist::get_compressed_arg (const cgiargsclass &args) {
     150  return "";
    494151}
    495 
    496 
    497 // prepare_page prepares to write out a page using the current
    498 // page parameters and defines any general macros
    499 void libinterface::prepare_page (cgiargsclass &args, outconvertclass &outconvert,
    500                  ostream &logout) {
    501   // get page parameters
    502   text_t pageparams = text_t("collection=") + args["c"];
    503   if (args.getintarg("u") == 1) pageparams += ",style=htmlonly";
    504   if (args.getintarg("v") == 1) pageparams += ",version=text";
    505   if (args.getintarg("f") == 1) pageparams += ",queryversion=big";
    506   if (args["l"] != 'e') pageparams += ",language=" + args["l"];
    507 
    508   disp.openpage(pageparams, MACROPRECEDENCE);
    509   define_general_macros(args, outconvert, logout);
    510   define_collection_macros(args, logout);
    511 }
    512 
    513 void libinterface::set_query_params (cgiargsclass &args, queryparamclass &queryparams)
    514 {
    515   queryparams.collection = args["c"];
    516   //  assemble_index (args, queryparams.search_index);
    517   queryparams.search_index = args["i"];
    518   queryparams.querystring = args["q"];
    519   format_querystring (queryparams.querystring);
    520   queryparams.search_type = args.getintarg ("t");
    521   queryparams.casefolding = args.getintarg ("k");
    522   queryparams.stemming = args.getintarg ("s");
    523   queryparams.maxdocs = args.getintarg ("m");
    524 }
    525 
    526 void libinterface::format_querystring (text_t &querystring)
    527 {
    528   text_t formattedstring;
    529   quotedstring.clear();
    530 
    531   text_t::iterator here = querystring.begin();
    532   text_t::iterator end = querystring.end();
    533   int foundquote = 0;
    534 
    535   // want to remove ()|!& from querystring so boolean queries are just
    536   // "all the words" queries
    537   while (here != end) {
    538     if (*here == '(' || *here == ')' || *here == '|' ||
    539     *here == '!' || *here == '&') {
    540       formattedstring += " ";
    541     } else {
    542       if (*here == '"') {
    543     if (foundquote) {foundquote = 0; quotedstring.push_back(*here);}
    544     else foundquote = 1;
    545       } else {
    546     formattedstring.push_back(*here);
    547       }
    548       if (foundquote) quotedstring.push_back(*here);
    549     }
    550     here ++;
    551   }
    552   querystring = formattedstring + quotedstring;
    553 }
    554 
    555 void libinterface::define_query_macros (cgiargsclass &args,
    556                     queryparamclass &queryparams,
    557                     queryresultsclass &queryresults,
    558                     ostream &logout)
    559 {
    560   int numdocs = queryresults.getnumdocs();
    561   int numterms = queryresults.getnumterms();
    562   disp.setmacro("querysize", "query", args["f"]);
    563   disp.setmacro("haveresults", "query", numdocs);
    564  
    565   // set the display frequency macro
    566   text_t freqmsg = "_textfm1_";
    567 
    568   int first = 1;
    569   for (int i = 0; i < numterms; i++) {
    570     if (first == 0) freqmsg += "; ";
    571     first = 0;
    572     freqmsg += queryresults.terms[i].termstr + ": " + queryresults.terms[i].termfreq;
    573   }
    574   if (!quotedstring.empty()) freqmsg += "<br><i>post-processed to find " + quotedstring + "</i>\n";
    575   disp.setmacro("freqmsg", "query", freqmsg);
    576 
    577   // set the result line macro
    578 
    579   text_t resline;
    580  
    581   if (numdocs >= queryparams.maxdocs)
    582     resline.setcstr("_textmt2_");
    583  
    584   if (numdocs == 0) {
    585     resline.setcstr("_textndmtq_");
    586   } else if (numdocs == 1) {
    587     resline += text_t(numdocs) + text_t(" _textdmtq2_.");
    588   } else {
    589     resline += text_t(numdocs) + text_t(" _textdmtq3_.");
    590   }
    591 
    592   disp.setmacro("resultline", "query", resline);
    593 
    594   //  define_collection_macros (args, logout);
    595 
    596   if (queryresults.getnumdocs() > 0) {
    597     docLinks(args, queryresults, logout);
    598   }
    599 }
    600 
    601 
    602 // set the _links_ macro to create the links between pages of query results
    603 void libinterface::docLinks (cgiargsclass &args,
    604                  queryresultsclass &queryresults,
    605                  ostream &logout)
    606 {
    607   text_t links;
    608   int a, b, documents, nextfirst, nextlast, prevfirst, prevlast;
    609   int results_from = args.getintarg("r");
    610   int hitsperpage = args.getintarg("o");
    611  
    612   documents = queryresults.getnumdocs();
    613  
    614   a = results_from;
    615   b = a + (hitsperpage - 1);
    616  
    617   // make sure a and b are in range
    618   if (a < 1) a = 1;
    619   if (b < 1) b = 1;
    620   if (a > documents) a = documents;
    621   if (b > documents) b = documents;
    622  
    623   links.setcstr("<table cellspacing=0 cellpadding=0 border=0 width=\"100%\">\n");
    624   links += "<tr valign=bottom>\n";
    625   links += "<td align=left>\n";
    626   links += "<_font_>\n";
    627  
    628   // previous page link
    629   if (a > 1) {
    630     prevlast = a - 1;
    631     prevfirst = a - hitsperpage;
    632    
    633     links += "<p><a href=\"_httpqueryresults_&r=";
    634     links += prevfirst;
    635     links += "\">_iconprev__textmatches_ ";
    636     links += prevfirst;
    637     links += " - ";
    638     links += prevlast;
    639     links += "</a>\n";
    640   }
    641  
    642   links += "</font>\n";
    643   links += "</td><td width=100></td><td align=right>\n";
    644   links += "<_font_>\n";
    645  
    646   // next page link
    647   if (b < documents) {
    648     nextfirst = b + 1;
    649     nextlast = b + hitsperpage;
    650     if (nextlast > documents) nextlast = documents;
    651    
    652     links += "<p><a href=\"_httpqueryresults_&r=";
    653     links += nextfirst;
    654     links += "\">_textmatches_ ";
    655     links += nextfirst;
    656     links += " - ";
    657     links += nextlast ;
    658     links += "_iconnext_</a>\n";
    659   }
    660  
    661   links += "</font>\n";
    662   links += "</td></tr></table>\n";
    663  
    664   disp.setmacro("links", "query", links);
    665 }
    666 
    667 
    668 // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
    669 int libinterface::do_query(cgiargsclass &args, queryparamclass &queryparams,
    670                 queryresultsclass &queryresults, ostream &logout)
    671 {
    672   set_query_params(args, queryparams);
    673 
    674   if (!queryparams.querystring.empty()) {
    675     // do the query - the results are returned in queryresults
    676     if (!search.search(queryparams, queryresults)) {
    677       logout << "ERROR: database didn't load\n";
    678       return LI_LOADDATABASEFAILED;
    679     }
    680   }
    681   return LI_NOERROR;
    682 }
    683 
    684 ////////////////////////////////////////////////////////////////////////////////////////
    685 // query_action is called whenever a search is to be carried out  (i.e. when the
    686 // 'a' parameter == 'q') - query calls the mgsearch search() function (via do_query()) to
    687 // carry out the search then displays the first page of results.
    688 //
    689 // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure
    690 int libinterface::query_action (cgiargsclass &args, outconvertclass &outconvert,
    691                 ostream &textout, ostream &logout) {
    692   int err = LI_NOERROR;
    693 
    694   queryparamclass queryparams;
    695   queryresultsclass queryresults;
    696 
    697   err = do_query(args, queryparams, queryresults, logout);
    698 
    699   // prepare to print out the page
    700   prepare_page(args, outconvert, logout);
    701   define_query_macros(args, queryparams, queryresults, logout);
    702 
    703   // print out the query page
    704   textout << outconvert << disp << "_query:header_\n";
    705 
    706   // output query results if there is a query string -
    707   // otherwise output help text
    708   if (!queryparams.querystring.empty())
    709     {
    710       displayresults (args, outconvert, textout, logout, queryresults);
    711     }
    712   else
    713     {
    714       textout << outconvert << disp << "_query:noqueryheader_\n";
    715     }
    716  
    717   textout << outconvert << disp << "_query:footer_\n";
    718 
    719   return err;
    720 }
    721 
    722 void libinterface::displayresults (cgiargsclass &args, outconvertclass &outconvert,
    723                    ostream &textout, ostream &logout,
    724                    queryresultsclass &queryresults) {
    725   textout << outconvert << disp << "_query:queryheader_";
    726 
    727   int startresults = args.getintarg("r") - 1;
    728   int numresults = args.getintarg("o");
    729 
    730   textout << outconvert << "<table cellspacing=4>\n";
    731   for (int i=startresults; i < startresults+numresults; i++) {
    732     displaydocsummary (args, outconvert, textout, logout, queryresults, i);
    733   }
    734   textout << outconvert << "</table>\n\n";
    735 
    736   textout << outconvert << disp << "_query:queryfooter_";
    737 }
    738 
    739 
    740 ///////////////////////////////////////////////////////////////////////////////////////////////
    741 // browse_action writes out the browse pages (i.e. the top level hierarchy pages)
    742 void libinterface::browse_action (cgiargsclass &args, outconvertclass &outconvert,
    743                   ostream &textout, ostream &logout) {
    744   text_t browse_bar, locator, output;
    745   gdbm_info info;
    746  
    747   prepare_page(args, outconvert, logout);
    748  
    749   // get browse bar unless page has been detached
    750   if (args.getintarg("x") == 0) {
    751     browse->get_browse_bar(args["d"], browse_bar);
    752   }
    753  
    754   // get top locator
    755   browse->get_top_locator(args, gdbm, 0, locator); 
    756 
    757   // expand and output page
    758   // Note: we need to expand these out using package 'browse'
    759   // so we can't use the tricky '<<' syntax
    760   disp.expandstring("browse", "_header_", output);
    761   textout << outconvert << output;
    762   disp.expandstring("browse", browse_bar, output);
    763   textout << outconvert << output;
    764   disp.expandstring("browse", locator, output);
    765   textout << outconvert << output;
    766   disp.expandstring("browse", "_footer_", output);
    767   textout << outconvert << output;
    768 }
    769 
    770 
    771 
    772 /////////////////////////////////////////////////////////////////////////////////////////
    773 // document_action is called to retrieve and display collection documents
    774 // It calls the mgsearch function docTargetDocument() to retrieve
    775 // a document.
    776 
    777 void libinterface::document_action (cgiargsclass &args, outconvertclass &outconvert,
    778                     ostream &textout, ostream &logout) {
    779  
    780   text_t locator, content, links, output;
    781   gdbm_info info;
    782   queryparamclass queryparams;
    783   queryresultsclass queryresults;
    784   int oversize = 0;
    785 
    786   // have to redo the query to get queryterms for highlight text
    787   do_query(args, queryparams, queryresults, logout);
    788 
    789   prepare_page(args, outconvert, logout);
    790  
    791   if (args["g"][1] == '0') {
    792     // get docnum from gdbm
    793     text_t docref;
    794     if (args["d"][0] != 'B') get_book(args["d"], docref);
    795     else docref = args["d"];
    796     if (gdbm.getinfo(docref, info) != 0) {
    797       logout << logconvert << "info_db wasn't opened - " << docref << "\n";
    798       return;
    799     }
    800    
    801     // get document text if there is any
    802     if (info.contents.empty()) {
    803         search.docTargetDocument(text_default_index, queryparams.collection,
    804                  info.docnum, content);
    805     if (info.title != "<i>(introductory text)</i>")
    806       content = "<h3>" + info.title + "</h3>\n" + content;
    807     }
    808   }
    809 
    810   if (args["g"][1] == '1') {
    811     // want to get expanded out text
    812     vector<text_t> contents_arr;
    813     text_t booksection;
    814     int levelcount;
    815 
    816     get_book (args["d"], booksection);
    817     levelcount = count_dots(booksection);
    818 
    819     browse->get_contents_arr(args, gdbm, contents_arr);
    820    
    821 
    822     // get text for each section of book
    823     vector<text_t>::const_iterator thiscontent = contents_arr.begin();
    824     vector<text_t>::const_iterator end = contents_arr.end();
    825 
    826     int first = 1;
    827     int count = 1;
    828     while (thiscontent != end) {
    829       text_t text;
    830 
    831       // get docnum from gdbm
    832       if (gdbm.getinfo(*thiscontent, info) != 0) {
    833     logout << logconvert << "info_db wasn't opened - " << args["d"] << "\n";
    834     return;
    835       }
    836 
    837       // if section has text get it, otherwise output section title
    838       if (info.contents.empty()) {
    839 
    840     // output <a name= > tags for all text sections currently displayed in toc (all text sections
    841     // if contents are expanded
    842     if (count_dots(*thiscontent) == levelcount || args["g"][0] == '1') {
    843       content += "<a name=\"";
    844       content += count;
    845       content += "\"></a>\n";
    846       count ++;
    847     }
    848 
    849     search.docTargetDocument(text_default_index, queryparams.collection,
    850                  info.docnum, text);                   
    851     if (info.title != "<i>(introductory text)</i>")
    852       content += "<h3>" + info.title + "</h3>\n";
    853     //  content += text + "<hr><br>\n";
    854     content += text + "<p>\n"; // no longer want <hr> between sections
    855       } else {
    856     content += "<h3>" + info.title + "</h3>\n";
    857       }
    858 
    859       if (args["n"] == 1) {
    860     if (first) {
    861       browse->get_top_locator(args, gdbm, 0, locator);
    862       disp.expandstring("text", "_header_", output);
    863       textout << outconvert << output;
    864       disp.expandstring("text", locator, output);
    865       textout << outconvert << output;
    866     }
    867    
    868     disp.expandstring("text", content, output);
    869     if (!queryparams.querystring.empty())
    870       highlighttext(queryresults.termvariants, outconvert, textout, logout, output);
    871     else
    872       textout << outconvert << output;
    873     first = 0;
    874     content.clear();
    875       }
    876 
    877       thiscontent ++;
    878       if (content.size() > 200000 && args["n"] != 1) {
    879     content.clear();
    880     oversize = 1;
    881     args["g"][1] = '0';
    882     break;
    883       }
    884     }
    885 
    886     if (args["g"][1] == '0') {
    887       // get docnum from gdbm
    888       text_t docref;
    889       if (args["d"][0] != 'B') get_book(args["d"], docref);
    890       else docref = args["d"];
    891       if (gdbm.getinfo(docref, info) != 0) {
    892         logout << logconvert << "info_db wasn't opened - " << docref << "\n";
    893         return;
    894       }
    895    
    896       // get document text if there is any
    897       if (info.contents.empty()) {
    898         search.docTargetDocument(text_default_index, queryparams.collection,
    899                                  info.docnum, content);
    900     if (info.title != "<i>(introductory text)</i>")
    901       content = "<h3>" + info.title + "</h3>\n" + content;
    902       }
    903     }
    904   }
    905 
    906   if (args["n"] != 1) {
    907     // get top locator
    908     browse->get_top_locator(args, gdbm, oversize, locator);
    909 
    910     // expand and output page
    911     // Note: we need to expand these out using package 'text'
    912     // so we can't use the tricky '<<' syntax
    913     disp.expandstring("text", "_header_", output);
    914     textout << outconvert << output;
    915 
    916     disp.expandstring("text", locator, output);
    917     textout << outconvert << output;
    918 
    919     disp.expandstring("text", content, output);
    920     if (!queryparams.querystring.empty())
    921       highlighttext(queryresults.termvariants, outconvert, textout, logout, output);
    922     else
    923       textout << outconvert << output;
    924   }
    925 
    926   // get links to next and previous sections unless in expand text mode
    927   if (args["g"][1] == '0') {
    928     browse->get_links(args, gdbm, links);
    929     disp.expandstring("text", links, output);
    930     textout << outconvert << output;
    931   }
    932 
    933   disp.expandstring("text", "_footer_", output);
    934   textout << outconvert << output;
    935 }
    936 
    937 
    938 /////////////////////////////////////////////////////////////////////////////////////////
    939 // auxiliary_action is called to retrieve and display collection documents
    940 // in formats other than those handled by document_action (i.e. those other
    941 // than text). This should be overridden for collections needing to return
    942 // images, postscript etc. You can have as many auxiliary actions as needed
    943 // by setting arg_a to a1, a2, a3 etc. and testing arg["a"][1] within the
    944 // auxiliary_action function.
    945 // auxiliary_action defaults to calling document_action
    946 
    947 void libinterface::auxiliary_action (cgiargsclass &args, outconvertclass &outconvert,
    948                      ostream &textout, ostream &logout) {
    949   document_action (args, outconvert, textout, logout);
    950 }
    951 
    952 /////////////////////////////////////////////////////////////////////////////////////////
    953 // page is called when a standard html page is to be displayed
    954 void libinterface::page_action (cgiargsclass &args, outconvertclass &outconvert,
    955                 ostream &textout, ostream &logout) {
    956 
    957   text_t &arg_p = args["p"];
    958 
    959   prepare_page(args, outconvert, logout);
    960 
    961   if (arg_p == "preferences")
    962     define_pref_macros(args, logout);
    963 
    964   textout << outconvert << disp << ("_" + arg_p + ":header_\n")
    965       << ("_" + arg_p + ":imagestandardbar_\n") << ("_" + arg_p + ":content_\n")
    966       << ("_" + arg_p + ":footer_\n");
    967 }
    968 
    969 // highlighttext highlights query terms in text string and outputs the resulting text string
    970 void libinterface::highlighttext(text_tarray &termvars, outconvertclass &outconvert,
    971                  ostream &textout, ostream &logout, text_t &text) {
    972   map<text_t, int, lttext_t> terms;
    973   map<text_t, int, lttext_t>::const_iterator it;
    974   for (unsigned int i = 0; i < termvars.size(); i++) {
    975     terms[termvars[i]] = 1;
    976   }
    977 
    978   text_t::iterator here = text.begin();
    979   text_t::iterator end = text.end();
    980   text_t word, buffer;
    981   while (here != end) {
    982     if (is_unicode_letdig(*here)) {
    983       // not word boundary
    984       word.push_back(*here);
    985       here++;
    986 
    987     } else {
    988       // found word boundary
    989       // add last word if there was one
    990       if (!word.empty()) {
    991     it = terms.find(word);
    992     if (it != terms.end()) {
    993       word = "<b><u>" + word + "</u></b>";
    994     }
    995     buffer += word;
    996         word.clear();
    997       }
    998 
    999       if (*here == '<') {
    1000         // skip over rest of html tag
    1001     while ((here != end) && (*here != '>')) {
    1002       buffer.push_back(*here);
    1003       here++;
    1004     }
    1005       }
    1006 
    1007       buffer.push_back(*here);
    1008       here++;
    1009 
    1010       if (buffer.size() > 1024) {
    1011     textout << outconvert << buffer;
    1012     buffer.clear();
    1013       }
    1014     }
    1015   }
    1016   textout << outconvert << buffer;
    1017 }
    1018 
    1019 void libinterface::define_pref_macros (cgiargsclass &args, ostream &logout)
    1020 {
    1021   // the caseoption macro
    1022   text_t caseoption;
    1023   int arg_k = args.getintarg("k");
    1024 
    1025   caseoption += "\n<input type=radio name=k value=1";
    1026   if (arg_k) caseoption += " checked";
    1027   caseoption += "> ignore case differences<br>\n";
    1028   caseoption += "<input type=radio name=k value=0";
    1029   if (!arg_k) caseoption += " checked";
    1030   caseoption += "> upper/lower case must match\n";
    1031 
    1032   disp.setmacro ("caseoption", "preferences", caseoption);
    1033 
    1034   // the stemoption macro
    1035   text_t stemoption;
    1036   int arg_s = args.getintarg("s");
    1037 
    1038   stemoption += "\n<input type=radio name=s value=1";
    1039   if (arg_s) stemoption += " checked";
    1040   stemoption += "> ignore word endings<br>\n";
    1041   stemoption += "<input type=radio name=s value=0";
    1042   if (!arg_s) stemoption += " checked";
    1043   stemoption += "> whole word must match\n";
    1044 
    1045   disp.setmacro ("stemoption", "preferences", stemoption);
    1046 
    1047 
    1048   // the encodingoption
    1049   text_t encodingoption;
    1050   const text_t &arg_w = args["w"];
    1051 
    1052   encodingoption += "\n<select name=\"nw\">\n";
    1053   encodingoption += "  <option value=\"w\"";
    1054   if (arg_w == "w") encodingoption += " selected";
    1055   encodingoption += ">Western (ISO-8859-1)\n";
    1056   encodingoption += "  <option value=\"g\"";
    1057   if (arg_w == "g") encodingoption += " selected";
    1058   encodingoption += ">Simplified Chinese (GB2312)\n";
    1059   encodingoption += "  <option value=\"8\"";
    1060   if (arg_w == "8") encodingoption += " selected";
    1061   encodingoption += ">Unicode (UTF-8)\n";
    1062   encodingoption += "</select>\n";
    1063 
    1064   disp.setmacro ("encodingoption", "preferences", encodingoption);
    1065 
    1066   // the maxdocoption
    1067   text_t maxdocoption;
    1068   int arg_m = args.getintarg("m");
    1069 
    1070   maxdocoption += "\n<select name=m>\n";
    1071   maxdocoption += "  <option value=\"50\"";
    1072   if (arg_m < 100) maxdocoption += " selected";
    1073   maxdocoption += ">50\n";
    1074   maxdocoption += "  <option value=\"100\"";
    1075   if (arg_m >= 100 && arg_m < 200) maxdocoption += " selected";
    1076   maxdocoption += ">100\n";
    1077   maxdocoption += "  <option value=\"200\"";
    1078   if (arg_m >= 200 && arg_m < 500) maxdocoption += " selected";
    1079   maxdocoption += ">200\n";
    1080   maxdocoption += "  <option value=\"500\"";
    1081   if (arg_m >= 500) maxdocoption += " selected";
    1082   maxdocoption += ">500\n";
    1083   maxdocoption += "</select>\n";
    1084 
    1085   disp.setmacro ("maxdocoption", "preferences", maxdocoption);
    1086 
    1087   // the hitsperpageoption
    1088   text_t hitsoption;
    1089   int arg_o = args.getintarg("o");
    1090 
    1091   hitsoption += "\n<select name=o>\n";
    1092   hitsoption += "  <option value=\"10\"";
    1093   if (arg_o < 20) hitsoption += " selected";
    1094   hitsoption += ">10\n";
    1095   hitsoption += "  <option value=\"20\"";
    1096   if (arg_o >= 20 && arg_o < 50) hitsoption += " selected";
    1097   hitsoption += ">20\n";
    1098   hitsoption += "  <option value=\"50\"";
    1099   if (arg_o >= 50 && arg_o < 100) hitsoption += " selected";
    1100   hitsoption += ">50\n";
    1101   hitsoption += "  <option value=\"100\"";
    1102   if (arg_o >= 100 && arg_o < 500) hitsoption += " selected";
    1103   hitsoption += ">100\n";
    1104   hitsoption += "  <option value=\"500\"";
    1105   if (arg_o >= 500) hitsoption += " selected";
    1106   hitsoption += ">all\n";
    1107   hitsoption += "  </select>\n";
    1108 
    1109   disp.setmacro ("hitsperpageoption", "preferences", hitsoption);
    1110 }
Note: See TracChangeset for help on using the changeset viewer.