#include #include #include #include #include #include "libinterface.h" #include "cgiargs.h" #include /////////////////////// // support functions // /////////////////////// unsigned short hexdigit (unsigned short c) { if (c >= '0' && c <= '9') return (c-'0'); if (c >= 'a' && c <= 'f') return (c-'a'+10); if (c >= 'A' && c <= 'F') return (c-'A'+10); return c; } void c2hex (unsigned short c, text_t &t) { t.clear(); if (c >= 256) { t = "20"; // ' ' return; } unsigned short o1, o2; o1 = (c/16) % 16; o2 = c % 16; if (o1 >= 10) o1 += 'a' - 10; else o1 += '0'; if (o2 >= 10) o2 += 'a' - 10; else o2 += '0'; t.push_back(o1); t.push_back(o2); } // convert %xx and + to their appropriate equivalents void decode (text_t &argstr) { text_t::iterator in = argstr.begin(); text_t::iterator out = in; text_t::iterator end = argstr.end(); while (in != end) { if (*in == '+') *out = ' '; else if (*in == '%') { unsigned short c = '%'; in++; if (in != end) { c = hexdigit (*in); in++; } if (in != end && c < 16) // sanity check on the previous character { c = c*16 + hexdigit (*in); } *out = c; } else *out = *in; if (in != end) in++; out++; } // remove the excess characters argstr.erase (out, end); } // split up the cgi arguments void parse_cgi_args (text_t argstr, cgiargsclass &args) { args.clear(); text_t::iterator here = argstr.begin(); text_t::iterator end = argstr.end(); text_t key, value; // extract out the key=value pairs while (here != end) { // get the next key and value pair here = getdelimitstr (here, end, '=', key); here = getdelimitstr (here, end, '&', value); // convert %xx and + to their appropriate equivalents decode (value); // store this key=value pair if (!key.empty()) args.setarg (key, value); } } text_t cgisafe (text_t &intext) { text_t outtext; text_t::iterator here = intext.begin (); text_t::iterator end = intext.end (); unsigned short c; text_t ttmp; while (here != end) { c = *here; if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || ((c >= '0') && (c <= '9'))) { // alphanumeric character outtext.push_back(c); } else { // non-alphnumeric character outtext.push_back('%'); c2hex(c, ttmp); outtext += ttmp; } here++; } return outtext; } ////////////////////////////// // methods for libinterface // ////////////////////////////// // constructor libinterface::libinterface() { browse = NULL; } void libinterface::setgsdlhome (const text_t &thegsdlhome) { gsdlhome = thegsdlhome; setmacroshome (thegsdlhome); setgdbmhome (thegsdlhome); setindexhome (thegsdlhome); } void libinterface::setmacroshome (const text_t &themacroshome) { macroshome = themacroshome; } void libinterface::setgdbmhome (const text_t &thegdbmhome) { gdbmhome = thegdbmhome; gdbm.setgdbmhome(gdbmhome); } void libinterface::setindexhome (const text_t &theindexhome) { indexhome = theindexhome; search.setindexhome(theindexhome); } void libinterface::sethttpprefix (const text_t &thehttpprefix) { httpprefix = thehttpprefix; } void libinterface::setgwcgi (const text_t &thegwcgi) { gwcgi = thegwcgi; } // init should be called after the various homes are set, // it returns 'false' on failure and 'true' on success bool libinterface::init () { set_default_index(); text_t collection = get_collection_name(); // load up default macro files -- these shouldn't have absolute file names #ifdef __WIN32__ disp.loaddefaultmacros(macroshome + "\\macros\\base.dm"); disp.loaddefaultmacros(macroshome + "\\macros\\browse.dm"); disp.loaddefaultmacros(macroshome + "\\macros\\text.dm"); disp.loaddefaultmacros(macroshome + "\\macros\\query.dm"); disp.loaddefaultmacros(macroshome + "\\macros\\help.dm"); disp.loaddefaultmacros(macroshome + "\\macros\\gsdl.dm"); disp.loaddefaultmacros(macroshome + "\\macros\\pref.dm"); #else disp.loaddefaultmacros(macroshome + "/macros/base.dm"); disp.loaddefaultmacros(macroshome + "/macros/browse.dm"); disp.loaddefaultmacros(macroshome + "/macros/text.dm"); disp.loaddefaultmacros(macroshome + "/macros/query.dm"); disp.loaddefaultmacros(macroshome + "/macros/help.dm"); disp.loaddefaultmacros(macroshome + "/macros/gsdl.dm"); disp.loaddefaultmacros(macroshome + "/macros/pref.dm"); #endif srand(time(NULL)); return collection_init(collection); } // examine the cgi arguments and create the appropriate page, // outputing the page to textout and any debug information to logout // // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure int libinterface::getpage (const text_t &argstr, ostream &textout, ostream &logout) { int err = LI_NOERROR; gdbm.setlogout (&logout); // make the output go where they want! cgiargsclass args; parse_cgi_args (argstr, args); expand_compressed_args (args); add_default_args (args); check_args (args); lastcomparg = get_compressed_args (args); logout << args; text_t &arg_a = args["a"]; if (arg_a == "q") err = query_action (args, textout, logout); else if (arg_a == "b") browse_action (args, textout, logout); else if (arg_a == "t") document_action (args, textout, logout); else if (arg_a == "p") page_action (args, textout, logout); else { // output error page } return err; } // the arg config string is used to do processing on the arguments // entries take the form argname[defaultvalue] // if the argument name is preceeded by a "+" it means that the // value may be more than one character long // the main state variable missed on this list is "q" the query string void libinterface::get_arg_config (text_t &argconfigstr) { argconfigstr = "a[p]" // action: q=query, b=browse, t=targetdoc, p=page "t[1]" // query type: 0=boolean, 1=ranked "i[c]" // index: c=chapter, p=paragraph, t=title, b=book "k[1]" // casefolding: 0=off, 1=on "s[0]" // stemming: 0=off, 1=on "+p[home]" // page "+c[unu2]" // collection "+r[1]" // results from "+d[C.1]" // the target document "+j[11]" // partial index: 11=all, 10=f&n, 01=other "+m[100]" // maxdocs "+o[20]" // hits per page "v[0]" // version: 0=text+graphics, 1=text "f[0]"; // query box size: 0=normal, 1=big } text_t::iterator libinterface::get_next_config_arg (text_t::iterator first, text_t::iterator last, text_t &argname, text_t &defaultvalue, bool &longarg) { first = getdelimitstr (first, last, '[', argname); first = getdelimitstr (first, last, ']', defaultvalue); longarg = false; if (!argname.empty() && (argname[0] == '+')) { argname.erase(argname.begin(), argname.begin()+1); longarg = true; } return first; } text_t libinterface::get_compressed_args (cgiargsclass &args) { text_t argconfigstr; get_arg_config (argconfigstr); text_t arg_e; text_t argname, defaultvalue; text_t *argvalue; bool longarg; text_t::iterator here = argconfigstr.begin(); text_t::iterator end = argconfigstr.end(); while (here != end) { here = get_next_config_arg (here, end, argname, defaultvalue, longarg); if (!argname.empty()) { argvalue = args.getarg (argname); if (argvalue == NULL) arg_e += defaultvalue; else arg_e += *argvalue; if (longarg) arg_e += "-"; } } return arg_e; } // the compressed options should never override explicit options // but they should always be expanded before add_default_args is // called void libinterface::expand_compressed_args (cgiargsclass &args) { text_t *arg_e = args.getarg("e"); // see if there is compressed options if (arg_e != NULL) { text_t argconfigstr; get_arg_config (argconfigstr); text_t argname, defaultvalue, argvalue; bool longarg; text_t::iterator confighere = argconfigstr.begin(); text_t::iterator configend = argconfigstr.end(); text_t::iterator arghere = arg_e->begin(); text_t::iterator argend = arg_e->end(); while (confighere != configend && arghere != argend) { confighere = get_next_config_arg (confighere, configend, argname, defaultvalue, longarg); if (!argname.empty()) { if (longarg) { arghere = getdelimitstr (arghere, argend, '-', argvalue); if (!argvalue.empty()) args.setdefaultarg (argname, argvalue); } else { args.setdefaultcarg (argname,*arghere); arghere++; } } } } } void libinterface::add_default_args (cgiargsclass &args) { text_t argconfigstr; get_arg_config (argconfigstr); text_t argname, defaultvalue; bool longarg; text_t::iterator confighere = argconfigstr.begin(); text_t::iterator configend = argconfigstr.end(); while (confighere != configend) { confighere = get_next_config_arg (confighere, configend, argname, defaultvalue, longarg); if (!argname.empty()) args.setdefaultarg (argname, defaultvalue); } // the query string and format string are not included in the argument configuration string args.setdefaultarg ("q", ""); // the default query string is "" args.setdefaultarg ("g", "00"); args.setdefaultarg ("x", "0"); } // check and attempt to fix an problems encountered in the list // of cgi arguments void libinterface::check_args (cgiargsclass &args) { text_t collection = get_collection_name(); args.setarg("c", collection); } void libinterface::define_general_macros (cgiargsclass &args, ostream &logout) { disp.setmacro("httpprefix", "Global", httpprefix); disp.setmacro("gwcgi", "Global", gwcgi); disp.setmacro("collection", "Global", cgisafe(args["c"])); disp.setmacro("compressedoptions", "Global", get_compressed_args(args)); disp.setmacro("urlsafequerystring", "Global", cgisafe(args["q"])); // need to escape any quotes in querystring to prevent them upsetting the html text_t querystring; text_t::iterator here = args["q"].begin(); text_t::iterator end = args["q"].end(); while (here != end) { if (*here == '"') querystring += """; else querystring.push_back(*here); here ++; } disp.setmacro("querystring", "Global", querystring); if (args.getintarg("x") == 0) disp.setmacro("notdetached", "Global", "1"); if (args["d"][0] == 'T') disp.setmacro("istitle", "Global", "1"); int i = rand(); disp.setmacro("pagedest", "Global", text_t(i)); // define the macro for the "g" argument disp.setmacro("g", "Global", args["g"]); } // prepare_page prepares to write out a page using the current // page parameters and defines any general macros void libinterface::prepare_page (cgiargsclass &args, ostream &logout) { // get page parameters text_t pageparams = text_t("collection=") + args["c"]; if (args.getintarg("u") == 1) pageparams += ",style=htmlonly"; if (args.getintarg("v") == 1) pageparams += ",version=text"; if (args.getintarg("f") == 1) pageparams += ",queryversion=big"; disp.openpage(pageparams, MACROPRECEDENCE); define_general_macros(args, logout); define_collection_macros(args, logout); } void libinterface::set_query_params (cgiargsclass &args, queryparamclass &queryparams) { queryparams.collection = args["c"]; assemble_index (args, queryparams.search_index); queryparams.querystring = args["q"]; format_querystring (queryparams.querystring); queryparams.search_type = args.getintarg ("t"); queryparams.casefolding = args.getintarg ("k"); queryparams.stemming = args.getintarg ("s"); queryparams.maxdocs = args.getintarg ("m"); } void libinterface::format_querystring (text_t &querystring) { text_t formattedstring; quotedstring.clear(); text_t::iterator here = querystring.begin(); text_t::iterator end = querystring.end(); int foundquote = 0; // want to remove ()|!& from querystring so boolean queries are just // "all the words" queries while (here != end) { if (*here == '(' || *here == ')' || *here == '|' || *here == '!' || *here == '&') { formattedstring += " "; } else { if (*here == '"') { if (foundquote) {foundquote = 0; quotedstring.push_back(*here);} else foundquote = 1; } else { formattedstring.push_back(*here); } if (foundquote) quotedstring.push_back(*here); } here ++; } querystring = formattedstring + quotedstring; } void libinterface::define_query_macros (cgiargsclass &args, queryparamclass &queryparams, queryresultsclass &queryresults, ostream &logout) { int numdocs = queryresults.getnumdocs(); int numterms = queryresults.getnumterms(); disp.setmacro("querysize", "query", args["f"]); disp.setmacro("haveresults", "query", numdocs); // set the display frequency macro text_t freqmsg = "_textfm1_"; int first = 1; for (int i = 0; i < numterms; i++) { if (first == 0) freqmsg += "; "; first = 0; freqmsg += queryresults.terms[i].termstr + ": " + queryresults.terms[i].termfreq; } if (!quotedstring.empty()) freqmsg += "
post-processed to find " + quotedstring + "\n"; disp.setmacro("freqmsg", "query", freqmsg); // set the result line macro text_t resline; if (numdocs >= queryparams.maxdocs) resline.setcstr("_textmt2_"); if (numdocs == 0) { resline.setcstr("_textndmtq_"); } else if (numdocs == 1) { resline += text_t(numdocs) + text_t(" _textdmtq2_."); } else { resline += text_t(numdocs) + text_t(" _textdmtq3_."); } disp.setmacro("resultline", "query", resline); // define_collection_macros (args, logout); if (queryresults.getnumdocs() > 0) { docLinks(args, queryresults, logout); } } // set the _links_ macro to create the links between pages of query results void libinterface::docLinks (cgiargsclass &args, queryresultsclass &queryresults, ostream &logout) { text_t links; int a, b, documents, nextfirst, nextlast, prevfirst, prevlast; int results_from = args.getintarg("r"); int hitsperpage = args.getintarg("o"); documents = queryresults.getnumdocs(); a = results_from; b = a + (hitsperpage - 1); // make sure a and b are in range if (a < 1) a = 1; if (b < 1) b = 1; if (a > documents) a = documents; if (b > documents) b = documents; links.setcstr("\n"); links += "\n"; links += "
\n"; links += "<_font_>\n"; // previous page link if (a > 1) { prevlast = a - 1; prevfirst = a - hitsperpage; links += "

_iconprev__textmatches_ "; links += prevfirst; links += " - "; links += prevlast; links += "\n"; } links += "\n"; links += "

\n"; links += "<_font_>\n"; // next page link if (b < documents) { nextfirst = b + 1; nextlast = b + hitsperpage; if (nextlast > documents) nextlast = documents; links += "

_textmatches_ "; links += nextfirst; links += " - "; links += nextlast ; links += "_iconnext_\n"; } links += "\n"; links += "

\n"; disp.setmacro("links", "query", links); } // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure int libinterface::do_query(cgiargsclass &args, queryparamclass &queryparams, queryresultsclass &queryresults, ostream &logout) { set_query_params(args, queryparams); if (!queryparams.querystring.empty()) { // do the query - the results are returned in queryresults if (!search.search(queryparams, queryresults)) { logout << "ERROR: database didn't load\n"; return LI_LOADDATABASEFAILED; } } return LI_NOERROR; } //////////////////////////////////////////////////////////////////////////////////////// // query_action is called whenever a search is to be carried out (i.e. when the // 'a' parameter == 'q') - query calls the mgsearch search() function (via do_query()) to // carry out the search then displays the first page of results. // // returns LI_NOERROR on success, LI_LOADDATABASEFAILED on failure int libinterface::query_action (cgiargsclass &args, ostream &textout, ostream &logout) { int err = LI_NOERROR; queryparamclass queryparams; queryresultsclass queryresults; err = do_query(args, queryparams, queryresults, logout); // prepare to print out the page prepare_page(args, logout); define_query_macros(args, queryparams, queryresults, logout); // print out the query page textout << text_t2ascii << disp << "_query:header_\n"; // output query results if there is a query string - // otherwise output help text if (!queryparams.querystring.empty()) { displayresults (args, textout, logout, queryresults); } else { textout << text_t2ascii << disp << "_query:noqueryheader_\n"; } textout << text_t2ascii << disp << "_query:footer_\n"; return err; } void libinterface::displayresults (cgiargsclass &args, ostream &textout, ostream &logout, queryresultsclass &queryresults) { textout << text_t2ascii << disp << "_query:queryheader_"; int startresults = args.getintarg("r") - 1; int numresults = args.getintarg("o"); textout << text_t2ascii << "\n"; for (int i=startresults; i < startresults+numresults; i++) { displaydocsummary (args, textout, logout, queryresults, i); } textout << text_t2ascii << "
\n\n"; textout << text_t2ascii << disp << "_query:queryfooter_"; } /////////////////////////////////////////////////////////////////////////////////////////////// // browse_action writes out the browse pages (i.e. the top level hierarchy pages) void libinterface::browse_action (cgiargsclass &args, ostream &textout, ostream &logout) { text_t browse_bar, locator, output; gdbm_info info; prepare_page(args, logout); // get browse bar unless page has been detached if (args.getintarg("x") == 0) { browse->get_browse_bar(args["d"], browse_bar); } // get top locator browse->get_top_locator(args, gdbm, 0, locator); // expand and output page // Note: we need to expand these out using package 'browse' // so we can't use the tricky '<<' syntax disp.expandstring("browse", "_header_", output); textout << text_t2ascii << output; disp.expandstring("browse", browse_bar, output); textout << text_t2ascii << output; disp.expandstring("browse", locator, output); textout << text_t2ascii << output; disp.expandstring("browse", "_footer_", output); textout << text_t2ascii << output; } ///////////////////////////////////////////////////////////////////////////////////////// // document_action is called to retrieve and display collection documents // It calls the mgsearch function docTargetDocument() to retrieve // a document. void libinterface::document_action (cgiargsclass &args, ostream &textout, ostream &logout) { text_t locator, content, links, output; gdbm_info info; queryparamclass queryparams; queryresultsclass queryresults; int oversize = 0; // have to redo the query to get queryterms for highlight text do_query(args, queryparams, queryresults, logout); prepare_page(args, logout); if (args["g"][1] == '0') { // get docnum from gdbm text_t docref; if (args["d"][0] != 'B') get_book(args["d"], docref); else docref = args["d"]; if (gdbm.getinfo(docref, queryparams.collection, info) != 0) { logout << text_t2ascii << "info_db wasn't opened - " << docref << "\n"; return; } // get document text if there is any if (info.c.empty()) { search.docTargetDocument(default_index, queryparams.collection, info.d, content); if (info.t != "(introductory text)") content = "

" + info.t + "

\n" + content; } } if (args["g"][1] == '1') { // want to get expanded out text vector contents_arr; text_t booksection; int levelcount; get_book (args["d"], booksection); levelcount = count_dots(booksection); browse->get_contents_arr(args, gdbm, contents_arr); // get text for each section of book vector::const_iterator thiscontent = contents_arr.begin(); vector::const_iterator end = contents_arr.end(); int first = 1; int count = 1; while (thiscontent != end) { text_t text; // get docnum from gdbm if (gdbm.getinfo(*thiscontent, queryparams.collection, info) != 0) { logout << text_t2ascii << "info_db wasn't opened - " << args["d"] << "\n"; return; } // if section has text get it, otherwise output section title if (info.c.empty()) { // output tags for all text sections currently displayed in toc (all text sections // if contents are expanded if (count_dots(*thiscontent) == levelcount || args["g"][0] == '1') { content += "\n"; count ++; } search.docTargetDocument(default_index, queryparams.collection, info.d, text); if (info.t != "(introductory text)") content += "

" + info.t + "

\n"; // content += text + "

\n"; content += text + "

\n"; // no longer want


between sections } else { content += "

" + info.t + "

\n"; } if (args["n"] == 1) { if (first) { browse->get_top_locator(args, gdbm, 0, locator); disp.expandstring("text", "_header_", output); textout << text_t2ascii << output; disp.expandstring("text", locator, output); textout << text_t2ascii << output; } disp.expandstring("text", content, output); if (!queryparams.querystring.empty()) highlighttext(queryresults.termvariants, textout, logout, output); else textout << text_t2ascii << output; first = 0; content.clear(); } thiscontent ++; if (content.size() > 200000 && args["n"] != 1) { content.clear(); oversize = 1; args["g"][1] = '0'; break; } } if (args["g"][1] == '0') { // get docnum from gdbm text_t docref; if (args["d"][0] != 'B') get_book(args["d"], docref); else docref = args["d"]; if (gdbm.getinfo(docref, queryparams.collection, info) != 0) { logout << text_t2ascii << "info_db wasn't opened - " << docref << "\n"; return; } // get document text if there is any if (info.c.empty()) { search.docTargetDocument(default_index, queryparams.collection, info.d, content); if (info.t != "(introductory text)") content = "

" + info.t + "

\n" + content; } } } if (args["n"] != 1) { // get top locator browse->get_top_locator(args, gdbm, oversize, locator); // expand and output page // Note: we need to expand these out using package 'text' // so we can't use the tricky '<<' syntax disp.expandstring("text", "_header_", output); textout << text_t2ascii << output; disp.expandstring("text", locator, output); textout << text_t2ascii << output; disp.expandstring("text", content, output); if (!queryparams.querystring.empty()) highlighttext(queryresults.termvariants, textout, logout, output); else textout << text_t2ascii << output; } // get links to next and previous sections unless in expand text mode if (args["g"][1] == '0') { browse->get_links(args, gdbm, links); disp.expandstring("text", links, output); textout << text_t2ascii << output; } disp.expandstring("text", "_footer_", output); textout << text_t2ascii << output; } ///////////////////////////////////////////////////////////////////////////////////////// // page is called when a standard html page is to be displayed void libinterface::page_action (cgiargsclass &args, ostream &textout, ostream &logout) { text_t &arg_p = args["p"]; prepare_page(args, logout); if (arg_p == "preferences") define_pref_macros(args, logout); textout << text_t2ascii << disp << ("_" + arg_p + ":header_\n") << ("_" + arg_p + ":imagestandardbar_\n") << ("_" + arg_p + ":content_\n") << ("_" + arg_p + ":footer_\n"); } // highlighttext highlights query terms in text string and outputs the resulting text string void libinterface::highlighttext(vector &termvars, ostream &textout, ostream &logout, text_t &text) { map terms; map::const_iterator it; for (unsigned int i = 0; i < termvars.size(); i++) { terms[termvars[i]] = 1; } text_t::iterator here = text.begin(); text_t::iterator end = text.end(); text_t word, buffer; while (here != end) { if (((*here >= 65) && (*here <= 90)) || ((*here >= 97) && (*here <= 122)) || ((*here >= '0') && (*here <= '9')) || ((*here >= 192) && (*here <= 214)) || ((*here >= 216) && (*here <= 246)) || ((*here >= 248) && (*here <= 255))) { // not word boundary word.push_back(*here); here++; } else { // found word boundary // add last word if there was one if (!word.empty()) { it = terms.find(word); if (it != terms.end()) { word = "" + word + ""; } buffer += word; word.clear(); } if (*here == '<') { // skip over rest of html tag while ((here != end) && (*here != '>')) { buffer.push_back(*here); here++; } } buffer.push_back(*here); here++; if (buffer.size() > 1024) { textout << text_t2ascii << buffer; buffer.clear(); } } } textout << text_t2ascii << buffer; } void libinterface::define_pref_macros (cgiargsclass &args, ostream &logout) { // the caseoption macro text_t caseoption; int arg_k = args.getintarg("k"); caseoption += "\n= 100 && arg_m < 200) maxdocoption += " selected"; maxdocoption += ">100\n"; maxdocoption += "