/********************************************************************** * * documentaction.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include #include "documentaction.h" #include "browsetools.h" #include "OIDtools.h" #include "querytools.h" #include "unitool.h" #include "gsdltools.h" documentaction::documentaction () { recpt = NULL; // this action uses cgi variables "a", "d", "cl", // "x", "gc", "gt", "gp", and "hl" cgiarginfo arg_ainfo; arg_ainfo.shortname = "a"; arg_ainfo.longname = "action"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "p"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "d"; arg_ainfo.longname = "document OID"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::none; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::can; argsinfo.addarginfo (NULL, arg_ainfo); // whether or not a document should be retrieved from the // library or the Web. arg_ainfo.shortname = "il"; arg_ainfo.longname = "internal link preference"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "l"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "cl"; arg_ainfo.longname = "classification OID"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::none; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::can; argsinfo.addarginfo (NULL, arg_ainfo); // in this action "gc" controls the expand/contract // contents function arg_ainfo.shortname = "gc"; arg_ainfo.longname = "expand contents"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::can; argsinfo.addarginfo (NULL, arg_ainfo); // in this action "gt" controls the expand/contract // text function 0 = not expanded, 1 = expand unless // there are more than 10 sections containing text, // 2 = expand all arg_ainfo.shortname = "gt"; arg_ainfo.longname = "expand text"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::can; argsinfo.addarginfo (NULL, arg_ainfo); // in this action "gp" is the "go to page" control // used by the Book type of toc arg_ainfo.shortname = "gp"; arg_ainfo.longname = "go to page"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::none; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); // in this action "hl" is the "highlighting on/ // highlighting off control arg_ainfo.shortname = "hl"; arg_ainfo.longname = "highlighting on/off"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "1"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "x" is 0 normally or 1 if page // has been "detached" arg_ainfo.shortname = "x"; arg_ainfo.longname = "detached page"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // f arg is set to 1 if document is to // be displayed in a frame arg_ainfo.shortname = "f"; arg_ainfo.longname = "frame"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); // fc arg is "1" if search bar is to be included (i.e. if "fc" == 1 // the httpdocument macro will include "&f=1" arg_ainfo.shortname = "fc"; arg_ainfo.longname = "include search bar"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "1"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); } documentaction::~documentaction () { } bool documentaction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args, ostream &logout) { // check gc argument int arg_gc = args.getintarg("gc"); if (arg_gc != 0 && arg_gc != 1) { logout << "Warning: \"gc\" argument out of range (" << arg_gc << ")\n"; cgiarginfo *gcinfo = argsinfo.getarginfo ("gc"); if (gcinfo != NULL) args["gc"] = gcinfo->argdefault; } // check gt argument (may be either 0, 1 or 2) int arg_gt = args.getintarg("gt"); if (arg_gt != 0 && arg_gt != 1 && arg_gt != 2) { logout << "Warning: \"gt\" argument out of range (" << arg_gt << ")\n"; cgiarginfo *gtinfo = argsinfo.getarginfo ("gt"); if (gtinfo != NULL) args["gt"] = gtinfo->argdefault; } // check hl argument int arg_hl = args.getintarg("hl"); if (arg_hl != 0 && arg_hl != 1) { logout << "Warning: \"hl\" argument out of range (" << arg_hl << ")\n"; cgiarginfo *hlinfo = argsinfo.getarginfo ("hl"); if (hlinfo != NULL) args["hl"] = hlinfo->argdefault; } // check x argument int arg_x = args.getintarg("x"); if (arg_x != 0 && arg_x != 1) { logout << "Warning: \"x\" argument out of range (" << arg_x << ")\n"; cgiarginfo *xinfo = argsinfo.getarginfo ("x"); if (xinfo != NULL) args["x"] = xinfo->argdefault; } return true; } void documentaction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos, response_t &response,text_t &response_data, ostream &logout) { if ((args["il"] == "w") && (!args["d"].empty())) { recptproto* collectproto = protos->getrecptproto (args["c"], logout); if (collectproto != NULL) { text_tset metadata; FilterResponse_t filt_response; text_t top; metadata.insert ("URL"); // get metadata for parent document get_top (args["d"], top); if (get_info (top, args["c"], metadata, false, collectproto, filt_response, logout)) { text_t url = filt_response.docInfo[0].metadata["URL"].values[0]; response = location; response_data = url; return; } else { // error, no URL logout << "Error: documentaction::get_cgihead_info failed on get_info" << endl; } } } response = content; response_data = "text/html"; } // set_widthtspace calculates how wide the spaces in the nav bar should // be and sets the appropriate macro void documentaction::set_spacemacro (displayclass &disp, FilterResponse_t &response) { text_t width; int twidth, swidth, iwidth = 0; int numc = response.docInfo.size(); ResultDocInfo_tarray::iterator dochere = response.docInfo.begin(); ResultDocInfo_tarray::iterator docend = response.docInfo.end(); disp.expandstring ("Global", "_pagewidth_", width); twidth = width.getint(); disp.expandstring ("query", "_searchwidth_", width); iwidth += width.getint(); while (dochere != docend) { const text_t &title = (*dochere).metadata["Title"].values[0]; disp.expandstring ("document", "_" + title + "width_", width); if (width == ("_" + title + "width_")) disp.expandstring ("document", "_defaultwidth_", width); iwidth += width.getint(); dochere ++; } if ((twidth - iwidth) < numc) swidth = 2; else { swidth = twidth - iwidth; if (numc > 0) swidth = swidth / numc; } disp.setmacro ("widthtspace", "Global", swidth); } // set_navbarmacros sets _navigationbar_ and _httpbrowseXXX_ macros // reponse contains 1 metadata field (Title) void documentaction::set_navbarmacros (displayclass &disp, FilterResponse_t &response, cgiargsclass &args) { text_t topparent; text_t &arg_d = args["d"]; text_t navigationbar = "\n"; get_top (args["cl"], topparent); int numc = response.docInfo.size(); ResultDocInfo_tarray::iterator dochere = response.docInfo.begin(); ResultDocInfo_tarray::iterator docend = response.docInfo.end(); navigationbar += "\n"; if (args["a"] == "q") { navigationbar += "_icontabsearchgreen_"; } else { navigationbar += "_imagesearch_"; } if (numc == 0) navigationbar += "_imagespacer_"; while (dochere != docend) { text_t title = (*dochere).metadata["Title"].values[0]; bool unknown = false; // test the _XXXwidth_ macro to see if image macros are // defined for this type of classification - if not we'll // just display the text text_t tmpwidth; disp.expandstring ("document", "_" + title + "width_", tmpwidth); if (tmpwidth == ("_" + title + "width_")) unknown = true; // if we're inside a document all the classification buttons should be enabled if (arg_d.empty() && ((*dochere).OID == topparent)) { if (unknown) navigationbar += "_imagespacer_ " + title + " "; else navigationbar += "_imagespacer__icontab" + title + "green_"; } else { // set the _httpbrowseXXX_ macro for this classification if (unknown) navigationbar += "_imagespacer_ " + title + " "; else { navigationbar += "_imagespacer__image" + title + "_"; disp.setmacro ("httpbrowse" + title, "Global", "_httpdocument_&cl=" + (*dochere).OID); } } dochere ++; } navigationbar += "\n\n"; navigationbar += "\n"; disp.setmacro ("navigationbar", "Global", navigationbar); } // define all the macros which might be used by other actions // to produce pages. void documentaction::define_external_macros (displayclass &disp, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { // define_external_macros sets the following macros: // _navigationbar_ this is the navigation bar containing the search button // and any classification buttons - it goes at the top of // most pages. for now we're assuming that there'll always // be a search button - we should probably check that there // is a query action before making this assumption // _httpbrowseXXX_ the http macros for each classification (i.e. if there // are Title and Creator classifications _httpbrowseTitle_ // and _httpbrowseCreator_ will be set // _widthtspace_ the width of the spacers between buttons in navigation // bar // _httpdocument_ has '&f=1' added if displaying document inside a frame // _gsdltop_ macro to replace _top targets with // _httppagehome_ overridden home url if html collections have own homepage // must have a valid collection server to continue text_t &collection = args["c"]; if (collection.empty()) return; recptproto *collectproto = protos->getrecptproto (collection, logout); if (collectproto == NULL) return; if (recpt == NULL) { logout << "ERROR (documentaction::define_external_macros): This action does not contain\n" << " information about any receptionists. The method set_receptionist was\n" << " probably not called from the module which instantiated this action.\n"; return; } outconvertclass text_t2ascii; comerror_t err; InfoFiltersResponse_t filterinfo; FilterResponse_t response; text_tset metadata; // get info on current collection and load up formatinfo // I'd prefer not to do this here as we're getting // collection info every time (and probably also getting // it in other places some of the time) - One day I'll // fix it ... maybe - Stefan. ColInfoResponse_t cinfo; collectproto->get_collectinfo (collection, cinfo, err, logout); load_formatinfo (cinfo.format, args.getintarg("gt")); // ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, collection, logout); // if (cinfo == NULL) { // logout << "ERROR (documentaction::define_external_macros): get_collectinfo_ptr returned NULL\n"; // return; // } //load_formatinfo (cinfo->format, args.getintarg("gt")); if (formatinfo.DocumentUseHTML) { // frame stuff if (args["fc"] == "1") { text_t httpdocument; disp.expandstring ("Global", "_httpdocument_", httpdocument); httpdocument += "&f=1"; disp.setmacro ("httpdocument", "Global", httpdocument); disp.setmacro ("gsdltop", "Global", "documenttop"); } text_tmap::iterator it = cinfo.format.find ("homepage"); if (it != cinfo.format.end()) { text_t httppagehome; if (get_link (args, protos, (*it).second, httppagehome, logout)) disp.setmacro ("httppagehome", "Global", httppagehome); } } // don't want navigation bar if page is 'detached' if (!args.getintarg("x")) { collectproto->get_filterinfo (collection, filterinfo, err, logout); if (err == noError) { // check that there's a browse filter if (filterinfo.filterNames.find ("BrowseFilter") != filterinfo.filterNames.end()) { metadata.insert ("Title"); bool getParents = false; get_children ("", collection, metadata, getParents, collectproto, response, logout); // calculate width of spacers and set _widthtspace_ macro if (args.getintarg("v") == 0) set_spacemacro (disp, response); // set _navigationbar_ macro set_navbarmacros (disp, response, args); } } else { logout << text_t2ascii << "Error (documentaction::define_external_macros()) in call to get_filterinfo() " << get_comerror_string (err); } } } bool documentaction::get_link (cgiargsclass &args, recptprotolistclass *protos, const text_t &inlink, text_t &outlink, ostream &logout) { FilterResponse_t response; text_tset metadata; metadata.insert ("section"); // check current collection first recptproto *collectproto = protos->getrecptproto (args["c"], logout); if (get_info (inlink, args["c"], metadata, false, collectproto, response, logout)) { if (!response.docInfo[0].metadata["section"].values[0].empty()) { outlink = "_httpdocument_&d=" + response.docInfo[0].metadata["section"].values[0]; return true; } } // check all the other enabled collections if (args["ccs"] == "1" && !args["cc"].empty()) { text_tarray collections; splitchar (args["cc"].begin(), args["cc"].end(), ',', collections); text_tarray::const_iterator col_here = collections.begin(); text_tarray::const_iterator col_end = collections.end(); while (col_here != col_end) { // don't need to check current collection again if (*col_here == args["c"]) {col_here ++; continue;} collectproto = protos->getrecptproto (*col_here, logout); if (get_info (inlink, *col_here, metadata, false, collectproto, response, logout)) { if (!response.docInfo[0].metadata["section"].values[0].empty()) { outlink = "_httpdocument_&c=" + *col_here + "&d=" + response.docInfo[0].metadata["section"].values[0]; return true; } } col_here ++; } } return false; } void documentaction::load_formatinfo (const text_tmap &colformat, int gt) { formatinfo.clear(); text_tmap::const_iterator format_here = colformat.begin(); text_tmap::const_iterator format_end = colformat.end(); while (format_here != format_end) { if (((*format_here).first == "DocumentImages") && ((*format_here).second == "true")) formatinfo.DocumentImages = true; else if (((*format_here).first == "DocumentTitles") && ((*format_here).second == "false")) formatinfo.DocumentTitles = false; else if ((*format_here).first == "DocumentHeading") formatinfo.DocumentHeading = (*format_here).second; else if (((*format_here).first == "DocumentContents") && ((*format_here).second == "false")) formatinfo.DocumentContents = false; else if (((*format_here).first == "DocumentArrowsBottom") && ((*format_here).second == "false")) formatinfo.DocumentArrowsBottom = false; else if ((*format_here).first == "DocumentButtons") splitchar ((*format_here).second.begin(), (*format_here).second.end(), '|', formatinfo.DocumentButtons); else if ((*format_here).first == "DocumentText") formatinfo.DocumentText = (*format_here).second; else if (((*format_here).first == "DocumentUseHTML") && ((*format_here).second == "true")) formatinfo.DocumentUseHTML = true; else formatinfo.formatstrings[(*format_here).first] = (*format_here).second; format_here ++; } // never want arrows when text is expanded if (gt) formatinfo.DocumentArrowsBottom = false; } // define all the macros which are related to pages generated // by this action. we also load up the formatinfo structure // here (it's used in do_action as well as here) void documentaction::define_internal_macros (displayclass &disp, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { // define_internal_macros sets the following macros: // _pagetitle_ the title to be displayed at the top of the browser window // _imagethispage_ the title image to be displayed at top right of page // _navarrows_ this may be overridden to "" when format option // DocumentArrowsBottom is false // _header_ the header macro is overridden if we're not at a top level // classification to remove the title block // _thisOID_ the OID (directory) of the current document - this corresponds // to the archivedir metadata element // must have a valid collection server to continue text_t &collection = args["c"]; if (collection.empty()) return; recptproto *collectproto = protos->getrecptproto (collection, logout); if (collectproto == NULL) return; text_tset metadata; FilterResponse_t response; text_t &arg_d = args["d"]; text_t &arg_cl = args["cl"]; if (!formatinfo.DocumentArrowsBottom) disp.setmacro("navarrows", "document", ""); metadata.insert ("Title"); bool fulltoc = false; if (args["cl"] != "search") { // see if there's a FullTOC string text_t cl_top, full_toc; get_top (arg_cl, cl_top); if (get_formatstring (cl_top, "FullTOC", formatinfo.formatstrings, full_toc)) if (full_toc == "true") fulltoc = true; } if (!arg_d.empty() && !fulltoc) { // we're at document level metadata.insert ("archivedir"); OptionValue_tarray options; // we need to know what the query was for the z3950proto if (collectproto->get_protocol_name()=="z3950proto") { OptionValue_t opt; opt.name="Query"; opt.value=args["q"]; options.push_back(opt); } // get metadata for this document and it's parents if (get_info (arg_d, collection, metadata, options, true, collectproto, response, logout)) { disp.setmacro ("header", "document", "_textheader_"); text_tarray pagetitlearray; if (!response.docInfo[0].metadata["Title"].values[0].empty()) pagetitlearray.push_back (response.docInfo[0].metadata["Title"].values[0]); if (args["gt"] != "1") { MetadataInfo_t *parenttitle = response.docInfo[0].metadata["Title"].parent; while (parenttitle != NULL) { if (!parenttitle->values[0].empty()) pagetitlearray.push_back (parenttitle->values[0]); parenttitle = parenttitle->parent; } } reverse (pagetitlearray.begin(), pagetitlearray.end()); text_t pagetitle; joinchar (pagetitlearray, ": ", pagetitle); disp.setmacro ("pagetitle", "document", pagetitle); if (is_top (arg_d)) disp.setmacro ("thisOID", "Global", dm_safe(response.docInfo[0].metadata["archivedir"].values[0])); else { MetadataInfo_t *parentad = response.docInfo[0].metadata["archivedir"].parent; text_t thisOID; while (parentad != NULL) { thisOID = parentad->values[0]; parentad = parentad->parent; } disp.setmacro ("thisOID", "Global", dm_safe(thisOID)); } } } else { if (!arg_cl.empty()) { // get metadata for top level classification text_t classtop; get_top (arg_cl, classtop); metadata.insert ("childtype"); if (get_info (classtop, collection, metadata, false, collectproto, response, logout)) { text_t &title = response.docInfo[0].metadata["Title"].values[0]; bool unknown = false; // test the _XXXwidth_ macro to see if image macros are // defined for this type of classification - if not we'll // just display the text text_t tmp; disp.expandstring ("document", "_" + title + "width_", tmp); if (tmp == ("_" + title + "width_")) unknown = true; if (unknown) { disp.setmacro ("pagetitle", "document", title); disp.setmacro ("imagethispage", "document", "

" + title + "

"); } else { disp.setmacro ("pagetitle", "document", "_text" + title + "page_"); disp.setmacro ("imagethispage", "document", "_icon" + title + "page_"); } // Add a macro to display the phind classifier (if appropriate) text_t &childtype = response.docInfo[0].metadata["childtype"].values[0]; if (childtype == "Phind") { disp.setmacro ("phindclassifier", "document", "_phindapplet_"); } } } } } bool documentaction::do_action (cgiargsclass &args, recptprotolistclass *protos, browsermapclass *browsers, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { // must have a valid collection server recptproto *collectproto = protos->getrecptproto (args["c"], logout); if (collectproto == NULL) { logout << "documentaction::do_action called with NULL collectproto\n"; textout << outconvert << disp << "_document:header_\n" << "Error: Attempt to get document without setting collection\n" << "_document:footer_\n"; } else { text_t OID = args["d"]; if (OID.empty()) OID = args["cl"]; if (OID.empty()) { textout << outconvert << disp << "Document contains no data_document:footer_\n"; return true; } if (formatinfo.DocumentUseHTML) { if (!args["d"].empty()) { if (args["f"] == "1") { textout << outconvert << disp << "\n" << "\n" << "\n" << "" << "\n" << "<p>You must have a frame enabled browser to view this.</p>\n" << "\n" << "\n" << "\n"; } else { output_document (OID, args, collectproto, disp, outconvert, textout, logout); } return true; } } textout << outconvert << disp << "_document:header_\n" << "_document:content_\n"; // output the table of contents output_toc (args, browsers, formatinfo, collectproto, disp, outconvert, textout, logout); // output the document text textout << "

\n"; output_document (OID, args, collectproto, disp, outconvert, textout, logout); textout << outconvert << disp << "_document:footer_\n"; } return true; } void documentaction::output_text (ResultDocInfo_t &docinfo, format_t *formatlistptr, const TermInfo_tarray &terminfo, const text_t &OID, bool highlight, int hastxt, int wanttext, text_t &collection, recptproto *collectproto, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { DocumentRequest_t docrequest; DocumentResponse_t docresponse; comerror_t err; if (hastxt == 1) { if (wanttext) { // get the text docrequest.OID = OID; collectproto->get_document (collection, docrequest, docresponse, err, logout); // cut down on overhead by not using formattools if we only want the text // (wanttext will equal 2 if we want text and other stuff too) if (wanttext == 1) if (highlight) highlighttext(docresponse.doc, terminfo, disp, outconvert, textout, logout); else textout << outconvert << disp << docresponse.doc; } if (wanttext != 1) { text_t doctext = get_formatted_string (collection, collectproto, docinfo, disp, formatlistptr, docresponse.doc, logout); if (highlight) highlighttext(doctext, terminfo, disp, outconvert, textout, logout); else textout << outconvert << disp << doctext; } } } void documentaction::output_document (const text_t &OID, cgiargsclass &args, recptproto *collectproto, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { FilterResponse_t inforesponse; FilterResponse_t queryresponse; text_tset metadata; bool getParents = false; bool highlight = false; int wanttext = 0; int arg_gt = args.getintarg("gt"); text_t &collection = args["c"]; // if we have a query string and highlighting is turned on we need // to redo the query to get the terms for highlighting if (!args["q"].empty() && args.getintarg("hl")) { FilterRequest_t request; comerror_t err; request.filterResultOptions = FRmatchTerms; text_t formattedstring = args["q"]; format_querystring (formattedstring, args.getintarg("b")); set_queryfilter_options (request, formattedstring, args); collectproto->filter (args["c"], request, queryresponse, err, logout); if (err != noError) { outconvertclass text_t2ascii; logout << text_t2ascii << "documentaction::output_document: call to QueryFilter failed " << "for " << args["c"] << " collection (" << get_comerror_string (err) << ")\n"; highlight = false; } else { highlight = true; } } format_t *formatlistptr = new format_t(); parse_formatstring (formatinfo.DocumentText, formatlistptr, metadata, getParents); metadata.insert ("hastxt"); metadata.insert ("haschildren"); if (formatinfo.DocumentText == "[Text]") wanttext = 1; else { char *docformat = formatinfo.DocumentText.getcstr(); if (strstr (docformat, "[Text]") != NULL) wanttext = 2; delete docformat; } if (get_info (OID, collection, metadata, getParents, collectproto, inforesponse, logout)) { int hastxt = inforesponse.docInfo[0].metadata["hastxt"].values[0].getint(); int haschildren = inforesponse.docInfo[0].metadata["haschildren"].values[0].getint(); if (arg_gt == 0) { output_text (inforesponse.docInfo[0], formatlistptr, queryresponse.termInfo, OID, highlight, hastxt, wanttext, collection, collectproto, disp, outconvert, textout, logout); } else { ResultDocInfo_t thisdocinfo = inforesponse.docInfo[0]; // text is to be expanded text_t exOID = OID; if (haschildren != 1) exOID = get_parent (OID); if (exOID.empty()) exOID = OID; // if we're not in a document (i.e. we're in a top level classification) // we need to pass "is_classify = true" to get_contents so that it // doesn't recurse all the way through each document in the classification bool is_classify = false; if (args["d"].empty()) is_classify = true; get_contents (exOID, is_classify, metadata, collection, collectproto, inforesponse, logout); ResultDocInfo_tarray::iterator sechere = inforesponse.docInfo.begin(); ResultDocInfo_tarray::iterator secend = inforesponse.docInfo.end(); if (arg_gt == 1) { // check if there are more than 10 sections containing text to be expanded - // if there are output warning message - this isn't a great way to do this // since the sections may be very large or very small - one day I'll fix it // -- Stefan. int seccount = 0; while (sechere != secend) { int shastxt = (*sechere).metadata["hastxt"].values[0].getint(); if (shastxt == 1) seccount ++; if (seccount > 10) break; sechere ++; } if (seccount > 10) { // more than 10 sections so output warning message and text // for current section only textout << outconvert << disp << "_document:textltwarning_"; output_text (thisdocinfo, formatlistptr, queryresponse.termInfo, OID, highlight, hastxt, wanttext, collection, collectproto, disp, outconvert, textout, logout); } else arg_gt = 2; } if (arg_gt == 2) { // get the text for each section sechere = inforesponse.docInfo.begin(); int count = 0; while (sechere != secend) { textout << outconvert << disp << "\n

\n"; int shastxt = (*sechere).metadata["hastxt"].values[0].getint(); output_text (*sechere, formatlistptr, queryresponse.termInfo, (*sechere).OID, highlight, shastxt, wanttext, collection, collectproto, disp, outconvert, textout, logout); count ++; sechere ++; } } } } delete formatlistptr; } // highlighttext highlights query terms in text string and outputs the resulting text string void documentaction::highlighttext(text_t &text, const TermInfo_tarray &terms, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &/*logout*/) { text_tmap allterms; text_tmap::const_iterator it; // first load all the term variations into a map TermInfo_tarray::const_iterator this_term = terms.begin(); TermInfo_tarray::const_iterator last_term = terms.end(); while (this_term != last_term) { text_tarray::const_iterator this_var = (*this_term).matchTerms.begin(); text_tarray::const_iterator last_var = (*this_term).matchTerms.end(); while (this_var != last_var) { allterms[*this_var] = 1; this_var ++; } this_term ++; } // get the text to start and end a hightlight text_t starthighlight = ""; text_t endhighlight = ""; if (disp.isdefaultmacro("Global", "starthighlight")) disp.expandstring("Global", "_starthighlight_", starthighlight); if (disp.isdefaultmacro("Global", "endhighlight")) disp.expandstring("Global", "_endhighlight_", endhighlight); text_t::iterator here = text.begin(); text_t::iterator end = text.end(); text_t word, buffer; while (here != end) { if (is_unicode_letdig(*here)) { // not word boundary word.push_back(*here); here++; } else { // found word boundary // add last word if there was one if (!word.empty()) { it = allterms.find(word); if (it != allterms.end()) { word = starthighlight + word + endhighlight; } buffer += word; word.clear(); } if (*here == '<') { // skip over rest of html tag while ((here != end) && (*here != '>')) { buffer.push_back(*here); here++; } } buffer.push_back(*here); here++; if (buffer.size() > 1024) { textout << outconvert << disp << buffer; buffer.clear(); } } } textout << outconvert << disp << buffer; }