/********************************************************************** * * formattools.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "formattools.h" #include "cgiutils.h" #include "gsdltools.h" #include "recptprototools.h" #include "OIDtools.h" #include "summarise.h" #include static bool metadata_spanwrap = false; // a few function prototypes static text_t format_string (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *formatlistptr, text_tmap &options, ostream& logout); static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end, format_t *formatlistptr, text_tset &metadata, bool &getParents); static text_t format_summary (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, text_tmap &options, ostream& logout); static text_t format_text (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, text_tmap &options, ostream& logout); static text_t expand_metadata(const text_t &metavalue, const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, text_tmap &options, ostream &logout); void metadata_t::clear() { metaname.clear(); metacommand = mNone; mqualifier.parent = pNone; mqualifier.sibling = sNone; mqualifier.child = cNone; pre_tree_traverse.clear(); parentoptions.clear(); siblingoptions.clear(); childoptions.clear(); } void decision_t::clear() { command = dMeta; meta.clear(); text.clear(); } format_t::~format_t() { if (nextptr != NULL) delete nextptr; if (ifptr != NULL) delete ifptr; if (elseptr != NULL) delete elseptr; if (orptr != NULL) delete orptr; } void format_t::clear() { command = comText; decision.clear(); text.clear(); meta.clear(); nextptr = NULL; ifptr = NULL; elseptr = NULL; orptr = NULL; } void formatinfo_t::clear() { DocumentImages = false; DocumentTitles = true; DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}
"; DocumentContents = true; DocumentArrowsBottom = true; DocumentArrowsTop = false; DocumentSearchResultLinks = false; DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end()); // DocumentButtons.push_back ("Expand Text"); // DocumentButtons.push_back ("Expand Contents"); DocumentButtons.push_back ("Detach"); DocumentButtons.push_back ("Highlight"); RelatedDocuments = ""; DocumentText = "[Text]"; formatstrings.erase (formatstrings.begin(), formatstrings.end()); DocumentUseHTML = false; AllowExtendedOptions = false; } // simply checks to see if formatstring begins with a tag bool is_table_content (const text_t &formatstring) { text_t::const_iterator here = formatstring.begin(); text_t::const_iterator end = formatstring.end(); while (here != end) { if (*here != ' ') { if ((*here == '<') && ((here+3) < end)) { if ((*(here+1) == 't' || *(here+1) == 'T') && (*(here+2) == 'd' || *(here+2) == 'D') && (*(here+3) == '>' || *(here+3) == ' ')) return true; } else return false; } ++here; } return false; } bool is_table_content (const format_t *formatlistptr) { if (formatlistptr == NULL) return false; if (formatlistptr->command == comText) return is_table_content (formatlistptr->text); return false; } // returns false if key isn't in formatstringmap bool get_formatstring (const text_t &key, const text_tmap &formatstringmap, text_t &formatstring) { formatstring.clear(); text_tmap::const_iterator it = formatstringmap.find(key); if (it == formatstringmap.end()) return false; formatstring = (*it).second; return true; } // tries to find "key1key2" then "key1" then "key2" bool get_formatstring (const text_t &key1, const text_t &key2, const text_tmap &formatstringmap, text_t &formatstring) { formatstring.clear(); text_tmap::const_iterator it = formatstringmap.find(key1 + key2); if (it != formatstringmap.end()) { formatstring = (*it).second; return true; } it = formatstringmap.find(key1); if (it != formatstringmap.end()) { formatstring = (*it).second; return true; } it = formatstringmap.find(key2); if (it != formatstringmap.end()) { formatstring = (*it).second; return true; } return false; } text_t remove_namespace(const text_t &meta_name) { text_t::const_iterator end = meta_name.end(); text_t::const_iterator it = findchar(meta_name.begin(), end, '.'); if (it != end) { return substr(it+1, end); } return meta_name; } // returns a date of form _format:date_(year, month, day) // input is date of type yyyy-?mm-?dd // at least the year must be present in date text_t format_date (const text_t &date) { if (date.size() < 4) return ""; text_t::const_iterator datebegin = date.begin(); text_t year = substr (datebegin, datebegin+4); int chars_seen_so_far = 4; if (chars_seen_so_far == date.size()) return "_format:date_("+year+")"; if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ; if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")"; text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2); int imonth = month.getint(); if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")"; chars_seen_so_far += 2; if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")"; if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ; if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")"; text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2); if (day[0] == '0') day = substr (day.begin()+1, day.end()); int iday = day.getint(); if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")"; return "_format:date_("+year+","+month+","+day+")"; } // converts an iso639 language code to its English equivalent // should we be checking that the macro exists?? text_t iso639 (const text_t &langcode) { if (langcode.empty()) return ""; return "_iso639:iso639"+langcode+"_"; } text_t get_href (const text_t &link) { text_t href; text_t::const_iterator here = findchar(link.begin(), link.end(), '"'); text_t::const_iterator end = link.end(); if (here == end) return g_EmptyText; ++here; while (here != end) { if (*here == '"') break; href.push_back(*here); ++here; } return href; } //this function gets the information associated with the relation //metadata for the document associated with 'docinfo'. This relation //metadata consists of a line of pairs containing 'collection, document OID' //(this is the OID of the document related to the current document, and //the collection the related document belongs to). For each of these pairs //the title metadata is obtained and then an html link between the title //of the related doc and the document's position (the document will be //found in " //(where collection is the related documents collection, and OID is the //related documents OID). A list of these html links are made for as many //related documents as there are. This list is then returned. If there are //no related documents available for the current document then the string //'.. no related documents .. ' is returned. text_t get_related_docs(const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, ostream& logout){ text_tset metadata; //insert the metadata we wish to collect metadata.insert("dc.Relation"); metadata.insert("Title"); metadata.insert("Subject"); //for emails, where title data doesn't apply FilterResponse_t response; text_t relation = ""; //string for displaying relation metadata text_t relationTitle = ""; //the related documents Title (or subject) text_t relationOID = ""; //the related documents OID //get the information associated with the metadata for current doc if (get_info (docinfo.OID, collection, "", metadata, false, collectproto, response, logout)) { //if the relation metadata exists, store for displaying if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){ relationOID += response.docInfo[0].metadata["dc.Relation"].values[0]; //split relation data into pairs of collectionname,ID number text_tarray relationpairs; splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs); text_tarray::const_iterator currDoc = relationpairs.begin(); text_tarray::const_iterator lastDoc = relationpairs.end(); //iterate through the pairs to split and display while(currDoc != lastDoc){ //split pairs into collectionname and ID text_tarray relationdata; splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata); //get first element in the array (collection) text_tarray::const_iterator doc_data = relationdata.begin(); text_t document_collection = *doc_data; ++doc_data; //increment to get next item in array (oid) text_t document_OID = *doc_data; //create html link to related document relation += "" + relationTitle + ""; relation += " (" + document_collection + ")
"; ++currDoc; } } } if(relation.empty()) //no relation data for documnet relation = ".. no related documents .. "; return relation; } static void get_parent_options (text_t &instring, metadata_t &metaoption) { assert (instring.size() > 7); if (instring.size() <= 7) return; text_t meta, com, op; bool inbraces = false; bool inquotes = false; bool foundcolon = false; text_t::const_iterator here = instring.begin()+6; text_t::const_iterator end = instring.end(); while (here != end) { if (foundcolon) meta.push_back (*here); else if (*here == '(') inbraces = true; else if (*here == ')') inbraces = false; else if (*here == '\'' && !inquotes) inquotes = true; else if (*here == '\'' && inquotes) inquotes = false; else if (*here == ':' && !inbraces) foundcolon = true; else if (inquotes) op.push_back (*here); else com.push_back (*here); ++here; } instring = meta; if (com.empty()) metaoption.mqualifier.parent = pImmediate; else if (com == "Top") metaoption.mqualifier.parent = pTop; else if (com == "All") { metaoption.mqualifier.parent = pAll; metaoption.parentoptions = op; } } static void get_sibling_options (text_t &instring, metadata_t &metaoption) { assert (instring.size() > 8); if (instring.size() <= 8) return; text_t meta, com, op; bool inbraces = false; bool inquotes = false; bool foundcolon = false; text_t::const_iterator here = instring.begin()+7; text_t::const_iterator end = instring.end(); while (here != end) { if (foundcolon) meta.push_back (*here); else if (*here == '(') inbraces = true; else if (*here == ')') inbraces = false; else if (*here == '\'' && !inquotes) inquotes = true; else if (*here == '\'' && inquotes) inquotes = false; else if (*here == ':' && !inbraces) foundcolon = true; else if (inquotes) op.push_back (*here); else com.push_back (*here); ++here; } instring = meta; metaoption.siblingoptions.clear(); if (com.empty()) { metaoption.mqualifier.sibling = sAll; metaoption.siblingoptions = " "; } else if (com == "first") { metaoption.mqualifier.sibling = sNum; metaoption.siblingoptions = "0"; } else if (com == "last") { metaoption.mqualifier.sibling = sNum; metaoption.siblingoptions = "-2"; // == last } else if (com.getint()>0) { metaoption.mqualifier.sibling = sNum; int pos = com.getint()-1; metaoption.siblingoptions +=pos; } else { metaoption.mqualifier.sibling = sAll; metaoption.siblingoptions = op; } } static void get_child_options (text_t &instring, metadata_t &metaoption) { assert (instring.size() > 6); if (instring.size() <= 6) return; text_t meta, com, op; bool inbraces = false; bool inquotes = false; bool foundcolon = false; text_t::const_iterator here = instring.begin()+5; text_t::const_iterator end = instring.end(); while (here != end) { if (foundcolon) meta.push_back (*here); else if (*here == '(') inbraces = true; else if (*here == ')') inbraces = false; else if (*here == '\'' && !inquotes) inquotes = true; else if (*here == '\'' && inquotes) inquotes = false; else if (*here == ':' && !inbraces) foundcolon = true; else if (inquotes) op.push_back (*here); else com.push_back (*here); ++here; } instring = meta; if (com.empty()) { metaoption.mqualifier.child = cAll; metaoption.childoptions = " "; } else if (com == "first") { metaoption.mqualifier.child = cNum; metaoption.childoptions = ".fc"; } else if (com == "last") { metaoption.mqualifier.child = cNum; metaoption.childoptions = ".lc"; } else if (com.getint()>0) { metaoption.mqualifier.child = cNum; metaoption.childoptions = "."+com; } else { metaoption.mqualifier.child = cAll; metaoption.childoptions = op; } } static void get_truncate_options (text_t &instring, metadata_t &metaoption) { assert (instring.size() > ((text_t) "truncate").size()); if (instring.size() <= ((text_t) "truncate").size()) return; text_t meta, com; bool inbraces = false; bool foundcolon = false; text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size(); text_t::const_iterator end = instring.end(); while (here != end) { if (foundcolon) meta.push_back (*here); else if (*here == '(') inbraces = true; else if (*here == ')') inbraces = false; else if (*here == ':' && !inbraces) foundcolon = true; else com.push_back (*here); ++here; } instring = meta; if (!com.empty()) { metaoption.siblingoptions = com; } else { // Default is 100 characters if not specified metaoption.siblingoptions = "100"; } } static void parse_meta (text_t &meta, metadata_t &metaoption, text_tset &metadata, bool &getParents) { // Look for the various format statement modifiers // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order // is irrelevant because this is not stored in metaoption.metacommand anyway bool keep_trying = true; while (keep_trying) { keep_trying = false; if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:")) { metaoption.metacommand |= mCgiSafe; meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end()); keep_trying = true; } if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:")) { metaoption.metacommand |= mSpecial; meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end()); keep_trying = true; } // New "truncate" special formatting option if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X) { metaoption.metacommand |= mTruncate; get_truncate_options (meta, metaoption); keep_trying = true; } // New "htmlsafe" special formatting option if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:")) { metaoption.metacommand |= mHTMLSafe; meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end()); keep_trying = true; } // New "xmlsafe" special formatting option if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:")) { metaoption.metacommand |= mXMLSafe; meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end()); keep_trying = true; } // New "dmsafe" special formatting option if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:")) { metaoption.metacommand |= mDMSafe; meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end()); keep_trying = true; } } bool had_parent_or_child = true; bool prev_was_parent = false; bool prev_was_child = false; while (had_parent_or_child) { if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) { // clear out sibling and child (cmd and options) metaoption.metacommand &= ~(mChild|mSibling); metaoption.childoptions.clear(); metaoption.siblingoptions.clear(); getParents = true; metaoption.metacommand |= mParent; get_parent_options (meta, metaoption); if (prev_was_parent) { metaoption.pre_tree_traverse += ".pr"; } else if (prev_was_child) { metaoption.pre_tree_traverse += ".fc"; } prev_was_parent = true; prev_was_child = false; } else if (meta.size() > 6 && (substr (meta.begin(), meta.begin()+5) == "child")) { // clear out sibling and parent (cmd and options) metaoption.metacommand &= ~(mParent|mSibling); metaoption.parentoptions.clear(); metaoption.siblingoptions.clear(); metaoption.metacommand |= mChild; get_child_options (meta, metaoption); metadata.insert("contains"); if (prev_was_parent) { metaoption.pre_tree_traverse += ".pr"; } else if (prev_was_child) { metaoption.pre_tree_traverse += ".fc"; } prev_was_child = true; prev_was_parent = false; } else { prev_was_child = false; prev_was_parent = false; had_parent_or_child = false; } } // parent/child can have sibling tacked on end also if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) { metaoption.metacommand |= mSibling; get_sibling_options (meta, metaoption); } // check for ex. which may occur in format statements if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) { meta = substr (meta.begin()+3, meta.end()); } metadata.insert (meta); metaoption.metaname = meta; } static void parse_coll_meta(text_t &meta, metadata_t &metaoption) { if (meta == "collection") { // no qualifiers metaoption.metaname = g_EmptyText; return; } meta = substr (meta.begin()+11, meta.end()); metaoption.metaname = meta; } static void parse_meta (text_t &meta, format_t *formatlistptr, text_tset &metadata, bool &getParents) { // check for ex. which may occur in format statements if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) { meta = substr (meta.begin()+3, meta.end()); } if (meta == "link") formatlistptr->command = comLink; else if (meta == "/link") formatlistptr->command = comEndLink; else if (meta == "srclink") { formatlistptr->command = comAssocLink; formatlistptr->meta.metaname = "srclink_file"; metadata.insert("srclink_file"); } else if (meta == "srchref") { formatlistptr->command = comAssocLink; formatlistptr->text = "href"; formatlistptr->meta.metaname = "srclink_file"; metadata.insert("srclink_file"); } else if (meta == "/srclink") { formatlistptr->command = comEndAssocLink; formatlistptr->meta.metaname = "srclink_file"; } // and weblink etc else if (meta == "href") formatlistptr->command = comHref; else if (meta == "num") formatlistptr->command = comNum; else if (meta == "icon") formatlistptr->command = comIcon; else if (meta == "Text") formatlistptr->command = comDoc; else if (meta == "RelatedDocuments") formatlistptr->command = comRel; else if (meta == "highlight") formatlistptr->command = comHighlight; else if (meta == "/highlight") formatlistptr->command = comEndHighlight; else if (meta == "metadata-spanwrap") formatlistptr->command = comMetadataSpanWrap; else if (meta == "/metadata-spanwrap") formatlistptr->command = comEndMetadataSpanWrap; else if (meta == "Summary") formatlistptr->command = comSummary; else if (meta == "DocImage") formatlistptr->command = comImage; else if (meta == "DocTOC") formatlistptr->command = comTOC; else if (meta == "DocumentButtonDetach") formatlistptr->command = comDocumentButtonDetach; else if (meta == "DocumentButtonHighlight") formatlistptr->command = comDocumentButtonHighlight; else if (meta == "DocumentButtonExpandContents") formatlistptr->command = comDocumentButtonExpandContents; else if (meta == "DocumentButtonExpandText") formatlistptr->command = comDocumentButtonExpandText; else if (meta == "DocOID") formatlistptr->command = comOID; else if (meta == "DocTopOID") formatlistptr->command = comTopOID; else if (meta == "DocRank") formatlistptr->command = comRank; else if (meta == "DocTermsFreqTotal") formatlistptr->command = comDocTermsFreqTotal; else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) { formatlistptr->command = comCollection; parse_coll_meta(meta, formatlistptr->meta); } else { formatlistptr->command = comMeta; parse_meta (meta, formatlistptr->meta, metadata, getParents); } } static bool parse_string (const text_t &formatstring, format_t *formatlistptr, text_tset &metadata, bool &getParents) { text_t text; text_t::const_iterator here = formatstring.begin(); text_t::const_iterator end = formatstring.end(); while (here != end) { if (*here == '\\') { ++here; if (here != end) text.push_back (*here); } else if (*here == '{') { if (!text.empty()) { formatlistptr->command = comText; formatlistptr->text = text; formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; text.clear(); } if (parse_action (++here, end, formatlistptr, metadata, getParents)) { formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; if (here == end) break; } } else if (*here == '[') { if (!text.empty()) { formatlistptr->command = comText; formatlistptr->text = text; formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; text.clear(); } text_t meta; ++here; while (*here != ']') { if (here == end) return false; meta.push_back (*here); ++here; } parse_meta (meta, formatlistptr, metadata, getParents); formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; } else text.push_back (*here); if (here != end) ++here; } if (!text.empty()) { formatlistptr->command = comText; formatlistptr->text = text; formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; } return true; } static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end, format_t *formatlistptr, text_tset &metadata, bool &getParents) { text_t::const_iterator it = findchar (here, end, '}'); if (it == end) return false; text_t com = substr (here, it); here = findchar (it, end, '{'); if (here == end) return false; else ++here; if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf; else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr; else return false; int commacount = 0; text_t text; while (here != end) { if (*here == '\\') { ++here; if (here != end) text.push_back(*here); } else if (*here == ',' || *here == '}' || *here == '{') { if (formatlistptr->command == comOr) { // the {Or}{this, or this, or this, or this} statement format_t *or_ptr; // find the next unused orptr if (formatlistptr->orptr == NULL) { formatlistptr->orptr = new format_t(); or_ptr = formatlistptr->orptr; } else { or_ptr = formatlistptr->orptr; while (or_ptr->nextptr != NULL) or_ptr = or_ptr->nextptr; or_ptr->nextptr = new format_t(); or_ptr = or_ptr->nextptr; } if (!text.empty()) { if (!parse_string(text, or_ptr, metadata, getParents)) { return false; } } if (*here == '{') { // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}} // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}} // The latter can always be re-written: // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}} if (!text.empty()) // already used up allocated format_t { // => allocate new one for detected action or_ptr->nextptr = new format_t(); or_ptr = or_ptr->nextptr; } if (!parse_action(++here, end, or_ptr, metadata, getParents)) { return false; } } else { if (*here == '}') break; } text.clear(); } // Parse an {If}{decide,do,else} statement else { // Read the decision component. if (commacount == 0) { // Decsion can be a metadata element, or a piece of text. // Originally Stefan's code, updated 25/10/2000 by Gordon. text_t::const_iterator beginbracket = text.begin(); text_t::const_iterator endbracket = (text.end() - 1); // Decision is based on a metadata element if ((*beginbracket == '[') && (*endbracket == ']')) { // Ignore the surrounding square brackets text_t meta = substr (beginbracket+1, endbracket); parse_meta (meta, formatlistptr->decision.meta, metadata, getParents); ++commacount; text.clear(); } // Decision is a piece of text (probably a macro like _cgiargmode_). else { // hunt for any metadata in string, which might be uses in // to test a condition, e.g. [Format] eq 'PDF' format_t* dummyformat = new format_t(); // update which metadata fields needed // (not interested in updatng formatlistptr) parse_string (text, dummyformat, metadata, getParents); delete dummyformat; formatlistptr->decision.command = dText; formatlistptr->decision.text = text; ++commacount; text.clear(); } } // Read the "then" and "else" components of the {If} statement. else { format_t** nextlistptr = NULL; if (commacount == 1) { nextlistptr = &formatlistptr->ifptr; } else if (commacount == 2 ) { nextlistptr = &formatlistptr->elseptr; } else { return false; } if (!text.empty()) { if (*nextlistptr == NULL) { *nextlistptr = new format_t(); } else { // skip to the end of any format_t statements already added while ((*nextlistptr)->nextptr != NULL) { nextlistptr = &(*nextlistptr)->nextptr; } (*nextlistptr)->nextptr = new format_t(); nextlistptr = &(*nextlistptr)->nextptr; } if (!parse_string (text, *nextlistptr, metadata, getParents)) { return false; } text.clear(); } if (*here == '{') { if (*nextlistptr == NULL) { *nextlistptr = new format_t(); } else { // skip to the end of any format_t statements already added while ((*nextlistptr)->nextptr != NULL) { nextlistptr = &(*nextlistptr)->nextptr; } (*nextlistptr)->nextptr = new format_t(); nextlistptr = &(*nextlistptr)->nextptr; } if (!parse_action(++here, end, *nextlistptr, metadata, getParents)) { return false; } } else { if (*here == '}') break; ++commacount; } } } } else text.push_back(*here); if (here != end) ++here; } return true; } static text_t spanwrap_metatext(const text_t& metatext, const text_t& OID, const text_t metaname, int metapos=-1) { text_t tag_type = (metaname == "Text") ? "div" : "span"; text_t editable_type = (metaname == "Text") ? "text" : "metadata"; text_t wrapped_metatext = "<" + tag_type + " "; wrapped_metatext += "class=\"editable-" + editable_type + "\" "; wrapped_metatext += "docoid=\"" + OID + "\" "; wrapped_metatext += "metaname=\"" + metaname + "\""; if (metapos>=0) { text_t metapos_str = metapos; wrapped_metatext += " metapos=\"" + metapos_str + "\""; } wrapped_metatext += ">" + metatext + ""; return wrapped_metatext; } bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr, text_tset &metadata, bool &getParents) { formatlistptr->clear(); getParents = false; return (parse_string (formatstring, formatlistptr, metadata, getParents)); } // position -1 for all, -2 for the last, 0 for the first, or x for a particular piece // metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false) { text_t no_ns_metaname = remove_namespace(meta.metaname); text_t formatted_metatext; bool first = true; const int start_i=0; const int end_i = metainfo.values.size()-1; if (position == -1) { // all for (int i=start_i; i<=end_i; ++i) { if (!first) formatted_metatext += meta.siblingoptions; text_t fresh_metatext; if (meta.metacommand & mSpecial) { // special formatting if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]); else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]); else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")"; } else fresh_metatext = metainfo.values[i]; // New "truncate" special formatting option if (meta.metacommand & mTruncate) { int truncate_length = meta.siblingoptions.getint(); text_t truncated_value = fresh_metatext; if (truncated_value.size() > truncate_length) { truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_"; } fresh_metatext = truncated_value; } // New "xmlsafe" special formatting option if (meta.metacommand & mXMLSafe) { // Make it XML-safe text_t text_xml_safe = ""; text_t::const_iterator text_iterator = fresh_metatext.begin(); while (text_iterator != fresh_metatext.end()) { if (*text_iterator == '&') text_xml_safe += "&"; else if (*text_iterator == '<') text_xml_safe += "<"; else if (*text_iterator == '>') text_xml_safe += ">"; else text_xml_safe.push_back(*text_iterator); text_iterator++; } fresh_metatext = text_xml_safe; } // New "htmlsafe" special formatting option if (meta.metacommand & mHTMLSafe) { // Make it HTML-safe text_t text_html_safe = ""; text_t::const_iterator text_iterator = fresh_metatext.begin(); while (text_iterator != fresh_metatext.end()) { if (*text_iterator == '&') text_html_safe += "&"; else if (*text_iterator == '<') text_html_safe += "<"; else if (*text_iterator == '>') text_html_safe += ">"; else if (*text_iterator == '"') text_html_safe += """; else text_html_safe.push_back(*text_iterator); text_iterator++; } fresh_metatext = text_html_safe; } // New "dmsafe" special formatting option if (meta.metacommand & mDMSafe) { // Make it macro-safe text_t text_dm_safe = dm_safe(fresh_metatext); fresh_metatext = text_dm_safe; } if (metadata_spanwrap) { fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,i); } formatted_metatext += fresh_metatext; first = false; } } else { if (position == -2) { // end position = end_i; } else if (position < start_i || position > end_i) { return ""; } text_t fresh_metatext; if (meta.metacommand & mSpecial) { // special formatting if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]); else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]); else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")"; } else fresh_metatext = metainfo.values[position]; // New "truncate" special formatting option if (meta.metacommand & mTruncate) { int truncate_length = meta.siblingoptions.getint(); text_t truncated_value = fresh_metatext; if (truncated_value.size() > truncate_length) { truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_"; } fresh_metatext = truncated_value; } // New "xmlsafe" special formatting option if (meta.metacommand & mXMLSafe) { // Make it XML-safe text_t text_xml_safe = ""; text_t::const_iterator text_iterator = fresh_metatext.begin(); while (text_iterator != fresh_metatext.end()) { if (*text_iterator == '&') text_xml_safe += "&"; else if (*text_iterator == '<') text_xml_safe += "<"; else if (*text_iterator == '>') text_xml_safe += ">"; else text_xml_safe.push_back(*text_iterator); text_iterator++; } fresh_metatext = text_xml_safe; } // New "htmlsafe" special formatting option if (meta.metacommand & mHTMLSafe) { // Make it HTML-safe text_t text_html_safe = ""; text_t::const_iterator text_iterator = fresh_metatext.begin(); while (text_iterator != fresh_metatext.end()) { if (*text_iterator == '&') text_html_safe += "&"; else if (*text_iterator == '<') text_html_safe += "<"; else if (*text_iterator == '>') text_html_safe += ">"; else if (*text_iterator == '"') text_html_safe += """; else text_html_safe.push_back(*text_iterator); text_iterator++; } fresh_metatext = text_html_safe; } // New "dmsafe" special formatting option if (meta.metacommand & mDMSafe) { // Make it macro-safe text_t text_dm_safe = dm_safe(fresh_metatext); fresh_metatext = text_dm_safe; } if (metadata_spanwrap) { fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,position); } formatted_metatext += fresh_metatext; } if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext); else return formatted_metatext; } static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values) { MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent; switch (meta.mqualifier.parent) { case pNone: return "Nothing!!"; break; case pImmediate: if (parent != NULL) { text_t parent_oid = get_parent(docinfo.OID); return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values); } break; case pTop: if (parent != NULL) { text_t parent_oid = get_parent(docinfo.OID); while (parent->parent != NULL) { parent = parent->parent; parent_oid = get_parent(parent_oid); } return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values); } break; case pAll: MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent; if (parent != NULL) { text_t parent_oid = get_parent(docinfo.OID); text_tarray tmparray; while (parent != NULL) { tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata parent = parent->parent; parent_oid = get_parent(parent_oid); } // now join them up - use teh parent separator bool first = true; text_t tmp; text_tarray::reverse_iterator here = tmparray.rbegin(); text_tarray::reverse_iterator end = tmparray.rend(); while (here != end) { if (!first) tmp += meta.parentoptions; tmp += *here; first = false; ++here; } if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp); else return tmp; } } return ""; } static text_t get_child_meta (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, const metadata_t &meta, text_tmap &options, ostream& logout, int siblings_values) { if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children const text_t& pre_tree_trav = meta.pre_tree_traverse; const text_t& child_metaname = meta.metaname; const text_t& child_field = meta.childoptions; text_tset child_metadata; child_metadata.insert(child_metaname); FilterResponse_t child_response; if (meta.mqualifier.child == cNum) { // just one child //get the information associated with the metadata for child doc if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "", child_metadata, false, collectproto, child_response, logout)) return ""; // invalid child number if (child_response.docInfo.empty()) return false; // no info for the child ResultDocInfo_t& child_docinfo = child_response.docInfo[0]; MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname]; text_t child_metavalue = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values); return expand_metadata(child_metavalue,collection,collectproto, child_docinfo,disp,options,logout); } if (meta.mqualifier.child != cAll) return false; // invalid qualifier if (!pre_tree_trav.empty()) { // need to get relevant "contains" metadata for new (e.g. pre tree trav) node FilterResponse_t trav_response; text_tset trav_metadata; trav_metadata.insert("contains"); if (!get_info (docinfo.OID+pre_tree_trav, collection, "", trav_metadata, false, collectproto, trav_response, logout)) return ""; // invalid pre_tree_trav if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0]; // use this for rest of routine docinfo = trav_docinfo; } // we need to get all children text_t result = ""; text_tarray children; text_t contains = docinfo.metadata["contains"].values[0]; splitchar (contains.begin(), contains.end(), ';', children); text_tarray::const_iterator here = children.begin(); text_tarray::const_iterator end = children.end(); bool first = true; while (here !=end) { text_t oid = *here; here++; if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID); //get the information associated with the metadata for child doc if (!get_info (oid, collection, "", child_metadata, false, collectproto, child_response, logout) || child_response.docInfo.empty()) { first = false; continue; } ResultDocInfo_t& child_docinfo = child_response.docInfo[0]; MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname]; text_t child_metavalue = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values); if (!first) result += child_field; first = false; // need to do this here cos otherwise we are in the wrong document text_t em = expand_metadata(child_metavalue,collection,collectproto, child_docinfo,disp,options,logout); result += em; } return result; } static text_t get_meta (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, const metadata_t &meta, text_tmap &options, ostream& logout) { // make sure we have the requested metadata MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname); if (it == docinfo.metadata.end()) return ""; int siblings_values = 0; // default is no siblings, just the first metadata available if (meta.metacommand & mSibling) { if (meta.mqualifier.sibling == sAll) { siblings_values = -1; //all } else if (meta.mqualifier.sibling == sNum) { siblings_values = meta.siblingoptions.getint(); } } if (meta.metacommand & mParent) { return get_parent_meta(docinfo,meta,siblings_values); } else if (meta.metacommand & mChild) { return get_child_meta(collection,collectproto,docinfo,disp,meta, options,logout, siblings_values); } else if (meta.metacommand & mSibling) { // only siblings MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname]; return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values); } else { // straightforward metadata request (nothing fancy) text_t classifier_metaname = docinfo.classifier_metadata_type; int metaname_index = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0; return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index); } return ""; } static text_t get_or (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *orptr, text_tmap &options, ostream& logout) { while (orptr != NULL) { if (metadata_spanwrap) { // need to be a bit more careful about this // => test for it *without* spanwrap, and if defined, then // got back and generate it again, this time with spanwrap on metadata_spanwrap = false; text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr, options, logout); metadata_spanwrap = true; if (!test_tmp.empty()) { return format_string (collection,collectproto,docinfo, disp, orptr, options, logout); } } else { text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr, options, logout); if (!tmp.empty()) return tmp; } orptr = orptr->nextptr; } return ""; } static bool char_is_whitespace(const char c) { return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r')); } static int scan_over_whitespace(const text_t& outstring, const int start_pos) { int pos = start_pos; while (pos=0) { if (!char_is_whitespace(outstring[pos])) { break; } --pos; } return pos; } static int rscan_for_whitespace(const text_t& outstring, const int start_pos) { int pos = start_pos; while (pos>=0) { if (char_is_whitespace(outstring[pos])) { break; } --pos; } return pos; } static int rscan_for(const text_t& outstring, const int start_pos, const char find_c) { int pos = start_pos; while (pos>=0) { char c = outstring[pos]; if (outstring[pos] == find_c) { break; } --pos; } return pos; } text_t extract_substr(const text_t& outstring, const int start_pos, const int end_pos) { text_t extracted_str; extracted_str.clear(); for (int pos=start_pos; pos<=end_pos; ++pos) { extracted_str.push_back(outstring[pos]); } return extracted_str; } static text_t expand_potential_metadata(const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, const text_t& intext, text_tmap &options, ostream& logout) { text_t outtext; // decide if dealing with metadata or text text_t::const_iterator beginbracket = intext.begin(); text_t::const_iterator endbracket = (intext.end() - 1); // Decision is based on a metadata element if ((*beginbracket == '[') && (*endbracket == ']')) { // Ignore the surrounding square brackets text_t meta_text = substr (beginbracket+1, endbracket); if (meta_text == "Text") { outtext = format_text(collection, collectproto, docinfo, disp, options, logout); } else { text_tset metadata; bool getParents =false; metadata_t meta; parse_meta (meta_text, meta, metadata, getParents); outtext = get_meta (collection,collectproto,docinfo,disp,meta,options,logout); } } else { outtext = intext; } return outtext; } static bool uses_expression(const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, const text_t& outstring, text_t& lhs_expr, text_t& op_expr, text_t& rhs_expr, text_tmap &options, ostream& logout) { // Note: the string may not be of the form: str1 op str2, however // to deterine this we have to process it on the assumption it is, // and if at any point an 'erroneous' value is encountered, return // false and let something else have a go at evaluating it // Starting at the end of the string and working backwards .. const int outstring_len = outstring.size(); // skip over white space int rhs_end = rscan_over_whitespace(outstring,outstring_len-1); if (rhs_end<=0) { // no meaningful text or (rhs_end==0) no room for operator return false; } // check for ' or " and then scan over token const char potential_quote = outstring[rhs_end]; int rhs_start=rhs_end; bool quoted = false; if ((potential_quote == '\'') || (potential_quote == '\"')) { --rhs_end; rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1; quoted = true; } else { rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1; } if ((rhs_end-rhs_start)<0) { // no meaningful rhs expression return false; } // form rhs_expr rhs_expr = extract_substr(outstring,rhs_start,rhs_end); // skip over white space const int to_whitespace = (quoted) ? 2 : 1; int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace); int op_start = rscan_for_whitespace(outstring,op_end-1)+1; if ((op_end<0) && (op_start<0)) { // no meaningful expression operator return false; } if (op_end-op_start<0) { // no meaningful expression operator return false; } op_expr = extract_substr(outstring,op_start,op_end); // check for operator if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") && (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) { // not a valid operator return false; } int lhs_end = rscan_over_whitespace(outstring,op_start-1); if (lhs_end<0) { // no meaningful lhs expression return false; } int lhs_start = scan_over_whitespace(outstring,0); // form lhs_expr from remainder of string lhs_expr = extract_substr(outstring,lhs_start,lhs_end); // Now we know we have a valid expression, look up any // metadata terms rhs_expr = expand_potential_metadata(collection,collectproto,docinfo, disp,rhs_expr,options,logout); lhs_expr = expand_potential_metadata(collection,collectproto,docinfo, disp,lhs_expr,options,logout); return true; } static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr, const text_t& rhs_expr, ostream& logout) { if (op_expr == "eq") return (lhs_expr == rhs_expr); else if (op_expr == "ne" ) return (lhs_expr != rhs_expr); else if (op_expr == "gt") return (lhs_expr > rhs_expr); else if (op_expr == "ge") return (lhs_expr >= rhs_expr); else if (op_expr == "lt") return (lhs_expr < rhs_expr); else if (op_expr == "le") return (lhs_expr <= rhs_expr); else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint()); else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint()); else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint()); else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint()); else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint()); else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint()); else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr)); else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr)); else { logout << "Error: '" << op_expr << "' is not a recognised operator." << endl; } return false; } static text_t get_if (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, const decision_t &decision, format_t *ifptr, format_t *elseptr, text_tmap &options, ostream& logout) { // If the decision component is a metadata element, then evaluate it // to see whether we output the "then" or the "else" clause if (decision.command == dMeta) { bool store_metadata_spanwrap = metadata_spanwrap; metadata_spanwrap = 0; // temporarily suspend metadata-spanwrap (if on) so can test if metadata item really exits or not bool metadata_exists = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options, logout) != ""); metadata_spanwrap = store_metadata_spanwrap; if (metadata_exists) { if (ifptr != NULL) return get_formatted_string (collection,collectproto,docinfo, disp, ifptr, options, logout); } else { if (elseptr != NULL) return get_formatted_string (collection,collectproto,docinfo, disp, elseptr, options, logout); } } // If the decision component is text, then evaluate it (it is probably a // macro like _cgiargmode_) to decide what to output. else if (decision.command == dText) { text_t outstring; disp.expandstring (decision.text, outstring); // Check for if expression in form: str1 op str2 // (such as [x] eq "y") text_t lhs_expr, op_expr, rhs_expr; if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) { if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) { if (ifptr != NULL) { return get_formatted_string (collection, collectproto, docinfo, disp, ifptr, options, logout); } else { return ""; } } else { if (elseptr != NULL) { return get_formatted_string (collection, collectproto, docinfo, disp, elseptr, options, logout); } else { return ""; } } } // This is a tad tricky. When we expand a string like _cgiargmode_, that is // a cgi argument macro that has not been set, it evaluates to itself. // Therefore, were have to say that a piece of text evalautes true if // it is non-empty and if it is a cgi argument evaulating to itself. if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) { if (ifptr != NULL) return get_formatted_string (collection, collectproto, docinfo, disp, ifptr, options, logout); } else { if (elseptr != NULL) return get_formatted_string (collection, collectproto, docinfo, disp, elseptr, options, logout); } } return ""; } bool includes_metadata(const text_t& text) { text_t::const_iterator here = text.begin(); text_t::const_iterator end = text.end(); while (here != end) { if (*here == '[') return true; ++here; } return false; } static text_t expand_metadata(const text_t &metavalue, const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, text_tmap &options, ostream &logout) { if (includes_metadata(metavalue)) { // text has embedded metadata in it => expand it FilterRequest_t request; FilterResponse_t response; request.getParents = false; format_t *expanded_formatlistptr = new format_t(); parse_formatstring (metavalue, expanded_formatlistptr, request.fields, request.getParents); // retrieve metadata get_info(docinfo.OID, collection, "", request.fields, request.getParents, collectproto, response, logout); if (!response.docInfo.empty()) { text_t expanded_metavalue = get_formatted_string(collection, collectproto, response.docInfo[0], disp, expanded_formatlistptr, options, logout); return expanded_metavalue; } else { return metavalue; } } else { return metavalue; } } text_t get_collection_meta(const text_t& collection, recptproto* collectproto, displayclass &disp, text_t meta_name, ostream& logout) { ColInfoResponse_t collectinfo; comerror_t err; collectproto->get_collectinfo (collection, collectinfo,err,logout); text_t meta_value = ""; text_t lang; disp.expandstring("_cgiargl_",lang); if (lang.empty()) { lang = "en"; } if (err == noError) { meta_value = collectinfo.get_collectionmeta(meta_name, lang); } return meta_value; } text_t format_string (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *formatlistptr, text_tmap &options, ostream& logout) { if (formatlistptr == NULL) return ""; switch (formatlistptr->command) { case comOID: return docinfo.OID; case comTopOID: { text_t top_id; get_top(docinfo.OID, top_id); return top_id; } case comRank: return text_t(docinfo.ranking); case comText: return formatlistptr->text; case comLink: return options["link"]; case comEndLink: { if (options["link"].empty()) return ""; else return ""; } case comHref: return get_href(options["link"]); case comIcon: return options["icon"]; case comNum: return docinfo.result_num; case comRel: //if [RelatedDocuments] appears in format string, collect relation data return get_related_docs(collection, collectproto, docinfo, logout); case comSummary: return format_summary(collection, collectproto, docinfo, disp, options, logout); case comAssocLink: { text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout); if (!link_filename.empty()) { text_t href= expand_metadata(options["assocfilepath"]+link_filename, collection, collectproto, docinfo, disp, options, logout); if (formatlistptr->text == "href") { return href; } return ""; } return ""; } case comEndAssocLink: { text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout); if (!link_filename.empty()) { return ""; } return ""; } case comMeta: { const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout); return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout); } case comDoc: return format_text(collection, collectproto, docinfo, disp, options, logout); case comImage: return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout); case comTOC: return options["DocTOC"]; case comDocumentButtonDetach: return options["DocumentButtonDetach"]; case comDocumentButtonHighlight: return options["DocumentButtonHighlight"]; case comDocumentButtonExpandContents: return options["DocumentButtonExpandContents"]; case comDocumentButtonExpandText: return options["DocumentButtonExpandText"]; case comHighlight: if (options["highlight"] == "1") return ""; break; case comEndHighlight: if (options["highlight"] == "1") return ""; break; case comMetadataSpanWrap: metadata_spanwrap=true; return ""; break; case comEndMetadataSpanWrap: metadata_spanwrap=false; return ""; break; case comIf: return get_if (collection, collectproto, docinfo, disp, formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr, options, logout); case comOr: return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr, options, logout); case comDocTermsFreqTotal: return docinfo.num_terms_matched; case comCollection: if (formatlistptr->meta.metaname == g_EmptyText) { return collection; } return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout); } return ""; } text_t get_formatted_string (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *formatlistptr, text_tmap &options, ostream& logout) { text_t ft; while (formatlistptr != NULL) { ft += format_string (collection, collectproto, docinfo, disp, formatlistptr, options, logout); formatlistptr = formatlistptr->nextptr; } return ft; } // we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive? text_t format_text (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, text_tmap &options, ostream& logout) { text_t text; if(!options["text"].empty()) { text = options["text"]; } else { // get document text here DocumentRequest_t docrequest; DocumentResponse_t docresponse; comerror_t err; docrequest.OID = docinfo.OID; collectproto->get_document (collection, docrequest, docresponse, err, logout); text = docresponse.doc; } if (metadata_spanwrap) { text = spanwrap_metatext(text,docinfo.OID,"Text"); } return text; } /* FUNCTION NAME: format_summary * DESC: this is invoked when a [Summary] special metadata is processed. * RETURNS: a query-biased summary for the document */ text_t format_summary (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, text_tmap &options, ostream& logout) { // GRB: added code here to ensure that the cstr (and other collections) // uses the document metadata item Summary, rather than compressing // the text of the document, processed via the methods in // summarise.cpp text_t summary; if (docinfo.metadata.count("Summary") > 0 && docinfo.metadata["Summary"].values.size() > 0) { summary = docinfo.metadata["Summary"].values[0]; } else { text_t textToSummarise, query; if(options["text"].empty()) { // get document text DocumentRequest_t docrequest; DocumentResponse_t docresponse; comerror_t err; docrequest.OID = docinfo.OID; collectproto->get_document (collection, docrequest, docresponse, err, logout); textToSummarise = docresponse.doc; } else { // in practice, this would not happen, because text is only // loaded with the [Text] command textToSummarise = options["text"]; } disp.expandstring("_cgiargq_",query); summary = summarise(textToSummarise,query,80); } if (metadata_spanwrap) { summary = spanwrap_metatext(summary,docinfo.OID,"Summary"); } return summary; }