/********************************************************************** * * formattools.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "formattools.h" #include "cgiutils.h" #include "OIDtools.h" #include "summarise.h" #include // a few function prototypes static text_t format_string (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *formatlistptr, const text_t &link, const text_t &icon, const text_t &text, bool highlight, ostream& logout); static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end, format_t *formatlistptr, text_tset &metadata, bool &getParents); text_t format_summary (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, const text_t &text, bool highlight, ostream& logout); void metadata_t::clear() { metaname.clear(); metacommand = mNone; parentcommand = pNone; functionoptions.clear(); } void decision_t::clear() { command = dMeta; meta.clear(); text.clear(); } void format_t::clear() { command = comText; decision.clear(); text.clear(); meta.clear(); nextptr = NULL; ifptr = NULL; elseptr = NULL; orptr = NULL; } void formatinfo_t::clear() { DocumentImages = false; DocumentTitles = true; DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}
"; DocumentContents = true; DocumentArrowsBottom = true; DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end()); // DocumentButtons.push_back ("Expand Text"); // DocumentButtons.push_back ("Expand Contents"); DocumentButtons.push_back ("Detach"); DocumentButtons.push_back ("Highlight"); RelatedDocuments = ""; DocumentText = "
[Text]
"; formatstrings.erase (formatstrings.begin(), formatstrings.end()); DocumentUseHTML = false; } // simply checks to see if formatstring begins with a tag bool is_table_content (const text_t &formatstring) { text_t::const_iterator here = formatstring.begin(); text_t::const_iterator end = formatstring.end(); while (here != end) { if (*here != ' ') { if ((*here == '<') && ((here+3) < end)) { if ((*(here+1) == 't' || *(here+1) == 'T') && (*(here+2) == 'd' || *(here+2) == 'D') && (*(here+3) == '>' || *(here+3) == ' ')) return true; } else return false; } here ++; } return false; } bool is_table_content (const format_t *formatlistptr) { if (formatlistptr == NULL) return false; if (formatlistptr->command == comText) return is_table_content (formatlistptr->text); return false; } // returns false if key isn't in formatstringmap bool get_formatstring (const text_t &key, const text_tmap &formatstringmap, text_t &formatstring) { formatstring.clear(); text_tmap::const_iterator it = formatstringmap.find(key); if (it == formatstringmap.end()) return false; formatstring = (*it).second; return true; } // tries to find "key1key2" then "key1" then "key2" bool get_formatstring (const text_t &key1, const text_t &key2, const text_tmap &formatstringmap, text_t &formatstring) { formatstring.clear(); text_tmap::const_iterator it = formatstringmap.find(key1 + key2); if (it != formatstringmap.end()) { formatstring = (*it).second; return true; } it = formatstringmap.find(key1); if (it != formatstringmap.end()) { formatstring = (*it).second; return true; } it = formatstringmap.find(key2); if (it != formatstringmap.end()) { formatstring = (*it).second; return true; } return false; } // returns a date of form 31 _textmonthnn_ 1999 // input is date of type 19991231 // at least the year must be present in date text_t format_date (const text_t &date) { if (date.size() < 4) return ""; text_t::const_iterator datebegin = date.begin(); text_t year = substr (datebegin, datebegin+4); if (date.size() < 6) return year; text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_"; int imonth = month.getint(); if (imonth < 0 || imonth > 12) return year; if (date.size() < 8) return month + " " + year; text_t day = substr (datebegin+6, datebegin+8); if (day[0] == '0') day = substr (day.begin()+1, day.end()); int iday = day.getint(); if (iday < 0 || iday > 31) return month + " " + year; return day + " " + month + " " + year; } // converts an iso639 language code to its English equivalent // I realize that this isn't the pretiest or most efficient implementation, // hopefully this ugly Language (and Date too) formatting won't survive to // see gsdl-3.0 text_t iso639 (const text_t &langcode) { if (langcode == "aa") return "Afar"; if (langcode == "ab") return "Abkhazian"; if (langcode == "af") return "Afrikaans"; if (langcode == "am") return "Amharic"; if (langcode == "ar") return "Arabic"; if (langcode == "as") return "Assamese"; if (langcode == "ay") return "Aymara"; if (langcode == "az") return "Azerbaijani"; if (langcode == "ba") return "Bashkir"; if (langcode == "be") return "Byelorussian"; if (langcode == "bg") return "Bulgarian"; if (langcode == "bh") return "Bihari"; if (langcode == "bi") return "Bislama"; if (langcode == "bn") return "Bengali; Bangla"; if (langcode == "bo") return "Tibetan"; if (langcode == "br") return "Breton"; if (langcode == "ca") return "Catalan"; if (langcode == "co") return "Corsican"; if (langcode == "cs") return "Czech"; if (langcode == "cy") return "Welsh"; if (langcode == "da") return "Danish"; if (langcode == "de") return "German"; if (langcode == "dz") return "Bhutani"; if (langcode == "el") return "Greek"; if (langcode == "en") return "English"; if (langcode == "eo") return "Esperanto"; if (langcode == "es") return "Spanish"; if (langcode == "et") return "Estonian"; if (langcode == "eu") return "Basque"; if (langcode == "fa") return "Persian"; if (langcode == "fi") return "Finnish"; if (langcode == "fj") return "Fiji"; if (langcode == "fo") return "Faroese"; if (langcode == "fr") return "French"; if (langcode == "fy") return "Frisian"; if (langcode == "ga") return "Irish"; if (langcode == "gd") return "Scots Gaelic"; if (langcode == "gl") return "Galician"; if (langcode == "gn") return "Guarani"; if (langcode == "gu") return "Gujarati"; if (langcode == "ha") return "Hausa"; if (langcode == "hi") return "Hindi"; if (langcode == "hr") return "Croatian"; if (langcode == "hu") return "Hungarian"; if (langcode == "hy") return "Armenian"; if (langcode == "ia") return "Interlingua"; if (langcode == "ie") return "Interlingue"; if (langcode == "ik") return "Inupiak"; if (langcode == "in") return "Indonesian"; if (langcode == "is") return "Icelandic"; if (langcode == "it") return "Italian"; if (langcode == "iw") return "Hebrew"; if (langcode == "ja") return "Japanese"; if (langcode == "ji") return "Yiddish"; if (langcode == "jw") return "Javanese"; if (langcode == "ka") return "Georgian"; if (langcode == "kk") return "Kazakh"; if (langcode == "kl") return "Greenlandic"; if (langcode == "km") return "Cambodian"; if (langcode == "kn") return "Kannada"; if (langcode == "ko") return "Korean"; if (langcode == "ks") return "Kashmiri"; if (langcode == "ku") return "Kurdish"; if (langcode == "ky") return "Kirghiz"; if (langcode == "la") return "Latin"; if (langcode == "ln") return "Lingala"; if (langcode == "lo") return "Laothian"; if (langcode == "lt") return "Lithuanian"; if (langcode == "lv") return "Latvian, Lettish"; if (langcode == "mg") return "Malagasy"; if (langcode == "mi") return "Maori"; if (langcode == "mk") return "Macedonian"; if (langcode == "ml") return "Malayalam"; if (langcode == "mn") return "Mongolian"; if (langcode == "mo") return "Moldavian"; if (langcode == "mr") return "Marathi"; if (langcode == "ms") return "Malay"; if (langcode == "mt") return "Maltese"; if (langcode == "my") return "Burmese"; if (langcode == "na") return "Nauru"; if (langcode == "ne") return "Nepali"; if (langcode == "nl") return "Dutch"; if (langcode == "no") return "Norwegian"; if (langcode == "oc") return "Occitan"; if (langcode == "om") return "(Afan) Oromo"; if (langcode == "or") return "Oriya"; if (langcode == "pa") return "Punjabi"; if (langcode == "pl") return "Polish"; if (langcode == "ps") return "Pashto, Pushto"; if (langcode == "pt") return "Portuguese"; if (langcode == "qu") return "Quechua"; if (langcode == "rm") return "Rhaeto-Romance"; if (langcode == "rn") return "Kirundi"; if (langcode == "ro") return "Romanian"; if (langcode == "ru") return "Russian"; if (langcode == "rw") return "Kinyarwanda"; if (langcode == "sa") return "Sanskrit"; if (langcode == "sd") return "Sindhi"; if (langcode == "sg") return "Sangro"; if (langcode == "sh") return "Serbo-Croatian"; if (langcode == "si") return "Singhalese"; if (langcode == "sk") return "Slovak"; if (langcode == "sl") return "Slovenian"; if (langcode == "sm") return "Samoan"; if (langcode == "sn") return "Shona"; if (langcode == "so") return "Somali"; if (langcode == "sq") return "Albanian"; if (langcode == "sr") return "Serbian"; if (langcode == "ss") return "Siswati"; if (langcode == "st") return "Sesotho"; if (langcode == "su") return "Sudanese"; if (langcode == "sv") return "Swedish"; if (langcode == "sw") return "Swahili"; if (langcode == "ta") return "Tamil"; if (langcode == "te") return "Tegulu"; if (langcode == "tg") return "Tajik"; if (langcode == "th") return "Thai"; if (langcode == "ti") return "Tigrinya"; if (langcode == "tk") return "Turkmen"; if (langcode == "tl") return "Tagalog"; if (langcode == "tn") return "Setswana"; if (langcode == "to") return "Tonga"; if (langcode == "tr") return "Turkish"; if (langcode == "ts") return "Tsonga"; if (langcode == "tt") return "Tatar"; if (langcode == "tw") return "Twi"; if (langcode == "uk") return "Ukrainian"; if (langcode == "ur") return "Urdu"; if (langcode == "uz") return "Uzbek"; if (langcode == "vi") return "Vietnamese"; if (langcode == "vo") return "Volapuk"; if (langcode == "wo") return "Wolof"; if (langcode == "xh") return "Xhosa"; if (langcode == "yo") return "Yoruba"; if (langcode == "zh") return "Chinese"; if (langcode == "zu") return "Zulu"; return ""; } text_t get_href (const text_t &link) { text_t href; text_t::const_iterator here = findchar(link.begin(), link.end(), '"'); text_t::const_iterator end = link.end(); here ++; while (here != end) { if (*here == '"') break; href.push_back(*here); here ++; } return href; } //this function gets the information associated with the relation //metadata for the document associated with 'docinfo'. This relation //metadata consists of a line of pairs containing 'collection, document OID' //(this is the OID of the document related to the current document, and //the collection the related document belongs to). For each of these pairs //the title metadata is obtained and then an html link between the title //of the related doc and the document's position (the document will be //found in " //(where collection is the related documents collection, and OID is the //related documents OID). A list of these html links are made for as many //related documents as there are. This list is then returned. If there are //no related documents available for the current document then the string //'.. no related documents .. ' is returned. text_t get_related_docs(const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, ostream& logout){ text_tset metadata; //insert the metadata we wish to collect metadata.insert("relation"); metadata.insert("Title"); metadata.insert("Subject"); //for emails, where title data doesn't apply FilterResponse_t response; text_t relation = ""; //string for displaying relation metadata text_t relationTitle = ""; //the related documents Title (or subject) text_t relationOID = ""; //the related documents OID //get the information associated with the metadata for current doc if (get_info (docinfo.OID, collection, metadata, false, collectproto, response, logout)) { //if the relation metadata exists, store for displaying if(!response.docInfo[0].metadata["relation"].values.empty()){ relationOID += response.docInfo[0].metadata["relation"].values[0]; //split relation data into pairs of collectionname,ID number text_tarray relationpairs; splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs); text_tarray::const_iterator currDoc = relationpairs.begin(); text_tarray::const_iterator lastDoc = relationpairs.end(); //iterate through the pairs to split and display while(currDoc != lastDoc){ //split pairs into collectionname and ID text_tarray relationdata; splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata); //get first element in the array (collection) text_tarray::const_iterator doc_data = relationdata.begin(); text_t document_collection = *doc_data; doc_data++; //increment to get next item in array (oid) text_t document_OID = *doc_data; //create html link to related document relation += "" + relationTitle + ""; relation += " (" + document_collection + ")
"; currDoc++; } } } if(relation.empty()) //no relation data for documnet relation = ".. no related documents .. "; return relation; } static void get_parent_options (text_t &instring, metadata_t &metaoption) { assert (instring.size() > 7); if (instring.size() <= 7) return; text_t meta, com, op; bool inbraces = false; bool inquotes = false; bool foundcolon = false; text_t::const_iterator here = instring.begin()+6; text_t::const_iterator end = instring.end(); while (here != end) { if (*here == '(') inbraces = true; else if (*here == ')') inbraces = false; else if (*here == '\'' && !inquotes) inquotes = true; else if (*here == '\'' && inquotes) inquotes = false; else if (*here == ':' && !inbraces) foundcolon = true; else if (foundcolon) meta.push_back (*here); else if (inquotes) op.push_back (*here); else com.push_back (*here); here ++; } instring = meta; if (com.empty()) metaoption.parentcommand = pImmediate; else if (com == "Top") metaoption.parentcommand = pTop; else if (com == "All") { metaoption.parentcommand = pAll; metaoption.functionoptions = op; } } static void get_sibling_options (text_t &instring, metadata_t &metaoption) { assert (instring.size() > 8); if (instring.size() <= 8) return; text_t meta, com, op; bool inbraces = false; bool inquotes = false; bool foundcolon = false; text_t::const_iterator here = instring.begin()+7; text_t::const_iterator end = instring.end(); while (here != end) { if (*here == '(') inbraces = true; else if (*here == ')') inbraces = false; else if (*here == '\'' && !inquotes) inquotes = true; else if (*here == '\'' && inquotes) inquotes = false; else if (*here == ':' && !inbraces) foundcolon = true; else if (foundcolon) meta.push_back (*here); else if (inquotes) op.push_back (*here); else com.push_back (*here); here ++; } instring = meta; if (com.empty()) { metaoption.functionoptions = " "; } else { metaoption.functionoptions = op; } } static void parse_meta (text_t &meta, metadata_t &metaoption, text_tset &metadata, bool &getParents) { if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) { metaoption.metacommand |= mCgiSafe; meta = substr (meta.begin()+8, meta.end()); } if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) { metaoption.metacommand |= mParent; getParents = true; get_parent_options (meta, metaoption); } else if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) { metaoption.metacommand |= mSibling; get_sibling_options (meta, metaoption); } metadata.insert (meta); metaoption.metaname = meta; } static void parse_meta (text_t &meta, format_t *formatlistptr, text_tset &metadata, bool &getParents) { if (meta == "link") formatlistptr->command = comLink; else if (meta == "/link") formatlistptr->command = comEndLink; else if (meta == "href") formatlistptr->command = comHref; else if (meta == "num") formatlistptr->command = comNum; else if (meta == "icon") formatlistptr->command = comIcon; else if (meta == "Text") formatlistptr->command = comDoc; else if (meta == "RelatedDocuments") formatlistptr->command = comRel; else if (meta == "highlight") formatlistptr->command = comHighlight; else if (meta == "/highlight") formatlistptr->command = comEndHighlight; else if (meta == "Summary") formatlistptr->command = comSummary; else { formatlistptr->command = comMeta; parse_meta (meta, formatlistptr->meta, metadata, getParents); } } static bool parse_string (const text_t &formatstring, format_t *formatlistptr, text_tset &metadata, bool &getParents) { text_t text; text_t::const_iterator here = formatstring.begin(); text_t::const_iterator end = formatstring.end(); while (here != end) { if (*here == '\\') { here ++; if (here != end) text.push_back (*here); } else if (*here == '{') { if (!text.empty()) { formatlistptr->command = comText; formatlistptr->text = text; formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; text.clear(); } if (parse_action (++here, end, formatlistptr, metadata, getParents)) { formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; if (here == end) break; } } else if (*here == '[') { if (!text.empty()) { formatlistptr->command = comText; formatlistptr->text = text; formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; text.clear(); } text_t meta; here ++; while (*here != ']') { if (here == end) return false; meta.push_back (*here); here ++; } parse_meta (meta, formatlistptr, metadata, getParents); formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; } else text.push_back (*here); if (here != end) here ++; } if (!text.empty()) { formatlistptr->command = comText; formatlistptr->text = text; formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; } return true; } static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end, format_t *formatlistptr, text_tset &metadata, bool &getParents) { text_t::const_iterator it = findchar (here, end, '}'); if (it == end) return false; text_t com = substr (here, it); here = findchar (it, end, '{'); if (here == end) return false; else here ++; if (com == "If") formatlistptr->command = comIf; else if (com == "Or") formatlistptr->command = comOr; else return false; int commacount = 0; text_t text; while (here != end) { if (*here == '\\') { here++; if (here != end) text.push_back(*here); } else if (*here == ',' || *here == '}' || *here == '{') { if (formatlistptr->command == comOr) { // the {Or}{this, or this, or this, or this} statement format_t *or_ptr; // find the next unused orptr if (formatlistptr->orptr == NULL) { formatlistptr->orptr = new format_t(); or_ptr = formatlistptr->orptr; } else { or_ptr = formatlistptr->orptr; while (or_ptr->nextptr != NULL) or_ptr = or_ptr->nextptr; or_ptr->nextptr = new format_t(); or_ptr = or_ptr->nextptr; } if (!text.empty()) { if (!parse_string(text, or_ptr, metadata, getParents)) { return false; } } if (*here == '{') { // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}} // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}} // The latter can always be re-written: // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}} if (!text.empty()) // already used up allocated format_t { // => allocate new one for detected action or_ptr->nextptr = new format_t(); or_ptr = or_ptr->nextptr; } if (!parse_action(++here, end, or_ptr, metadata, getParents)) { return false; } } else { if (*here == '}') break; } text.clear(); } // Parse an {If}{decide,do,else} statement else { // Read the decision component. if (commacount == 0) { // Decsion can be a metadata element, or a piece of text. // Originally Stefan's code, updated 25/10/2000 by Gordon. text_t::const_iterator beginbracket = text.begin(); text_t::const_iterator endbracket = (text.end() - 1); // Decision is based on a metadata element if ((*beginbracket == '[') && (*endbracket == ']')) { // Ignore the surrounding square brackets text_t meta = substr (beginbracket+1, endbracket); parse_meta (meta, formatlistptr->decision.meta, metadata, getParents); commacount ++; text.clear(); } // Decision is a piece of text (probably a macro like _cgiargmode_). else { formatlistptr->decision.command = dText; formatlistptr->decision.text = text; commacount ++; text.clear(); } } // Read the "then" and "else" components of the {If} statement. else { format_t** nextlistptr = NULL; if (commacount == 1) { nextlistptr = &formatlistptr->ifptr; } else if (commacount == 2 ) { nextlistptr = &formatlistptr->elseptr; } else { return false; } if (!text.empty()) { if (*nextlistptr == NULL) { *nextlistptr = new format_t(); } else { // skip to the end of any format_t statements already added while ((*nextlistptr)->nextptr != NULL) { nextlistptr = &(*nextlistptr)->nextptr; } (*nextlistptr)->nextptr = new format_t(); nextlistptr = &(*nextlistptr)->nextptr; } if (!parse_string (text, *nextlistptr, metadata, getParents)) { return false; } text.clear(); } if (*here == '{') { if (*nextlistptr == NULL) { *nextlistptr = new format_t(); } else { (*nextlistptr)->nextptr = new format_t(); nextlistptr = &(*nextlistptr)->nextptr; } if (!parse_action(++here, end, *nextlistptr, metadata, getParents)) { return false; } } else { if (*here == '}') break; commacount ++; } } } } else text.push_back(*here); if (here != end) here ++; } return true; } bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr, text_tset &metadata, bool &getParents) { formatlistptr->clear(); getParents = false; return (parse_string (formatstring, formatlistptr, metadata, getParents)); } // note: all the format_date stuff is assuming that all Date metadata is going to // be of the form yyyymmdd, this is of course, crap ;) static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) { // make sure we have the requested metadata MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname); if (it == docinfo.metadata.end()) return ""; MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent; switch (meta.parentcommand) { case pNone: { if (meta.metacommand & mSibling) { text_t tmp; bool first = true; MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname]; const int start_i=0; const int end_i = metaname_rec.values.size()-1; for (int i=start_i; i<=end_i; i++) { if (!first) tmp += meta.functionoptions; if (meta.metaname == "Date") tmp += format_date (metaname_rec.values[i]); else if (meta.metaname == "Language") tmp += iso639(metaname_rec.values[i]); else tmp += metaname_rec.values[i]; first = false; } if (meta.metacommand & mCgiSafe) return cgi_safe (tmp); else return tmp; } else { text_t classifier_metaname = docinfo.classifier_metadata_type; int metaname_index = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0; text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index]; if (meta.metaname == "Date") return format_date (metadata_item); else if (meta.metaname == "Language") return iso639(metadata_item); if (meta.metacommand & mCgiSafe) return cgi_safe (metadata_item); else return metadata_item; } } case pImmediate: if (parent != NULL) { if (meta.metaname == "Date") return format_date (parent->values[0]); if (meta.metacommand == mCgiSafe) return cgi_safe (parent->values[0]); else return parent->values[0]; } break; case pTop: if (parent != NULL) { while (parent->parent != NULL) parent = parent->parent; if (meta.metaname == "Date") return format_date (parent->values[0]); if (meta.metacommand & mCgiSafe) return cgi_safe (parent->values[0]); else return parent->values[0]; } break; case pAll: MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent; if (parent != NULL) { text_tarray tmparray; while (parent != NULL) { tmparray.push_back (parent->values[0]); parent = parent->parent; } bool first = true; text_t tmp; text_tarray::reverse_iterator here = tmparray.rbegin(); text_tarray::reverse_iterator end = tmparray.rend(); while (here != end) { if (!first) tmp += meta.functionoptions; if (meta.metaname == "Date") tmp += format_date (*here); else tmp += *here; first = false; here ++; } if (meta.metacommand & mCgiSafe) return cgi_safe (tmp); else return tmp; } } return ""; } static text_t get_or (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *orptr, const text_t &link, const text_t &icon, const text_t &text, bool highlight, ostream& logout) { text_t tmp; while (orptr != NULL) { tmp = format_string (collection,collectproto, docinfo, disp, orptr, link, icon, text, highlight, logout); if (!tmp.empty()) return tmp; orptr = orptr->nextptr; } return ""; } static text_t get_if (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, const decision_t &decision, format_t *ifptr, format_t *elseptr, const text_t &link, const text_t &icon, const text_t &text, bool highlight, ostream& logout) { // If the decision component is a metadata element, then evaluate it // to see whether we output the "then" or the "else" clause if (decision.command == dMeta) { if (get_meta (docinfo, decision.meta) != "") { if (ifptr != NULL) return get_formatted_string (collection,collectproto, docinfo, disp, ifptr, link, icon, text, highlight, logout); } else { if (elseptr != NULL) return get_formatted_string (collection,collectproto, docinfo, disp, elseptr, link, icon, text, highlight, logout); } } // If the decision component is text, then evaluate it (it is probably a // macro like _cgiargmode_) to decide what to output. else if (decision.command == dText) { text_t outstring; disp.expandstring (decision.text, outstring); // This is a tad tricky. When we expand a string like _cgiargmode_, that is // a cgi argument macro that has not been set, it evaluates to itself. // Therefore, were have to say that a piece of text evalautes true if // it is non-empty and if it is a cgi argument evaulating to itself. if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) { if (ifptr != NULL) return get_formatted_string (collection, collectproto, docinfo, disp, ifptr, link, icon, text, highlight, logout); } else { if (elseptr != NULL) return get_formatted_string (collection, collectproto, docinfo, disp, elseptr, link, icon, text, highlight, logout); } } return ""; } bool includes_metadata(const text_t& text) { text_t::const_iterator here = text.begin(); text_t::const_iterator end = text.end(); while (here != end) { if (*here == '[') return true; here ++; } return false; } text_t format_string (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *formatlistptr, const text_t &link, const text_t &icon, const text_t &text, bool highlight, ostream& logout) { if (formatlistptr == NULL) return ""; switch (formatlistptr->command) { case comText: return formatlistptr->text; case comLink: return link; case comEndLink: if (link.empty()) return ""; else return ""; case comHref: return get_href(link); case comIcon: return icon; case comNum: return docinfo.result_num; case comRel: //if [RelatedDocuments] appears in format string, collect relation data return get_related_docs(collection, collectproto, docinfo, logout); case comSummary: return format_summary(collection,collectproto,docinfo,disp,text,highlight,logout); case comMeta: { const text_t& metavalue = get_meta (docinfo, formatlistptr->meta); if (includes_metadata(metavalue)) { // text has embedded metadata in it => expand it FilterRequest_t request; FilterResponse_t response; request.getParents = false; format_t *expanded_formatlistptr = new format_t(); parse_formatstring (metavalue, expanded_formatlistptr, request.fields, request.getParents); // retrieve metadata get_info(docinfo.OID, collection, request.fields, request.getParents, collectproto, response, logout); if (!response.docInfo.empty()) { text_t expanded_metavalue = get_formatted_string(collection, collectproto, response.docInfo[0], disp, expanded_formatlistptr, link, icon, highlight, logout); return expanded_metavalue; } else { return metavalue; } } else { return metavalue; } } case comDoc: return text; case comHighlight: if (highlight) return ""; break; case comEndHighlight: if (highlight) return ""; break; case comIf: return get_if (collection, collectproto, docinfo, disp, formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr, link, icon, text, highlight, logout); case comOr: return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr, link, icon, text, highlight, logout); } return ""; } text_t get_formatted_string (const text_t& collection, recptproto* collectproto, ResultDocInfo_t& docinfo, displayclass &disp, format_t* formatlistptr, const text_t& link, const text_t& icon, const text_t& text, const bool highlight, ostream& logout) { text_t ft; while (formatlistptr != NULL) { ft += format_string (collection, collectproto, docinfo, disp, formatlistptr, link, icon, text, highlight, logout); formatlistptr = formatlistptr->nextptr; } return ft; } text_t get_formatted_string (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *formatlistptr, const text_t &link, const text_t &icon, const bool highlight, ostream& logout) { text_t text = ""; return get_formatted_string(collection, collectproto, docinfo, disp, formatlistptr, link, icon, text, highlight, logout); } text_t get_formatted_string (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *formatlistptr, const text_t& text, ostream& logout) { text_t link = ""; text_t icon = "_icontext_"; bool highlight = false; return get_formatted_string(collection, collectproto, docinfo, disp, formatlistptr, link, icon, text, highlight, logout); } text_t get_formatted_string (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, format_t *formatlistptr, ostream& logout) { text_t text = ""; return get_formatted_string(collection, collectproto, docinfo, disp, formatlistptr, text, logout); } /* FUNCTION NAME: format_summary * DESC: this is invoked when a [Summary] special metadata is processed. * RETURNS: a query-biased summary for the document */ text_t format_summary (const text_t& collection, recptproto* collectproto, ResultDocInfo_t &docinfo, displayclass &disp, const text_t &text, bool highlight, ostream& logout) { // GRB: added code here to ensure that the cstr (and other collections) // uses the document metadata item Summary, rather than compressing // the text of the document, processed via the methods in // summarise.cpp if (docinfo.metadata.count("Summary") > 0 && docinfo.metadata["Summary"].values.size() > 0) { return docinfo.metadata["Summary"].values[0]; } text_t textToSummarise, query; if(text.empty()) { // get document text DocumentRequest_t docrequest; DocumentResponse_t docresponse; comerror_t err; docrequest.OID = docinfo.OID; collectproto->get_document (collection, docrequest, docresponse, err, logout); textToSummarise = docresponse.doc; } else // in practice, this would not happen, because text is only // loaded with the [Text] command textToSummarise = text; disp.expandstring("_cgiargq_",query); return summarise(textToSummarise,query,80); }