/********************************************************************** * * formattools.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: formattools.cpp 874 2000-01-26 20:10:31Z sjboddie $ * *********************************************************************/ /* $Log$ Revision 1.17 2000/01/26 20:10:31 sjboddie changed the default order of detach/expand/highlight buttons Revision 1.16 2000/01/25 22:33:31 sjboddie added DocumentUseHTML Revision 1.15 1999/12/13 02:45:16 davidb Support for more than one metavalue for the same metadata name Revision 1.14 1999/10/30 22:23:11 sjboddie moved table functions from browsetools Revision 1.13 1999/10/14 23:01:24 sjboddie changes for new browsing support Revision 1.12 1999/10/10 08:14:07 sjboddie - metadata now returns mp rather than array - redesigned browsing support (although it's not finished so won't currently work ;-) Revision 1.11 1999/09/28 20:38:19 rjmcnab fixed a couple of bugs Revision 1.10 1999/09/07 04:56:55 sjboddie added GPL notice Revision 1.9 1999/09/02 00:31:25 rjmcnab fixed small error. Revision 1.8 1999/08/20 00:56:38 sjboddie added cgisafe option - you can now do something like [cgisafe:Title] if you want Title to be entered safely into a url Revision 1.7 1999/08/10 22:38:08 sjboddie added some more format options Revision 1.6 1999/07/30 02:25:42 sjboddie made format_date function global Revision 1.5 1999/07/21 05:00:00 sjboddie added some date formatting Revision 1.4 1999/07/20 03:02:15 sjboddie added an [icon] option, added ability to call get_formatted_string with icon and link arguments set Revision 1.3 1999/07/09 02:44:35 sjboddie fixed parent(All) function so it only outputs parents and not current level meta Revision 1.2 1999/07/08 20:48:33 rjmcnab Added ability to print the result number Revision 1.1 1999/07/07 05:49:34 sjboddie had another crack at the format string code - created a new formattools module. It can now handle {If} and {Or} statements although there's a bug preventing nested if's and or's. */ #include "formattools.h" #include "cgiutils.h" // a few function prototypes static text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr, const text_t &link, const text_t &icon, const text_t &text, bool highlight); static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end, format_t *formatlistptr, text_tset &metadata, bool &getParents); void metadata_t::clear() { metaname.clear(); metacommand = mNone; parentcommand = pNone; parentoptions.clear(); } void decision_t::clear() { command = dMeta; meta.clear(); } void format_t::clear() { command = comText; decision.clear(); text.clear(); meta.clear(); nextptr = NULL; ifptr = NULL; elseptr = NULL; orptr = NULL; } void formatinfo_t::clear() { DocumentImages = false; DocumentTitles = true; DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}
"; DocumentContents = true; DocumentArrowsBottom = true; DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end()); DocumentButtons.push_back ("Expand Text"); DocumentButtons.push_back ("Expand Contents"); DocumentButtons.push_back ("Detach"); DocumentButtons.push_back ("Highlight"); DocumentText = "[Text]"; formatstrings.erase (formatstrings.begin(), formatstrings.end()); DocumentUseHTML = false; } // simply checks to see if formatstring begins with a tag bool is_table_content (const text_t &formatstring) { text_t::const_iterator here = formatstring.begin(); text_t::const_iterator end = formatstring.end(); while (here != end) { if (*here != ' ') { if (*here == '<') { if ((*(here+1) == 't' || *(here+1) == 'T') && (*(here+2) == 'd' || *(here+2) == 'D') && (*(here+3) == '>' || *(here+3) == ' ')) return true; } else return false; } here ++; } return false; } bool is_table_content (const format_t *formatlistptr) { if (formatlistptr == NULL) return false; if (formatlistptr->command == comText) return is_table_content (formatlistptr->text); return false; } // returns false if key isn't in formatstringmap bool get_formatstring (const text_t &key, const text_tmap &formatstringmap, text_t &formatstring) { formatstring.clear(); text_tmap::const_iterator it = formatstringmap.find(key); if (it == formatstringmap.end()) return false; formatstring = (*it).second; return true; } // tries to find "key1key2" then "key1" then "key2" bool get_formatstring (const text_t &key1, const text_t &key2, const text_tmap &formatstringmap, text_t &formatstring) { formatstring.clear(); text_tmap::const_iterator it = formatstringmap.find(key1 + key2); if (it != formatstringmap.end()) { formatstring = (*it).second; return true; } it = formatstringmap.find(key1); if (it != formatstringmap.end()) { formatstring = (*it).second; return true; } it = formatstringmap.find(key2); if (it != formatstringmap.end()) { formatstring = (*it).second; return true; } return false; } // returns a date of form _textmonthnn_ 31, 1999 // input is date of type 19991231 // at least the year must be present in date text_t format_date (const text_t &date) { if (date.size() < 4) return ""; text_t::const_iterator datebegin = date.begin(); text_t year = substr (datebegin, datebegin+4); if (date.size() < 6) return year; text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_"; int imonth = month.getint(); if (imonth < 0 || imonth > 12) return year; if (date.size() < 8) return month + ", " + year; text_t day = substr (datebegin+6, datebegin+8); if (day[0] == '0') day = substr (day.begin()+1, day.end()); int iday = day.getint(); if (iday < 0 || iday > 31) return month + ", " + year; return month + " " + day + ", " + year; } static void get_parent_options (text_t &instring, metadata_t &metaoption) { text_t meta, com, op; bool inbraces = false; bool inquotes = false; bool foundcolon = false; text_t::const_iterator here = instring.begin()+6; text_t::const_iterator end = instring.end(); while (here != end) { if (*here == '(') inbraces = true; else if (*here == ')') inbraces = false; else if (*here == '\'' && !inquotes) inquotes = true; else if (*here == '\'' && inquotes) inquotes = false; else if (*here == ':' && !inbraces) foundcolon = true; else if (foundcolon) meta.push_back (*here); else if (inquotes) op.push_back (*here); else com.push_back (*here); here ++; } instring = meta; if (com.empty()) metaoption.parentcommand = pImmediate; else if (com == "Top") metaoption.parentcommand = pTop; else if (com == "All") { metaoption.parentcommand = pAll; metaoption.parentoptions = op; } } static void parse_meta (text_t &meta, metadata_t &metaoption, text_tset &metadata, bool &getParents) { if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) { metaoption.metacommand = mCgiSafe; meta = substr (meta.begin()+8, meta.end()); } if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) { getParents = true; get_parent_options (meta, metaoption); } metadata.insert (meta); metaoption.metaname = meta; } static void parse_meta (text_t &meta, format_t *formatlistptr, text_tset &metadata, bool &getParents) { if (meta == "link") formatlistptr->command = comLink; else if (meta == "/link") formatlistptr->command = comEndLink; else if (meta == "num") formatlistptr->command = comNum; else if (meta == "icon") formatlistptr->command = comIcon; else if (meta == "Text") formatlistptr->command = comDoc; else if (meta == "highlight") formatlistptr->command = comHighlight; else if (meta == "/highlight") formatlistptr->command = comEndHighlight; else { formatlistptr->command = comMeta; parse_meta (meta, formatlistptr->meta, metadata, getParents); } } static bool parse_string (const text_t &formatstring, format_t *formatlistptr, text_tset &metadata, bool &getParents) { text_t text; text_t::const_iterator here = formatstring.begin(); text_t::const_iterator end = formatstring.end(); while (here != end) { if (*here == '\\') text.push_back (*(++here)); else if (*here == '{') { if (!text.empty()) { formatlistptr->command = comText; formatlistptr->text = text; formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; text.clear(); } if (parse_action (++here, end, formatlistptr, metadata, getParents)) { formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; if (here == end) break; } } else if (*here == '[') { if (!text.empty()) { formatlistptr->command = comText; formatlistptr->text = text; formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; text.clear(); } text_t meta; here ++; while (*here != ']') { if (here == end) return false; meta.push_back (*here); here ++; } parse_meta (meta, formatlistptr, metadata, getParents); formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; } else text.push_back (*here); if (here != end) here ++; } if (!text.empty()) { formatlistptr->command = comText; formatlistptr->text = text; formatlistptr->nextptr = new format_t(); formatlistptr = formatlistptr->nextptr; } return true; } static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end, format_t *formatlistptr, text_tset &metadata, bool &getParents) { text_t::const_iterator it = findchar (here, end, '}'); if (it == end) return false; text_t com = substr (here, it); here = findchar (it, end, '{'); if (here == end) return false; else here ++; if (com == "If") formatlistptr->command = comIf; else if (com == "Or") formatlistptr->command = comOr; else return false; int curlycount = 0; int commacount = 0; text_t text; while (here != end) { if (*here == '\\') { here++; if (here != end) text.push_back(*here); } else if (*here == '{') {curlycount ++; text.push_back(*here);} else if (*here == '}' && curlycount > 0) { curlycount --; text.push_back(*here); } else if ((*here == ',' || *here == '}') && curlycount <= 0) { if (formatlistptr->command == comOr) { // the {Or}{this, or this, or this, or this} statement // or'ed statements may be either [metadata] or plain text format_t *or_ptr; // find the next unused orptr if (formatlistptr->orptr == NULL) { formatlistptr->orptr = new format_t(); or_ptr = formatlistptr->orptr; } else { or_ptr = formatlistptr->orptr; while (or_ptr->nextptr != NULL) or_ptr = or_ptr->nextptr; or_ptr->nextptr = new format_t(); or_ptr = or_ptr->nextptr; } text_t::const_iterator beginbracket = text.begin(); text_t::const_iterator endbracket = (text.end() - 1); if ((*beginbracket == '[') && (*endbracket == ']')) { // it's metadata text_t meta = substr (beginbracket+1, endbracket); parse_meta (meta, or_ptr, metadata, getParents); } else { // assume it's plain text or_ptr->command = comText; or_ptr->text = text; } text.clear(); } else { // the {If}{decide,do,else} statement if (commacount == 0) { // If decision only supports metadata at present // remove the surrounding square brackets text_t::const_iterator beginbracket = text.begin(); text_t::const_iterator endbracket = (text.end() - 1); if ((*beginbracket == '[') && (*endbracket == ']')) { text_t meta = substr (beginbracket+1, endbracket); parse_meta (meta, formatlistptr->decision.meta, metadata, getParents); commacount ++; text.clear(); } } else if (commacount == 1) { formatlistptr->ifptr = new format_t(); parse_string (text, formatlistptr->ifptr, metadata, getParents); commacount ++; text.clear(); } else if (commacount == 2) { formatlistptr->elseptr = new format_t(); parse_string (text, formatlistptr->elseptr, metadata, getParents); commacount ++; text.clear(); } } if (*here == '}') break; } else text.push_back(*here); if (here != end) here ++; } return true; } bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr, text_tset &metadata, bool &getParents) { formatlistptr->clear(); getParents = false; return (parse_string (formatstring, formatlistptr, metadata, getParents)); } // note: all the format_date stuff is assuming that all Date metadata is going to // be of the form yyyymmdd, this is of course, crap ;) static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) { // make sure we have the requested metadata MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname); if (it == docinfo.metadata.end()) return ""; MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent; switch (meta.parentcommand) { case pNone: { text_t classifier_metaname = docinfo.classifier_metadata_type; int metaname_index = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0; text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index]; if (meta.metaname == "Date") return format_date (metadata_item); if (meta.metacommand == mCgiSafe) return cgi_safe (metadata_item); else return metadata_item; } case pImmediate: if (parent != NULL) { if (meta.metaname == "Date") return format_date (parent->values[0]); if (meta.metacommand == mCgiSafe) return cgi_safe (parent->values[0]); else return parent->values[0]; } break; case pTop: if (parent != NULL) { while (parent->parent != NULL) parent = parent->parent; if (meta.metaname == "Date") return format_date (parent->values[0]); if (meta.metacommand == mCgiSafe) return cgi_safe (parent->values[0]); else return parent->values[0]; } break; case pAll: MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent; if (parent != NULL) { text_tarray tmparray; while (parent != NULL) { tmparray.push_back (parent->values[0]); parent = parent->parent; } bool first = true; text_t tmp; text_tarray::reverse_iterator here = tmparray.rbegin(); text_tarray::reverse_iterator end = tmparray.rend(); while (here != end) { if (!first) tmp += meta.parentoptions; if (meta.metaname == "Date") tmp += format_date (*here); else tmp += *here; first = false; here ++; } if (meta.metacommand == mCgiSafe) return cgi_safe (tmp); else return tmp; } } return ""; } static text_t get_or (ResultDocInfo_t &docinfo, format_t *orptr, const text_t &link, const text_t &icon, const text_t &text, bool highlight) { text_t tmp; while (orptr != NULL) { tmp = format_string (docinfo, orptr, link, icon, text, highlight); if (!tmp.empty()) return tmp; orptr = orptr->nextptr; } return ""; } static text_t get_if (ResultDocInfo_t &docinfo, const decision_t &decision, format_t *ifptr, format_t *elseptr, const text_t &link, const text_t &icon, const text_t &text, bool highlight) { // not much of a choice yet ... if (decision.command == dMeta) { if (get_meta (docinfo, decision.meta) != "") { if (ifptr != NULL) return get_formatted_string (docinfo, ifptr, link, icon, text, highlight); } else { if (elseptr != NULL) return get_formatted_string (docinfo, elseptr, link, icon, text, highlight); } } return ""; } text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr, const text_t &link, const text_t &icon, const text_t &text, bool highlight) { if (formatlistptr == NULL) return ""; switch (formatlistptr->command) { case comText: return formatlistptr->text; case comLink: return link; case comEndLink: if (link.empty()) return ""; else return ""; case comIcon: return icon; case comNum: return docinfo.result_num; case comMeta: return get_meta (docinfo, formatlistptr->meta); case comDoc: return text; case comHighlight: if (highlight) return ""; break; case comEndHighlight: if (highlight) return ""; break; case comIf: return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr, link, icon, text, highlight); case comOr: return get_or (docinfo, formatlistptr->orptr, link, icon, text, highlight); } return ""; } text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr, const text_t &link, const text_t &icon) { text_t text; text_t ft; while (formatlistptr != NULL) { ft += format_string (docinfo, formatlistptr, link, icon, text, false); formatlistptr = formatlistptr->nextptr; } return ft; } text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr) { text_t link = ""; text_t icon = "_icontext_"; text_t text; text_t ft; while (formatlistptr != NULL) { ft += format_string (docinfo, formatlistptr, link, icon, text, false); formatlistptr = formatlistptr->nextptr; } return ft; } text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr, const text_t &text) { text_t link = ""; text_t icon = "_icontext_"; text_t ft; while (formatlistptr != NULL) { ft += format_string (docinfo, formatlistptr, link, icon, text, false); formatlistptr = formatlistptr->nextptr; } return ft; } text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr, const text_t &link, const text_t &icon, const text_t &text) { text_t ft; while (formatlistptr != NULL) { ft += format_string (docinfo, formatlistptr, link, icon, text, false); formatlistptr = formatlistptr->nextptr; } return ft; } text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr, const text_t &link, const text_t &icon, bool highlight) { text_t text, ft; while (formatlistptr != NULL) { ft += format_string (docinfo, formatlistptr, link, icon, text, highlight); formatlistptr = formatlistptr->nextptr; } return ft; } text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr, const text_t &link, const text_t &icon, const text_t &text, bool highlight) { text_t ft; while (formatlistptr != NULL) { ft += format_string (docinfo, formatlistptr, link, icon, text, highlight); formatlistptr = formatlistptr->nextptr; } return ft; }