root/main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp @ 23305

Revision 23305, 60.1 KB (checked in by davidb, 9 years ago)

htmlsafe: expanded to also protect commas and single apostrophe as these can cause problems in the HTML when embedded in format or macro statements

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "gsdltools.h"
29#include "recptprototools.h"
30#include "OIDtools.h"
31#include "summarise.h"
32
33#include <assert.h>
34
35static bool metadata_spanwrap = false;
36
37// a few function prototypes
38
39static text_t format_string (const text_t& collection, recptproto* collectproto,
40                 ResultDocInfo_t &docinfo, displayclass &disp,
41                 format_t *formatlistptr, text_tmap &options,
42                 ostream& logout);
43
44static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
45              format_t *formatlistptr, text_tset &metadata, bool &getParents);
46
47static text_t format_summary (const text_t& collection, recptproto* collectproto,
48                  ResultDocInfo_t &docinfo, displayclass &disp,
49                  text_tmap &options, ostream& logout);
50static text_t format_text (const text_t& collection, recptproto* collectproto,
51                  ResultDocInfo_t &docinfo, displayclass &disp,
52                  text_tmap &options, ostream& logout);
53
54static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
55                  recptproto* collectproto, ResultDocInfo_t &docinfo,
56                  displayclass &disp, text_tmap &options,
57                  ostream &logout);
58
59
60void metadata_t::clear() {
61  metaname.clear();
62  metacommand = mNone;
63  mqualifier.parent  = pNone;
64  mqualifier.sibling = sNone;
65  mqualifier.child   = cNone;
66  pre_tree_traverse.clear();
67  parentoptions.clear();
68  siblingoptions.clear();
69  childoptions.clear();
70}
71
72void decision_t::clear() {
73  command = dMeta;
74  meta.clear();
75  text.clear();
76}
77
78format_t::~format_t()
79{
80  if (nextptr != NULL) delete nextptr;
81  if (ifptr != NULL) delete ifptr;
82  if (elseptr != NULL) delete elseptr;
83  if (orptr != NULL) delete orptr;
84}
85
86void format_t::clear() {
87  command = comText;
88  decision.clear();
89  text.clear();
90  meta.clear();
91  nextptr = NULL;
92  ifptr = NULL;
93  elseptr = NULL;
94  orptr = NULL;
95}
96
97void formatinfo_t::clear() {
98  DocumentImages = false;
99  DocumentTitles = true;
100  DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
101  DocumentContents = true;
102  DocumentArrowsBottom = true;
103  DocumentArrowsTop = false;
104  DocumentSearchResultLinks = false;
105  DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
106  //  DocumentButtons.push_back ("Expand Text");
107  //  DocumentButtons.push_back ("Expand Contents");
108  DocumentButtons.push_back ("Detach");
109  DocumentButtons.push_back ("Highlight");
110  RelatedDocuments = "";
111  DocumentText = "[Text]";
112  formatstrings.erase (formatstrings.begin(), formatstrings.end());
113  DocumentUseHTML = false;
114  AllowExtendedOptions = false;
115}
116
117// simply checks to see if formatstring begins with a <td> tag
118bool is_table_content (const text_t &formatstring) {
119  text_t::const_iterator here = formatstring.begin();
120  text_t::const_iterator end = formatstring.end();
121 
122  while (here != end) {
123    if (*here != ' ') {
124      if ((*here == '<') && ((here+3) < end)) {
125    if ((*(here+1) == 't' || *(here+1) == 'T') &&
126        (*(here+2) == 'd' || *(here+2) == 'D') &&
127        (*(here+3) == '>' || *(here+3) == ' '))
128      return true;
129      } else return false;
130    }
131    ++here;
132  }
133  return false;
134}
135
136bool is_table_content (const format_t *formatlistptr) {
137
138  if (formatlistptr == NULL) return false;
139 
140  if (formatlistptr->command == comText)
141    return is_table_content (formatlistptr->text);
142   
143  return false;
144}
145
146// returns false if key isn't in formatstringmap
147bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
148               text_t &formatstring) {
149
150  formatstring.clear();
151  text_tmap::const_iterator it = formatstringmap.find(key);
152  if (it == formatstringmap.end()) return false;
153  formatstring = (*it).second;
154  return true;
155}
156
157// tries to find "key1key2" then "key1" then "key2"
158bool get_formatstring (const text_t &key1, const text_t &key2, 
159               const text_tmap &formatstringmap,
160               text_t &formatstring) {
161
162  formatstring.clear();
163  text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
164  if (it != formatstringmap.end()) {
165    formatstring = (*it).second;
166    return true;
167  }
168  it = formatstringmap.find(key1);
169  if (it != formatstringmap.end()) {
170    formatstring = (*it).second;
171    return true;
172  }
173  it = formatstringmap.find(key2);
174  if (it != formatstringmap.end()) {
175    formatstring = (*it).second;
176    return true;
177  }
178  return false;
179}
180
181
182text_t remove_namespace(const text_t &meta_name) {
183  text_t::const_iterator end = meta_name.end();
184  text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
185  if (it != end) {
186    return substr(it+1, end);
187  }
188
189  return meta_name;
190
191}
192// returns a date of form _format:date_(year, month, day)
193// input is date of type yyyy-?mm-?dd
194// at least the year must be present in date
195text_t format_date (const text_t &date) {
196
197  if (date.size() < 4) return "";
198
199  text_t::const_iterator datebegin = date.begin();
200
201  text_t year = substr (datebegin, datebegin+4);
202  int chars_seen_so_far = 4;
203  if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
204
205  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
206  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
207 
208  text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
209  int imonth = month.getint();
210  if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
211 
212  chars_seen_so_far += 2;
213  if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
214
215  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
216  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
217
218  text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
219  if (day[0] == '0') day = substr (day.begin()+1, day.end());
220  int iday = day.getint();
221  if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
222   
223  return "_format:date_("+year+","+month+","+day+")";
224
225
226// converts an iso639 language code to its English equivalent
227// should we be checking that the macro exists??
228text_t iso639 (const text_t &langcode) {
229  if (langcode.empty()) return "";
230  return "_iso639:iso639"+langcode+"_";
231}
232
233
234text_t get_href (const text_t &link) {
235
236  text_t href;
237
238  text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
239  text_t::const_iterator end = link.end();
240  if (here == end) return g_EmptyText;
241 
242  ++here;
243  while (here != end) {
244    if (*here == '"') break;
245    href.push_back(*here);
246    ++here;
247  }
248
249  return href;
250}
251
252//this function gets the information associated with the relation
253//metadata for the document associated with 'docinfo'. This relation
254//metadata consists of a line of pairs containing 'collection, document OID'
255//(this is the OID of the document related to the current document, and
256//the collection the related document belongs to). For each of these pairs
257//the title metadata is obtained and then an html link between the title
258//of the related doc and the document's position (the document will be
259//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
260//(where collection is the related documents collection, and OID is the
261//related documents OID).  A list of these html links are made for as many
262//related documents as there are. This list is then returned. If there are
263//no related documents available for the current document then the string
264//'.. no related documents .. ' is returned.
265text_t get_related_docs(const text_t& collection, recptproto* collectproto,
266            ResultDocInfo_t &docinfo, ostream& logout){
267 
268  text_tset metadata;
269
270  //insert the metadata we wish to collect
271  metadata.insert("dc.Relation");
272  metadata.insert("Title"); 
273  metadata.insert("Subject"); //for emails, where title data doesn't apply
274 
275  FilterResponse_t response;
276  text_t relation = ""; //string for displaying relation metadata
277  text_t relationTitle = ""; //the related documents Title (or subject)
278  text_t relationOID = ""; //the related documents OID 
279
280  //get the information associated with the metadata for current doc
281  if (get_info (docinfo.OID, collection, "", metadata,
282        false, collectproto, response, logout)) {
283   
284    //if the relation metadata exists, store for displaying
285    if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
286      relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
287
288      //split relation data into pairs of collectionname,ID number
289      text_tarray relationpairs;
290      splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
291     
292      text_tarray::const_iterator currDoc = relationpairs.begin(); 
293      text_tarray::const_iterator lastDoc = relationpairs.end();
294
295      //iterate through the pairs to split and display
296      while(currDoc != lastDoc){
297   
298    //split pairs into collectionname and ID
299    text_tarray relationdata;
300    splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
301   
302    //get first element in the array (collection)
303    text_tarray::const_iterator doc_data = relationdata.begin();
304    text_t document_collection = *doc_data;
305    ++doc_data; //increment to get next item in array (oid)
306    text_t document_OID = *doc_data;
307   
308    //create html link to related document
309    relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
310    relation += "&amp;cl=search&amp;d=" + document_OID;
311       
312    //get the information associated with the metadata for related doc
313    if (get_info (document_OID, document_collection, "", metadata,
314              false, collectproto, response, logout)) {
315     
316      //if title metadata doesn't exist, collect subject metadata
317      //if that doesn't exist, just call it 'related document'
318      if (!response.docInfo[0].metadata["Title"].values[0].empty())
319        relationTitle = response.docInfo[0].metadata["Title"].values[0];
320      else if (!response.docInfo[0].metadata["Subject"].values.empty())
321        relationTitle = response.docInfo[0].metadata["Subject"].values[0];
322      else relationTitle =  "RELATED DOCUMENT";
323     
324    }
325   
326    //link the related document's title to its page
327    relation += "\">" + relationTitle + "</a>";
328    relation += "  (" + document_collection + ")<br>";
329   
330    ++currDoc;
331      }
332    }
333   
334  }
335
336  if(relation.empty()) //no relation data for documnet
337    relation = ".. no related documents .. ";
338
339  return relation;
340}
341
342
343
344static void get_parent_options (text_t &instring, metadata_t &metaoption) {
345
346  assert (instring.size() > 7);
347  if (instring.size() <= 7) return;
348
349  text_t meta, com, op;
350  bool inbraces = false;
351  bool inquotes = false;
352  bool foundcolon = false;
353  text_t::const_iterator here = instring.begin()+6;
354  text_t::const_iterator end = instring.end();
355  while (here != end) {
356    if (foundcolon) meta.push_back (*here);
357    else if (*here == '(') inbraces = true;
358    else if (*here == ')') inbraces = false;
359    else if (*here == '\'' && !inquotes) inquotes = true;
360    else if (*here == '\'' && inquotes) inquotes = false;
361    else if (*here == ':' && !inbraces) foundcolon = true;
362    else if (inquotes) op.push_back (*here);
363    else com.push_back (*here);
364    ++here;
365  }
366
367  instring = meta;
368  if (com.empty())
369    metaoption.mqualifier.parent = pImmediate;
370  else if (com == "Top")
371    metaoption.mqualifier.parent = pTop;
372  else if (com == "All") {
373    metaoption.mqualifier.parent = pAll;
374    metaoption.parentoptions = op;
375  }
376}
377
378
379static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
380
381  assert (instring.size() > 8);
382  if (instring.size() <= 8) return;
383  text_t meta, com, op;
384  bool inbraces = false;
385  bool inquotes = false;
386  bool foundcolon = false;
387  text_t::const_iterator here = instring.begin()+7;
388  text_t::const_iterator end = instring.end();
389  while (here != end) {
390    if (foundcolon) meta.push_back (*here);
391    else if (*here == '(') inbraces = true;
392    else if (*here == ')') inbraces = false;
393    else if (*here == '\'' && !inquotes) inquotes = true;
394    else if (*here == '\'' && inquotes) inquotes = false;
395    else if (*here == ':' && !inbraces) foundcolon = true;   
396    else if (inquotes) op.push_back (*here);
397    else com.push_back (*here);
398    ++here;
399  }
400
401  instring = meta;
402  metaoption.siblingoptions.clear();
403
404  if (com.empty()) {
405    metaoption.mqualifier.sibling = sAll;
406    metaoption.siblingoptions = " ";
407  }
408  else if (com == "first") {
409    metaoption.mqualifier.sibling = sNum;
410    metaoption.siblingoptions = "0";
411  }
412  else if (com == "last") {
413    metaoption.mqualifier.sibling = sNum;
414    metaoption.siblingoptions = "-2"; // == last
415  }
416  else if (com.getint()>0) {
417    metaoption.mqualifier.sibling = sNum;
418    int pos = com.getint()-1;
419    metaoption.siblingoptions +=pos;
420  }
421  else {
422    metaoption.mqualifier.sibling = sAll;
423    metaoption.siblingoptions = op;
424  }
425}
426
427static void get_child_options (text_t &instring, metadata_t &metaoption) {
428
429  assert (instring.size() > 6);
430  if (instring.size() <= 6) return;
431  text_t meta, com, op;
432  bool inbraces = false;
433  bool inquotes = false;
434  bool foundcolon = false;
435  text_t::const_iterator here = instring.begin()+5;
436  text_t::const_iterator end = instring.end();
437  while (here != end) {
438    if (foundcolon) meta.push_back (*here);
439    else if (*here == '(') inbraces = true;
440    else if (*here == ')') inbraces = false;
441    else if (*here == '\'' && !inquotes) inquotes = true;
442    else if (*here == '\'' && inquotes) inquotes = false;
443    else if (*here == ':' && !inbraces) foundcolon = true;
444    else if (inquotes) op.push_back (*here);
445    else com.push_back (*here);
446    ++here;
447  }
448
449  instring = meta;
450  if (com.empty()) {
451    metaoption.mqualifier.child = cAll;
452    metaoption.childoptions = " ";
453  }
454  else if (com == "first") {
455    metaoption.mqualifier.child = cNum;
456    metaoption.childoptions = ".fc";
457  }
458  else if (com == "last") {
459    metaoption.mqualifier.child = cNum;
460    metaoption.childoptions = ".lc";
461  }
462  else if (com.getint()>0) {
463    metaoption.mqualifier.child = cNum;
464    metaoption.childoptions = "."+com;
465  }
466  else {
467    metaoption.mqualifier.child = cAll;
468    metaoption.childoptions = op;
469  }
470}
471
472
473static void get_truncate_options (text_t &instring, metadata_t &metaoption)
474{
475  assert (instring.size() > ((text_t) "truncate").size());
476  if (instring.size() <= ((text_t) "truncate").size()) return;
477  text_t meta, com;
478  bool inbraces = false;
479  bool foundcolon = false;
480  text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
481  text_t::const_iterator end = instring.end();
482  while (here != end) {
483    if (foundcolon) meta.push_back (*here);
484    else if (*here == '(') inbraces = true;
485    else if (*here == ')') inbraces = false;
486    else if (*here == ':' && !inbraces) foundcolon = true;   
487    else com.push_back (*here);
488    ++here;
489  }
490
491  instring = meta;
492
493  if (!com.empty())
494  {
495    metaoption.siblingoptions = com;
496  }
497  else
498  {
499    // Default is 100 characters if not specified
500    metaoption.siblingoptions = "100";
501  }
502}
503
504
505
506static void parse_meta (text_t &meta, metadata_t &metaoption,
507            text_tset &metadata, bool &getParents) {
508
509  // Look for the various format statement modifiers
510  // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
511  //   is irrelevant because this is not stored in metaoption.metacommand anyway
512  bool keep_trying = true;
513  while (keep_trying)
514  {
515    keep_trying = false;
516
517    if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
518    {
519      metaoption.metacommand |= mCgiSafe;
520      meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
521      keep_trying = true;
522    }
523    if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
524    {   
525      metaoption.metacommand |= mSpecial;
526      meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
527      keep_trying = true;
528    }
529
530    // New "truncate" special formatting option
531    if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate"))  // No colons due to truncate(X)
532    {
533      metaoption.metacommand |= mTruncate;
534      get_truncate_options (meta, metaoption);
535      keep_trying = true;
536    }
537    // New "htmlsafe" special formatting option
538    if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
539    {
540      metaoption.metacommand |= mHTMLSafe;
541      meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
542      keep_trying = true;
543    }
544    // New "xmlsafe" special formatting option
545    if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
546    {
547      metaoption.metacommand |= mXMLSafe;
548      meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
549      keep_trying = true;
550    }
551    // New "dmsafe" special formatting option
552    if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
553    {
554      metaoption.metacommand |= mDMSafe;
555      meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
556      keep_trying = true;
557    }
558  }
559
560  bool had_parent_or_child = true;
561  bool prev_was_parent = false;
562  bool prev_was_child  = false;
563
564  while (had_parent_or_child) {
565    if (meta.size() > 7
566    && (substr (meta.begin(), meta.begin()+6) == "parent")) {
567
568      // clear out sibling and child (cmd and options)
569      metaoption.metacommand &= ~(mChild|mSibling);
570      metaoption.childoptions.clear();
571      metaoption.siblingoptions.clear();
572
573      getParents = true;
574      metaoption.metacommand |= mParent;
575      get_parent_options (meta, metaoption);
576
577      if (prev_was_parent) {
578    metaoption.pre_tree_traverse += ".pr";
579      }
580      else if (prev_was_child) {
581    metaoption.pre_tree_traverse += ".fc";
582      }
583
584      prev_was_parent = true;
585      prev_was_child  = false;
586    }
587    else if (meta.size() > 6
588         && (substr (meta.begin(), meta.begin()+5) == "child")) {
589
590      // clear out sibling and parent (cmd and options)
591      metaoption.metacommand &= ~(mParent|mSibling);
592      metaoption.parentoptions.clear();
593      metaoption.siblingoptions.clear();
594
595      metaoption.metacommand |= mChild;
596      get_child_options (meta, metaoption);
597      metadata.insert("contains");
598
599      if (prev_was_parent) {
600    metaoption.pre_tree_traverse += ".pr";
601      }
602      else if (prev_was_child) {
603    metaoption.pre_tree_traverse += ".fc";
604      }
605
606      prev_was_child  = true;
607      prev_was_parent = false;
608    }
609    else {
610      prev_was_child  = false;
611      prev_was_parent = false;
612      had_parent_or_child = false;
613    }
614  }
615
616  // parent/child can have sibling tacked on end also
617  if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
618    metaoption.metacommand |= mSibling;
619    get_sibling_options (meta, metaoption);
620  }
621 
622  // check for ex. which may occur in format statements
623  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
624    meta = substr (meta.begin()+3, meta.end());
625  }
626  metadata.insert (meta);
627  metaoption.metaname = meta;
628}
629
630static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
631  if (meta == "collection") {
632    // no qualifiers
633    metaoption.metaname = g_EmptyText;
634    return;
635  }
636  meta = substr (meta.begin()+11, meta.end());
637  metaoption.metaname = meta;
638 
639}
640
641static void parse_meta (text_t &meta, format_t *formatlistptr,
642            text_tset &metadata, bool &getParents) {
643 
644  // check for ex. which may occur in format statements
645  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
646    meta = substr (meta.begin()+3, meta.end());
647  }
648  if (meta == "link")
649    formatlistptr->command = comLink;
650  else if (meta == "/link")
651    formatlistptr->command = comEndLink;
652
653  else if (meta == "srclink") {
654    formatlistptr->command = comAssocLink;
655    formatlistptr->meta.metaname = "srclink_file";
656    metadata.insert("srclink_file");
657  }
658  else if (meta == "srchref") {
659    formatlistptr->command = comAssocLink;
660    formatlistptr->text = "href";
661    formatlistptr->meta.metaname = "srclink_file";
662    metadata.insert("srclink_file");
663  }
664  else if (meta == "/srclink") {
665    formatlistptr->command = comEndAssocLink;
666    formatlistptr->meta.metaname = "srclink_file";
667  }
668  // and weblink etc
669  else if (meta == "href")
670    formatlistptr->command = comHref;
671
672  else if (meta == "num")
673    formatlistptr->command = comNum;
674
675  else if (meta == "icon")
676    formatlistptr->command = comIcon;
677
678  else if (meta == "Text")
679    formatlistptr->command = comDoc;
680 
681  else if (meta == "RelatedDocuments")
682   formatlistptr->command = comRel;
683
684  else if (meta == "highlight")
685    formatlistptr->command = comHighlight;
686
687  else if (meta == "/highlight")
688    formatlistptr->command = comEndHighlight;
689
690  else if (meta == "metadata-spanwrap")
691    formatlistptr->command = comMetadataSpanWrap;
692
693  else if (meta == "/metadata-spanwrap")
694    formatlistptr->command = comEndMetadataSpanWrap;
695
696  else if (meta == "Summary")
697    formatlistptr->command = comSummary;
698
699  else if (meta == "DocImage")
700    formatlistptr->command = comImage;
701
702  else if (meta == "DocTOC")
703    formatlistptr->command = comTOC;
704
705  else if (meta == "DocumentButtonDetach")
706     formatlistptr->command = comDocumentButtonDetach;
707 
708  else if (meta == "DocumentButtonHighlight")
709     formatlistptr->command = comDocumentButtonHighlight;
710 
711  else if (meta == "DocumentButtonExpandContents")
712    formatlistptr->command = comDocumentButtonExpandContents;
713
714  else if (meta == "DocumentButtonExpandText")
715     formatlistptr->command = comDocumentButtonExpandText;
716
717  else if (meta == "DocOID")
718     formatlistptr->command = comOID;
719  else if (meta == "DocTopOID")
720    formatlistptr->command = comTopOID;
721  else if (meta == "DocRank")
722    formatlistptr->command = comRank;
723  else if (meta == "DocTermsFreqTotal")
724    formatlistptr->command = comDocTermsFreqTotal;
725  else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
726    formatlistptr->command = comCollection;
727    parse_coll_meta(meta, formatlistptr->meta);
728  }
729  else {
730    formatlistptr->command = comMeta;
731    parse_meta (meta, formatlistptr->meta, metadata, getParents);
732  }
733}
734
735
736static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
737              text_tset &metadata, bool &getParents) {
738
739  text_t text;
740  text_t::const_iterator here = formatstring.begin();
741  text_t::const_iterator end = formatstring.end();
742
743  while (here != end) {
744
745    if (*here == '\\') {
746      ++here;
747      if (here != end) text.push_back (*here);
748
749    } else if (*here == '{') {
750      if (!text.empty()) {
751    formatlistptr->command = comText;
752    formatlistptr->text = text;
753    formatlistptr->nextptr = new format_t();
754    formatlistptr = formatlistptr->nextptr;
755   
756    text.clear();
757      }
758      if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
759
760    formatlistptr->nextptr = new format_t();
761    formatlistptr = formatlistptr->nextptr;
762    if (here == end) break;
763      }
764    } else if (*here == '[') {
765      if (!text.empty()) {
766    formatlistptr->command = comText;
767    formatlistptr->text = text;
768    formatlistptr->nextptr = new format_t();
769    formatlistptr = formatlistptr->nextptr;
770
771    text.clear();
772      }
773      text_t meta;
774      ++here;
775      while (*here != ']') {
776    if (here == end) return false;
777    meta.push_back (*here);
778    ++here;
779      }
780      parse_meta (meta, formatlistptr, metadata, getParents);
781      formatlistptr->nextptr = new format_t();
782      formatlistptr = formatlistptr->nextptr;
783
784    } else
785      text.push_back (*here);
786
787    if (here != end) ++here;
788  }
789  if (!text.empty()) {
790    formatlistptr->command = comText;
791    formatlistptr->text = text;
792    formatlistptr->nextptr = new format_t();
793    formatlistptr = formatlistptr->nextptr;
794
795  }
796  return true;
797}
798
799
800static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
801              format_t *formatlistptr, text_tset &metadata, bool &getParents) {
802
803  text_t::const_iterator it = findchar (here, end, '}');
804  if (it == end) return false;
805
806  text_t com = substr (here, it);
807  here = findchar (it, end, '{');
808  if (here == end) return false;
809  else ++here;
810
811  if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
812  else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
813  else return false;
814
815  int commacount = 0;
816  text_t text;
817  while (here != end) {
818
819    if (*here == '\\') {
820      ++here;
821      if (here != end) text.push_back(*here);
822     
823    }
824 
825    else if (*here == ',' || *here == '}' || *here == '{') {
826
827      if (formatlistptr->command == comOr) {
828    // the {Or}{this, or this, or this, or this} statement
829    format_t *or_ptr;
830   
831    // find the next unused orptr
832    if (formatlistptr->orptr == NULL) {
833      formatlistptr->orptr = new format_t();
834      or_ptr = formatlistptr->orptr;
835    } else {
836      or_ptr = formatlistptr->orptr;
837      while (or_ptr->nextptr != NULL)
838        or_ptr = or_ptr->nextptr;
839      or_ptr->nextptr = new format_t();
840      or_ptr = or_ptr->nextptr;
841    }
842
843    if (!text.empty())
844      {
845        if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
846      }
847
848    if (*here == '{')
849      {
850        // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
851        // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
852        // The latter can always be re-written:
853        // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
854       
855        if (!text.empty()) // already used up allocated format_t
856          {
857        // => allocate new one for detected action
858        or_ptr->nextptr = new format_t();
859        or_ptr = or_ptr->nextptr;
860          }
861        if (!parse_action(++here, end, or_ptr, metadata, getParents))
862          {
863        return false;
864          }
865      }
866    else
867      {
868        if (*here == '}') break;
869      }
870    text.clear();
871
872      }
873
874      // Parse an {If}{decide,do,else} statement
875      else {
876   
877    // Read the decision component. 
878    if (commacount == 0) {
879      // Decsion can be a metadata element, or a piece of text.
880      // Originally Stefan's code, updated 25/10/2000 by Gordon.
881
882      text_t::const_iterator beginbracket = text.begin();
883      text_t::const_iterator endbracket = (text.end() - 1);
884
885      // Decision is based on a metadata element
886      if ((*beginbracket == '[') && (*endbracket == ']')) {
887        // Ignore the surrounding square brackets
888        text_t meta = substr (beginbracket+1, endbracket);
889        parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
890        ++commacount;
891        text.clear();
892      }
893
894      // Decision is a piece of text (probably a macro like _cgiargmode_).
895      else {
896
897        // hunt for any metadata in string, which might be uses in
898        // to test a condition, e.g. [Format] eq 'PDF'
899        format_t* dummyformat = new format_t();
900        // update which metadata fields needed
901        // (not interested in updatng formatlistptr)
902        parse_string (text, dummyformat, metadata, getParents);
903        delete dummyformat;
904
905        formatlistptr->decision.command = dText;
906        formatlistptr->decision.text = text;
907        ++commacount;
908        text.clear();
909      }
910    }
911
912    // Read the "then" and "else" components of the {If} statement.
913    else {
914      format_t** nextlistptr = NULL;
915      if (commacount == 1) {
916        nextlistptr = &formatlistptr->ifptr;
917      } else if (commacount == 2 ) {
918        nextlistptr = &formatlistptr->elseptr;
919      } else {
920        return false;
921      }
922
923      if (!text.empty()) {
924        if (*nextlistptr == NULL) {
925          *nextlistptr = new format_t();
926        } else {
927
928          // skip to the end of any format_t statements already added
929          while ((*nextlistptr)->nextptr != NULL)
930          {
931        nextlistptr = &(*nextlistptr)->nextptr;
932          }
933
934          (*nextlistptr)->nextptr = new format_t();
935          nextlistptr = &(*nextlistptr)->nextptr;
936        }
937
938        if (!parse_string (text, *nextlistptr, metadata, getParents))
939          {
940        return false;
941          }
942        text.clear();
943      }
944     
945      if (*here == '{')
946        {
947          if (*nextlistptr == NULL) {
948        *nextlistptr = new format_t();
949          } else {
950        // skip to the end of any format_t statements already added
951        while ((*nextlistptr)->nextptr != NULL)
952          {
953            nextlistptr = &(*nextlistptr)->nextptr;
954          }
955
956        (*nextlistptr)->nextptr = new format_t();
957        nextlistptr = &(*nextlistptr)->nextptr;
958          }
959
960          if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
961        {
962          return false;
963        }
964        }
965      else
966        {
967          if (*here == '}') break;
968          ++commacount;
969        }
970    }
971      }
972     
973    } else text.push_back(*here);
974   
975    if (here != end) ++here;
976  }
977
978  return true;
979}
980
981
982static text_t spanwrap_metatext(const text_t& metatext, const text_t& OID,
983                const text_t metaname, int metapos=-1)
984{
985
986  text_t tag_type = (metaname == "Text") ? "div" : "span";
987  text_t editable_type = (metaname == "Text") ? "text" : "metadata";
988
989  text_t wrapped_metatext = "<" + tag_type + " ";
990  wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
991
992  wrapped_metatext += "docoid=\"" + OID + "\" "; 
993  wrapped_metatext += "metaname=\"" + metaname + "\"";
994
995  if (metapos>=0) {
996    text_t metapos_str = metapos;
997    wrapped_metatext += " metapos=\"" + metapos_str + "\"";
998  }
999
1000  wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1001
1002  return wrapped_metatext;
1003}
1004
1005   
1006
1007bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1008             text_tset &metadata, bool &getParents) {
1009
1010  formatlistptr->clear();
1011  getParents = false;
1012
1013  return (parse_string (formatstring, formatlistptr, metadata, getParents));
1014}
1015
1016// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1017// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1018
1019static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1020{
1021  text_t no_ns_metaname = remove_namespace(meta.metaname);
1022  text_t formatted_metatext;
1023  bool first = true;
1024 
1025  const int start_i=0;
1026  const int end_i = metainfo.values.size()-1;
1027 
1028  if (position == -1) { // all
1029    for (int i=start_i; i<=end_i; ++i) {
1030      if (!first) formatted_metatext += meta.siblingoptions;
1031     
1032      text_t fresh_metatext;
1033
1034      if (meta.metacommand & mSpecial) {
1035    // special formatting
1036    if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1037    else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1038    else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1039      }
1040      else fresh_metatext = metainfo.values[i];
1041
1042      // New "truncate" special formatting option
1043      if (meta.metacommand & mTruncate)
1044      {
1045    int truncate_length = meta.siblingoptions.getint();
1046    text_t truncated_value = fresh_metatext;
1047    if (truncated_value.size() > truncate_length)
1048    {
1049      truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1050    }
1051    fresh_metatext = truncated_value;
1052      }
1053      // New "xmlsafe" special formatting option
1054      if (meta.metacommand & mXMLSafe)
1055      {
1056    // Make it XML-safe
1057    text_t text_xml_safe = "";
1058    text_t::const_iterator text_iterator = fresh_metatext.begin();
1059    while (text_iterator != fresh_metatext.end())
1060    {
1061      if (*text_iterator == '&') text_xml_safe += "&amp;";
1062      else if (*text_iterator == '<') text_xml_safe += "&lt;";
1063      else if (*text_iterator == '>') text_xml_safe += "&gt;";
1064      else text_xml_safe.push_back(*text_iterator);
1065      text_iterator++;
1066    }
1067    fresh_metatext = text_xml_safe;
1068      }
1069      // New "htmlsafe" special formatting option
1070      if (meta.metacommand & mHTMLSafe)
1071      {
1072    // Make it HTML-safe
1073    text_t text_html_safe = "";
1074    text_t::const_iterator text_iterator = fresh_metatext.begin();
1075    while (text_iterator != fresh_metatext.end())
1076    {
1077      if (*text_iterator == '&') text_html_safe += "&amp;";
1078      else if (*text_iterator == '<') text_html_safe += "&lt;";
1079      else if (*text_iterator == '>') text_html_safe += "&gt;";
1080      else if (*text_iterator == '"') text_html_safe += "&quot;";
1081      else text_html_safe.push_back(*text_iterator);
1082      text_iterator++;
1083    }
1084    fresh_metatext = text_html_safe;
1085      }
1086      // New "dmsafe" special formatting option (always apply to "srclink_file" metadata)
1087      if (meta.metacommand & mDMSafe || meta.metaname == "srclink_file")
1088      {
1089    // Make it macro-safe
1090    text_t text_dm_safe = dm_safe(fresh_metatext);
1091    fresh_metatext = text_dm_safe;
1092      }
1093
1094      if (metadata_spanwrap) {
1095    fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,i);
1096      }
1097      formatted_metatext += fresh_metatext;
1098
1099      first = false;
1100     
1101    }
1102  } else {
1103    if (position == -2) { // end
1104      position = end_i;
1105    } else if (position < start_i || position > end_i) {
1106      return "";
1107    }
1108
1109    text_t fresh_metatext;
1110    if (meta.metacommand & mSpecial) {
1111
1112      // special formatting
1113      if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1114      else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1115      else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1116    }
1117    else fresh_metatext = metainfo.values[position];
1118
1119    // New "truncate" special formatting option
1120    if (meta.metacommand & mTruncate)
1121    {
1122      int truncate_length = meta.siblingoptions.getint();
1123      text_t truncated_value = fresh_metatext;
1124      if (truncated_value.size() > truncate_length)
1125      {
1126    truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1127      }
1128      fresh_metatext = truncated_value;
1129    }
1130    // New "xmlsafe" special formatting option
1131    if (meta.metacommand & mXMLSafe)
1132    {
1133      // Make it XML-safe
1134      text_t text_xml_safe = "";
1135      text_t::const_iterator text_iterator = fresh_metatext.begin();
1136      while (text_iterator != fresh_metatext.end())
1137      {
1138    if (*text_iterator == '&') text_xml_safe += "&amp;";
1139    else if (*text_iterator == '<') text_xml_safe += "&lt;";
1140    else if (*text_iterator == '>') text_xml_safe += "&gt;";
1141    else text_xml_safe.push_back(*text_iterator);
1142    text_iterator++;
1143      }
1144      fresh_metatext = text_xml_safe;
1145    }
1146    // New "htmlsafe" special formatting option
1147    if (meta.metacommand & mHTMLSafe)
1148    {
1149      // Make it HTML-safe
1150      text_t text_html_safe = "";
1151      text_t::const_iterator text_iterator = fresh_metatext.begin();
1152      while (text_iterator != fresh_metatext.end())
1153      {
1154    if (*text_iterator == '&') text_html_safe += "&amp;";
1155    else if (*text_iterator == '<') text_html_safe += "&lt;";
1156    else if (*text_iterator == '>') text_html_safe += "&gt;";
1157    else if (*text_iterator == '"') text_html_safe += "&quot;";
1158    else if (*text_iterator == '\'') text_html_safe += "&#39;";
1159    else if (*text_iterator == ',') text_html_safe += "&#44;";
1160    else text_html_safe.push_back(*text_iterator);
1161    text_iterator++;
1162      }
1163      fresh_metatext = text_html_safe;
1164    }
1165    // New "dmsafe" special formatting option (always apply to "srclink_file" metadata)
1166    if (meta.metacommand & mDMSafe || meta.metaname == "srclink_file")
1167    {
1168      // Make it macro-safe
1169      text_t text_dm_safe = dm_safe(fresh_metatext);
1170      fresh_metatext = text_dm_safe;
1171    }
1172
1173    if (metadata_spanwrap) {
1174      fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,position);
1175    }
1176
1177    formatted_metatext += fresh_metatext;
1178  }
1179
1180  if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1181  else return formatted_metatext;
1182}
1183
1184static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1185{
1186 
1187  MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1188
1189  switch (meta.mqualifier.parent) {
1190  case pNone:
1191    return "Nothing!!";
1192    break;
1193
1194  case pImmediate:
1195    if (parent != NULL) {
1196      text_t parent_oid = get_parent(docinfo.OID);
1197      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1198    }
1199    break;
1200
1201  case pTop:
1202    if (parent != NULL) {
1203      text_t parent_oid = get_parent(docinfo.OID);
1204
1205      while (parent->parent != NULL) {
1206    parent = parent->parent;
1207    parent_oid = get_parent(parent_oid);
1208      }
1209      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1210    }
1211    break;
1212
1213  case pAll:
1214    MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1215    if (parent != NULL) {
1216      text_t parent_oid = get_parent(docinfo.OID);
1217
1218      text_tarray tmparray;
1219      while (parent != NULL) {
1220    tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1221    parent = parent->parent;
1222    parent_oid = get_parent(parent_oid);
1223
1224      }
1225      // now join them up - use teh parent separator
1226      bool first = true;
1227      text_t tmp;
1228      text_tarray::reverse_iterator here = tmparray.rbegin();
1229      text_tarray::reverse_iterator end = tmparray.rend();
1230      while (here != end) {
1231    if (!first) tmp += meta.parentoptions;
1232    tmp += *here;
1233    first = false;
1234    ++here;
1235      }
1236      if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1237      else return tmp;
1238    }
1239  }
1240  return "";
1241
1242}
1243
1244static text_t get_child_meta (const text_t& collection,
1245                  recptproto* collectproto,
1246                  ResultDocInfo_t &docinfo, displayclass &disp,
1247                  const metadata_t &meta, text_tmap &options,
1248                  ostream& logout, int siblings_values)
1249{
1250  if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1251 
1252  const text_t& pre_tree_trav = meta.pre_tree_traverse;
1253  const text_t& child_metaname = meta.metaname;
1254  const text_t& child_field = meta.childoptions;
1255  text_tset child_metadata;
1256  child_metadata.insert(child_metaname);
1257
1258  FilterResponse_t child_response;
1259  if (meta.mqualifier.child == cNum) {
1260    // just one child
1261    //get the information associated with the metadata for child doc
1262    if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1263           child_metadata, false, collectproto, child_response,
1264           logout)) return ""; // invalid child number
1265
1266      if (child_response.docInfo.empty()) return false; // no info for the child
1267 
1268      ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1269      MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1270 
1271      text_t child_metavalue
1272    = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1273      return expand_metadata(child_metavalue,collection,collectproto,
1274             child_docinfo,disp,options,logout);
1275  }
1276 
1277   
1278  if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1279
1280
1281  if (!pre_tree_trav.empty()) {
1282    // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1283    FilterResponse_t trav_response;
1284
1285    text_tset trav_metadata;
1286    trav_metadata.insert("contains");
1287
1288    if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1289           trav_metadata, false, collectproto, trav_response,
1290           logout)) return ""; // invalid pre_tree_trav
1291
1292    if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1293 
1294    ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1295
1296    // use this for rest of routine
1297    docinfo = trav_docinfo;
1298  }
1299 
1300  // we need to get all children
1301  text_t result = "";
1302  text_tarray children;
1303  text_t contains = docinfo.metadata["contains"].values[0];
1304  splitchar (contains.begin(), contains.end(), ';', children);
1305  text_tarray::const_iterator here = children.begin();
1306  text_tarray::const_iterator end = children.end();
1307  bool first = true;
1308  while (here !=end) {
1309    text_t oid = *here;
1310    here++;
1311    if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1312
1313    //get the information associated with the metadata for child doc
1314    if (!get_info (oid, collection, "", child_metadata,
1315           false, collectproto, child_response, logout) ||
1316    child_response.docInfo.empty()) {
1317      first = false;
1318      continue;
1319    }
1320   
1321   
1322    ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1323    MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1324   
1325    text_t child_metavalue
1326      = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1327
1328
1329    if (!first) result += child_field;
1330    first = false;
1331    // need to do this here cos otherwise we are in the wrong document
1332    text_t em =  expand_metadata(child_metavalue,collection,collectproto,
1333                 child_docinfo,disp,options,logout);
1334
1335    result += em;
1336  }
1337  return result;
1338   
1339}
1340
1341static text_t get_meta (const text_t& collection, recptproto* collectproto,
1342            ResultDocInfo_t &docinfo, displayclass &disp,
1343            const metadata_t &meta, text_tmap &options,
1344            ostream& logout) {
1345 
1346  // make sure we have the requested metadata
1347  MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1348  if (it == docinfo.metadata.end()) return "";
1349
1350  int siblings_values = 0; // default is no siblings, just the first metadata available
1351  if (meta.metacommand & mSibling) {
1352    if (meta.mqualifier.sibling == sAll) {
1353      siblings_values = -1; //all
1354    } else if (meta.mqualifier.sibling == sNum) {
1355      siblings_values = meta.siblingoptions.getint();
1356    }
1357  }
1358  if (meta.metacommand & mParent) {
1359    return get_parent_meta(docinfo,meta,siblings_values);
1360  }
1361
1362  else if (meta.metacommand & mChild) {
1363    return get_child_meta(collection,collectproto,docinfo,disp,meta,
1364                options,logout, siblings_values);
1365  }
1366  else if (meta.metacommand & mSibling) { // only siblings
1367    MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1368    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1369  }
1370  else {
1371   
1372    // straightforward metadata request (nothing fancy)
1373
1374    text_t classifier_metaname = docinfo.classifier_metadata_type;
1375    int metaname_index
1376      = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1377    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1378  }
1379 
1380  return "";
1381}
1382
1383static text_t get_or (const text_t& collection, recptproto* collectproto,
1384              ResultDocInfo_t &docinfo, displayclass &disp,
1385              format_t *orptr, text_tmap &options,
1386              ostream& logout) {
1387
1388  while (orptr != NULL) {
1389
1390    if (metadata_spanwrap) {
1391      // need to be a bit more careful about this
1392      // => test for it *without* spanwrap, and if defined, then
1393      // got back and generate it again, this time with spanwrap on
1394
1395      metadata_spanwrap = false;
1396      text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1397                       options, logout);
1398      metadata_spanwrap = true;
1399      if (!test_tmp.empty()) {
1400
1401    return format_string (collection,collectproto,docinfo, disp, orptr,
1402                  options, logout);
1403      }
1404    }
1405    else {
1406      text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1407                  options, logout);
1408      if (!tmp.empty()) return tmp;
1409    }
1410
1411    orptr = orptr->nextptr;
1412  }
1413  return "";
1414}
1415
1416static bool char_is_whitespace(const char c)
1417{
1418  return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1419
1420}
1421
1422static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1423{
1424  int pos = start_pos;
1425  while (pos<outstring.size()) {
1426    if (!char_is_whitespace(outstring[pos])) {
1427      break;
1428    }
1429    ++pos;
1430  }
1431
1432  return pos;
1433}
1434
1435static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1436{
1437  int pos = start_pos;
1438  while (pos>=0) {
1439    if (!char_is_whitespace(outstring[pos])) {
1440      break;
1441    }
1442    --pos;
1443  }
1444
1445  return pos;
1446}
1447
1448static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1449{
1450  int pos = start_pos;
1451  while (pos>=0) {
1452    if (char_is_whitespace(outstring[pos])) {
1453      break;
1454    }
1455    --pos;
1456  }
1457
1458  return pos;
1459}
1460
1461
1462static int rscan_for(const text_t& outstring, const int start_pos,
1463             const char find_c)
1464{
1465  int pos = start_pos;
1466  while (pos>=0) {
1467    char c = outstring[pos];
1468    if (outstring[pos] == find_c) {
1469      break;
1470    }
1471    --pos;
1472  }
1473
1474  return pos;
1475}
1476
1477text_t extract_substr(const text_t& outstring, const int start_pos,
1478              const int end_pos)
1479{
1480  text_t extracted_str;
1481  extracted_str.clear();
1482
1483  for (int pos=start_pos; pos<=end_pos; ++pos) {
1484    extracted_str.push_back(outstring[pos]);
1485  }
1486
1487  return extracted_str;
1488}
1489
1490
1491static text_t expand_potential_metadata(const text_t& collection,
1492                    recptproto* collectproto,
1493                    ResultDocInfo_t &docinfo,
1494                    displayclass &disp,
1495                    const text_t& intext,
1496                    text_tmap &options,
1497                    ostream& logout)
1498{
1499  text_t outtext;
1500
1501  // decide if dealing with metadata or text
1502
1503  text_t::const_iterator beginbracket = intext.begin();
1504  text_t::const_iterator endbracket = (intext.end() - 1);
1505
1506  // Decision is based on a metadata element
1507  if ((*beginbracket == '[') && (*endbracket == ']')) {
1508    // Ignore the surrounding square brackets
1509    text_t meta_text = substr (beginbracket+1, endbracket);
1510
1511    if (meta_text == "Text") {
1512      outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1513    }
1514    else {
1515
1516      text_tset metadata;
1517      bool getParents =false;
1518      metadata_t meta;
1519     
1520      parse_meta (meta_text, meta, metadata, getParents);   
1521      outtext
1522    = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1523    }
1524
1525  }
1526  else {
1527    outtext = intext;
1528  }
1529
1530  return outtext;
1531}
1532
1533
1534
1535
1536static bool uses_expression(const text_t& collection, recptproto* collectproto,
1537                ResultDocInfo_t &docinfo,
1538                displayclass &disp,
1539                const text_t& outstring, text_t& lhs_expr,
1540                text_t& op_expr, text_t& rhs_expr,
1541                text_tmap &options,
1542                ostream& logout)
1543{
1544  // Note: the string may not be of the form: str1 op str2, however
1545  // to deterine this we have to process it on the assumption it is,
1546  // and if at any point an 'erroneous' value is encountered, return
1547  // false and let something else have a go at evaluating it
1548
1549  // Starting at the end of the string and working backwards ..
1550
1551  const int outstring_len = outstring.size();
1552
1553  // skip over white space
1554  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1555
1556  if (rhs_end<=0) {
1557    // no meaningful text or (rhs_end==0) no room for operator
1558    return false;
1559  }
1560
1561  // check for ' or " and then scan over token
1562  const char potential_quote = outstring[rhs_end];
1563  int rhs_start=rhs_end;
1564  bool quoted = false;
1565
1566  if ((potential_quote == '\'') || (potential_quote == '\"')) {
1567    --rhs_end;
1568    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1569    quoted = true;
1570  }
1571  else {
1572    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1573  }
1574
1575  if ((rhs_end-rhs_start)<0) {
1576    // no meaningful rhs expression
1577    return false;
1578  }
1579
1580  // form rhs_expr
1581  rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1582
1583  // skip over white space
1584  const int to_whitespace = (quoted) ? 2 : 1;
1585
1586  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1587  int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1588
1589  if ((op_end<0) && (op_start<0)) {
1590    // no meaningful expression operator
1591    return false;
1592  }
1593
1594  if (op_end-op_start<0) {
1595    // no meaningful expression operator
1596    return false;
1597  }
1598
1599  op_expr = extract_substr(outstring,op_start,op_end);
1600
1601
1602  // check for operator
1603  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1604     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1605
1606    // not a valid operator
1607    return false;
1608  }
1609
1610  int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1611  if (lhs_end<0) {
1612    // no meaningful lhs expression
1613    return false;
1614  }
1615
1616  int lhs_start = scan_over_whitespace(outstring,0);
1617
1618  // form lhs_expr from remainder of string
1619  lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1620
1621  // Now we know we have a valid expression, look up any
1622  // metadata terms
1623
1624  rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1625                       disp,rhs_expr,options,logout);
1626  lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1627                       disp,lhs_expr,options,logout);
1628
1629  return true;
1630}
1631
1632static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1633                 const text_t& rhs_expr, ostream& logout)
1634{
1635  if (op_expr == "eq") return (lhs_expr == rhs_expr);
1636  else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1637  else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1638  else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1639  else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1640  else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1641  else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1642  else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1643  else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1644  else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1645  else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1646  else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1647  else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1648  else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1649  else {
1650    logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1651  }
1652 
1653  return false;
1654}
1655
1656
1657static text_t get_if (const text_t& collection, recptproto* collectproto,
1658              ResultDocInfo_t &docinfo, displayclass &disp,
1659              const decision_t &decision,
1660              format_t *ifptr, format_t *elseptr,
1661              text_tmap &options, ostream& logout)
1662{
1663  // If the decision component is a metadata element, then evaluate it
1664  // to see whether we output the "then" or the "else" clause
1665  if (decision.command == dMeta) {
1666
1667    bool store_metadata_spanwrap = metadata_spanwrap;
1668    metadata_spanwrap = 0;
1669
1670    // temporarily suspend metadata-spanwrap (if on) so can test if metadata item really exits or not
1671    bool metadata_exists
1672      = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1673           logout) != "");
1674
1675    metadata_spanwrap = store_metadata_spanwrap;
1676
1677    if (metadata_exists) {
1678      if (ifptr != NULL)
1679    return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1680                     options, logout);
1681    }
1682    else {
1683      if (elseptr != NULL)
1684    return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1685                     options, logout);
1686    }
1687  }
1688
1689  // If the decision component is text, then evaluate it (it is probably a
1690  // macro like _cgiargmode_) to decide what to output.
1691  else if (decision.command == dText) {
1692
1693    text_t outstring;
1694    disp.expandstring (decision.text, outstring);
1695
1696    // Check for if expression in form: str1 op str2
1697    // (such as [x] eq "y")
1698    text_t lhs_expr, op_expr, rhs_expr;
1699    if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1700      if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1701    if (ifptr != NULL) {
1702      return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1703                       options, logout);
1704    }
1705    else {
1706      return "";
1707    }
1708      } else {
1709    if (elseptr != NULL) {
1710      return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1711                       options, logout);
1712    }
1713    else {
1714      return "";
1715    }
1716      }
1717    }
1718
1719
1720    // This is a tad tricky.  When we expand a string like _cgiargmode_, that is
1721    // a cgi argument macro that has not been set, it evaluates to itself.
1722    // Therefore, were have to say that a piece of text evalautes true if
1723    // it is non-empty and if it is a cgi argument evaulating to itself.
1724
1725    if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1726      if (ifptr != NULL)
1727    return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1728                     options, logout);
1729    } else {
1730      if (elseptr != NULL)
1731    return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1732                     options, logout);
1733    }
1734  }
1735 
1736  return "";
1737}
1738
1739bool includes_metadata(const text_t& text)
1740{
1741  text_t::const_iterator here = text.begin();
1742  text_t::const_iterator end = text.end();
1743  while (here != end) {
1744    if (*here == '[') return true;
1745    ++here;
1746  }
1747
1748  return false;
1749}
1750
1751static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1752                  recptproto* collectproto,
1753                  ResultDocInfo_t &docinfo,
1754                  displayclass &disp, text_tmap &options,
1755                  ostream &logout) {
1756     
1757  if (includes_metadata(metavalue)) {
1758   
1759    // text has embedded metadata in it => expand it
1760    FilterRequest_t request;
1761    FilterResponse_t response;
1762   
1763    request.getParents = false;
1764   
1765    format_t *expanded_formatlistptr = new format_t();
1766    parse_formatstring (metavalue, expanded_formatlistptr,
1767            request.fields, request.getParents);
1768   
1769    // retrieve metadata
1770    get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1771         collectproto, response, logout);
1772   
1773    if (!response.docInfo.empty()) {
1774     
1775      text_t expanded_metavalue
1776    = get_formatted_string(collection, collectproto,
1777                   response.docInfo[0], disp, expanded_formatlistptr,
1778                   options, logout);
1779     
1780      return expanded_metavalue;
1781    }
1782    else {
1783      return metavalue;
1784    }
1785  }
1786  else {
1787   
1788    return metavalue;
1789  }
1790}
1791
1792text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1793               displayclass &disp,
1794               text_t meta_name, ostream& logout) {
1795 
1796  ColInfoResponse_t collectinfo;
1797  comerror_t err;
1798  collectproto->get_collectinfo (collection, collectinfo,err,logout);
1799  text_t meta_value = "";
1800  text_t lang;
1801  disp.expandstring("_cgiargl_",lang);
1802  if (lang.empty()) {
1803    lang = "en";
1804  }
1805
1806  if (err == noError) {
1807    meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1808  }
1809  return meta_value;
1810 
1811
1812}
1813text_t format_string (const text_t& collection, recptproto* collectproto,
1814              ResultDocInfo_t &docinfo, displayclass &disp,
1815              format_t *formatlistptr, text_tmap &options,
1816              ostream& logout) {
1817
1818  if (formatlistptr == NULL) return "";
1819
1820  switch (formatlistptr->command) {
1821     case comOID:
1822    return docinfo.OID;
1823  case comTopOID:
1824    {
1825      text_t top_id;
1826      get_top(docinfo.OID, top_id);
1827      return top_id;
1828    }
1829  case comRank:
1830    return text_t(docinfo.ranking);
1831     case comText:
1832    return formatlistptr->text;
1833     case comLink:
1834    return options["link"];
1835     case comEndLink:
1836       {
1837     if (options["link"].empty()) return "";
1838    else return "</a>";
1839       }
1840     case comHref:
1841    return get_href(options["link"]);
1842     case comIcon:
1843    return options["icon"];
1844     case comNum:
1845    return docinfo.result_num;
1846     case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1847    return get_related_docs(collection, collectproto, docinfo, logout);
1848
1849     case comSummary:
1850       return format_summary(collection, collectproto, docinfo, disp, options, logout);
1851     case comAssocLink:
1852       {
1853     text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1854         if (!link_filename.empty()) {
1855       text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
1856       if (formatlistptr->text == "href") {
1857         return href;
1858       }
1859       return "<a href=\""+ href + "\">";
1860     }
1861     return "";
1862       }
1863  case comEndAssocLink:
1864    {
1865    text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1866    if (!link_filename.empty()) {
1867      return "</a>";
1868    }
1869    return "";
1870    }
1871     case comMeta:
1872    {
1873       const text_t& metavalue =  get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1874       return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1875    }
1876
1877     case comDoc:
1878       return format_text(collection, collectproto, docinfo, disp, options, logout);
1879
1880     case comImage:
1881    return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1882     case comTOC:
1883    return options["DocTOC"];
1884     case comDocumentButtonDetach:
1885    return options["DocumentButtonDetach"];
1886     case comDocumentButtonHighlight:
1887    return options["DocumentButtonHighlight"];
1888     case comDocumentButtonExpandContents:
1889    return options["DocumentButtonExpandContents"];
1890     case comDocumentButtonExpandText:
1891    return options["DocumentButtonExpandText"];
1892     case comHighlight:
1893    if (options["highlight"] == "1") return "<b>";
1894    break;
1895     case comEndHighlight:
1896    if (options["highlight"] == "1") return "</b>";
1897    break;
1898     case comMetadataSpanWrap:
1899        metadata_spanwrap=true;  return "";
1900    break;
1901     case comEndMetadataSpanWrap:
1902    metadata_spanwrap=false; return "";
1903    break;
1904     case comIf:
1905    return get_if (collection, collectproto, docinfo, disp,
1906               formatlistptr->decision, formatlistptr->ifptr,
1907               formatlistptr->elseptr, options, logout);
1908     case comOr:
1909    return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1910               options, logout);
1911     case comDocTermsFreqTotal:
1912       return docinfo.num_terms_matched;
1913     case comCollection:
1914       if (formatlistptr->meta.metaname == g_EmptyText) {
1915     return collection;
1916       }
1917       return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1918   
1919  }
1920  return "";
1921}
1922
1923text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1924                 ResultDocInfo_t &docinfo, displayclass &disp,
1925                 format_t *formatlistptr, text_tmap &options,
1926                 ostream& logout) {
1927
1928   text_t ft;
1929   while (formatlistptr != NULL)
1930      {
1931     ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1932                  options, logout);
1933     formatlistptr = formatlistptr->nextptr;
1934      }
1935   
1936   return ft;
1937}
1938
1939
1940// we have only preloaded the text in DocumentAction. But you may want
1941// to get the text in query, so copy what we have done with
1942// format_summary and get the text here. Probably is quite expensive?
1943text_t format_text (const text_t& collection, recptproto* collectproto,
1944            ResultDocInfo_t &docinfo, displayclass &disp,
1945            text_tmap &options, ostream& logout)
1946{
1947  text_t text;
1948
1949  if (!options["text"].empty()) {
1950    text = options["text"];
1951  }
1952  else {
1953    // get document text here
1954    DocumentRequest_t docrequest;
1955    DocumentResponse_t docresponse;
1956    comerror_t err;
1957    docrequest.OID = docinfo.OID;
1958    collectproto->get_document (collection, docrequest, docresponse, err, logout);
1959    text = docresponse.doc;
1960  }
1961
1962  if (metadata_spanwrap) {
1963    text = spanwrap_metatext(text,docinfo.OID,"Text");
1964  }
1965
1966  return text;
1967}
1968 
1969/* FUNCTION NAME: format_summary
1970 * DESC: this is invoked when a [Summary] special metadata is processed.
1971 * RETURNS: a query-biased summary for the document */
1972
1973text_t format_summary (const text_t& collection, recptproto* collectproto,
1974               ResultDocInfo_t &docinfo, displayclass &disp,
1975               text_tmap &options, ostream& logout) {
1976
1977  // GRB: added code here to ensure that the cstr (and other collections)
1978  //      uses the document metadata item Summary, rather than compressing
1979  //      the text of the document, processed via the methods in
1980  //      summarise.cpp
1981
1982  text_t summary;
1983
1984  if (docinfo.metadata.count("Summary") > 0 &&
1985      docinfo.metadata["Summary"].values.size() > 0) {
1986    summary = docinfo.metadata["Summary"].values[0];
1987  }
1988  else {
1989 
1990    text_t textToSummarise, query;
1991
1992    if(options["text"].empty()) { // get document text
1993      DocumentRequest_t docrequest;
1994      DocumentResponse_t docresponse;
1995      comerror_t err;
1996      docrequest.OID = docinfo.OID;
1997      collectproto->get_document (collection, docrequest, docresponse, err, logout);
1998      textToSummarise = docresponse.doc;
1999    }
2000    else {
2001      // in practice, this would not happen, because text is only
2002      // loaded with the [Text] command
2003      textToSummarise = options["text"];
2004    }
2005   
2006    disp.expandstring("_cgiargq_",query);
2007    summary = summarise(textToSummarise,query,80);
2008    //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
2009  }
2010
2011  summary.replace("'","&#039;");
2012  summary.replace("\n","&#013;");
2013
2014  if (metadata_spanwrap) {
2015    summary = spanwrap_metatext(summary,docinfo.OID,"Summary");
2016  }
2017
2018  return summary;
2019}
Note: See TracBrowser for help on using the browser.