root/main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp @ 23515

Revision 23515, 60.4 KB (checked in by davidb, 9 years ago)

Generalizatoin of metadata-spanwrap to be either metadata-spanwrap or metadata-divwrap

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "gsdltools.h"
29#include "recptprototools.h"
30#include "OIDtools.h"
31#include "summarise.h"
32
33#include <assert.h>
34
35static bool metadata_wrap = false;
36static text_t metadata_wrap_type = "";
37
38// a few function prototypes
39
40static text_t format_string (const text_t& collection, recptproto* collectproto,
41                 ResultDocInfo_t &docinfo, displayclass &disp,
42                 format_t *formatlistptr, text_tmap &options,
43                 ostream& logout);
44
45static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
46              format_t *formatlistptr, text_tset &metadata, bool &getParents);
47
48static text_t format_summary (const text_t& collection, recptproto* collectproto,
49                  ResultDocInfo_t &docinfo, displayclass &disp,
50                  text_tmap &options, ostream& logout);
51static text_t format_text (const text_t& collection, recptproto* collectproto,
52                  ResultDocInfo_t &docinfo, displayclass &disp,
53                  text_tmap &options, ostream& logout);
54
55static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
56                  recptproto* collectproto, ResultDocInfo_t &docinfo,
57                  displayclass &disp, text_tmap &options,
58                  ostream &logout);
59
60
61void metadata_t::clear() {
62  metaname.clear();
63  metacommand = mNone;
64  mqualifier.parent  = pNone;
65  mqualifier.sibling = sNone;
66  mqualifier.child   = cNone;
67  pre_tree_traverse.clear();
68  parentoptions.clear();
69  siblingoptions.clear();
70  childoptions.clear();
71}
72
73void decision_t::clear() {
74  command = dMeta;
75  meta.clear();
76  text.clear();
77}
78
79format_t::~format_t()
80{
81  if (nextptr != NULL) delete nextptr;
82  if (ifptr != NULL) delete ifptr;
83  if (elseptr != NULL) delete elseptr;
84  if (orptr != NULL) delete orptr;
85}
86
87void format_t::clear() {
88  command = comText;
89  decision.clear();
90  text.clear();
91  meta.clear();
92  nextptr = NULL;
93  ifptr = NULL;
94  elseptr = NULL;
95  orptr = NULL;
96}
97
98void formatinfo_t::clear() {
99  DocumentImages = false;
100  DocumentTitles = true;
101  DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
102  DocumentContents = true;
103  DocumentArrowsBottom = true;
104  DocumentArrowsTop = false;
105  DocumentSearchResultLinks = false;
106  DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
107  //  DocumentButtons.push_back ("Expand Text");
108  //  DocumentButtons.push_back ("Expand Contents");
109  DocumentButtons.push_back ("Detach");
110  DocumentButtons.push_back ("Highlight");
111  RelatedDocuments = "";
112  DocumentText = "[Text]";
113  formatstrings.erase (formatstrings.begin(), formatstrings.end());
114  DocumentUseHTML = false;
115  AllowExtendedOptions = false;
116}
117
118// simply checks to see if formatstring begins with a <td> tag
119bool is_table_content (const text_t &formatstring) {
120  text_t::const_iterator here = formatstring.begin();
121  text_t::const_iterator end = formatstring.end();
122 
123  while (here != end) {
124    if (*here != ' ') {
125      if ((*here == '<') && ((here+3) < end)) {
126    if ((*(here+1) == 't' || *(here+1) == 'T') &&
127        (*(here+2) == 'd' || *(here+2) == 'D') &&
128        (*(here+3) == '>' || *(here+3) == ' '))
129      return true;
130      } else return false;
131    }
132    ++here;
133  }
134  return false;
135}
136
137bool is_table_content (const format_t *formatlistptr) {
138
139  if (formatlistptr == NULL) return false;
140 
141  if (formatlistptr->command == comText)
142    return is_table_content (formatlistptr->text);
143   
144  return false;
145}
146
147// returns false if key isn't in formatstringmap
148bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
149               text_t &formatstring) {
150
151  formatstring.clear();
152  text_tmap::const_iterator it = formatstringmap.find(key);
153  if (it == formatstringmap.end()) return false;
154  formatstring = (*it).second;
155  return true;
156}
157
158// tries to find "key1key2" then "key1" then "key2"
159bool get_formatstring (const text_t &key1, const text_t &key2, 
160               const text_tmap &formatstringmap,
161               text_t &formatstring) {
162
163  formatstring.clear();
164  text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
165  if (it != formatstringmap.end()) {
166    formatstring = (*it).second;
167    return true;
168  }
169  it = formatstringmap.find(key1);
170  if (it != formatstringmap.end()) {
171    formatstring = (*it).second;
172    return true;
173  }
174  it = formatstringmap.find(key2);
175  if (it != formatstringmap.end()) {
176    formatstring = (*it).second;
177    return true;
178  }
179  return false;
180}
181
182
183text_t remove_namespace(const text_t &meta_name) {
184  text_t::const_iterator end = meta_name.end();
185  text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
186  if (it != end) {
187    return substr(it+1, end);
188  }
189
190  return meta_name;
191
192}
193// returns a date of form _format:date_(year, month, day)
194// input is date of type yyyy-?mm-?dd
195// at least the year must be present in date
196text_t format_date (const text_t &date) {
197
198  if (date.size() < 4) return "";
199
200  text_t::const_iterator datebegin = date.begin();
201
202  text_t year = substr (datebegin, datebegin+4);
203  int chars_seen_so_far = 4;
204  if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
205
206  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
207  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
208 
209  text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
210  int imonth = month.getint();
211  if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
212 
213  chars_seen_so_far += 2;
214  if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
215
216  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
217  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
218
219  text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
220  if (day[0] == '0') day = substr (day.begin()+1, day.end());
221  int iday = day.getint();
222  if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
223   
224  return "_format:date_("+year+","+month+","+day+")";
225
226
227// converts an iso639 language code to its English equivalent
228// should we be checking that the macro exists??
229text_t iso639 (const text_t &langcode) {
230  if (langcode.empty()) return "";
231  return "_iso639:iso639"+langcode+"_";
232}
233
234
235text_t get_href (const text_t &link) {
236
237  text_t href;
238
239  text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
240  text_t::const_iterator end = link.end();
241  if (here == end) return g_EmptyText;
242 
243  ++here;
244  while (here != end) {
245    if (*here == '"') break;
246    href.push_back(*here);
247    ++here;
248  }
249
250  return href;
251}
252
253//this function gets the information associated with the relation
254//metadata for the document associated with 'docinfo'. This relation
255//metadata consists of a line of pairs containing 'collection, document OID'
256//(this is the OID of the document related to the current document, and
257//the collection the related document belongs to). For each of these pairs
258//the title metadata is obtained and then an html link between the title
259//of the related doc and the document's position (the document will be
260//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
261//(where collection is the related documents collection, and OID is the
262//related documents OID).  A list of these html links are made for as many
263//related documents as there are. This list is then returned. If there are
264//no related documents available for the current document then the string
265//'.. no related documents .. ' is returned.
266text_t get_related_docs(const text_t& collection, recptproto* collectproto,
267            ResultDocInfo_t &docinfo, ostream& logout){
268 
269  text_tset metadata;
270
271  //insert the metadata we wish to collect
272  metadata.insert("dc.Relation");
273  metadata.insert("Title"); 
274  metadata.insert("Subject"); //for emails, where title data doesn't apply
275 
276  FilterResponse_t response;
277  text_t relation = ""; //string for displaying relation metadata
278  text_t relationTitle = ""; //the related documents Title (or subject)
279  text_t relationOID = ""; //the related documents OID 
280
281  //get the information associated with the metadata for current doc
282  if (get_info (docinfo.OID, collection, "", metadata,
283        false, collectproto, response, logout)) {
284   
285    //if the relation metadata exists, store for displaying
286    if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
287      relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
288
289      //split relation data into pairs of collectionname,ID number
290      text_tarray relationpairs;
291      splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
292     
293      text_tarray::const_iterator currDoc = relationpairs.begin(); 
294      text_tarray::const_iterator lastDoc = relationpairs.end();
295
296      //iterate through the pairs to split and display
297      while(currDoc != lastDoc){
298   
299    //split pairs into collectionname and ID
300    text_tarray relationdata;
301    splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
302   
303    //get first element in the array (collection)
304    text_tarray::const_iterator doc_data = relationdata.begin();
305    text_t document_collection = *doc_data;
306    ++doc_data; //increment to get next item in array (oid)
307    text_t document_OID = *doc_data;
308   
309    //create html link to related document
310    relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
311    relation += "&amp;cl=search&amp;d=" + document_OID;
312       
313    //get the information associated with the metadata for related doc
314    if (get_info (document_OID, document_collection, "", metadata,
315              false, collectproto, response, logout)) {
316     
317      //if title metadata doesn't exist, collect subject metadata
318      //if that doesn't exist, just call it 'related document'
319      if (!response.docInfo[0].metadata["Title"].values[0].empty())
320        relationTitle = response.docInfo[0].metadata["Title"].values[0];
321      else if (!response.docInfo[0].metadata["Subject"].values.empty())
322        relationTitle = response.docInfo[0].metadata["Subject"].values[0];
323      else relationTitle =  "RELATED DOCUMENT";
324     
325    }
326   
327    //link the related document's title to its page
328    relation += "\">" + relationTitle + "</a>";
329    relation += "  (" + document_collection + ")<br>";
330   
331    ++currDoc;
332      }
333    }
334   
335  }
336
337  if(relation.empty()) //no relation data for documnet
338    relation = ".. no related documents .. ";
339
340  return relation;
341}
342
343
344
345static void get_parent_options (text_t &instring, metadata_t &metaoption) {
346
347  assert (instring.size() > 7);
348  if (instring.size() <= 7) return;
349
350  text_t meta, com, op;
351  bool inbraces = false;
352  bool inquotes = false;
353  bool foundcolon = false;
354  text_t::const_iterator here = instring.begin()+6;
355  text_t::const_iterator end = instring.end();
356  while (here != end) {
357    if (foundcolon) meta.push_back (*here);
358    else if (*here == '(') inbraces = true;
359    else if (*here == ')') inbraces = false;
360    else if (*here == '\'' && !inquotes) inquotes = true;
361    else if (*here == '\'' && inquotes) inquotes = false;
362    else if (*here == ':' && !inbraces) foundcolon = true;
363    else if (inquotes) op.push_back (*here);
364    else com.push_back (*here);
365    ++here;
366  }
367
368  instring = meta;
369  if (com.empty())
370    metaoption.mqualifier.parent = pImmediate;
371  else if (com == "Top")
372    metaoption.mqualifier.parent = pTop;
373  else if (com == "All") {
374    metaoption.mqualifier.parent = pAll;
375    metaoption.parentoptions = op;
376  }
377}
378
379
380static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
381
382  assert (instring.size() > 8);
383  if (instring.size() <= 8) return;
384  text_t meta, com, op;
385  bool inbraces = false;
386  bool inquotes = false;
387  bool foundcolon = false;
388  text_t::const_iterator here = instring.begin()+7;
389  text_t::const_iterator end = instring.end();
390  while (here != end) {
391    if (foundcolon) meta.push_back (*here);
392    else if (*here == '(') inbraces = true;
393    else if (*here == ')') inbraces = false;
394    else if (*here == '\'' && !inquotes) inquotes = true;
395    else if (*here == '\'' && inquotes) inquotes = false;
396    else if (*here == ':' && !inbraces) foundcolon = true;   
397    else if (inquotes) op.push_back (*here);
398    else com.push_back (*here);
399    ++here;
400  }
401
402  instring = meta;
403  metaoption.siblingoptions.clear();
404
405  if (com.empty()) {
406    metaoption.mqualifier.sibling = sAll;
407    metaoption.siblingoptions = " ";
408  }
409  else if (com == "first") {
410    metaoption.mqualifier.sibling = sNum;
411    metaoption.siblingoptions = "0";
412  }
413  else if (com == "last") {
414    metaoption.mqualifier.sibling = sNum;
415    metaoption.siblingoptions = "-2"; // == last
416  }
417  else if (com.getint()>0) {
418    metaoption.mqualifier.sibling = sNum;
419    int pos = com.getint()-1;
420    metaoption.siblingoptions +=pos;
421  }
422  else {
423    metaoption.mqualifier.sibling = sAll;
424    metaoption.siblingoptions = op;
425  }
426}
427
428static void get_child_options (text_t &instring, metadata_t &metaoption) {
429
430  assert (instring.size() > 6);
431  if (instring.size() <= 6) return;
432  text_t meta, com, op;
433  bool inbraces = false;
434  bool inquotes = false;
435  bool foundcolon = false;
436  text_t::const_iterator here = instring.begin()+5;
437  text_t::const_iterator end = instring.end();
438  while (here != end) {
439    if (foundcolon) meta.push_back (*here);
440    else if (*here == '(') inbraces = true;
441    else if (*here == ')') inbraces = false;
442    else if (*here == '\'' && !inquotes) inquotes = true;
443    else if (*here == '\'' && inquotes) inquotes = false;
444    else if (*here == ':' && !inbraces) foundcolon = true;
445    else if (inquotes) op.push_back (*here);
446    else com.push_back (*here);
447    ++here;
448  }
449
450  instring = meta;
451  if (com.empty()) {
452    metaoption.mqualifier.child = cAll;
453    metaoption.childoptions = " ";
454  }
455  else if (com == "first") {
456    metaoption.mqualifier.child = cNum;
457    metaoption.childoptions = ".fc";
458  }
459  else if (com == "last") {
460    metaoption.mqualifier.child = cNum;
461    metaoption.childoptions = ".lc";
462  }
463  else if (com.getint()>0) {
464    metaoption.mqualifier.child = cNum;
465    metaoption.childoptions = "."+com;
466  }
467  else {
468    metaoption.mqualifier.child = cAll;
469    metaoption.childoptions = op;
470  }
471}
472
473
474static void get_truncate_options (text_t &instring, metadata_t &metaoption)
475{
476  assert (instring.size() > ((text_t) "truncate").size());
477  if (instring.size() <= ((text_t) "truncate").size()) return;
478  text_t meta, com;
479  bool inbraces = false;
480  bool foundcolon = false;
481  text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
482  text_t::const_iterator end = instring.end();
483  while (here != end) {
484    if (foundcolon) meta.push_back (*here);
485    else if (*here == '(') inbraces = true;
486    else if (*here == ')') inbraces = false;
487    else if (*here == ':' && !inbraces) foundcolon = true;   
488    else com.push_back (*here);
489    ++here;
490  }
491
492  instring = meta;
493
494  if (!com.empty())
495  {
496    metaoption.siblingoptions = com;
497  }
498  else
499  {
500    // Default is 100 characters if not specified
501    metaoption.siblingoptions = "100";
502  }
503}
504
505
506
507static void parse_meta (text_t &meta, metadata_t &metaoption,
508            text_tset &metadata, bool &getParents) {
509
510  // Look for the various format statement modifiers
511  // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
512  //   is irrelevant because this is not stored in metaoption.metacommand anyway
513  bool keep_trying = true;
514  while (keep_trying)
515  {
516    keep_trying = false;
517
518    if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
519    {
520      metaoption.metacommand |= mCgiSafe;
521      meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
522      keep_trying = true;
523    }
524    if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
525    {   
526      metaoption.metacommand |= mSpecial;
527      meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
528      keep_trying = true;
529    }
530
531    // New "truncate" special formatting option
532    if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate"))  // No colons due to truncate(X)
533    {
534      metaoption.metacommand |= mTruncate;
535      get_truncate_options (meta, metaoption);
536      keep_trying = true;
537    }
538    // New "htmlsafe" special formatting option
539    if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
540    {
541      metaoption.metacommand |= mHTMLSafe;
542      meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
543      keep_trying = true;
544    }
545    // New "xmlsafe" special formatting option
546    if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
547    {
548      metaoption.metacommand |= mXMLSafe;
549      meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
550      keep_trying = true;
551    }
552    // New "dmsafe" special formatting option
553    if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
554    {
555      metaoption.metacommand |= mDMSafe;
556      meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
557      keep_trying = true;
558    }
559  }
560
561  bool had_parent_or_child = true;
562  bool prev_was_parent = false;
563  bool prev_was_child  = false;
564
565  while (had_parent_or_child) {
566    if (meta.size() > 7
567    && (substr (meta.begin(), meta.begin()+6) == "parent")) {
568
569      // clear out sibling and child (cmd and options)
570      metaoption.metacommand &= ~(mChild|mSibling);
571      metaoption.childoptions.clear();
572      metaoption.siblingoptions.clear();
573
574      getParents = true;
575      metaoption.metacommand |= mParent;
576      get_parent_options (meta, metaoption);
577
578      if (prev_was_parent) {
579    metaoption.pre_tree_traverse += ".pr";
580      }
581      else if (prev_was_child) {
582    metaoption.pre_tree_traverse += ".fc";
583      }
584
585      prev_was_parent = true;
586      prev_was_child  = false;
587    }
588    else if (meta.size() > 6
589         && (substr (meta.begin(), meta.begin()+5) == "child")) {
590
591      // clear out sibling and parent (cmd and options)
592      metaoption.metacommand &= ~(mParent|mSibling);
593      metaoption.parentoptions.clear();
594      metaoption.siblingoptions.clear();
595
596      metaoption.metacommand |= mChild;
597      get_child_options (meta, metaoption);
598      metadata.insert("contains");
599
600      if (prev_was_parent) {
601    metaoption.pre_tree_traverse += ".pr";
602      }
603      else if (prev_was_child) {
604    metaoption.pre_tree_traverse += ".fc";
605      }
606
607      prev_was_child  = true;
608      prev_was_parent = false;
609    }
610    else {
611      prev_was_child  = false;
612      prev_was_parent = false;
613      had_parent_or_child = false;
614    }
615  }
616
617  // parent/child can have sibling tacked on end also
618  if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
619    metaoption.metacommand |= mSibling;
620    get_sibling_options (meta, metaoption);
621  }
622 
623  // check for ex. which may occur in format statements
624  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
625    meta = substr (meta.begin()+3, meta.end());
626  }
627  metadata.insert (meta);
628  metaoption.metaname = meta;
629}
630
631static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
632  if (meta == "collection") {
633    // no qualifiers
634    metaoption.metaname = g_EmptyText;
635    return;
636  }
637  meta = substr (meta.begin()+11, meta.end());
638  metaoption.metaname = meta;
639 
640}
641
642static void parse_meta (text_t &meta, format_t *formatlistptr,
643            text_tset &metadata, bool &getParents) {
644 
645  // check for ex. which may occur in format statements
646  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
647    meta = substr (meta.begin()+3, meta.end());
648  }
649  if (meta == "link")
650    formatlistptr->command = comLink;
651  else if (meta == "/link")
652    formatlistptr->command = comEndLink;
653
654  else if (meta == "srclink") {
655    formatlistptr->command = comAssocLink;
656    formatlistptr->meta.metaname = "srclink_file";
657    metadata.insert("srclink_file");
658  }
659  else if (meta == "srchref") {
660    formatlistptr->command = comAssocLink;
661    formatlistptr->text = "href";
662    formatlistptr->meta.metaname = "srclink_file";
663    metadata.insert("srclink_file");
664  }
665  else if (meta == "/srclink") {
666    formatlistptr->command = comEndAssocLink;
667    formatlistptr->meta.metaname = "srclink_file";
668  }
669  // and weblink etc
670  else if (meta == "href")
671    formatlistptr->command = comHref;
672
673  else if (meta == "num")
674    formatlistptr->command = comNum;
675
676  else if (meta == "icon")
677    formatlistptr->command = comIcon;
678
679  else if (meta == "Text")
680    formatlistptr->command = comDoc;
681 
682  else if (meta == "RelatedDocuments")
683   formatlistptr->command = comRel;
684
685  else if (meta == "highlight")
686    formatlistptr->command = comHighlight;
687
688  else if (meta == "/highlight")
689    formatlistptr->command = comEndHighlight;
690
691  else if (meta == "metadata-spanwrap")
692    formatlistptr->command = comMetadataSpanWrap;
693
694  else if (meta == "/metadata-spanwrap")
695    formatlistptr->command = comEndMetadataSpanWrap;
696
697  else if (meta == "metadata-divwrap")
698    formatlistptr->command = comMetadataDivWrap;
699
700  else if (meta == "/metadata-divwrap")
701    formatlistptr->command = comEndMetadataDivWrap;
702
703  else if (meta == "Summary")
704    formatlistptr->command = comSummary;
705
706  else if (meta == "DocImage")
707    formatlistptr->command = comImage;
708
709  else if (meta == "DocTOC")
710    formatlistptr->command = comTOC;
711
712  else if (meta == "DocumentButtonDetach")
713     formatlistptr->command = comDocumentButtonDetach;
714 
715  else if (meta == "DocumentButtonHighlight")
716     formatlistptr->command = comDocumentButtonHighlight;
717 
718  else if (meta == "DocumentButtonExpandContents")
719    formatlistptr->command = comDocumentButtonExpandContents;
720
721  else if (meta == "DocumentButtonExpandText")
722     formatlistptr->command = comDocumentButtonExpandText;
723
724  else if (meta == "DocOID")
725     formatlistptr->command = comOID;
726  else if (meta == "DocTopOID")
727    formatlistptr->command = comTopOID;
728  else if (meta == "DocRank")
729    formatlistptr->command = comRank;
730  else if (meta == "DocTermsFreqTotal")
731    formatlistptr->command = comDocTermsFreqTotal;
732  else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
733    formatlistptr->command = comCollection;
734    parse_coll_meta(meta, formatlistptr->meta);
735  }
736  else {
737    formatlistptr->command = comMeta;
738    parse_meta (meta, formatlistptr->meta, metadata, getParents);
739  }
740}
741
742
743static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
744              text_tset &metadata, bool &getParents) {
745
746  text_t text;
747  text_t::const_iterator here = formatstring.begin();
748  text_t::const_iterator end = formatstring.end();
749
750  while (here != end) {
751
752    if (*here == '\\') {
753      ++here;
754      if (here != end) text.push_back (*here);
755
756    } else if (*here == '{') {
757      if (!text.empty()) {
758    formatlistptr->command = comText;
759    formatlistptr->text = text;
760    formatlistptr->nextptr = new format_t();
761    formatlistptr = formatlistptr->nextptr;
762   
763    text.clear();
764      }
765      if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
766
767    formatlistptr->nextptr = new format_t();
768    formatlistptr = formatlistptr->nextptr;
769    if (here == end) break;
770      }
771    } else if (*here == '[') {
772      if (!text.empty()) {
773    formatlistptr->command = comText;
774    formatlistptr->text = text;
775    formatlistptr->nextptr = new format_t();
776    formatlistptr = formatlistptr->nextptr;
777
778    text.clear();
779      }
780      text_t meta;
781      ++here;
782      while (*here != ']') {
783    if (here == end) return false;
784    meta.push_back (*here);
785    ++here;
786      }
787      parse_meta (meta, formatlistptr, metadata, getParents);
788      formatlistptr->nextptr = new format_t();
789      formatlistptr = formatlistptr->nextptr;
790
791    } else
792      text.push_back (*here);
793
794    if (here != end) ++here;
795  }
796  if (!text.empty()) {
797    formatlistptr->command = comText;
798    formatlistptr->text = text;
799    formatlistptr->nextptr = new format_t();
800    formatlistptr = formatlistptr->nextptr;
801
802  }
803  return true;
804}
805
806
807static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
808              format_t *formatlistptr, text_tset &metadata, bool &getParents) {
809
810  text_t::const_iterator it = findchar (here, end, '}');
811  if (it == end) return false;
812
813  text_t com = substr (here, it);
814  here = findchar (it, end, '{');
815  if (here == end) return false;
816  else ++here;
817
818  if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
819  else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
820  else return false;
821
822  int commacount = 0;
823  text_t text;
824  while (here != end) {
825
826    if (*here == '\\') {
827      ++here;
828      if (here != end) text.push_back(*here);
829     
830    }
831 
832    else if (*here == ',' || *here == '}' || *here == '{') {
833
834      if (formatlistptr->command == comOr) {
835    // the {Or}{this, or this, or this, or this} statement
836    format_t *or_ptr;
837   
838    // find the next unused orptr
839    if (formatlistptr->orptr == NULL) {
840      formatlistptr->orptr = new format_t();
841      or_ptr = formatlistptr->orptr;
842    } else {
843      or_ptr = formatlistptr->orptr;
844      while (or_ptr->nextptr != NULL)
845        or_ptr = or_ptr->nextptr;
846      or_ptr->nextptr = new format_t();
847      or_ptr = or_ptr->nextptr;
848    }
849
850    if (!text.empty())
851      {
852        if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
853      }
854
855    if (*here == '{')
856      {
857        // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
858        // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
859        // The latter can always be re-written:
860        // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
861       
862        if (!text.empty()) // already used up allocated format_t
863          {
864        // => allocate new one for detected action
865        or_ptr->nextptr = new format_t();
866        or_ptr = or_ptr->nextptr;
867          }
868        if (!parse_action(++here, end, or_ptr, metadata, getParents))
869          {
870        return false;
871          }
872      }
873    else
874      {
875        if (*here == '}') break;
876      }
877    text.clear();
878
879      }
880
881      // Parse an {If}{decide,do,else} statement
882      else {
883   
884    // Read the decision component. 
885    if (commacount == 0) {
886      // Decsion can be a metadata element, or a piece of text.
887      // Originally Stefan's code, updated 25/10/2000 by Gordon.
888
889      text_t::const_iterator beginbracket = text.begin();
890      text_t::const_iterator endbracket = (text.end() - 1);
891
892      // Decision is based on a metadata element
893      if ((*beginbracket == '[') && (*endbracket == ']')) {
894        // Ignore the surrounding square brackets
895        text_t meta = substr (beginbracket+1, endbracket);
896        parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
897        ++commacount;
898        text.clear();
899      }
900
901      // Decision is a piece of text (probably a macro like _cgiargmode_).
902      else {
903
904        // hunt for any metadata in string, which might be uses in
905        // to test a condition, e.g. [Format] eq 'PDF'
906        format_t* dummyformat = new format_t();
907        // update which metadata fields needed
908        // (not interested in updatng formatlistptr)
909        parse_string (text, dummyformat, metadata, getParents);
910        delete dummyformat;
911
912        formatlistptr->decision.command = dText;
913        formatlistptr->decision.text = text;
914        ++commacount;
915        text.clear();
916      }
917    }
918
919    // Read the "then" and "else" components of the {If} statement.
920    else {
921      format_t** nextlistptr = NULL;
922      if (commacount == 1) {
923        nextlistptr = &formatlistptr->ifptr;
924      } else if (commacount == 2 ) {
925        nextlistptr = &formatlistptr->elseptr;
926      } else {
927        return false;
928      }
929
930      if (!text.empty()) {
931        if (*nextlistptr == NULL) {
932          *nextlistptr = new format_t();
933        } else {
934
935          // skip to the end of any format_t statements already added
936          while ((*nextlistptr)->nextptr != NULL)
937          {
938        nextlistptr = &(*nextlistptr)->nextptr;
939          }
940
941          (*nextlistptr)->nextptr = new format_t();
942          nextlistptr = &(*nextlistptr)->nextptr;
943        }
944
945        if (!parse_string (text, *nextlistptr, metadata, getParents))
946          {
947        return false;
948          }
949        text.clear();
950      }
951     
952      if (*here == '{')
953        {
954          if (*nextlistptr == NULL) {
955        *nextlistptr = new format_t();
956          } else {
957        // skip to the end of any format_t statements already added
958        while ((*nextlistptr)->nextptr != NULL)
959          {
960            nextlistptr = &(*nextlistptr)->nextptr;
961          }
962
963        (*nextlistptr)->nextptr = new format_t();
964        nextlistptr = &(*nextlistptr)->nextptr;
965          }
966
967          if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
968        {
969          return false;
970        }
971        }
972      else
973        {
974          if (*here == '}') break;
975          ++commacount;
976        }
977    }
978      }
979     
980    } else text.push_back(*here);
981   
982    if (here != end) ++here;
983  }
984
985  return true;
986}
987
988
989static text_t wrap_metatext(const text_t& metatext, const text_t& OID,
990                const text_t metaname, int metapos=-1)
991{
992
993  text_t tag_type = metadata_wrap_type;
994  text_t editable_type = (metaname == "Text") ? "text" : "metadata";
995
996  text_t wrapped_metatext = "<" + tag_type + " ";
997  wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
998
999  wrapped_metatext += "docoid=\"" + OID + "\" "; 
1000  wrapped_metatext += "metaname=\"" + metaname + "\"";
1001
1002  if (metapos>=0) {
1003    text_t metapos_str = metapos;
1004    wrapped_metatext += " metapos=\"" + metapos_str + "\"";
1005  }
1006
1007  wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1008
1009  return wrapped_metatext;
1010}
1011
1012   
1013
1014bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1015             text_tset &metadata, bool &getParents) {
1016
1017  formatlistptr->clear();
1018  getParents = false;
1019
1020  return (parse_string (formatstring, formatlistptr, metadata, getParents));
1021}
1022
1023// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1024// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1025
1026static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1027{
1028  text_t no_ns_metaname = remove_namespace(meta.metaname);
1029  text_t formatted_metatext;
1030  bool first = true;
1031 
1032  const int start_i=0;
1033  const int end_i = metainfo.values.size()-1;
1034 
1035  if (position == -1) { // all
1036    for (int i=start_i; i<=end_i; ++i) {
1037      if (!first) formatted_metatext += meta.siblingoptions;
1038     
1039      text_t fresh_metatext;
1040
1041      if (meta.metacommand & mSpecial) {
1042    // special formatting
1043    if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1044    else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1045    else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1046      }
1047      else fresh_metatext = metainfo.values[i];
1048
1049      // New "truncate" special formatting option
1050      if (meta.metacommand & mTruncate)
1051      {
1052    int truncate_length = meta.siblingoptions.getint();
1053    text_t truncated_value = fresh_metatext;
1054    if (truncated_value.size() > truncate_length)
1055    {
1056      truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1057    }
1058    fresh_metatext = truncated_value;
1059      }
1060      // New "xmlsafe" special formatting option
1061      if (meta.metacommand & mXMLSafe)
1062      {
1063    // Make it XML-safe
1064    text_t text_xml_safe = "";
1065    text_t::const_iterator text_iterator = fresh_metatext.begin();
1066    while (text_iterator != fresh_metatext.end())
1067    {
1068      if (*text_iterator == '&') text_xml_safe += "&amp;";
1069      else if (*text_iterator == '<') text_xml_safe += "&lt;";
1070      else if (*text_iterator == '>') text_xml_safe += "&gt;";
1071      else text_xml_safe.push_back(*text_iterator);
1072      text_iterator++;
1073    }
1074    fresh_metatext = text_xml_safe;
1075      }
1076      // New "htmlsafe" special formatting option
1077      if (meta.metacommand & mHTMLSafe)
1078      {
1079    // Make it HTML-safe
1080    text_t text_html_safe = "";
1081    text_t::const_iterator text_iterator = fresh_metatext.begin();
1082    while (text_iterator != fresh_metatext.end())
1083    {
1084      if (*text_iterator == '&') text_html_safe += "&amp;";
1085      else if (*text_iterator == '<') text_html_safe += "&lt;";
1086      else if (*text_iterator == '>') text_html_safe += "&gt;";
1087      else if (*text_iterator == '"') text_html_safe += "&quot;";
1088      else text_html_safe.push_back(*text_iterator);
1089      text_iterator++;
1090    }
1091    fresh_metatext = text_html_safe;
1092      }
1093      // New "dmsafe" special formatting option (always apply to "srclink_file" metadata)
1094      if (meta.metacommand & mDMSafe || meta.metaname == "srclink_file")
1095      {
1096    // Make it macro-safe
1097    text_t text_dm_safe = dm_safe(fresh_metatext);
1098    fresh_metatext = text_dm_safe;
1099      }
1100
1101      if (metadata_wrap) {
1102    fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,i);
1103      }
1104      formatted_metatext += fresh_metatext;
1105
1106      first = false;
1107     
1108    }
1109  } else {
1110    if (position == -2) { // end
1111      position = end_i;
1112    } else if (position < start_i || position > end_i) {
1113      return "";
1114    }
1115
1116    text_t fresh_metatext;
1117    if (meta.metacommand & mSpecial) {
1118
1119      // special formatting
1120      if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1121      else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1122      else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1123    }
1124    else fresh_metatext = metainfo.values[position];
1125
1126    // New "truncate" special formatting option
1127    if (meta.metacommand & mTruncate)
1128    {
1129      int truncate_length = meta.siblingoptions.getint();
1130      text_t truncated_value = fresh_metatext;
1131      if (truncated_value.size() > truncate_length)
1132      {
1133    truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1134      }
1135      fresh_metatext = truncated_value;
1136    }
1137    // New "xmlsafe" special formatting option
1138    if (meta.metacommand & mXMLSafe)
1139    {
1140      // Make it XML-safe
1141      text_t text_xml_safe = "";
1142      text_t::const_iterator text_iterator = fresh_metatext.begin();
1143      while (text_iterator != fresh_metatext.end())
1144      {
1145    if (*text_iterator == '&') text_xml_safe += "&amp;";
1146    else if (*text_iterator == '<') text_xml_safe += "&lt;";
1147    else if (*text_iterator == '>') text_xml_safe += "&gt;";
1148    else text_xml_safe.push_back(*text_iterator);
1149    text_iterator++;
1150      }
1151      fresh_metatext = text_xml_safe;
1152    }
1153    // New "htmlsafe" special formatting option
1154    if (meta.metacommand & mHTMLSafe)
1155    {
1156      // Make it HTML-safe
1157      text_t text_html_safe = "";
1158      text_t::const_iterator text_iterator = fresh_metatext.begin();
1159      while (text_iterator != fresh_metatext.end())
1160      {
1161    if (*text_iterator == '&') text_html_safe += "&amp;";
1162    else if (*text_iterator == '<') text_html_safe += "&lt;";
1163    else if (*text_iterator == '>') text_html_safe += "&gt;";
1164    else if (*text_iterator == '"') text_html_safe += "&quot;";
1165    else if (*text_iterator == '\'') text_html_safe += "&#39;";
1166    else if (*text_iterator == ',') text_html_safe += "&#44;";
1167    else text_html_safe.push_back(*text_iterator);
1168    text_iterator++;
1169      }
1170      fresh_metatext = text_html_safe;
1171    }
1172    // New "dmsafe" special formatting option (always apply to "srclink_file" metadata)
1173    if (meta.metacommand & mDMSafe || meta.metaname == "srclink_file")
1174    {
1175      // Make it macro-safe
1176      text_t text_dm_safe = dm_safe(fresh_metatext);
1177      fresh_metatext = text_dm_safe;
1178    }
1179
1180    if (metadata_wrap) {
1181      fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,position);
1182    }
1183
1184    formatted_metatext += fresh_metatext;
1185  }
1186
1187  if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1188  else return formatted_metatext;
1189}
1190
1191static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1192{
1193 
1194  MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1195
1196  switch (meta.mqualifier.parent) {
1197  case pNone:
1198    return "Nothing!!";
1199    break;
1200
1201  case pImmediate:
1202    if (parent != NULL) {
1203      text_t parent_oid = get_parent(docinfo.OID);
1204      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1205    }
1206    break;
1207
1208  case pTop:
1209    if (parent != NULL) {
1210      text_t parent_oid = get_parent(docinfo.OID);
1211
1212      while (parent->parent != NULL) {
1213    parent = parent->parent;
1214    parent_oid = get_parent(parent_oid);
1215      }
1216      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1217    }
1218    break;
1219
1220  case pAll:
1221    MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1222    if (parent != NULL) {
1223      text_t parent_oid = get_parent(docinfo.OID);
1224
1225      text_tarray tmparray;
1226      while (parent != NULL) {
1227    tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1228    parent = parent->parent;
1229    parent_oid = get_parent(parent_oid);
1230
1231      }
1232      // now join them up - use teh parent separator
1233      bool first = true;
1234      text_t tmp;
1235      text_tarray::reverse_iterator here = tmparray.rbegin();
1236      text_tarray::reverse_iterator end = tmparray.rend();
1237      while (here != end) {
1238    if (!first) tmp += meta.parentoptions;
1239    tmp += *here;
1240    first = false;
1241    ++here;
1242      }
1243      if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1244      else return tmp;
1245    }
1246  }
1247  return "";
1248
1249}
1250
1251static text_t get_child_meta (const text_t& collection,
1252                  recptproto* collectproto,
1253                  ResultDocInfo_t &docinfo, displayclass &disp,
1254                  const metadata_t &meta, text_tmap &options,
1255                  ostream& logout, int siblings_values)
1256{
1257  if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1258 
1259  const text_t& pre_tree_trav = meta.pre_tree_traverse;
1260  const text_t& child_metaname = meta.metaname;
1261  const text_t& child_field = meta.childoptions;
1262  text_tset child_metadata;
1263  child_metadata.insert(child_metaname);
1264
1265  FilterResponse_t child_response;
1266  if (meta.mqualifier.child == cNum) {
1267    // just one child
1268    //get the information associated with the metadata for child doc
1269    if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1270           child_metadata, false, collectproto, child_response,
1271           logout)) return ""; // invalid child number
1272
1273      if (child_response.docInfo.empty()) return false; // no info for the child
1274 
1275      ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1276      MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1277 
1278      text_t child_metavalue
1279    = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1280      return expand_metadata(child_metavalue,collection,collectproto,
1281             child_docinfo,disp,options,logout);
1282  }
1283 
1284   
1285  if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1286
1287
1288  if (!pre_tree_trav.empty()) {
1289    // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1290    FilterResponse_t trav_response;
1291
1292    text_tset trav_metadata;
1293    trav_metadata.insert("contains");
1294
1295    if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1296           trav_metadata, false, collectproto, trav_response,
1297           logout)) return ""; // invalid pre_tree_trav
1298
1299    if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1300 
1301    ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1302
1303    // use this for rest of routine
1304    docinfo = trav_docinfo;
1305  }
1306 
1307  // we need to get all children
1308  text_t result = "";
1309  text_tarray children;
1310  text_t contains = docinfo.metadata["contains"].values[0];
1311  splitchar (contains.begin(), contains.end(), ';', children);
1312  text_tarray::const_iterator here = children.begin();
1313  text_tarray::const_iterator end = children.end();
1314  bool first = true;
1315  while (here !=end) {
1316    text_t oid = *here;
1317    here++;
1318    if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1319
1320    //get the information associated with the metadata for child doc
1321    if (!get_info (oid, collection, "", child_metadata,
1322           false, collectproto, child_response, logout) ||
1323    child_response.docInfo.empty()) {
1324      first = false;
1325      continue;
1326    }
1327   
1328   
1329    ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1330    MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1331   
1332    text_t child_metavalue
1333      = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1334
1335
1336    if (!first) result += child_field;
1337    first = false;
1338    // need to do this here cos otherwise we are in the wrong document
1339    text_t em =  expand_metadata(child_metavalue,collection,collectproto,
1340                 child_docinfo,disp,options,logout);
1341
1342    result += em;
1343  }
1344  return result;
1345   
1346}
1347
1348static text_t get_meta (const text_t& collection, recptproto* collectproto,
1349            ResultDocInfo_t &docinfo, displayclass &disp,
1350            const metadata_t &meta, text_tmap &options,
1351            ostream& logout) {
1352 
1353  // make sure we have the requested metadata
1354  MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1355  if (it == docinfo.metadata.end()) return "";
1356
1357  int siblings_values = 0; // default is no siblings, just the first metadata available
1358  if (meta.metacommand & mSibling) {
1359    if (meta.mqualifier.sibling == sAll) {
1360      siblings_values = -1; //all
1361    } else if (meta.mqualifier.sibling == sNum) {
1362      siblings_values = meta.siblingoptions.getint();
1363    }
1364  }
1365  if (meta.metacommand & mParent) {
1366    return get_parent_meta(docinfo,meta,siblings_values);
1367  }
1368
1369  else if (meta.metacommand & mChild) {
1370    return get_child_meta(collection,collectproto,docinfo,disp,meta,
1371                options,logout, siblings_values);
1372  }
1373  else if (meta.metacommand & mSibling) { // only siblings
1374    MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1375    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1376  }
1377  else {
1378   
1379    // straightforward metadata request (nothing fancy)
1380
1381    text_t classifier_metaname = docinfo.classifier_metadata_type;
1382    int metaname_index
1383      = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1384    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1385  }
1386 
1387  return "";
1388}
1389
1390static text_t get_or (const text_t& collection, recptproto* collectproto,
1391              ResultDocInfo_t &docinfo, displayclass &disp,
1392              format_t *orptr, text_tmap &options,
1393              ostream& logout) {
1394
1395  while (orptr != NULL) {
1396
1397    if (metadata_wrap) {
1398      // need to be a bit more careful about this
1399      // => test for it *without* spanwrap or divwrap, and if defined, then
1400      // got back and generate it again, this time with spanwrap/divwrap on
1401
1402      metadata_wrap = false;
1403      text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1404                       options, logout);
1405      metadata_wrap = true;
1406      if (!test_tmp.empty()) {
1407
1408    return format_string (collection,collectproto,docinfo, disp, orptr,
1409                  options, logout);
1410      }
1411    }
1412    else {
1413      text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1414                  options, logout);
1415      if (!tmp.empty()) return tmp;
1416    }
1417
1418    orptr = orptr->nextptr;
1419  }
1420  return "";
1421}
1422
1423static bool char_is_whitespace(const char c)
1424{
1425  return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1426
1427}
1428
1429static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1430{
1431  int pos = start_pos;
1432  while (pos<outstring.size()) {
1433    if (!char_is_whitespace(outstring[pos])) {
1434      break;
1435    }
1436    ++pos;
1437  }
1438
1439  return pos;
1440}
1441
1442static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1443{
1444  int pos = start_pos;
1445  while (pos>=0) {
1446    if (!char_is_whitespace(outstring[pos])) {
1447      break;
1448    }
1449    --pos;
1450  }
1451
1452  return pos;
1453}
1454
1455static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1456{
1457  int pos = start_pos;
1458  while (pos>=0) {
1459    if (char_is_whitespace(outstring[pos])) {
1460      break;
1461    }
1462    --pos;
1463  }
1464
1465  return pos;
1466}
1467
1468
1469static int rscan_for(const text_t& outstring, const int start_pos,
1470             const char find_c)
1471{
1472  int pos = start_pos;
1473  while (pos>=0) {
1474    char c = outstring[pos];
1475    if (outstring[pos] == find_c) {
1476      break;
1477    }
1478    --pos;
1479  }
1480
1481  return pos;
1482}
1483
1484text_t extract_substr(const text_t& outstring, const int start_pos,
1485              const int end_pos)
1486{
1487  text_t extracted_str;
1488  extracted_str.clear();
1489
1490  for (int pos=start_pos; pos<=end_pos; ++pos) {
1491    extracted_str.push_back(outstring[pos]);
1492  }
1493
1494  return extracted_str;
1495}
1496
1497
1498static text_t expand_potential_metadata(const text_t& collection,
1499                    recptproto* collectproto,
1500                    ResultDocInfo_t &docinfo,
1501                    displayclass &disp,
1502                    const text_t& intext,
1503                    text_tmap &options,
1504                    ostream& logout)
1505{
1506  text_t outtext;
1507
1508  // decide if dealing with metadata or text
1509
1510  text_t::const_iterator beginbracket = intext.begin();
1511  text_t::const_iterator endbracket = (intext.end() - 1);
1512
1513  // Decision is based on a metadata element
1514  if ((*beginbracket == '[') && (*endbracket == ']')) {
1515    // Ignore the surrounding square brackets
1516    text_t meta_text = substr (beginbracket+1, endbracket);
1517
1518    if (meta_text == "Text") {
1519      outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1520    }
1521    else {
1522
1523      text_tset metadata;
1524      bool getParents =false;
1525      metadata_t meta;
1526     
1527      parse_meta (meta_text, meta, metadata, getParents);   
1528      outtext
1529    = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1530    }
1531
1532  }
1533  else {
1534    outtext = intext;
1535  }
1536
1537  return outtext;
1538}
1539
1540
1541
1542
1543static bool uses_expression(const text_t& collection, recptproto* collectproto,
1544                ResultDocInfo_t &docinfo,
1545                displayclass &disp,
1546                const text_t& outstring, text_t& lhs_expr,
1547                text_t& op_expr, text_t& rhs_expr,
1548                text_tmap &options,
1549                ostream& logout)
1550{
1551  // Note: the string may not be of the form: str1 op str2, however
1552  // to deterine this we have to process it on the assumption it is,
1553  // and if at any point an 'erroneous' value is encountered, return
1554  // false and let something else have a go at evaluating it
1555
1556  // Starting at the end of the string and working backwards ..
1557
1558  const int outstring_len = outstring.size();
1559
1560  // skip over white space
1561  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1562
1563  if (rhs_end<=0) {
1564    // no meaningful text or (rhs_end==0) no room for operator
1565    return false;
1566  }
1567
1568  // check for ' or " and then scan over token
1569  const char potential_quote = outstring[rhs_end];
1570  int rhs_start=rhs_end;
1571  bool quoted = false;
1572
1573  if ((potential_quote == '\'') || (potential_quote == '\"')) {
1574    --rhs_end;
1575    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1576    quoted = true;
1577  }
1578  else {
1579    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1580  }
1581
1582  if ((rhs_end-rhs_start)<0) {
1583    // no meaningful rhs expression
1584    return false;
1585  }
1586
1587  // form rhs_expr
1588  rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1589
1590  // skip over white space
1591  const int to_whitespace = (quoted) ? 2 : 1;
1592
1593  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1594  int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1595
1596  if ((op_end<0) && (op_start<0)) {
1597    // no meaningful expression operator
1598    return false;
1599  }
1600
1601  if (op_end-op_start<0) {
1602    // no meaningful expression operator
1603    return false;
1604  }
1605
1606  op_expr = extract_substr(outstring,op_start,op_end);
1607
1608
1609  // check for operator
1610  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1611     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1612
1613    // not a valid operator
1614    return false;
1615  }
1616
1617  int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1618  if (lhs_end<0) {
1619    // no meaningful lhs expression
1620    return false;
1621  }
1622
1623  int lhs_start = scan_over_whitespace(outstring,0);
1624
1625  // form lhs_expr from remainder of string
1626  lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1627
1628  // Now we know we have a valid expression, look up any
1629  // metadata terms
1630
1631  rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1632                       disp,rhs_expr,options,logout);
1633  lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1634                       disp,lhs_expr,options,logout);
1635
1636  return true;
1637}
1638
1639static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1640                 const text_t& rhs_expr, ostream& logout)
1641{
1642  if (op_expr == "eq") return (lhs_expr == rhs_expr);
1643  else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1644  else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1645  else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1646  else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1647  else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1648  else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1649  else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1650  else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1651  else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1652  else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1653  else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1654  else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1655  else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1656  else {
1657    logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1658  }
1659 
1660  return false;
1661}
1662
1663
1664static text_t get_if (const text_t& collection, recptproto* collectproto,
1665              ResultDocInfo_t &docinfo, displayclass &disp,
1666              const decision_t &decision,
1667              format_t *ifptr, format_t *elseptr,
1668              text_tmap &options, ostream& logout)
1669{
1670  // If the decision component is a metadata element, then evaluate it
1671  // to see whether we output the "then" or the "else" clause
1672  if (decision.command == dMeta) {
1673
1674    bool store_metadata_wrap = metadata_wrap;
1675    metadata_wrap = 0;
1676
1677    // temporarily suspend metadata-XXXwrap (if on) so can test if metadata item really exits or not
1678    bool metadata_exists
1679      = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1680           logout) != "");
1681
1682    metadata_wrap = store_metadata_wrap;
1683
1684    if (metadata_exists) {
1685      if (ifptr != NULL)
1686    return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1687                     options, logout);
1688    }
1689    else {
1690      if (elseptr != NULL)
1691    return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1692                     options, logout);
1693    }
1694  }
1695
1696  // If the decision component is text, then evaluate it (it is probably a
1697  // macro like _cgiargmode_) to decide what to output.
1698  else if (decision.command == dText) {
1699
1700    text_t outstring;
1701    disp.expandstring (decision.text, outstring);
1702
1703    // Check for if expression in form: str1 op str2
1704    // (such as [x] eq "y")
1705    text_t lhs_expr, op_expr, rhs_expr;
1706    if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1707      if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1708    if (ifptr != NULL) {
1709      return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1710                       options, logout);
1711    }
1712    else {
1713      return "";
1714    }
1715      } else {
1716    if (elseptr != NULL) {
1717      return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1718                       options, logout);
1719    }
1720    else {
1721      return "";
1722    }
1723      }
1724    }
1725
1726
1727    // This is a tad tricky.  When we expand a string like _cgiargmode_, that is
1728    // a cgi argument macro that has not been set, it evaluates to itself.
1729    // Therefore, were have to say that a piece of text evalautes true if
1730    // it is non-empty and if it is a cgi argument evaulating to itself.
1731
1732    if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1733      if (ifptr != NULL)
1734    return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1735                     options, logout);
1736    } else {
1737      if (elseptr != NULL)
1738    return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1739                     options, logout);
1740    }
1741  }
1742 
1743  return "";
1744}
1745
1746bool includes_metadata(const text_t& text)
1747{
1748  text_t::const_iterator here = text.begin();
1749  text_t::const_iterator end = text.end();
1750  while (here != end) {
1751    if (*here == '[') return true;
1752    ++here;
1753  }
1754
1755  return false;
1756}
1757
1758static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1759                  recptproto* collectproto,
1760                  ResultDocInfo_t &docinfo,
1761                  displayclass &disp, text_tmap &options,
1762                  ostream &logout) {
1763     
1764  if (includes_metadata(metavalue)) {
1765   
1766    // text has embedded metadata in it => expand it
1767    FilterRequest_t request;
1768    FilterResponse_t response;
1769   
1770    request.getParents = false;
1771   
1772    format_t *expanded_formatlistptr = new format_t();
1773    parse_formatstring (metavalue, expanded_formatlistptr,
1774            request.fields, request.getParents);
1775   
1776    // retrieve metadata
1777    get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1778         collectproto, response, logout);
1779   
1780    if (!response.docInfo.empty()) {
1781     
1782      text_t expanded_metavalue
1783    = get_formatted_string(collection, collectproto,
1784                   response.docInfo[0], disp, expanded_formatlistptr,
1785                   options, logout);
1786     
1787      return expanded_metavalue;
1788    }
1789    else {
1790      return metavalue;
1791    }
1792  }
1793  else {
1794   
1795    return metavalue;
1796  }
1797}
1798
1799text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1800               displayclass &disp,
1801               text_t meta_name, ostream& logout) {
1802 
1803  ColInfoResponse_t collectinfo;
1804  comerror_t err;
1805  collectproto->get_collectinfo (collection, collectinfo,err,logout);
1806  text_t meta_value = "";
1807  text_t lang;
1808  disp.expandstring("_cgiargl_",lang);
1809  if (lang.empty()) {
1810    lang = "en";
1811  }
1812
1813  if (err == noError) {
1814    meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1815  }
1816  return meta_value;
1817 
1818
1819}
1820text_t format_string (const text_t& collection, recptproto* collectproto,
1821              ResultDocInfo_t &docinfo, displayclass &disp,
1822              format_t *formatlistptr, text_tmap &options,
1823              ostream& logout) {
1824
1825  if (formatlistptr == NULL) return "";
1826
1827  switch (formatlistptr->command) {
1828     case comOID:
1829    return docinfo.OID;
1830  case comTopOID:
1831    {
1832      text_t top_id;
1833      get_top(docinfo.OID, top_id);
1834      return top_id;
1835    }
1836  case comRank:
1837    return text_t(docinfo.ranking);
1838     case comText:
1839    return formatlistptr->text;
1840     case comLink:
1841    return options["link"];
1842     case comEndLink:
1843       {
1844     if (options["link"].empty()) return "";
1845    else return "</a>";
1846       }
1847     case comHref:
1848    return get_href(options["link"]);
1849     case comIcon:
1850    return options["icon"];
1851     case comNum:
1852    return docinfo.result_num;
1853     case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1854    return get_related_docs(collection, collectproto, docinfo, logout);
1855
1856     case comSummary:
1857       return format_summary(collection, collectproto, docinfo, disp, options, logout);
1858     case comAssocLink:
1859       {
1860     text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1861         if (!link_filename.empty()) {
1862       text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
1863       if (formatlistptr->text == "href") {
1864         return href;
1865       }
1866       return "<a href=\""+ href + "\">";
1867     }
1868     return "";
1869       }
1870  case comEndAssocLink:
1871    {
1872    text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1873    if (!link_filename.empty()) {
1874      return "</a>";
1875    }
1876    return "";
1877    }
1878     case comMeta:
1879    {
1880       const text_t& metavalue =  get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1881       return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1882    }
1883
1884     case comDoc:
1885       return format_text(collection, collectproto, docinfo, disp, options, logout);
1886
1887     case comImage:
1888    return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1889     case comTOC:
1890    return options["DocTOC"];
1891     case comDocumentButtonDetach:
1892    return options["DocumentButtonDetach"];
1893     case comDocumentButtonHighlight:
1894    return options["DocumentButtonHighlight"];
1895     case comDocumentButtonExpandContents:
1896    return options["DocumentButtonExpandContents"];
1897     case comDocumentButtonExpandText:
1898    return options["DocumentButtonExpandText"];
1899     case comHighlight:
1900    if (options["highlight"] == "1") return "<b>";
1901    break;
1902     case comEndHighlight:
1903    if (options["highlight"] == "1") return "</b>";
1904    break;
1905     case comMetadataSpanWrap:
1906        metadata_wrap=true;  metadata_wrap_type="span"; return "";
1907    break;
1908     case comEndMetadataSpanWrap:
1909    metadata_wrap=false; metadata_wrap_type="";     return "";
1910    break;
1911     case comMetadataDivWrap:
1912        metadata_wrap=true;  metadata_wrap_type="div";  return "";
1913    break;
1914     case comEndMetadataDivWrap:
1915    metadata_wrap=false; metadata_wrap_type="";     return "";
1916    break;
1917     case comIf:
1918    return get_if (collection, collectproto, docinfo, disp,
1919               formatlistptr->decision, formatlistptr->ifptr,
1920               formatlistptr->elseptr, options, logout);
1921     case comOr:
1922    return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1923               options, logout);
1924     case comDocTermsFreqTotal:
1925       return docinfo.num_terms_matched;
1926     case comCollection:
1927       if (formatlistptr->meta.metaname == g_EmptyText) {
1928     return collection;
1929       }
1930       return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1931   
1932  }
1933  return "";
1934}
1935
1936text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1937                 ResultDocInfo_t &docinfo, displayclass &disp,
1938                 format_t *formatlistptr, text_tmap &options,
1939                 ostream& logout) {
1940
1941   text_t ft;
1942   while (formatlistptr != NULL)
1943      {
1944     ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1945                  options, logout);
1946     formatlistptr = formatlistptr->nextptr;
1947      }
1948   
1949   return ft;
1950}
1951
1952
1953// we have only preloaded the text in DocumentAction. But you may want
1954// to get the text in query, so copy what we have done with
1955// format_summary and get the text here. Probably is quite expensive?
1956text_t format_text (const text_t& collection, recptproto* collectproto,
1957            ResultDocInfo_t &docinfo, displayclass &disp,
1958            text_tmap &options, ostream& logout)
1959{
1960  text_t text;
1961
1962  if (!options["text"].empty()) {
1963    text = options["text"];
1964  }
1965  else {
1966    // get document text here
1967    DocumentRequest_t docrequest;
1968    DocumentResponse_t docresponse;
1969    comerror_t err;
1970    docrequest.OID = docinfo.OID;
1971    collectproto->get_document (collection, docrequest, docresponse, err, logout);
1972    text = docresponse.doc;
1973  }
1974
1975  if (metadata_wrap) {
1976    text = wrap_metatext(text,docinfo.OID,"Text");
1977  }
1978
1979  return text;
1980}
1981 
1982/* FUNCTION NAME: format_summary
1983 * DESC: this is invoked when a [Summary] special metadata is processed.
1984 * RETURNS: a query-biased summary for the document */
1985
1986text_t format_summary (const text_t& collection, recptproto* collectproto,
1987               ResultDocInfo_t &docinfo, displayclass &disp,
1988               text_tmap &options, ostream& logout) {
1989
1990  // GRB: added code here to ensure that the cstr (and other collections)
1991  //      uses the document metadata item Summary, rather than compressing
1992  //      the text of the document, processed via the methods in
1993  //      summarise.cpp
1994
1995  text_t summary;
1996
1997  if (docinfo.metadata.count("Summary") > 0 &&
1998      docinfo.metadata["Summary"].values.size() > 0) {
1999    summary = docinfo.metadata["Summary"].values[0];
2000  }
2001  else {
2002 
2003    text_t textToSummarise, query;
2004
2005    if(options["text"].empty()) { // get document text
2006      DocumentRequest_t docrequest;
2007      DocumentResponse_t docresponse;
2008      comerror_t err;
2009      docrequest.OID = docinfo.OID;
2010      collectproto->get_document (collection, docrequest, docresponse, err, logout);
2011      textToSummarise = docresponse.doc;
2012    }
2013    else {
2014      // in practice, this would not happen, because text is only
2015      // loaded with the [Text] command
2016      textToSummarise = options["text"];
2017    }
2018   
2019    disp.expandstring("_cgiargq_",query);
2020    summary = summarise(textToSummarise,query,80);
2021    //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
2022  }
2023
2024  summary.replace("'","&#039;");
2025  summary.replace("\n","&#013;");
2026
2027  if (metadata_wrap) {
2028    summary = wrap_metatext(summary,docinfo.OID,"Summary");
2029  }
2030
2031  return summary;
2032}
Note: See TracBrowser for help on using the browser.