root/gsdl/trunk/runtime-src/src/recpt/formattools.cpp @ 19058

Revision 19058, 48.9 KB (checked in by davidb, 11 years ago)

Fixed bug that causes Local Library Server to crash with an {If} statement with two chars in it (that did not include any 'op').

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "recptprototools.h"
29#include "OIDtools.h"
30#include "summarise.h"
31
32#include <assert.h>
33
34// a few function prototypes
35
36static text_t format_string (const text_t& collection, recptproto* collectproto,
37                 ResultDocInfo_t &docinfo, displayclass &disp,
38                 format_t *formatlistptr, text_tmap &options,
39                 ostream& logout);
40
41static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
42              format_t *formatlistptr, text_tset &metadata, bool &getParents);
43
44static text_t format_summary (const text_t& collection, recptproto* collectproto,
45                  ResultDocInfo_t &docinfo, displayclass &disp,
46                  text_tmap &options, ostream& logout);
47static text_t format_text (const text_t& collection, recptproto* collectproto,
48                  ResultDocInfo_t &docinfo, displayclass &disp,
49                  text_tmap &options, ostream& logout);
50
51static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
52                  recptproto* collectproto, ResultDocInfo_t &docinfo,
53                  displayclass &disp, text_tmap &options,
54                  ostream &logout);
55
56
57void metadata_t::clear() {
58  metaname.clear();
59  metacommand = mNone;
60  mqualifier.parent  = pNone;
61  mqualifier.sibling = sNone;
62  mqualifier.child   = cNone;
63  pre_tree_traverse.clear();
64  parentoptions.clear();
65  siblingoptions.clear();
66  childoptions.clear();
67}
68
69void decision_t::clear() {
70  command = dMeta;
71  meta.clear();
72  text.clear();
73}
74
75void format_t::clear() {
76  command = comText;
77  decision.clear();
78  text.clear();
79  meta.clear();
80  nextptr = NULL;
81  ifptr = NULL;
82  elseptr = NULL;
83  orptr = NULL;
84}
85
86void formatinfo_t::clear() {
87  DocumentImages = false;
88  DocumentTitles = true;
89  DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
90  DocumentContents = true;
91  DocumentArrowsBottom = true;
92  DocumentArrowsTop = false;
93  DocumentSearchResultLinks = false;
94  DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
95  //  DocumentButtons.push_back ("Expand Text");
96  //  DocumentButtons.push_back ("Expand Contents");
97  DocumentButtons.push_back ("Detach");
98  DocumentButtons.push_back ("Highlight");
99  RelatedDocuments = "";
100  DocumentText = "[Text]";
101  formatstrings.erase (formatstrings.begin(), formatstrings.end());
102  DocumentUseHTML = false;
103  AllowExtendedOptions = false;
104}
105
106// simply checks to see if formatstring begins with a <td> tag
107bool is_table_content (const text_t &formatstring) {
108  text_t::const_iterator here = formatstring.begin();
109  text_t::const_iterator end = formatstring.end();
110 
111  while (here != end) {
112    if (*here != ' ') {
113      if ((*here == '<') && ((here+3) < end)) {
114    if ((*(here+1) == 't' || *(here+1) == 'T') &&
115        (*(here+2) == 'd' || *(here+2) == 'D') &&
116        (*(here+3) == '>' || *(here+3) == ' '))
117      return true;
118      } else return false;
119    }
120    ++here;
121  }
122  return false;
123}
124
125bool is_table_content (const format_t *formatlistptr) {
126
127  if (formatlistptr == NULL) return false;
128 
129  if (formatlistptr->command == comText)
130    return is_table_content (formatlistptr->text);
131   
132  return false;
133}
134
135// returns false if key isn't in formatstringmap
136bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
137               text_t &formatstring) {
138
139  formatstring.clear();
140  text_tmap::const_iterator it = formatstringmap.find(key);
141  if (it == formatstringmap.end()) return false;
142  formatstring = (*it).second;
143  return true;
144}
145
146// tries to find "key1key2" then "key1" then "key2"
147bool get_formatstring (const text_t &key1, const text_t &key2, 
148               const text_tmap &formatstringmap,
149               text_t &formatstring) {
150
151  formatstring.clear();
152  text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
153  if (it != formatstringmap.end()) {
154    formatstring = (*it).second;
155    return true;
156  }
157  it = formatstringmap.find(key1);
158  if (it != formatstringmap.end()) {
159    formatstring = (*it).second;
160    return true;
161  }
162  it = formatstringmap.find(key2);
163  if (it != formatstringmap.end()) {
164    formatstring = (*it).second;
165    return true;
166  }
167  return false;
168}
169
170
171text_t remove_namespace(const text_t &meta_name) {
172  text_t::const_iterator end = meta_name.end();
173  text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
174  if (it != end) {
175    return substr(it+1, end);
176  }
177
178  return meta_name;
179
180}
181// returns a date of form _format:date_(year, month, day)
182// input is date of type yyyy-?mm-?dd
183// at least the year must be present in date
184text_t format_date (const text_t &date) {
185
186  if (date.size() < 4) return "";
187
188  text_t::const_iterator datebegin = date.begin();
189
190  text_t year = substr (datebegin, datebegin+4);
191  int chars_seen_so_far = 4;
192
193  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
194  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
195 
196  text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
197  int imonth = month.getint();
198  if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
199 
200  chars_seen_so_far += 2;
201  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
202 
203  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
204
205  text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
206  if (day[0] == '0') day = substr (day.begin()+1, day.end());
207  int iday = day.getint();
208  if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
209   
210  return "_format:date_("+year+","+month+","+day+")";
211
212
213// converts an iso639 language code to its English equivalent
214// should we be checking that the macro exists??
215text_t iso639 (const text_t &langcode) {
216  if (langcode.empty()) return "";
217  return "_iso639:iso639"+langcode+"_";
218}
219
220
221text_t get_href (const text_t &link) {
222
223  text_t href;
224
225  text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
226  text_t::const_iterator end = link.end();
227  if (here == end) return g_EmptyText;
228 
229  ++here;
230  while (here != end) {
231    if (*here == '"') break;
232    href.push_back(*here);
233    ++here;
234  }
235
236  return href;
237}
238
239//this function gets the information associated with the relation
240//metadata for the document associated with 'docinfo'. This relation
241//metadata consists of a line of pairs containing 'collection, document OID'
242//(this is the OID of the document related to the current document, and
243//the collection the related document belongs to). For each of these pairs
244//the title metadata is obtained and then an html link between the title
245//of the related doc and the document's position (the document will be
246//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
247//(where collection is the related documents collection, and OID is the
248//related documents OID).  A list of these html links are made for as many
249//related documents as there are. This list is then returned. If there are
250//no related documents available for the current document then the string
251//'.. no related documents .. ' is returned.
252text_t get_related_docs(const text_t& collection, recptproto* collectproto,
253            ResultDocInfo_t &docinfo, ostream& logout){
254 
255  text_tset metadata;
256
257  //insert the metadata we wish to collect
258  metadata.insert("dc.Relation");
259  metadata.insert("Title"); 
260  metadata.insert("Subject"); //for emails, where title data doesn't apply
261 
262  FilterResponse_t response;
263  text_t relation = ""; //string for displaying relation metadata
264  text_t relationTitle = ""; //the related documents Title (or subject)
265  text_t relationOID = ""; //the related documents OID 
266
267  //get the information associated with the metadata for current doc
268  if (get_info (docinfo.OID, collection, "", metadata,
269        false, collectproto, response, logout)) {
270   
271    //if the relation metadata exists, store for displaying
272    if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
273      relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
274
275      //split relation data into pairs of collectionname,ID number
276      text_tarray relationpairs;
277      splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
278     
279      text_tarray::const_iterator currDoc = relationpairs.begin(); 
280      text_tarray::const_iterator lastDoc = relationpairs.end();
281
282      //iterate through the pairs to split and display
283      while(currDoc != lastDoc){
284   
285    //split pairs into collectionname and ID
286    text_tarray relationdata;
287    splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
288   
289    //get first element in the array (collection)
290    text_tarray::const_iterator doc_data = relationdata.begin();
291    text_t document_collection = *doc_data;
292    ++doc_data; //increment to get next item in array (oid)
293    text_t document_OID = *doc_data;
294   
295    //create html link to related document
296    relation += "<a href=\"_httpdocument_&c=" + document_collection;
297    relation += "&cl=search&d=" + document_OID;
298       
299    //get the information associated with the metadata for related doc
300    if (get_info (document_OID, document_collection, "", metadata,
301              false, collectproto, response, logout)) {
302     
303      //if title metadata doesn't exist, collect subject metadata
304      //if that doesn't exist, just call it 'related document'
305      if (!response.docInfo[0].metadata["Title"].values[0].empty())
306        relationTitle = response.docInfo[0].metadata["Title"].values[0];
307      else if (!response.docInfo[0].metadata["Subject"].values.empty())
308        relationTitle = response.docInfo[0].metadata["Subject"].values[0];
309      else relationTitle =  "RELATED DOCUMENT";
310     
311    }
312   
313    //link the related document's title to its page
314    relation += "\">" + relationTitle + "</a>";
315    relation += "  (" + document_collection + ")<br>";
316   
317    ++currDoc;
318      }
319    }
320   
321  }
322
323  if(relation.empty()) //no relation data for documnet
324    relation = ".. no related documents .. ";
325
326  return relation;
327}
328
329
330
331static void get_parent_options (text_t &instring, metadata_t &metaoption) {
332
333  assert (instring.size() > 7);
334  if (instring.size() <= 7) return;
335
336  text_t meta, com, op;
337  bool inbraces = false;
338  bool inquotes = false;
339  bool foundcolon = false;
340  text_t::const_iterator here = instring.begin()+6;
341  text_t::const_iterator end = instring.end();
342  while (here != end) {
343    if (foundcolon) meta.push_back (*here);
344    else if (*here == '(') inbraces = true;
345    else if (*here == ')') inbraces = false;
346    else if (*here == '\'' && !inquotes) inquotes = true;
347    else if (*here == '\'' && inquotes) inquotes = false;
348    else if (*here == ':' && !inbraces) foundcolon = true;
349    else if (inquotes) op.push_back (*here);
350    else com.push_back (*here);
351    ++here;
352  }
353
354  instring = meta;
355  if (com.empty())
356    metaoption.mqualifier.parent = pImmediate;
357  else if (com == "Top")
358    metaoption.mqualifier.parent = pTop;
359  else if (com == "All") {
360    metaoption.mqualifier.parent = pAll;
361    metaoption.parentoptions = op;
362  }
363}
364
365
366static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
367
368  assert (instring.size() > 8);
369  if (instring.size() <= 8) return;
370  text_t meta, com, op;
371  bool inbraces = false;
372  bool inquotes = false;
373  bool foundcolon = false;
374  text_t::const_iterator here = instring.begin()+7;
375  text_t::const_iterator end = instring.end();
376  while (here != end) {
377    if (foundcolon) meta.push_back (*here);
378    else if (*here == '(') inbraces = true;
379    else if (*here == ')') inbraces = false;
380    else if (*here == '\'' && !inquotes) inquotes = true;
381    else if (*here == '\'' && inquotes) inquotes = false;
382    else if (*here == ':' && !inbraces) foundcolon = true;   
383    else if (inquotes) op.push_back (*here);
384    else com.push_back (*here);
385    ++here;
386  }
387
388  instring = meta;
389  metaoption.siblingoptions.clear();
390
391  if (com.empty()) {
392    metaoption.mqualifier.sibling = sAll;
393    metaoption.siblingoptions = " ";
394  }
395  else if (com == "first") {
396    metaoption.mqualifier.sibling = sNum;
397    metaoption.siblingoptions = "0";
398  }
399  else if (com == "last") {
400    metaoption.mqualifier.sibling = sNum;
401    metaoption.siblingoptions = "-2"; // == last
402  }
403  else if (com.getint()>0) {
404    metaoption.mqualifier.sibling = sNum;
405    int pos = com.getint()-1;
406    metaoption.siblingoptions +=pos;
407  }
408  else {
409    metaoption.mqualifier.sibling = sAll;
410    metaoption.siblingoptions = op;
411  }
412}
413
414static void get_child_options (text_t &instring, metadata_t &metaoption) {
415
416  assert (instring.size() > 6);
417  if (instring.size() <= 6) return;
418  text_t meta, com, op;
419  bool inbraces = false;
420  bool inquotes = false;
421  bool foundcolon = false;
422  text_t::const_iterator here = instring.begin()+5;
423  text_t::const_iterator end = instring.end();
424  while (here != end) {
425    if (foundcolon) meta.push_back (*here);
426    else if (*here == '(') inbraces = true;
427    else if (*here == ')') inbraces = false;
428    else if (*here == '\'' && !inquotes) inquotes = true;
429    else if (*here == '\'' && inquotes) inquotes = false;
430    else if (*here == ':' && !inbraces) foundcolon = true;
431    else if (inquotes) op.push_back (*here);
432    else com.push_back (*here);
433    ++here;
434  }
435
436  instring = meta;
437  if (com.empty()) {
438    metaoption.mqualifier.child = cAll;
439    metaoption.childoptions = " ";
440  }
441  else if (com == "first") {
442    metaoption.mqualifier.child = cNum;
443    metaoption.childoptions = ".fc";
444  }
445  else if (com == "last") {
446    metaoption.mqualifier.child = cNum;
447    metaoption.childoptions = ".lc";
448  }
449  else if (com.getint()>0) {
450    metaoption.mqualifier.child = cNum;
451    metaoption.childoptions = "."+com;
452  }
453  else {
454    metaoption.mqualifier.child = cAll;
455    metaoption.childoptions = op;
456  }
457}
458
459
460
461static void parse_meta (text_t &meta, metadata_t &metaoption,
462            text_tset &metadata, bool &getParents) {
463
464  if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
465    metaoption.metacommand |= mCgiSafe;
466    meta = substr (meta.begin()+8, meta.end());
467  }
468  if (meta.size() > 7 && (substr(meta.begin(), meta.begin()+7) == "format:")) {   
469    metaoption.metacommand |= mSpecial;
470    meta = substr (meta.begin()+7, meta.end());
471  }
472
473  bool had_parent_or_child = true;
474  bool prev_was_parent = false;
475  bool prev_was_child  = false;
476
477  while (had_parent_or_child) {
478    if (meta.size() > 7
479    && (substr (meta.begin(), meta.begin()+6) == "parent")) {
480
481      // clear out sibling and child (cmd and options)
482      metaoption.metacommand &= ~(mChild|mSibling);
483      metaoption.childoptions.clear();
484      metaoption.siblingoptions.clear();
485
486      getParents = true;
487      metaoption.metacommand |= mParent;
488      get_parent_options (meta, metaoption);
489
490      if (prev_was_parent) {
491    metaoption.pre_tree_traverse += ".pr";
492      }
493      else if (prev_was_child) {
494    metaoption.pre_tree_traverse += ".fc";
495      }
496
497      prev_was_parent = true;
498      prev_was_child  = false;
499    }
500    else if (meta.size() > 6
501         && (substr (meta.begin(), meta.begin()+5) == "child")) {
502
503      // clear out sibling and parent (cmd and options)
504      metaoption.metacommand &= ~(mParent|mSibling);
505      metaoption.parentoptions.clear();
506      metaoption.siblingoptions.clear();
507
508      metaoption.metacommand |= mChild;
509      get_child_options (meta, metaoption);
510      metadata.insert("contains");
511
512      if (prev_was_parent) {
513    metaoption.pre_tree_traverse += ".pr";
514      }
515      else if (prev_was_child) {
516    metaoption.pre_tree_traverse += ".fc";
517      }
518
519      prev_was_child  = true;
520      prev_was_parent = false;
521    }
522    else {
523      prev_was_child  = false;
524      prev_was_parent = false;
525      had_parent_or_child = false;
526    }
527  }
528
529  // parent/child can have sibling tacked on end also
530  if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
531    metaoption.metacommand |= mSibling;
532    get_sibling_options (meta, metaoption);
533  }
534 
535  // check for ex. which may occur in format statements
536  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
537    meta = substr (meta.begin()+3, meta.end());
538  }
539  metadata.insert (meta);
540  metaoption.metaname = meta;
541}
542
543static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
544  if (meta == "collection") {
545    // no qualifiers
546    metaoption.metaname = g_EmptyText;
547    return;
548  }
549  meta = substr (meta.begin()+11, meta.end());
550  metaoption.metaname = meta;
551 
552}
553
554static void parse_meta (text_t &meta, format_t *formatlistptr,
555            text_tset &metadata, bool &getParents) {
556 
557  if (meta == "link")
558    formatlistptr->command = comLink;
559  else if (meta == "/link")
560    formatlistptr->command = comEndLink;
561
562  else if (meta == "href")
563    formatlistptr->command = comHref;
564
565  else if (meta == "num")
566    formatlistptr->command = comNum;
567
568  else if (meta == "icon")
569    formatlistptr->command = comIcon;
570
571  else if (meta == "Text")
572    formatlistptr->command = comDoc;
573 
574  else if (meta == "RelatedDocuments")
575   formatlistptr->command = comRel;
576
577  else if (meta == "highlight")
578    formatlistptr->command = comHighlight;
579
580  else if (meta == "/highlight")
581    formatlistptr->command = comEndHighlight;
582
583  else if (meta == "Summary")
584    formatlistptr->command = comSummary;
585
586  else if (meta == "DocImage")
587    formatlistptr->command = comImage;
588
589  else if (meta == "DocTOC")
590    formatlistptr->command = comTOC;
591
592  else if (meta == "DocumentButtonDetach")
593     formatlistptr->command = comDocumentButtonDetach;
594 
595  else if (meta == "DocumentButtonHighlight")
596     formatlistptr->command = comDocumentButtonHighlight;
597 
598  else if (meta == "DocumentButtonExpandContents")
599    formatlistptr->command = comDocumentButtonExpandContents;
600
601  else if (meta == "DocumentButtonExpandText")
602     formatlistptr->command = comDocumentButtonExpandText;
603
604  else if (meta == "DocOID")
605     formatlistptr->command = comOID;
606  else if (meta == "DocTopOID")
607    formatlistptr->command = comTopOID;
608  else if (meta == "DocRank")
609    formatlistptr->command = comRank;
610  else if (meta == "DocTermsFreqTotal")
611    formatlistptr->command = comDocTermsFreqTotal;
612  else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
613    formatlistptr->command = comCollection;
614    parse_coll_meta(meta, formatlistptr->meta);
615  }
616  else {
617    formatlistptr->command = comMeta;
618    parse_meta (meta, formatlistptr->meta, metadata, getParents);
619  }
620}
621
622
623static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
624              text_tset &metadata, bool &getParents) {
625
626  text_t text;
627  text_t::const_iterator here = formatstring.begin();
628  text_t::const_iterator end = formatstring.end();
629
630  while (here != end) {
631
632    if (*here == '\\') {
633      ++here;
634      if (here != end) text.push_back (*here);
635
636    } else if (*here == '{') {
637      if (!text.empty()) {
638    formatlistptr->command = comText;
639    formatlistptr->text = text;
640    formatlistptr->nextptr = new format_t();
641    formatlistptr = formatlistptr->nextptr;
642   
643    text.clear();
644      }
645      if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
646
647    formatlistptr->nextptr = new format_t();
648    formatlistptr = formatlistptr->nextptr;
649    if (here == end) break;
650      }
651    } else if (*here == '[') {
652      if (!text.empty()) {
653    formatlistptr->command = comText;
654    formatlistptr->text = text;
655    formatlistptr->nextptr = new format_t();
656    formatlistptr = formatlistptr->nextptr;
657
658    text.clear();
659      }
660      text_t meta;
661      ++here;
662      while (*here != ']') {
663    if (here == end) return false;
664    meta.push_back (*here);
665    ++here;
666      }
667      parse_meta (meta, formatlistptr, metadata, getParents);
668      formatlistptr->nextptr = new format_t();
669      formatlistptr = formatlistptr->nextptr;
670
671    } else
672      text.push_back (*here);
673
674    if (here != end) ++here;
675  }
676  if (!text.empty()) {
677    formatlistptr->command = comText;
678    formatlistptr->text = text;
679    formatlistptr->nextptr = new format_t();
680    formatlistptr = formatlistptr->nextptr;
681
682  }
683  return true;
684}
685
686
687static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
688              format_t *formatlistptr, text_tset &metadata, bool &getParents) {
689
690  text_t::const_iterator it = findchar (here, end, '}');
691  if (it == end) return false;
692
693  text_t com = substr (here, it);
694  here = findchar (it, end, '{');
695  if (here == end) return false;
696  else ++here;
697
698  if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
699  else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
700  else return false;
701
702  int commacount = 0;
703  text_t text;
704  while (here != end) {
705
706    if (*here == '\\') {
707      ++here;
708      if (here != end) text.push_back(*here);
709     
710    }
711 
712    else if (*here == ',' || *here == '}' || *here == '{') {
713
714      if (formatlistptr->command == comOr) {
715    // the {Or}{this, or this, or this, or this} statement
716    format_t *or_ptr;
717   
718    // find the next unused orptr
719    if (formatlistptr->orptr == NULL) {
720      formatlistptr->orptr = new format_t();
721      or_ptr = formatlistptr->orptr;
722    } else {
723      or_ptr = formatlistptr->orptr;
724      while (or_ptr->nextptr != NULL)
725        or_ptr = or_ptr->nextptr;
726      or_ptr->nextptr = new format_t();
727      or_ptr = or_ptr->nextptr;
728    }
729
730    if (!text.empty())
731      {
732        if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
733      }
734
735    if (*here == '{')
736      {
737        // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
738        // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
739        // The latter can always be re-written:
740        // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
741       
742        if (!text.empty()) // already used up allocated format_t
743          {
744        // => allocate new one for detected action
745        or_ptr->nextptr = new format_t();
746        or_ptr = or_ptr->nextptr;
747          }
748        if (!parse_action(++here, end, or_ptr, metadata, getParents))
749          {
750        return false;
751          }
752      }
753    else
754      {
755        if (*here == '}') break;
756      }
757    text.clear();
758
759      }
760
761      // Parse an {If}{decide,do,else} statement
762      else {
763   
764    // Read the decision component. 
765    if (commacount == 0) {
766      // Decsion can be a metadata element, or a piece of text.
767      // Originally Stefan's code, updated 25/10/2000 by Gordon.
768
769      text_t::const_iterator beginbracket = text.begin();
770      text_t::const_iterator endbracket = (text.end() - 1);
771
772      // Decision is based on a metadata element
773      if ((*beginbracket == '[') && (*endbracket == ']')) {
774        // Ignore the surrounding square brackets
775        text_t meta = substr (beginbracket+1, endbracket);
776        parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
777        ++commacount;
778        text.clear();
779      }
780
781      // Decision is a piece of text (probably a macro like _cgiargmode_).
782      else {
783
784        // hunt for any metadata in string, which might be uses in
785        // to test a condition, e.g. [Format] eq 'PDF'
786        format_t* dummyformat = new format_t();
787        // update which metadata fields needed
788        // (not interested in updatng formatlistptr)
789        parse_string (text, dummyformat, metadata, getParents);
790        delete dummyformat;
791
792        formatlistptr->decision.command = dText;
793        formatlistptr->decision.text = text;
794        ++commacount;
795        text.clear();
796      }
797    }
798
799    // Read the "then" and "else" components of the {If} statement.
800    else {
801      format_t** nextlistptr = NULL;
802      if (commacount == 1) {
803        nextlistptr = &formatlistptr->ifptr;
804      } else if (commacount == 2 ) {
805        nextlistptr = &formatlistptr->elseptr;
806      } else {
807        return false;
808      }
809
810      if (!text.empty()) {
811        if (*nextlistptr == NULL) {
812          *nextlistptr = new format_t();
813        } else {
814
815          // skip to the end of any format_t statements already added
816          while ((*nextlistptr)->nextptr != NULL)
817          {
818        nextlistptr = &(*nextlistptr)->nextptr;
819          }
820
821          (*nextlistptr)->nextptr = new format_t();
822          nextlistptr = &(*nextlistptr)->nextptr;
823        }
824
825        if (!parse_string (text, *nextlistptr, metadata, getParents))
826          {
827        return false;
828          }
829        text.clear();
830      }
831     
832      if (*here == '{')
833        {
834          if (*nextlistptr == NULL) {
835        *nextlistptr = new format_t();
836          } else {
837        // skip to the end of any format_t statements already added
838        while ((*nextlistptr)->nextptr != NULL)
839          {
840            nextlistptr = &(*nextlistptr)->nextptr;
841          }
842
843        (*nextlistptr)->nextptr = new format_t();
844        nextlistptr = &(*nextlistptr)->nextptr;
845          }
846
847          if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
848        {
849          return false;
850        }
851        }
852      else
853        {
854          if (*here == '}') break;
855          ++commacount;
856        }
857    }
858      }
859     
860    } else text.push_back(*here);
861   
862    if (here != end) ++here;
863  }
864
865  return true;
866}
867
868
869bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
870             text_tset &metadata, bool &getParents) {
871
872  formatlistptr->clear();
873  getParents = false;
874
875  return (parse_string (formatstring, formatlistptr, metadata, getParents));
876}
877
878// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
879// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
880static text_t get_formatted_meta_text(MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
881{
882  text_t no_ns_metaname = remove_namespace(meta.metaname);
883  text_t tmp;
884  bool first = true;
885 
886  const int start_i=0;
887  const int end_i = metainfo.values.size()-1;
888 
889  if (position == -1) { // all
890    for (int i=start_i; i<=end_i; ++i) {
891      if (!first) tmp += meta.siblingoptions;
892      if (meta.metacommand & mSpecial) {
893    // special formatting
894    if (no_ns_metaname == "Date") tmp += format_date (metainfo.values[i]);
895    else if (no_ns_metaname == "Language") tmp += iso639(metainfo.values[i]);
896    else tmp += "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
897      }
898      else tmp += metainfo.values[i];
899      first = false;
900     
901    }
902  } else {
903    if (position == -2) { // end
904      position = end_i;
905    } else if (position < start_i || position > end_i) {
906      return "";
907    }
908    if (meta.metacommand & mSpecial) {
909      // special formatting
910      if (no_ns_metaname == "Date") tmp += format_date (metainfo.values[position]);
911      else if (no_ns_metaname == "Language") tmp += iso639(metainfo.values[position]);
912      else tmp += "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
913    }
914    else tmp += metainfo.values[position];
915  }
916  if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (tmp);
917  else return tmp;
918}
919
920static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
921{
922 
923  MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
924  switch (meta.mqualifier.parent) {
925  case pNone:
926    return "Nothing!!";
927    break;
928
929  case pImmediate:
930    if (parent != NULL) {
931      return get_formatted_meta_text(*parent, meta, siblings_values);
932    }
933    break;
934
935  case pTop:
936    if (parent != NULL) {
937      while (parent->parent != NULL) parent = parent->parent;
938      return get_formatted_meta_text(*parent, meta, siblings_values);
939    }
940    break;
941
942  case pAll:
943    MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
944    if (parent != NULL) {
945      text_tarray tmparray;
946      while (parent != NULL) {
947    tmparray.push_back (get_formatted_meta_text(*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
948    parent = parent->parent;
949      }
950      // now join them up - use teh parent separator
951      bool first = true;
952      text_t tmp;
953      text_tarray::reverse_iterator here = tmparray.rbegin();
954      text_tarray::reverse_iterator end = tmparray.rend();
955      while (here != end) {
956    if (!first) tmp += meta.parentoptions;
957    tmp += *here;
958    first = false;
959    ++here;
960      }
961      if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
962      else return tmp;
963    }
964  }
965  return "";
966
967}
968
969static text_t get_child_meta (const text_t& collection,
970                  recptproto* collectproto,
971                  ResultDocInfo_t &docinfo, displayclass &disp,
972                  const metadata_t &meta, text_tmap &options,
973                  ostream& logout, int siblings_values)
974{
975  if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
976 
977  const text_t& pre_tree_trav = meta.pre_tree_traverse;
978  const text_t& child_metaname = meta.metaname;
979  const text_t& child_field = meta.childoptions;
980  text_tset child_metadata;
981  child_metadata.insert(child_metaname);
982
983  FilterResponse_t child_response;
984  if (meta.mqualifier.child == cNum) {
985    // just one child
986    //get the information associated with the metadata for child doc
987    if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
988           child_metadata, false, collectproto, child_response,
989           logout)) return ""; // invalid child number
990
991      if (child_response.docInfo.empty()) return false; // no info for the child
992 
993      ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
994      MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
995 
996      text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
997      return expand_metadata(child_metavalue,collection,collectproto,
998             child_docinfo,disp,options,logout);
999  }
1000 
1001   
1002  if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1003
1004
1005  if (!pre_tree_trav.empty()) {
1006    // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1007    FilterResponse_t trav_response;
1008
1009    text_tset trav_metadata;
1010    trav_metadata.insert("contains");
1011
1012    if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1013           trav_metadata, false, collectproto, trav_response,
1014           logout)) return ""; // invalid pre_tree_trav
1015
1016    if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1017 
1018    ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1019    // use this for rest of routine
1020    docinfo = trav_docinfo;
1021  }
1022 
1023  // we need to get all children
1024  text_t result = "";
1025  text_tarray children;
1026  text_t contains = docinfo.metadata["contains"].values[0];
1027  splitchar (contains.begin(), contains.end(), ';', children);
1028  text_tarray::const_iterator here = children.begin();
1029  text_tarray::const_iterator end = children.end();
1030  bool first = true;
1031  while (here !=end) {
1032    text_t oid = *here;
1033    here++;
1034    if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1035   
1036    //get the information associated with the metadata for child doc
1037    if (!get_info (oid, collection, "", child_metadata,
1038           false, collectproto, child_response, logout) ||
1039    child_response.docInfo.empty()) {
1040      first = false;
1041      continue;
1042    }
1043   
1044   
1045    ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1046    MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1047   
1048    text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
1049    if (!first) result += child_field;
1050    first = false;
1051    // need to do this here cos otherwise we are in the wrong document
1052    result +=  expand_metadata(child_metavalue,collection,collectproto,
1053                   child_docinfo,disp,options,logout);
1054  }
1055  return result;
1056   
1057}
1058
1059static text_t get_meta (const text_t& collection, recptproto* collectproto,
1060            ResultDocInfo_t &docinfo, displayclass &disp,
1061            const metadata_t &meta, text_tmap &options,
1062            ostream& logout) {
1063 
1064  // make sure we have the requested metadata
1065  MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1066  if (it == docinfo.metadata.end()) return "";
1067
1068  int siblings_values = 0; // default is no siblings, just the first metadata available
1069  if (meta.metacommand & mSibling) {
1070    if (meta.mqualifier.sibling == sAll) {
1071      siblings_values = -1; //all
1072    } else if (meta.mqualifier.sibling == sNum) {
1073      siblings_values = meta.siblingoptions.getint();
1074    }
1075  }
1076  if (meta.metacommand & mParent) {
1077    return get_parent_meta(docinfo,meta,siblings_values);
1078  }
1079
1080  else if (meta.metacommand & mChild) {
1081    return get_child_meta(collection,collectproto,docinfo,disp,meta,
1082                options,logout, siblings_values);
1083  }
1084  else if (meta.metacommand & mSibling) { // only siblings
1085    MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1086    return get_formatted_meta_text(docinfo.metadata[meta.metaname],meta, siblings_values);
1087  }
1088  else {
1089   
1090    // straightforward metadata request (nothing fancy)
1091
1092    text_t classifier_metaname = docinfo.classifier_metadata_type;
1093    int metaname_index
1094      = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1095    return get_formatted_meta_text(docinfo.metadata[meta.metaname], meta, metaname_index);
1096  }
1097 
1098  return "";
1099}
1100
1101static text_t get_or (const text_t& collection, recptproto* collectproto,
1102              ResultDocInfo_t &docinfo, displayclass &disp,
1103              format_t *orptr, text_tmap &options,
1104              ostream& logout) {
1105
1106  text_t tmp;
1107  while (orptr != NULL) {
1108
1109    tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1110             options, logout);
1111    if (!tmp.empty()) return tmp;
1112
1113    orptr = orptr->nextptr;
1114  }
1115  return "";
1116}
1117
1118static bool char_is_whitespace(const char c)
1119{
1120  return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1121
1122}
1123
1124static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1125{
1126  int pos = start_pos;
1127  while (pos<outstring.size()) {
1128    if (!char_is_whitespace(outstring[pos])) {
1129      break;
1130    }
1131    ++pos;
1132  }
1133
1134  return pos;
1135}
1136
1137static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1138{
1139  int pos = start_pos;
1140  while (pos>=0) {
1141    if (!char_is_whitespace(outstring[pos])) {
1142      break;
1143    }
1144    --pos;
1145  }
1146
1147  return pos;
1148}
1149
1150static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1151{
1152  int pos = start_pos;
1153  while (pos>=0) {
1154    if (char_is_whitespace(outstring[pos])) {
1155      break;
1156    }
1157    --pos;
1158  }
1159
1160  return pos;
1161}
1162
1163
1164static int rscan_for(const text_t& outstring, const int start_pos,
1165             const char find_c)
1166{
1167  int pos = start_pos;
1168  while (pos>=0) {
1169    char c = outstring[pos];
1170    if (outstring[pos] == find_c) {
1171      break;
1172    }
1173    --pos;
1174  }
1175
1176  return pos;
1177}
1178
1179text_t extract_substr(const text_t& outstring, const int start_pos,
1180              const int end_pos)
1181{
1182  text_t extracted_str;
1183  extracted_str.clear();
1184
1185  for (int pos=start_pos; pos<=end_pos; ++pos) {
1186    extracted_str.push_back(outstring[pos]);
1187  }
1188
1189  return extracted_str;
1190}
1191
1192
1193static text_t expand_potential_metadata(const text_t& collection,
1194                    recptproto* collectproto,
1195                    ResultDocInfo_t &docinfo,
1196                    displayclass &disp,
1197                    const text_t& intext,
1198                    text_tmap &options,
1199                    ostream& logout)
1200{
1201  text_t outtext;
1202
1203  // decide if dealing with metadata or text
1204
1205  text_t::const_iterator beginbracket = intext.begin();
1206  text_t::const_iterator endbracket = (intext.end() - 1);
1207
1208  // Decision is based on a metadata element
1209  if ((*beginbracket == '[') && (*endbracket == ']')) {
1210    // Ignore the surrounding square brackets
1211    text_t meta_text = substr (beginbracket+1, endbracket);
1212
1213    if (meta_text == "Text") {
1214      outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1215    } else {
1216
1217      text_tset metadata;
1218      bool getParents =false;
1219      metadata_t meta;
1220     
1221      parse_meta (meta_text, meta, metadata, getParents);   
1222      outtext
1223    = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1224    }
1225
1226  }
1227  else {
1228    outtext = intext;
1229  }
1230
1231  return outtext;
1232}
1233
1234
1235
1236
1237static bool uses_expression(const text_t& collection, recptproto* collectproto,
1238                ResultDocInfo_t &docinfo,
1239                displayclass &disp,
1240                const text_t& outstring, text_t& lhs_expr,
1241                text_t& op_expr, text_t& rhs_expr,
1242                text_tmap &options,
1243                ostream& logout)
1244{
1245  // Note: the string may not be of the form: str1 op str2, however
1246  // to deterine this we have to process it on the assumption it is,
1247  // and if at any point an 'erroneous' value is encountered, return
1248  // false and let something else have a go at evaluating it
1249
1250  // Starting at the end of the string and working backwards ..
1251
1252  const int outstring_len = outstring.size();
1253
1254  // skip over white space
1255  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1256
1257  if (rhs_end<=0) {
1258    // no meaningful text or (rhs_end==0) no room for operator
1259    return false;
1260  }
1261
1262  // check for ' or " and then scan over token
1263  const char potential_quote = outstring[rhs_end];
1264  int rhs_start=rhs_end;
1265  bool quoted = false;
1266
1267  if ((potential_quote == '\'') || (potential_quote == '\"')) {
1268    --rhs_end;
1269    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1270    quoted = true;
1271  }
1272  else {
1273    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1274  }
1275
1276  if ((rhs_end-rhs_start)<0) {
1277    // no meaningful rhs expression
1278    return false;
1279  }
1280
1281  // form rhs_expr
1282  rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1283
1284  // skip over white space
1285  const int to_whitespace = (quoted) ? 2 : 1;
1286
1287  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1288  int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1289
1290  if ((op_end<0) && (op_start<0)) {
1291    // no meaningful expression operator
1292    return false;
1293  }
1294
1295  if (op_end-op_start<0) {
1296    // no meaningful expression operator
1297    return false;
1298  }
1299
1300  op_expr = extract_substr(outstring,op_start,op_end);
1301
1302
1303  // check for operator
1304  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1305     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1306
1307    // not a valid operator
1308    return false;
1309  }
1310
1311  int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1312  if (lhs_end<0) {
1313    // no meaningful lhs expression
1314    return false;
1315  }
1316
1317  int lhs_start = scan_over_whitespace(outstring,0);
1318
1319  // form lhs_expr from remainder of string
1320  lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1321
1322  // Now we know we have a valid expression, look up any
1323  // metadata terms
1324
1325  rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1326                       disp,rhs_expr,options,logout);
1327  lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1328                       disp,lhs_expr,options,logout);
1329
1330  return true;
1331}
1332
1333static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1334                 const text_t& rhs_expr, ostream& logout)
1335{
1336  if (op_expr == "eq") return (lhs_expr == rhs_expr);
1337  else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1338  else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1339  else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1340  else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1341  else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1342  else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1343  else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1344  else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1345  else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1346  else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1347  else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1348  else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1349  else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1350  else {
1351    logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1352  }
1353 
1354  return false;
1355}
1356
1357
1358static text_t get_if (const text_t& collection, recptproto* collectproto,
1359              ResultDocInfo_t &docinfo, displayclass &disp,
1360              const decision_t &decision,
1361              format_t *ifptr, format_t *elseptr,
1362              text_tmap &options, ostream& logout)
1363{
1364  // If the decision component is a metadata element, then evaluate it
1365  // to see whether we output the "then" or the "else" clause
1366  if (decision.command == dMeta) {
1367    if (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1368          logout) != "") {
1369      if (ifptr != NULL)
1370    return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1371                     options, logout);
1372    }
1373    else {
1374      if (elseptr != NULL)
1375    return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1376                     options, logout);
1377    }
1378  }
1379
1380  // If the decision component is text, then evaluate it (it is probably a
1381  // macro like _cgiargmode_) to decide what to output.
1382  else if (decision.command == dText) {
1383
1384    text_t outstring;
1385    disp.expandstring (decision.text, outstring);
1386
1387    // Check for if expression in form: str1 op str2
1388    // (such as [x] eq "y")
1389    text_t lhs_expr, op_expr, rhs_expr;
1390    if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1391      if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1392    if (ifptr != NULL) {
1393      return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1394                       options, logout);
1395    }
1396    else {
1397      return "";
1398    }
1399      } else {
1400    if (elseptr != NULL) {
1401      return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1402                       options, logout);
1403    }
1404    else {
1405      return "";
1406    }
1407      }
1408    }
1409
1410
1411    // This is a tad tricky.  When we expand a string like _cgiargmode_, that is
1412    // a cgi argument macro that has not been set, it evaluates to itself.
1413    // Therefore, were have to say that a piece of text evalautes true if
1414    // it is non-empty and if it is a cgi argument evaulating to itself.
1415
1416    if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1417      if (ifptr != NULL)
1418    return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1419                     options, logout);
1420    } else {
1421      if (elseptr != NULL)
1422    return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1423                     options, logout);
1424    }
1425  }
1426 
1427  return "";
1428}
1429
1430bool includes_metadata(const text_t& text)
1431{
1432  text_t::const_iterator here = text.begin();
1433  text_t::const_iterator end = text.end();
1434  while (here != end) {
1435    if (*here == '[') return true;
1436    ++here;
1437  }
1438
1439  return false;
1440}
1441
1442static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1443                  recptproto* collectproto,
1444                  ResultDocInfo_t &docinfo,
1445                  displayclass &disp, text_tmap &options,
1446                  ostream &logout) {
1447     
1448  if (includes_metadata(metavalue)) {
1449   
1450    // text has embedded metadata in it => expand it
1451    FilterRequest_t request;
1452    FilterResponse_t response;
1453   
1454    request.getParents = false;
1455   
1456    format_t *expanded_formatlistptr = new format_t();
1457    parse_formatstring (metavalue, expanded_formatlistptr,
1458            request.fields, request.getParents);
1459   
1460    // retrieve metadata
1461    get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1462         collectproto, response, logout);
1463   
1464    if (!response.docInfo.empty()) {
1465     
1466      text_t expanded_metavalue
1467    = get_formatted_string(collection, collectproto,
1468                   response.docInfo[0], disp, expanded_formatlistptr,
1469                   options, logout);
1470     
1471      return expanded_metavalue;
1472    }
1473    else {
1474      return metavalue;
1475    }
1476  }
1477  else {
1478   
1479    return metavalue;
1480  }
1481}
1482
1483text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1484               displayclass &disp,
1485               text_t meta_name, ostream& logout) {
1486 
1487  ColInfoResponse_t collectinfo;
1488  comerror_t err;
1489  collectproto->get_collectinfo (collection, collectinfo,err,logout);
1490  text_t meta_value = "";
1491  text_t lang;
1492  disp.expandstring("_cgiargl_",lang);
1493  if (lang.empty()) {
1494    lang = "en";
1495  }
1496
1497  if (err == noError) {
1498    meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1499  }
1500  return meta_value;
1501 
1502
1503}
1504text_t format_string (const text_t& collection, recptproto* collectproto,
1505              ResultDocInfo_t &docinfo, displayclass &disp,
1506              format_t *formatlistptr, text_tmap &options,
1507              ostream& logout) {
1508
1509  if (formatlistptr == NULL) return "";
1510
1511  switch (formatlistptr->command) {
1512     case comOID:
1513    return docinfo.OID;
1514  case comTopOID:
1515    {
1516      text_t top_id;
1517      get_top(docinfo.OID, top_id);
1518      return top_id;
1519    }
1520  case comRank:
1521    return text_t(docinfo.ranking);
1522     case comText:
1523    return formatlistptr->text;
1524     case comLink:
1525    return options["link"];
1526     case comEndLink:
1527    if (options["link"].empty()) return "";
1528    else return "</a>";
1529     case comHref:
1530    return get_href(options["link"]);
1531     case comIcon:
1532    return options["icon"];
1533     case comNum:
1534    return docinfo.result_num;
1535     case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1536    return get_related_docs(collection, collectproto, docinfo, logout);
1537     case comSummary:
1538    return format_summary(collection, collectproto, docinfo, disp, options, logout);
1539     case comMeta:
1540    {
1541       const text_t& metavalue =  get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1542       return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1543    }
1544     case comDoc:
1545    return format_text(collection, collectproto, docinfo, disp, options, logout);
1546    //return options["text"];
1547     case comImage:
1548    return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1549     case comTOC:
1550    return options["DocTOC"];
1551     case comDocumentButtonDetach:
1552    return options["DocumentButtonDetach"];
1553     case comDocumentButtonHighlight:
1554    return options["DocumentButtonHighlight"];
1555     case comDocumentButtonExpandContents:
1556    return options["DocumentButtonExpandContents"];
1557     case comDocumentButtonExpandText:
1558    return options["DocumentButtonExpandText"];
1559     case comHighlight:
1560    if (options["highlight"] == "1") return "<b>";
1561    break;
1562     case comEndHighlight:
1563    if (options["highlight"] == "1") return "</b>";
1564    break;
1565     case comIf:
1566    return get_if (collection, collectproto, docinfo, disp,
1567               formatlistptr->decision, formatlistptr->ifptr,
1568               formatlistptr->elseptr, options, logout);
1569     case comOr:
1570    return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1571               options, logout);
1572     case comDocTermsFreqTotal:
1573       return docinfo.num_terms_matched;
1574     case comCollection:
1575       if (formatlistptr->meta.metaname == g_EmptyText) {
1576     return collection;
1577       }
1578       return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1579   
1580  }
1581  return "";
1582}
1583
1584text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1585                 ResultDocInfo_t &docinfo, displayclass &disp,
1586                 format_t *formatlistptr, text_tmap &options,
1587                 ostream& logout) {
1588
1589   text_t ft;
1590   while (formatlistptr != NULL)
1591      {
1592     ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1593                  options, logout);
1594     formatlistptr = formatlistptr->nextptr;
1595      }
1596   
1597   return ft;
1598}
1599
1600
1601// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1602text_t format_text (const text_t& collection, recptproto* collectproto,
1603            ResultDocInfo_t &docinfo, displayclass &disp,
1604            text_tmap &options, ostream& logout) {
1605  if(!options["text"].empty()) {
1606    return options["text"];
1607  }
1608  // else get document text here
1609  DocumentRequest_t docrequest;
1610  DocumentResponse_t docresponse;
1611  comerror_t err;
1612  docrequest.OID = docinfo.OID;
1613  collectproto->get_document (collection, docrequest, docresponse, err, logout);
1614  return docresponse.doc;
1615
1616}
1617 
1618/* FUNCTION NAME: format_summary
1619 * DESC: this is invoked when a [Summary] special metadata is processed.
1620 * RETURNS: a query-biased summary for the document */
1621
1622text_t format_summary (const text_t& collection, recptproto* collectproto,
1623               ResultDocInfo_t &docinfo, displayclass &disp,
1624               text_tmap &options, ostream& logout) {
1625
1626  // GRB: added code here to ensure that the cstr (and other collections)
1627  //      uses the document metadata item Summary, rather than compressing
1628  //      the text of the document, processed via the methods in
1629  //      summarise.cpp
1630  if (docinfo.metadata.count("Summary") > 0 &&
1631      docinfo.metadata["Summary"].values.size() > 0) {
1632    return docinfo.metadata["Summary"].values[0];
1633  }
1634
1635  text_t textToSummarise, query;
1636  if(options["text"].empty()) { // get document text
1637     DocumentRequest_t docrequest;
1638     DocumentResponse_t docresponse;
1639     comerror_t err;
1640     docrequest.OID = docinfo.OID;
1641     collectproto->get_document (collection, docrequest, docresponse, err, logout);
1642     textToSummarise = docresponse.doc;
1643  } else // in practice, this would not happen, because text is only
1644         // loaded with the [Text] command
1645     textToSummarise = options["text"];
1646  disp.expandstring("_cgiargq_",query);
1647  return summarise(textToSummarise,query,80);
1648}
Note: See TracBrowser for help on using the browser.