root/gsdl/trunk/runtime-src/src/recpt/formattools.cpp @ 19312

Revision 19312, 51.9 KB (checked in by davidb, 10 years ago)

DocOID is now stored in the span-wrap tag to help with set-metadata call

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "recptprototools.h"
29#include "OIDtools.h"
30#include "summarise.h"
31
32#include <assert.h>
33
34static bool metadata_spanwrap = false;
35
36// a few function prototypes
37
38static text_t format_string (const text_t& collection, recptproto* collectproto,
39                 ResultDocInfo_t &docinfo, displayclass &disp,
40                 format_t *formatlistptr, text_tmap &options,
41                 ostream& logout);
42
43static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
44              format_t *formatlistptr, text_tset &metadata, bool &getParents);
45
46static text_t format_summary (const text_t& collection, recptproto* collectproto,
47                  ResultDocInfo_t &docinfo, displayclass &disp,
48                  text_tmap &options, ostream& logout);
49static text_t format_text (const text_t& collection, recptproto* collectproto,
50                  ResultDocInfo_t &docinfo, displayclass &disp,
51                  text_tmap &options, ostream& logout);
52
53static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
54                  recptproto* collectproto, ResultDocInfo_t &docinfo,
55                  displayclass &disp, text_tmap &options,
56                  ostream &logout);
57
58
59void metadata_t::clear() {
60  metaname.clear();
61  metacommand = mNone;
62  mqualifier.parent  = pNone;
63  mqualifier.sibling = sNone;
64  mqualifier.child   = cNone;
65  pre_tree_traverse.clear();
66  parentoptions.clear();
67  siblingoptions.clear();
68  childoptions.clear();
69}
70
71void decision_t::clear() {
72  command = dMeta;
73  meta.clear();
74  text.clear();
75}
76
77void format_t::clear() {
78  command = comText;
79  decision.clear();
80  text.clear();
81  meta.clear();
82  nextptr = NULL;
83  ifptr = NULL;
84  elseptr = NULL;
85  orptr = NULL;
86}
87
88void formatinfo_t::clear() {
89  DocumentImages = false;
90  DocumentTitles = true;
91  DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
92  DocumentContents = true;
93  DocumentArrowsBottom = true;
94  DocumentArrowsTop = false;
95  DocumentSearchResultLinks = false;
96  DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
97  //  DocumentButtons.push_back ("Expand Text");
98  //  DocumentButtons.push_back ("Expand Contents");
99  DocumentButtons.push_back ("Detach");
100  DocumentButtons.push_back ("Highlight");
101  RelatedDocuments = "";
102  DocumentText = "[Text]";
103  formatstrings.erase (formatstrings.begin(), formatstrings.end());
104  DocumentUseHTML = false;
105  AllowExtendedOptions = false;
106}
107
108// simply checks to see if formatstring begins with a <td> tag
109bool is_table_content (const text_t &formatstring) {
110  text_t::const_iterator here = formatstring.begin();
111  text_t::const_iterator end = formatstring.end();
112 
113  while (here != end) {
114    if (*here != ' ') {
115      if ((*here == '<') && ((here+3) < end)) {
116    if ((*(here+1) == 't' || *(here+1) == 'T') &&
117        (*(here+2) == 'd' || *(here+2) == 'D') &&
118        (*(here+3) == '>' || *(here+3) == ' '))
119      return true;
120      } else return false;
121    }
122    ++here;
123  }
124  return false;
125}
126
127bool is_table_content (const format_t *formatlistptr) {
128
129  if (formatlistptr == NULL) return false;
130 
131  if (formatlistptr->command == comText)
132    return is_table_content (formatlistptr->text);
133   
134  return false;
135}
136
137// returns false if key isn't in formatstringmap
138bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
139               text_t &formatstring) {
140
141  formatstring.clear();
142  text_tmap::const_iterator it = formatstringmap.find(key);
143  if (it == formatstringmap.end()) return false;
144  formatstring = (*it).second;
145  return true;
146}
147
148// tries to find "key1key2" then "key1" then "key2"
149bool get_formatstring (const text_t &key1, const text_t &key2, 
150               const text_tmap &formatstringmap,
151               text_t &formatstring) {
152
153  formatstring.clear();
154  text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
155  if (it != formatstringmap.end()) {
156    formatstring = (*it).second;
157    return true;
158  }
159  it = formatstringmap.find(key1);
160  if (it != formatstringmap.end()) {
161    formatstring = (*it).second;
162    return true;
163  }
164  it = formatstringmap.find(key2);
165  if (it != formatstringmap.end()) {
166    formatstring = (*it).second;
167    return true;
168  }
169  return false;
170}
171
172
173text_t remove_namespace(const text_t &meta_name) {
174  text_t::const_iterator end = meta_name.end();
175  text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
176  if (it != end) {
177    return substr(it+1, end);
178  }
179
180  return meta_name;
181
182}
183// returns a date of form _format:date_(year, month, day)
184// input is date of type yyyy-?mm-?dd
185// at least the year must be present in date
186text_t format_date (const text_t &date) {
187
188  if (date.size() < 4) return "";
189
190  text_t::const_iterator datebegin = date.begin();
191
192  text_t year = substr (datebegin, datebegin+4);
193  int chars_seen_so_far = 4;
194
195  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
196  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
197 
198  text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
199  int imonth = month.getint();
200  if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
201 
202  chars_seen_so_far += 2;
203  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
204 
205  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
206
207  text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
208  if (day[0] == '0') day = substr (day.begin()+1, day.end());
209  int iday = day.getint();
210  if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
211   
212  return "_format:date_("+year+","+month+","+day+")";
213
214
215// converts an iso639 language code to its English equivalent
216// should we be checking that the macro exists??
217text_t iso639 (const text_t &langcode) {
218  if (langcode.empty()) return "";
219  return "_iso639:iso639"+langcode+"_";
220}
221
222
223text_t get_href (const text_t &link) {
224
225  text_t href;
226
227  text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
228  text_t::const_iterator end = link.end();
229  if (here == end) return g_EmptyText;
230 
231  ++here;
232  while (here != end) {
233    if (*here == '"') break;
234    href.push_back(*here);
235    ++here;
236  }
237
238  return href;
239}
240
241//this function gets the information associated with the relation
242//metadata for the document associated with 'docinfo'. This relation
243//metadata consists of a line of pairs containing 'collection, document OID'
244//(this is the OID of the document related to the current document, and
245//the collection the related document belongs to). For each of these pairs
246//the title metadata is obtained and then an html link between the title
247//of the related doc and the document's position (the document will be
248//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
249//(where collection is the related documents collection, and OID is the
250//related documents OID).  A list of these html links are made for as many
251//related documents as there are. This list is then returned. If there are
252//no related documents available for the current document then the string
253//'.. no related documents .. ' is returned.
254text_t get_related_docs(const text_t& collection, recptproto* collectproto,
255            ResultDocInfo_t &docinfo, ostream& logout){
256 
257  text_tset metadata;
258
259  //insert the metadata we wish to collect
260  metadata.insert("dc.Relation");
261  metadata.insert("Title"); 
262  metadata.insert("Subject"); //for emails, where title data doesn't apply
263 
264  FilterResponse_t response;
265  text_t relation = ""; //string for displaying relation metadata
266  text_t relationTitle = ""; //the related documents Title (or subject)
267  text_t relationOID = ""; //the related documents OID 
268
269  //get the information associated with the metadata for current doc
270  if (get_info (docinfo.OID, collection, "", metadata,
271        false, collectproto, response, logout)) {
272   
273    //if the relation metadata exists, store for displaying
274    if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
275      relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
276
277      //split relation data into pairs of collectionname,ID number
278      text_tarray relationpairs;
279      splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
280     
281      text_tarray::const_iterator currDoc = relationpairs.begin(); 
282      text_tarray::const_iterator lastDoc = relationpairs.end();
283
284      //iterate through the pairs to split and display
285      while(currDoc != lastDoc){
286   
287    //split pairs into collectionname and ID
288    text_tarray relationdata;
289    splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
290   
291    //get first element in the array (collection)
292    text_tarray::const_iterator doc_data = relationdata.begin();
293    text_t document_collection = *doc_data;
294    ++doc_data; //increment to get next item in array (oid)
295    text_t document_OID = *doc_data;
296   
297    //create html link to related document
298    relation += "<a href=\"_httpdocument_&c=" + document_collection;
299    relation += "&cl=search&d=" + document_OID;
300       
301    //get the information associated with the metadata for related doc
302    if (get_info (document_OID, document_collection, "", metadata,
303              false, collectproto, response, logout)) {
304     
305      //if title metadata doesn't exist, collect subject metadata
306      //if that doesn't exist, just call it 'related document'
307      if (!response.docInfo[0].metadata["Title"].values[0].empty())
308        relationTitle = response.docInfo[0].metadata["Title"].values[0];
309      else if (!response.docInfo[0].metadata["Subject"].values.empty())
310        relationTitle = response.docInfo[0].metadata["Subject"].values[0];
311      else relationTitle =  "RELATED DOCUMENT";
312     
313    }
314   
315    //link the related document's title to its page
316    relation += "\">" + relationTitle + "</a>";
317    relation += "  (" + document_collection + ")<br>";
318   
319    ++currDoc;
320      }
321    }
322   
323  }
324
325  if(relation.empty()) //no relation data for documnet
326    relation = ".. no related documents .. ";
327
328  return relation;
329}
330
331
332
333static void get_parent_options (text_t &instring, metadata_t &metaoption) {
334
335  assert (instring.size() > 7);
336  if (instring.size() <= 7) return;
337
338  text_t meta, com, op;
339  bool inbraces = false;
340  bool inquotes = false;
341  bool foundcolon = false;
342  text_t::const_iterator here = instring.begin()+6;
343  text_t::const_iterator end = instring.end();
344  while (here != end) {
345    if (foundcolon) meta.push_back (*here);
346    else if (*here == '(') inbraces = true;
347    else if (*here == ')') inbraces = false;
348    else if (*here == '\'' && !inquotes) inquotes = true;
349    else if (*here == '\'' && inquotes) inquotes = false;
350    else if (*here == ':' && !inbraces) foundcolon = true;
351    else if (inquotes) op.push_back (*here);
352    else com.push_back (*here);
353    ++here;
354  }
355
356  instring = meta;
357  if (com.empty())
358    metaoption.mqualifier.parent = pImmediate;
359  else if (com == "Top")
360    metaoption.mqualifier.parent = pTop;
361  else if (com == "All") {
362    metaoption.mqualifier.parent = pAll;
363    metaoption.parentoptions = op;
364  }
365}
366
367
368static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
369
370  assert (instring.size() > 8);
371  if (instring.size() <= 8) return;
372  text_t meta, com, op;
373  bool inbraces = false;
374  bool inquotes = false;
375  bool foundcolon = false;
376  text_t::const_iterator here = instring.begin()+7;
377  text_t::const_iterator end = instring.end();
378  while (here != end) {
379    if (foundcolon) meta.push_back (*here);
380    else if (*here == '(') inbraces = true;
381    else if (*here == ')') inbraces = false;
382    else if (*here == '\'' && !inquotes) inquotes = true;
383    else if (*here == '\'' && inquotes) inquotes = false;
384    else if (*here == ':' && !inbraces) foundcolon = true;   
385    else if (inquotes) op.push_back (*here);
386    else com.push_back (*here);
387    ++here;
388  }
389
390  instring = meta;
391  metaoption.siblingoptions.clear();
392
393  if (com.empty()) {
394    metaoption.mqualifier.sibling = sAll;
395    metaoption.siblingoptions = " ";
396  }
397  else if (com == "first") {
398    metaoption.mqualifier.sibling = sNum;
399    metaoption.siblingoptions = "0";
400  }
401  else if (com == "last") {
402    metaoption.mqualifier.sibling = sNum;
403    metaoption.siblingoptions = "-2"; // == last
404  }
405  else if (com.getint()>0) {
406    metaoption.mqualifier.sibling = sNum;
407    int pos = com.getint()-1;
408    metaoption.siblingoptions +=pos;
409  }
410  else {
411    metaoption.mqualifier.sibling = sAll;
412    metaoption.siblingoptions = op;
413  }
414}
415
416static void get_child_options (text_t &instring, metadata_t &metaoption) {
417
418  assert (instring.size() > 6);
419  if (instring.size() <= 6) return;
420  text_t meta, com, op;
421  bool inbraces = false;
422  bool inquotes = false;
423  bool foundcolon = false;
424  text_t::const_iterator here = instring.begin()+5;
425  text_t::const_iterator end = instring.end();
426  while (here != end) {
427    if (foundcolon) meta.push_back (*here);
428    else if (*here == '(') inbraces = true;
429    else if (*here == ')') inbraces = false;
430    else if (*here == '\'' && !inquotes) inquotes = true;
431    else if (*here == '\'' && inquotes) inquotes = false;
432    else if (*here == ':' && !inbraces) foundcolon = true;
433    else if (inquotes) op.push_back (*here);
434    else com.push_back (*here);
435    ++here;
436  }
437
438  instring = meta;
439  if (com.empty()) {
440    metaoption.mqualifier.child = cAll;
441    metaoption.childoptions = " ";
442  }
443  else if (com == "first") {
444    metaoption.mqualifier.child = cNum;
445    metaoption.childoptions = ".fc";
446  }
447  else if (com == "last") {
448    metaoption.mqualifier.child = cNum;
449    metaoption.childoptions = ".lc";
450  }
451  else if (com.getint()>0) {
452    metaoption.mqualifier.child = cNum;
453    metaoption.childoptions = "."+com;
454  }
455  else {
456    metaoption.mqualifier.child = cAll;
457    metaoption.childoptions = op;
458  }
459}
460
461
462
463static void parse_meta (text_t &meta, metadata_t &metaoption,
464            text_tset &metadata, bool &getParents) {
465
466  if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
467    metaoption.metacommand |= mCgiSafe;
468    meta = substr (meta.begin()+8, meta.end());
469  }
470  if (meta.size() > 7 && (substr(meta.begin(), meta.begin()+7) == "format:")) {   
471    metaoption.metacommand |= mSpecial;
472    meta = substr (meta.begin()+7, meta.end());
473  }
474
475  bool had_parent_or_child = true;
476  bool prev_was_parent = false;
477  bool prev_was_child  = false;
478
479  while (had_parent_or_child) {
480    if (meta.size() > 7
481    && (substr (meta.begin(), meta.begin()+6) == "parent")) {
482
483      // clear out sibling and child (cmd and options)
484      metaoption.metacommand &= ~(mChild|mSibling);
485      metaoption.childoptions.clear();
486      metaoption.siblingoptions.clear();
487
488      getParents = true;
489      metaoption.metacommand |= mParent;
490      get_parent_options (meta, metaoption);
491
492      if (prev_was_parent) {
493    metaoption.pre_tree_traverse += ".pr";
494      }
495      else if (prev_was_child) {
496    metaoption.pre_tree_traverse += ".fc";
497      }
498
499      prev_was_parent = true;
500      prev_was_child  = false;
501    }
502    else if (meta.size() > 6
503         && (substr (meta.begin(), meta.begin()+5) == "child")) {
504
505      // clear out sibling and parent (cmd and options)
506      metaoption.metacommand &= ~(mParent|mSibling);
507      metaoption.parentoptions.clear();
508      metaoption.siblingoptions.clear();
509
510      metaoption.metacommand |= mChild;
511      get_child_options (meta, metaoption);
512      metadata.insert("contains");
513
514      if (prev_was_parent) {
515    metaoption.pre_tree_traverse += ".pr";
516      }
517      else if (prev_was_child) {
518    metaoption.pre_tree_traverse += ".fc";
519      }
520
521      prev_was_child  = true;
522      prev_was_parent = false;
523    }
524    else {
525      prev_was_child  = false;
526      prev_was_parent = false;
527      had_parent_or_child = false;
528    }
529  }
530
531  // parent/child can have sibling tacked on end also
532  if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
533    metaoption.metacommand |= mSibling;
534    get_sibling_options (meta, metaoption);
535  }
536 
537  // check for ex. which may occur in format statements
538  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
539    meta = substr (meta.begin()+3, meta.end());
540  }
541  metadata.insert (meta);
542  metaoption.metaname = meta;
543}
544
545static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
546  if (meta == "collection") {
547    // no qualifiers
548    metaoption.metaname = g_EmptyText;
549    return;
550  }
551  meta = substr (meta.begin()+11, meta.end());
552  metaoption.metaname = meta;
553 
554}
555
556static void parse_meta (text_t &meta, format_t *formatlistptr,
557            text_tset &metadata, bool &getParents) {
558 
559  if (meta == "link")
560    formatlistptr->command = comLink;
561  else if (meta == "/link")
562    formatlistptr->command = comEndLink;
563
564  else if (meta == "href")
565    formatlistptr->command = comHref;
566
567  else if (meta == "num")
568    formatlistptr->command = comNum;
569
570  else if (meta == "icon")
571    formatlistptr->command = comIcon;
572
573  else if (meta == "Text")
574    formatlistptr->command = comDoc;
575 
576  else if (meta == "RelatedDocuments")
577   formatlistptr->command = comRel;
578
579  else if (meta == "highlight")
580    formatlistptr->command = comHighlight;
581
582  else if (meta == "/highlight")
583    formatlistptr->command = comEndHighlight;
584
585  else if (meta == "metadata-spanwrap")
586    formatlistptr->command = comMetadataSpanWrap;
587
588  else if (meta == "/metadata-spanwrap")
589    formatlistptr->command = comEndMetadataSpanWrap;
590
591  else if (meta == "Summary")
592    formatlistptr->command = comSummary;
593
594  else if (meta == "DocImage")
595    formatlistptr->command = comImage;
596
597  else if (meta == "DocTOC")
598    formatlistptr->command = comTOC;
599
600  else if (meta == "DocumentButtonDetach")
601     formatlistptr->command = comDocumentButtonDetach;
602 
603  else if (meta == "DocumentButtonHighlight")
604     formatlistptr->command = comDocumentButtonHighlight;
605 
606  else if (meta == "DocumentButtonExpandContents")
607    formatlistptr->command = comDocumentButtonExpandContents;
608
609  else if (meta == "DocumentButtonExpandText")
610     formatlistptr->command = comDocumentButtonExpandText;
611
612  else if (meta == "DocOID")
613     formatlistptr->command = comOID;
614  else if (meta == "DocTopOID")
615    formatlistptr->command = comTopOID;
616  else if (meta == "DocRank")
617    formatlistptr->command = comRank;
618  else if (meta == "DocTermsFreqTotal")
619    formatlistptr->command = comDocTermsFreqTotal;
620  else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
621    formatlistptr->command = comCollection;
622    parse_coll_meta(meta, formatlistptr->meta);
623  }
624  else {
625    formatlistptr->command = comMeta;
626    parse_meta (meta, formatlistptr->meta, metadata, getParents);
627  }
628}
629
630
631static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
632              text_tset &metadata, bool &getParents) {
633
634  text_t text;
635  text_t::const_iterator here = formatstring.begin();
636  text_t::const_iterator end = formatstring.end();
637
638  while (here != end) {
639
640    if (*here == '\\') {
641      ++here;
642      if (here != end) text.push_back (*here);
643
644    } else if (*here == '{') {
645      if (!text.empty()) {
646    formatlistptr->command = comText;
647    formatlistptr->text = text;
648    formatlistptr->nextptr = new format_t();
649    formatlistptr = formatlistptr->nextptr;
650   
651    text.clear();
652      }
653      if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
654
655    formatlistptr->nextptr = new format_t();
656    formatlistptr = formatlistptr->nextptr;
657    if (here == end) break;
658      }
659    } else if (*here == '[') {
660      if (!text.empty()) {
661    formatlistptr->command = comText;
662    formatlistptr->text = text;
663    formatlistptr->nextptr = new format_t();
664    formatlistptr = formatlistptr->nextptr;
665
666    text.clear();
667      }
668      text_t meta;
669      ++here;
670      while (*here != ']') {
671    if (here == end) return false;
672    meta.push_back (*here);
673    ++here;
674      }
675      parse_meta (meta, formatlistptr, metadata, getParents);
676      formatlistptr->nextptr = new format_t();
677      formatlistptr = formatlistptr->nextptr;
678
679    } else
680      text.push_back (*here);
681
682    if (here != end) ++here;
683  }
684  if (!text.empty()) {
685    formatlistptr->command = comText;
686    formatlistptr->text = text;
687    formatlistptr->nextptr = new format_t();
688    formatlistptr = formatlistptr->nextptr;
689
690  }
691  return true;
692}
693
694
695static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
696              format_t *formatlistptr, text_tset &metadata, bool &getParents) {
697
698  text_t::const_iterator it = findchar (here, end, '}');
699  if (it == end) return false;
700
701  text_t com = substr (here, it);
702  here = findchar (it, end, '{');
703  if (here == end) return false;
704  else ++here;
705
706  if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
707  else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
708  else return false;
709
710  int commacount = 0;
711  text_t text;
712  while (here != end) {
713
714    if (*here == '\\') {
715      ++here;
716      if (here != end) text.push_back(*here);
717     
718    }
719 
720    else if (*here == ',' || *here == '}' || *here == '{') {
721
722      if (formatlistptr->command == comOr) {
723    // the {Or}{this, or this, or this, or this} statement
724    format_t *or_ptr;
725   
726    // find the next unused orptr
727    if (formatlistptr->orptr == NULL) {
728      formatlistptr->orptr = new format_t();
729      or_ptr = formatlistptr->orptr;
730    } else {
731      or_ptr = formatlistptr->orptr;
732      while (or_ptr->nextptr != NULL)
733        or_ptr = or_ptr->nextptr;
734      or_ptr->nextptr = new format_t();
735      or_ptr = or_ptr->nextptr;
736    }
737
738    if (!text.empty())
739      {
740        if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
741      }
742
743    if (*here == '{')
744      {
745        // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
746        // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
747        // The latter can always be re-written:
748        // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
749       
750        if (!text.empty()) // already used up allocated format_t
751          {
752        // => allocate new one for detected action
753        or_ptr->nextptr = new format_t();
754        or_ptr = or_ptr->nextptr;
755          }
756        if (!parse_action(++here, end, or_ptr, metadata, getParents))
757          {
758        return false;
759          }
760      }
761    else
762      {
763        if (*here == '}') break;
764      }
765    text.clear();
766
767      }
768
769      // Parse an {If}{decide,do,else} statement
770      else {
771   
772    // Read the decision component. 
773    if (commacount == 0) {
774      // Decsion can be a metadata element, or a piece of text.
775      // Originally Stefan's code, updated 25/10/2000 by Gordon.
776
777      text_t::const_iterator beginbracket = text.begin();
778      text_t::const_iterator endbracket = (text.end() - 1);
779
780      // Decision is based on a metadata element
781      if ((*beginbracket == '[') && (*endbracket == ']')) {
782        // Ignore the surrounding square brackets
783        text_t meta = substr (beginbracket+1, endbracket);
784        parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
785        ++commacount;
786        text.clear();
787      }
788
789      // Decision is a piece of text (probably a macro like _cgiargmode_).
790      else {
791
792        // hunt for any metadata in string, which might be uses in
793        // to test a condition, e.g. [Format] eq 'PDF'
794        format_t* dummyformat = new format_t();
795        // update which metadata fields needed
796        // (not interested in updatng formatlistptr)
797        parse_string (text, dummyformat, metadata, getParents);
798        delete dummyformat;
799
800        formatlistptr->decision.command = dText;
801        formatlistptr->decision.text = text;
802        ++commacount;
803        text.clear();
804      }
805    }
806
807    // Read the "then" and "else" components of the {If} statement.
808    else {
809      format_t** nextlistptr = NULL;
810      if (commacount == 1) {
811        nextlistptr = &formatlistptr->ifptr;
812      } else if (commacount == 2 ) {
813        nextlistptr = &formatlistptr->elseptr;
814      } else {
815        return false;
816      }
817
818      if (!text.empty()) {
819        if (*nextlistptr == NULL) {
820          *nextlistptr = new format_t();
821        } else {
822
823          // skip to the end of any format_t statements already added
824          while ((*nextlistptr)->nextptr != NULL)
825          {
826        nextlistptr = &(*nextlistptr)->nextptr;
827          }
828
829          (*nextlistptr)->nextptr = new format_t();
830          nextlistptr = &(*nextlistptr)->nextptr;
831        }
832
833        if (!parse_string (text, *nextlistptr, metadata, getParents))
834          {
835        return false;
836          }
837        text.clear();
838      }
839     
840      if (*here == '{')
841        {
842          if (*nextlistptr == NULL) {
843        *nextlistptr = new format_t();
844          } else {
845        // skip to the end of any format_t statements already added
846        while ((*nextlistptr)->nextptr != NULL)
847          {
848            nextlistptr = &(*nextlistptr)->nextptr;
849          }
850
851        (*nextlistptr)->nextptr = new format_t();
852        nextlistptr = &(*nextlistptr)->nextptr;
853          }
854
855          if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
856        {
857          return false;
858        }
859        }
860      else
861        {
862          if (*here == '}') break;
863          ++commacount;
864        }
865    }
866      }
867     
868    } else text.push_back(*here);
869   
870    if (here != end) ++here;
871  }
872
873  return true;
874}
875
876
877static text_t spanwrap_metatext(const text_t& metatext, const text_t& OID,
878                const text_t metaname, int metapos=-1)
879{
880
881  text_t tag_type = (metaname == "Text") ? "div" : "span";
882  text_t editable_type = (metaname == "Text") ? "text" : "metadata";
883
884  text_t wrapped_metatext = "<" + tag_type + " ";
885  wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
886
887  wrapped_metatext += "docoid=\"" + OID + "\" "; 
888  wrapped_metatext += "metaname=\"" + metaname + "\"";
889
890  if (metapos>=0) {
891    text_t metapos_str = metapos;
892    wrapped_metatext += " metapos=\"" + metapos_str + "\"";
893  }
894
895  wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
896
897  return wrapped_metatext;
898}
899
900   
901
902bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
903             text_tset &metadata, bool &getParents) {
904
905  formatlistptr->clear();
906  getParents = false;
907
908  return (parse_string (formatstring, formatlistptr, metadata, getParents));
909}
910
911// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
912// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
913
914static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
915{
916  text_t no_ns_metaname = remove_namespace(meta.metaname);
917  text_t formatted_metatext;
918  bool first = true;
919 
920  const int start_i=0;
921  const int end_i = metainfo.values.size()-1;
922 
923  if (position == -1) { // all
924    for (int i=start_i; i<=end_i; ++i) {
925      if (!first) formatted_metatext += meta.siblingoptions;
926     
927      text_t fresh_metatext;
928
929      if (meta.metacommand & mSpecial) {
930    // special formatting
931    if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
932    else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
933    else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
934      }
935      else fresh_metatext = metainfo.values[i];
936
937      if (metadata_spanwrap) {
938    fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,i);
939      }
940      formatted_metatext += fresh_metatext;
941
942      first = false;
943     
944    }
945  } else {
946    if (position == -2) { // end
947      position = end_i;
948    } else if (position < start_i || position > end_i) {
949      return "";
950    }
951
952    text_t fresh_metatext;
953    if (meta.metacommand & mSpecial) {
954
955      // special formatting
956      if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
957      else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
958      else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
959    }
960    else fresh_metatext = metainfo.values[position];
961
962    if (metadata_spanwrap) {
963      fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,position);
964    }
965
966    formatted_metatext += fresh_metatext;
967  }
968
969  if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
970  else return formatted_metatext;
971}
972
973static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
974{
975 
976  MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
977
978  switch (meta.mqualifier.parent) {
979  case pNone:
980    return "Nothing!!";
981    break;
982
983  case pImmediate:
984    if (parent != NULL) {
985      text_t parent_oid = get_parent(docinfo.OID);
986      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
987    }
988    break;
989
990  case pTop:
991    if (parent != NULL) {
992      text_t parent_oid = get_parent(docinfo.OID);
993
994      while (parent->parent != NULL) {
995    parent = parent->parent;
996    parent_oid = get_parent(parent_oid);
997      }
998      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
999    }
1000    break;
1001
1002  case pAll:
1003    MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1004    if (parent != NULL) {
1005      text_t parent_oid = get_parent(docinfo.OID);
1006
1007      text_tarray tmparray;
1008      while (parent != NULL) {
1009    tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1010    parent = parent->parent;
1011    parent_oid = get_parent(parent_oid);
1012
1013      }
1014      // now join them up - use teh parent separator
1015      bool first = true;
1016      text_t tmp;
1017      text_tarray::reverse_iterator here = tmparray.rbegin();
1018      text_tarray::reverse_iterator end = tmparray.rend();
1019      while (here != end) {
1020    if (!first) tmp += meta.parentoptions;
1021    tmp += *here;
1022    first = false;
1023    ++here;
1024      }
1025      if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1026      else return tmp;
1027    }
1028  }
1029  return "";
1030
1031}
1032
1033static text_t get_child_meta (const text_t& collection,
1034                  recptproto* collectproto,
1035                  ResultDocInfo_t &docinfo, displayclass &disp,
1036                  const metadata_t &meta, text_tmap &options,
1037                  ostream& logout, int siblings_values)
1038{
1039  if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1040 
1041  const text_t& pre_tree_trav = meta.pre_tree_traverse;
1042  const text_t& child_metaname = meta.metaname;
1043  const text_t& child_field = meta.childoptions;
1044  text_tset child_metadata;
1045  child_metadata.insert(child_metaname);
1046
1047  FilterResponse_t child_response;
1048  if (meta.mqualifier.child == cNum) {
1049    // just one child
1050    //get the information associated with the metadata for child doc
1051    if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1052           child_metadata, false, collectproto, child_response,
1053           logout)) return ""; // invalid child number
1054
1055      if (child_response.docInfo.empty()) return false; // no info for the child
1056 
1057      ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1058      MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1059 
1060      text_t child_metavalue
1061    = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1062      return expand_metadata(child_metavalue,collection,collectproto,
1063             child_docinfo,disp,options,logout);
1064  }
1065 
1066   
1067  if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1068
1069
1070  if (!pre_tree_trav.empty()) {
1071    // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1072    FilterResponse_t trav_response;
1073
1074    text_tset trav_metadata;
1075    trav_metadata.insert("contains");
1076
1077    if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1078           trav_metadata, false, collectproto, trav_response,
1079           logout)) return ""; // invalid pre_tree_trav
1080
1081    if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1082 
1083    ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1084    // use this for rest of routine
1085    docinfo = trav_docinfo;
1086  }
1087 
1088  // we need to get all children
1089  text_t result = "";
1090  text_tarray children;
1091  text_t contains = docinfo.metadata["contains"].values[0];
1092  splitchar (contains.begin(), contains.end(), ';', children);
1093  text_tarray::const_iterator here = children.begin();
1094  text_tarray::const_iterator end = children.end();
1095  bool first = true;
1096  while (here !=end) {
1097    text_t oid = *here;
1098    here++;
1099    if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1100   
1101    //get the information associated with the metadata for child doc
1102    if (!get_info (oid, collection, "", child_metadata,
1103           false, collectproto, child_response, logout) ||
1104    child_response.docInfo.empty()) {
1105      first = false;
1106      continue;
1107    }
1108   
1109   
1110    ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1111    MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1112   
1113    text_t child_metavalue
1114      = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1115    if (!first) result += child_field;
1116    first = false;
1117    // need to do this here cos otherwise we are in the wrong document
1118    result +=  expand_metadata(child_metavalue,collection,collectproto,
1119                   child_docinfo,disp,options,logout);
1120  }
1121  return result;
1122   
1123}
1124
1125static text_t get_meta (const text_t& collection, recptproto* collectproto,
1126            ResultDocInfo_t &docinfo, displayclass &disp,
1127            const metadata_t &meta, text_tmap &options,
1128            ostream& logout) {
1129 
1130  // make sure we have the requested metadata
1131  MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1132  if (it == docinfo.metadata.end()) return "";
1133
1134  int siblings_values = 0; // default is no siblings, just the first metadata available
1135  if (meta.metacommand & mSibling) {
1136    if (meta.mqualifier.sibling == sAll) {
1137      siblings_values = -1; //all
1138    } else if (meta.mqualifier.sibling == sNum) {
1139      siblings_values = meta.siblingoptions.getint();
1140    }
1141  }
1142  if (meta.metacommand & mParent) {
1143    return get_parent_meta(docinfo,meta,siblings_values);
1144  }
1145
1146  else if (meta.metacommand & mChild) {
1147    return get_child_meta(collection,collectproto,docinfo,disp,meta,
1148                options,logout, siblings_values);
1149  }
1150  else if (meta.metacommand & mSibling) { // only siblings
1151    MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1152    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1153  }
1154  else {
1155   
1156    // straightforward metadata request (nothing fancy)
1157
1158    text_t classifier_metaname = docinfo.classifier_metadata_type;
1159    int metaname_index
1160      = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1161    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1162  }
1163 
1164  return "";
1165}
1166
1167static text_t get_or (const text_t& collection, recptproto* collectproto,
1168              ResultDocInfo_t &docinfo, displayclass &disp,
1169              format_t *orptr, text_tmap &options,
1170              ostream& logout) {
1171
1172  while (orptr != NULL) {
1173
1174    if (metadata_spanwrap) {
1175      // need to be a bit more careful about this
1176      // => test for it *without* spanwrap, and if defined, then
1177      // got back and generate it again, this time with spanwrap on
1178
1179      metadata_spanwrap = false;
1180      text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1181                       options, logout);
1182      metadata_spanwrap = true;
1183      if (!test_tmp.empty()) {
1184
1185    return format_string (collection,collectproto,docinfo, disp, orptr,
1186                  options, logout);
1187      }
1188    }
1189    else {
1190      text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1191                  options, logout);
1192      if (!tmp.empty()) return tmp;
1193    }
1194
1195    orptr = orptr->nextptr;
1196  }
1197  return "";
1198}
1199
1200static bool char_is_whitespace(const char c)
1201{
1202  return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1203
1204}
1205
1206static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1207{
1208  int pos = start_pos;
1209  while (pos<outstring.size()) {
1210    if (!char_is_whitespace(outstring[pos])) {
1211      break;
1212    }
1213    ++pos;
1214  }
1215
1216  return pos;
1217}
1218
1219static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1220{
1221  int pos = start_pos;
1222  while (pos>=0) {
1223    if (!char_is_whitespace(outstring[pos])) {
1224      break;
1225    }
1226    --pos;
1227  }
1228
1229  return pos;
1230}
1231
1232static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1233{
1234  int pos = start_pos;
1235  while (pos>=0) {
1236    if (char_is_whitespace(outstring[pos])) {
1237      break;
1238    }
1239    --pos;
1240  }
1241
1242  return pos;
1243}
1244
1245
1246static int rscan_for(const text_t& outstring, const int start_pos,
1247             const char find_c)
1248{
1249  int pos = start_pos;
1250  while (pos>=0) {
1251    char c = outstring[pos];
1252    if (outstring[pos] == find_c) {
1253      break;
1254    }
1255    --pos;
1256  }
1257
1258  return pos;
1259}
1260
1261text_t extract_substr(const text_t& outstring, const int start_pos,
1262              const int end_pos)
1263{
1264  text_t extracted_str;
1265  extracted_str.clear();
1266
1267  for (int pos=start_pos; pos<=end_pos; ++pos) {
1268    extracted_str.push_back(outstring[pos]);
1269  }
1270
1271  return extracted_str;
1272}
1273
1274
1275static text_t expand_potential_metadata(const text_t& collection,
1276                    recptproto* collectproto,
1277                    ResultDocInfo_t &docinfo,
1278                    displayclass &disp,
1279                    const text_t& intext,
1280                    text_tmap &options,
1281                    ostream& logout)
1282{
1283  text_t outtext;
1284
1285  // decide if dealing with metadata or text
1286
1287  text_t::const_iterator beginbracket = intext.begin();
1288  text_t::const_iterator endbracket = (intext.end() - 1);
1289
1290  // Decision is based on a metadata element
1291  if ((*beginbracket == '[') && (*endbracket == ']')) {
1292    // Ignore the surrounding square brackets
1293    text_t meta_text = substr (beginbracket+1, endbracket);
1294
1295    if (meta_text == "Text") {
1296      outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1297    }
1298    else {
1299
1300      text_tset metadata;
1301      bool getParents =false;
1302      metadata_t meta;
1303     
1304      parse_meta (meta_text, meta, metadata, getParents);   
1305      outtext
1306    = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1307    }
1308
1309  }
1310  else {
1311    outtext = intext;
1312  }
1313
1314  return outtext;
1315}
1316
1317
1318
1319
1320static bool uses_expression(const text_t& collection, recptproto* collectproto,
1321                ResultDocInfo_t &docinfo,
1322                displayclass &disp,
1323                const text_t& outstring, text_t& lhs_expr,
1324                text_t& op_expr, text_t& rhs_expr,
1325                text_tmap &options,
1326                ostream& logout)
1327{
1328  // Note: the string may not be of the form: str1 op str2, however
1329  // to deterine this we have to process it on the assumption it is,
1330  // and if at any point an 'erroneous' value is encountered, return
1331  // false and let something else have a go at evaluating it
1332
1333  // Starting at the end of the string and working backwards ..
1334
1335  const int outstring_len = outstring.size();
1336
1337  // skip over white space
1338  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1339
1340  if (rhs_end<=0) {
1341    // no meaningful text or (rhs_end==0) no room for operator
1342    return false;
1343  }
1344
1345  // check for ' or " and then scan over token
1346  const char potential_quote = outstring[rhs_end];
1347  int rhs_start=rhs_end;
1348  bool quoted = false;
1349
1350  if ((potential_quote == '\'') || (potential_quote == '\"')) {
1351    --rhs_end;
1352    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1353    quoted = true;
1354  }
1355  else {
1356    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1357  }
1358
1359  if ((rhs_end-rhs_start)<0) {
1360    // no meaningful rhs expression
1361    return false;
1362  }
1363
1364  // form rhs_expr
1365  rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1366
1367  // skip over white space
1368  const int to_whitespace = (quoted) ? 2 : 1;
1369
1370  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1371  int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1372
1373  if ((op_end<0) && (op_start<0)) {
1374    // no meaningful expression operator
1375    return false;
1376  }
1377
1378  if (op_end-op_start<0) {
1379    // no meaningful expression operator
1380    return false;
1381  }
1382
1383  op_expr = extract_substr(outstring,op_start,op_end);
1384
1385
1386  // check for operator
1387  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1388     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1389
1390    // not a valid operator
1391    return false;
1392  }
1393
1394  int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1395  if (lhs_end<0) {
1396    // no meaningful lhs expression
1397    return false;
1398  }
1399
1400  int lhs_start = scan_over_whitespace(outstring,0);
1401
1402  // form lhs_expr from remainder of string
1403  lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1404
1405  // Now we know we have a valid expression, look up any
1406  // metadata terms
1407
1408  rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1409                       disp,rhs_expr,options,logout);
1410  lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1411                       disp,lhs_expr,options,logout);
1412
1413  return true;
1414}
1415
1416static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1417                 const text_t& rhs_expr, ostream& logout)
1418{
1419  if (op_expr == "eq") return (lhs_expr == rhs_expr);
1420  else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1421  else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1422  else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1423  else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1424  else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1425  else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1426  else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1427  else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1428  else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1429  else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1430  else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1431  else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1432  else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1433  else {
1434    logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1435  }
1436 
1437  return false;
1438}
1439
1440
1441static text_t get_if (const text_t& collection, recptproto* collectproto,
1442              ResultDocInfo_t &docinfo, displayclass &disp,
1443              const decision_t &decision,
1444              format_t *ifptr, format_t *elseptr,
1445              text_tmap &options, ostream& logout)
1446{
1447  // If the decision component is a metadata element, then evaluate it
1448  // to see whether we output the "then" or the "else" clause
1449  if (decision.command == dMeta) {
1450
1451    bool store_metadata_spanwrap = metadata_spanwrap;
1452    metadata_spanwrap = 0;
1453
1454    // temporarily suspend metadata-spanwrap (if on) so can test if metadata item really exits or not
1455    bool metadata_exists
1456      = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1457           logout) != "");
1458
1459    metadata_spanwrap = store_metadata_spanwrap;
1460
1461    if (metadata_exists) {
1462      if (ifptr != NULL)
1463    return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1464                     options, logout);
1465    }
1466    else {
1467      if (elseptr != NULL)
1468    return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1469                     options, logout);
1470    }
1471  }
1472
1473  // If the decision component is text, then evaluate it (it is probably a
1474  // macro like _cgiargmode_) to decide what to output.
1475  else if (decision.command == dText) {
1476
1477    text_t outstring;
1478    disp.expandstring (decision.text, outstring);
1479
1480    // Check for if expression in form: str1 op str2
1481    // (such as [x] eq "y")
1482    text_t lhs_expr, op_expr, rhs_expr;
1483    if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1484      if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1485    if (ifptr != NULL) {
1486      return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1487                       options, logout);
1488    }
1489    else {
1490      return "";
1491    }
1492      } else {
1493    if (elseptr != NULL) {
1494      return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1495                       options, logout);
1496    }
1497    else {
1498      return "";
1499    }
1500      }
1501    }
1502
1503
1504    // This is a tad tricky.  When we expand a string like _cgiargmode_, that is
1505    // a cgi argument macro that has not been set, it evaluates to itself.
1506    // Therefore, were have to say that a piece of text evalautes true if
1507    // it is non-empty and if it is a cgi argument evaulating to itself.
1508
1509    if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1510      if (ifptr != NULL)
1511    return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1512                     options, logout);
1513    } else {
1514      if (elseptr != NULL)
1515    return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1516                     options, logout);
1517    }
1518  }
1519 
1520  return "";
1521}
1522
1523bool includes_metadata(const text_t& text)
1524{
1525  text_t::const_iterator here = text.begin();
1526  text_t::const_iterator end = text.end();
1527  while (here != end) {
1528    if (*here == '[') return true;
1529    ++here;
1530  }
1531
1532  return false;
1533}
1534
1535static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1536                  recptproto* collectproto,
1537                  ResultDocInfo_t &docinfo,
1538                  displayclass &disp, text_tmap &options,
1539                  ostream &logout) {
1540     
1541  if (includes_metadata(metavalue)) {
1542   
1543    // text has embedded metadata in it => expand it
1544    FilterRequest_t request;
1545    FilterResponse_t response;
1546   
1547    request.getParents = false;
1548   
1549    format_t *expanded_formatlistptr = new format_t();
1550    parse_formatstring (metavalue, expanded_formatlistptr,
1551            request.fields, request.getParents);
1552   
1553    // retrieve metadata
1554    get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1555         collectproto, response, logout);
1556   
1557    if (!response.docInfo.empty()) {
1558     
1559      text_t expanded_metavalue
1560    = get_formatted_string(collection, collectproto,
1561                   response.docInfo[0], disp, expanded_formatlistptr,
1562                   options, logout);
1563     
1564      return expanded_metavalue;
1565    }
1566    else {
1567      return metavalue;
1568    }
1569  }
1570  else {
1571   
1572    return metavalue;
1573  }
1574}
1575
1576text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1577               displayclass &disp,
1578               text_t meta_name, ostream& logout) {
1579 
1580  ColInfoResponse_t collectinfo;
1581  comerror_t err;
1582  collectproto->get_collectinfo (collection, collectinfo,err,logout);
1583  text_t meta_value = "";
1584  text_t lang;
1585  disp.expandstring("_cgiargl_",lang);
1586  if (lang.empty()) {
1587    lang = "en";
1588  }
1589
1590  if (err == noError) {
1591    meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1592  }
1593  return meta_value;
1594 
1595
1596}
1597text_t format_string (const text_t& collection, recptproto* collectproto,
1598              ResultDocInfo_t &docinfo, displayclass &disp,
1599              format_t *formatlistptr, text_tmap &options,
1600              ostream& logout) {
1601
1602  if (formatlistptr == NULL) return "";
1603
1604  switch (formatlistptr->command) {
1605     case comOID:
1606    return docinfo.OID;
1607  case comTopOID:
1608    {
1609      text_t top_id;
1610      get_top(docinfo.OID, top_id);
1611      return top_id;
1612    }
1613  case comRank:
1614    return text_t(docinfo.ranking);
1615     case comText:
1616    return formatlistptr->text;
1617     case comLink:
1618    return options["link"];
1619     case comEndLink:
1620    if (options["link"].empty()) return "";
1621    else return "</a>";
1622     case comHref:
1623    return get_href(options["link"]);
1624     case comIcon:
1625    return options["icon"];
1626     case comNum:
1627    return docinfo.result_num;
1628     case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1629    return get_related_docs(collection, collectproto, docinfo, logout);
1630
1631     case comSummary:
1632       return format_summary(collection, collectproto, docinfo, disp, options, logout);
1633
1634     case comMeta:
1635    {
1636       const text_t& metavalue =  get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1637       return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1638    }
1639
1640     case comDoc:
1641       return format_text(collection, collectproto, docinfo, disp, options, logout);
1642
1643     case comImage:
1644    return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1645     case comTOC:
1646    return options["DocTOC"];
1647     case comDocumentButtonDetach:
1648    return options["DocumentButtonDetach"];
1649     case comDocumentButtonHighlight:
1650    return options["DocumentButtonHighlight"];
1651     case comDocumentButtonExpandContents:
1652    return options["DocumentButtonExpandContents"];
1653     case comDocumentButtonExpandText:
1654    return options["DocumentButtonExpandText"];
1655     case comHighlight:
1656    if (options["highlight"] == "1") return "<b>";
1657    break;
1658     case comEndHighlight:
1659    if (options["highlight"] == "1") return "</b>";
1660    break;
1661     case comMetadataSpanWrap:
1662        metadata_spanwrap=true;  return "";
1663    break;
1664     case comEndMetadataSpanWrap:
1665    metadata_spanwrap=false; return "";
1666    break;
1667     case comIf:
1668    return get_if (collection, collectproto, docinfo, disp,
1669               formatlistptr->decision, formatlistptr->ifptr,
1670               formatlistptr->elseptr, options, logout);
1671     case comOr:
1672    return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1673               options, logout);
1674     case comDocTermsFreqTotal:
1675       return docinfo.num_terms_matched;
1676     case comCollection:
1677       if (formatlistptr->meta.metaname == g_EmptyText) {
1678     return collection;
1679       }
1680       return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1681   
1682  }
1683  return "";
1684}
1685
1686text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1687                 ResultDocInfo_t &docinfo, displayclass &disp,
1688                 format_t *formatlistptr, text_tmap &options,
1689                 ostream& logout) {
1690
1691   text_t ft;
1692   while (formatlistptr != NULL)
1693      {
1694     ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1695                  options, logout);
1696     formatlistptr = formatlistptr->nextptr;
1697      }
1698   
1699   return ft;
1700}
1701
1702
1703// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1704text_t format_text (const text_t& collection, recptproto* collectproto,
1705            ResultDocInfo_t &docinfo, displayclass &disp,
1706            text_tmap &options, ostream& logout)
1707{
1708  text_t text;
1709
1710  if(!options["text"].empty()) {
1711    text = options["text"];
1712  }
1713  else {
1714    // get document text here
1715    DocumentRequest_t docrequest;
1716    DocumentResponse_t docresponse;
1717    comerror_t err;
1718    docrequest.OID = docinfo.OID;
1719    collectproto->get_document (collection, docrequest, docresponse, err, logout);
1720    text = docresponse.doc;
1721  }
1722
1723  if (metadata_spanwrap) {
1724    text = spanwrap_metatext(text,docinfo.OID,"Text");
1725  }
1726
1727  return text;
1728}
1729 
1730/* FUNCTION NAME: format_summary
1731 * DESC: this is invoked when a [Summary] special metadata is processed.
1732 * RETURNS: a query-biased summary for the document */
1733
1734text_t format_summary (const text_t& collection, recptproto* collectproto,
1735               ResultDocInfo_t &docinfo, displayclass &disp,
1736               text_tmap &options, ostream& logout) {
1737
1738  // GRB: added code here to ensure that the cstr (and other collections)
1739  //      uses the document metadata item Summary, rather than compressing
1740  //      the text of the document, processed via the methods in
1741  //      summarise.cpp
1742
1743  text_t summary;
1744
1745  if (docinfo.metadata.count("Summary") > 0 &&
1746      docinfo.metadata["Summary"].values.size() > 0) {
1747    summary = docinfo.metadata["Summary"].values[0];
1748  }
1749  else {
1750 
1751    text_t textToSummarise, query;
1752
1753    if(options["text"].empty()) { // get document text
1754      DocumentRequest_t docrequest;
1755      DocumentResponse_t docresponse;
1756      comerror_t err;
1757      docrequest.OID = docinfo.OID;
1758      collectproto->get_document (collection, docrequest, docresponse, err, logout);
1759      textToSummarise = docresponse.doc;
1760    }
1761    else {
1762      // in practice, this would not happen, because text is only
1763      // loaded with the [Text] command
1764    textToSummarise = options["text"];
1765    }
1766   
1767    disp.expandstring("_cgiargq_",query);
1768    summary = summarise(textToSummarise,query,80);
1769  }
1770
1771  if (metadata_spanwrap) {
1772    summary = spanwrap_metatext(summary,docinfo.OID,"Summary");
1773  }
1774
1775  return summary;
1776}
Note: See TracBrowser for help on using the browser.