root/main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp @ 24306

Revision 24306, 61.6 KB (checked in by ak19, 8 years ago)

More changes to do with the ex. prefixed to embedded metadata (that may have an additional metadata set as namespace qualifier). The C code now removes the ex. prefix only if there are no other metadataset qualifiers in the metadata name.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "gsdltools.h"
29#include "recptprototools.h"
30#include "OIDtools.h"
31#include "summarise.h"
32
33#include <assert.h>
34
35static bool metadata_wrap = false;
36static text_t metadata_wrap_type = "";
37
38// a few function prototypes
39
40static text_t format_string (const text_t& collection, recptproto* collectproto,
41                 ResultDocInfo_t &docinfo, displayclass &disp,
42                 format_t *formatlistptr, text_tmap &options,
43                 ostream& logout);
44
45static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
46              format_t *formatlistptr, text_tset &metadata, bool &getParents);
47
48static text_t format_summary (const text_t& collection, recptproto* collectproto,
49                  ResultDocInfo_t &docinfo, displayclass &disp,
50                  text_tmap &options, ostream& logout);
51static text_t format_text (const text_t& collection, recptproto* collectproto,
52                  ResultDocInfo_t &docinfo, displayclass &disp,
53                  text_tmap &options, ostream& logout);
54
55static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
56                  recptproto* collectproto, ResultDocInfo_t &docinfo,
57                  displayclass &disp, text_tmap &options,
58                  ostream &logout);
59
60
61void metadata_t::clear() {
62  metaname.clear();
63  metacommand = mNone;
64  mqualifier.parent  = pNone;
65  mqualifier.sibling = sNone;
66  mqualifier.child   = cNone;
67  pre_tree_traverse.clear();
68  parentoptions.clear();
69  siblingoptions.clear();
70  childoptions.clear();
71}
72
73void decision_t::clear() {
74  command = dMeta;
75  meta.clear();
76  text.clear();
77}
78
79format_t::~format_t()
80{
81  if (nextptr != NULL) delete nextptr;
82  if (ifptr != NULL) delete ifptr;
83  if (elseptr != NULL) delete elseptr;
84  if (orptr != NULL) delete orptr;
85}
86
87void format_t::clear() {
88  command = comText;
89  decision.clear();
90  text.clear();
91  meta.clear();
92  nextptr = NULL;
93  ifptr = NULL;
94  elseptr = NULL;
95  orptr = NULL;
96}
97
98void formatinfo_t::clear() {
99  DocumentImages = false;
100  DocumentTitles = true;
101  DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
102  DocumentContents = true;
103  DocumentArrowsBottom = true;
104  DocumentArrowsTop = false;
105  DocumentSearchResultLinks = false;
106  DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
107  //  DocumentButtons.push_back ("Expand Text");
108  //  DocumentButtons.push_back ("Expand Contents");
109  DocumentButtons.push_back ("Detach");
110  DocumentButtons.push_back ("Highlight");
111  RelatedDocuments = "";
112  DocumentText = "[Text]";
113  formatstrings.erase (formatstrings.begin(), formatstrings.end());
114  DocumentUseHTML = false;
115  AllowExtendedOptions = false;
116}
117
118// simply checks to see if formatstring begins with a <td> tag
119bool is_table_content (const text_t &formatstring) {
120  text_t::const_iterator here = formatstring.begin();
121  text_t::const_iterator end = formatstring.end();
122 
123  while (here != end) {
124    if (*here != ' ') {
125      if ((*here == '<') && ((here+3) < end)) {
126    if ((*(here+1) == 't' || *(here+1) == 'T') &&
127        (*(here+2) == 'd' || *(here+2) == 'D') &&
128        (*(here+3) == '>' || *(here+3) == ' '))
129      return true;
130      } else return false;
131    }
132    ++here;
133  }
134  return false;
135}
136
137bool is_table_content (const format_t *formatlistptr) {
138
139  if (formatlistptr == NULL) return false;
140 
141  if (formatlistptr->command == comText)
142    return is_table_content (formatlistptr->text);
143   
144  return false;
145}
146
147// returns false if key isn't in formatstringmap
148bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
149               text_t &formatstring) {
150
151  formatstring.clear();
152  text_tmap::const_iterator it = formatstringmap.find(key);
153  if (it == formatstringmap.end()) return false;
154  formatstring = (*it).second;
155  return true;
156}
157
158// tries to find "key1key2" then "key1" then "key2"
159bool get_formatstring (const text_t &key1, const text_t &key2, 
160               const text_tmap &formatstringmap,
161               text_t &formatstring) {
162
163  formatstring.clear();
164  text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
165  if (it != formatstringmap.end()) {
166    formatstring = (*it).second;
167    return true;
168  }
169  it = formatstringmap.find(key1);
170  if (it != formatstringmap.end()) {
171    formatstring = (*it).second;
172    return true;
173  }
174  it = formatstringmap.find(key2);
175  if (it != formatstringmap.end()) {
176    formatstring = (*it).second;
177    return true;
178  }
179  return false;
180}
181
182
183text_t remove_namespace(const text_t &meta_name) {
184  text_t::const_iterator end = meta_name.end();
185  text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
186  if (it != end) {
187    return substr(it+1, end);
188  }
189
190  return meta_name;
191
192}
193// returns a date of form _format:date_(year, month, day)
194// input is date of type yyyy-?mm-?dd
195// at least the year must be present in date
196text_t format_date (const text_t &date) {
197
198  if (date.size() < 4) return "";
199
200  text_t::const_iterator datebegin = date.begin();
201
202  text_t year = substr (datebegin, datebegin+4);
203  int chars_seen_so_far = 4;
204  if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
205
206  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
207  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
208 
209  text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
210  int imonth = month.getint();
211  if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
212 
213  chars_seen_so_far += 2;
214  if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
215
216  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
217  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
218
219  text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
220  if (day[0] == '0') day = substr (day.begin()+1, day.end());
221  int iday = day.getint();
222  if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
223   
224  return "_format:date_("+year+","+month+","+day+")";
225
226
227// converts an iso639 language code to its English equivalent
228// should we be checking that the macro exists??
229text_t iso639 (const text_t &langcode) {
230  if (langcode.empty()) return "";
231  return "_iso639:iso639"+langcode+"_";
232}
233
234
235text_t get_href (const text_t &link) {
236
237  text_t href;
238
239  text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
240  text_t::const_iterator end = link.end();
241  if (here == end) return g_EmptyText;
242 
243  ++here;
244  while (here != end) {
245    if (*here == '"') break;
246    href.push_back(*here);
247    ++here;
248  }
249
250  return href;
251}
252
253//this function gets the information associated with the relation
254//metadata for the document associated with 'docinfo'. This relation
255//metadata consists of a line of pairs containing 'collection, document OID'
256//(this is the OID of the document related to the current document, and
257//the collection the related document belongs to). For each of these pairs
258//the title metadata is obtained and then an html link between the title
259//of the related doc and the document's position (the document will be
260//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
261//(where collection is the related documents collection, and OID is the
262//related documents OID).  A list of these html links are made for as many
263//related documents as there are. This list is then returned. If there are
264//no related documents available for the current document then the string
265//'.. no related documents .. ' is returned.
266text_t get_related_docs(const text_t& collection, recptproto* collectproto,
267            ResultDocInfo_t &docinfo, ostream& logout){
268 
269  text_tset metadata;
270
271  //insert the metadata we wish to collect
272  metadata.insert("dc.Relation");
273  metadata.insert("Title"); 
274  metadata.insert("Subject"); //for emails, where title data doesn't apply
275 
276  FilterResponse_t response;
277  text_t relation = ""; //string for displaying relation metadata
278  text_t relationTitle = ""; //the related documents Title (or subject)
279  text_t relationOID = ""; //the related documents OID 
280
281  //get the information associated with the metadata for current doc
282  if (get_info (docinfo.OID, collection, "", metadata,
283        false, collectproto, response, logout)) {
284   
285    //if the relation metadata exists, store for displaying
286    if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
287      relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
288
289      //split relation data into pairs of collectionname,ID number
290      text_tarray relationpairs;
291      splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
292     
293      text_tarray::const_iterator currDoc = relationpairs.begin(); 
294      text_tarray::const_iterator lastDoc = relationpairs.end();
295
296      //iterate through the pairs to split and display
297      while(currDoc != lastDoc){
298   
299    //split pairs into collectionname and ID
300    text_tarray relationdata;
301    splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
302   
303    //get first element in the array (collection)
304    text_tarray::const_iterator doc_data = relationdata.begin();
305    text_t document_collection = *doc_data;
306    ++doc_data; //increment to get next item in array (oid)
307    text_t document_OID = *doc_data;
308   
309    //create html link to related document
310    relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
311    relation += "&amp;cl=search&amp;d=" + document_OID;
312       
313    //get the information associated with the metadata for related doc
314    if (get_info (document_OID, document_collection, "", metadata,
315              false, collectproto, response, logout)) {
316     
317      //if title metadata doesn't exist, collect subject metadata
318      //if that doesn't exist, just call it 'related document'
319      if (!response.docInfo[0].metadata["Title"].values[0].empty())
320        relationTitle = response.docInfo[0].metadata["Title"].values[0];
321      else if (!response.docInfo[0].metadata["Subject"].values.empty())
322        relationTitle = response.docInfo[0].metadata["Subject"].values[0];
323      else relationTitle =  "RELATED DOCUMENT";
324     
325    }
326   
327    //link the related document's title to its page
328    relation += "\">" + relationTitle + "</a>";
329    relation += "  (" + document_collection + ")<br>";
330   
331    ++currDoc;
332      }
333    }
334   
335  }
336
337  if(relation.empty()) //no relation data for documnet
338    relation = ".. no related documents .. ";
339
340  return relation;
341}
342
343
344
345static void get_parent_options (text_t &instring, metadata_t &metaoption) {
346
347  assert (instring.size() > 7);
348  if (instring.size() <= 7) return;
349
350  text_t meta, com, op;
351  bool inbraces = false;
352  bool inquotes = false;
353  bool foundcolon = false;
354  text_t::const_iterator here = instring.begin()+6;
355  text_t::const_iterator end = instring.end();
356  while (here != end) {
357    if (foundcolon) meta.push_back (*here);
358    else if (*here == '(') inbraces = true;
359    else if (*here == ')') inbraces = false;
360    else if (*here == '\'' && !inquotes) inquotes = true;
361    else if (*here == '\'' && inquotes) inquotes = false;
362    else if (*here == ':' && !inbraces) foundcolon = true;
363    else if (inquotes) op.push_back (*here);
364    else com.push_back (*here);
365    ++here;
366  }
367
368  instring = meta;
369  if (com.empty())
370    metaoption.mqualifier.parent = pImmediate;
371  else if (com == "Top")
372    metaoption.mqualifier.parent = pTop;
373  else if (com == "All") {
374    metaoption.mqualifier.parent = pAll;
375    metaoption.parentoptions = op;
376  }
377}
378
379
380static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
381
382  assert (instring.size() > 8);
383  if (instring.size() <= 8) return;
384  text_t meta, com, op;
385  bool inbraces = false;
386  bool inquotes = false;
387  bool foundcolon = false;
388  text_t::const_iterator here = instring.begin()+7;
389  text_t::const_iterator end = instring.end();
390  while (here != end) {
391    if (foundcolon) meta.push_back (*here);
392    else if (*here == '(') inbraces = true;
393    else if (*here == ')') inbraces = false;
394    else if (*here == '\'' && !inquotes) inquotes = true;
395    else if (*here == '\'' && inquotes) inquotes = false;
396    else if (*here == ':' && !inbraces) foundcolon = true;   
397    else if (inquotes) op.push_back (*here);
398    else com.push_back (*here);
399    ++here;
400  }
401
402  instring = meta;
403  metaoption.siblingoptions.clear();
404
405  if (com.empty()) {
406    metaoption.mqualifier.sibling = sAll;
407    metaoption.siblingoptions = " ";
408  }
409  else if (com == "first") {
410    metaoption.mqualifier.sibling = sNum;
411    metaoption.siblingoptions = "0";
412  }
413  else if (com == "last") {
414    metaoption.mqualifier.sibling = sNum;
415    metaoption.siblingoptions = "-2"; // == last
416  }
417  else if (com.getint()>0) {
418    metaoption.mqualifier.sibling = sNum;
419    int pos = com.getint()-1;
420    metaoption.siblingoptions +=pos;
421  }
422  else {
423    metaoption.mqualifier.sibling = sAll;
424    metaoption.siblingoptions = op;
425  }
426}
427
428static void get_child_options (text_t &instring, metadata_t &metaoption) {
429
430  assert (instring.size() > 6);
431  if (instring.size() <= 6) return;
432  text_t meta, com, op;
433  bool inbraces = false;
434  bool inquotes = false;
435  bool foundcolon = false;
436  text_t::const_iterator here = instring.begin()+5;
437  text_t::const_iterator end = instring.end();
438  while (here != end) {
439    if (foundcolon) meta.push_back (*here);
440    else if (*here == '(') inbraces = true;
441    else if (*here == ')') inbraces = false;
442    else if (*here == '\'' && !inquotes) inquotes = true;
443    else if (*here == '\'' && inquotes) inquotes = false;
444    else if (*here == ':' && !inbraces) foundcolon = true;
445    else if (inquotes) op.push_back (*here);
446    else com.push_back (*here);
447    ++here;
448  }
449
450  instring = meta;
451  if (com.empty()) {
452    metaoption.mqualifier.child = cAll;
453    metaoption.childoptions = " ";
454  }
455  else if (com == "first") {
456    metaoption.mqualifier.child = cNum;
457    metaoption.childoptions = ".fc";
458  }
459  else if (com == "last") {
460    metaoption.mqualifier.child = cNum;
461    metaoption.childoptions = ".lc";
462  }
463  else if (com.getint()>0) {
464    metaoption.mqualifier.child = cNum;
465    metaoption.childoptions = "."+com;
466  }
467  else {
468    metaoption.mqualifier.child = cAll;
469    metaoption.childoptions = op;
470  }
471}
472
473
474static void get_truncate_options (text_t &instring, metadata_t &metaoption)
475{
476  assert (instring.size() > ((text_t) "truncate").size());
477  if (instring.size() <= ((text_t) "truncate").size()) return;
478  text_t meta, com;
479  bool inbraces = false;
480  bool foundcolon = false;
481  text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
482  text_t::const_iterator end = instring.end();
483  while (here != end) {
484    if (foundcolon) meta.push_back (*here);
485    else if (*here == '(') inbraces = true;
486    else if (*here == ')') inbraces = false;
487    else if (*here == ':' && !inbraces) foundcolon = true;   
488    else com.push_back (*here);
489    ++here;
490  }
491
492  instring = meta;
493
494  if (!com.empty())
495  {
496    metaoption.siblingoptions = com;
497  }
498  else
499  {
500    // Default is 100 characters if not specified
501    metaoption.siblingoptions = "100";
502  }
503}
504
505
506
507static void parse_meta (text_t &meta, metadata_t &metaoption,
508            text_tset &metadata, bool &getParents) {
509
510  // Look for the various format statement modifiers
511  // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
512  //   is irrelevant because this is not stored in metaoption.metacommand anyway
513  bool keep_trying = true;
514  while (keep_trying)
515  {
516    keep_trying = false;
517
518    if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
519    {
520      metaoption.metacommand |= mCgiSafe;
521      meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
522      keep_trying = true;
523    }
524    if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
525    {   
526      metaoption.metacommand |= mSpecial;
527      meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
528      keep_trying = true;
529    }
530
531    // New "truncate" special formatting option
532    if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate"))  // No colons due to truncate(X)
533    {
534      metaoption.metacommand |= mTruncate;
535      get_truncate_options (meta, metaoption);
536      keep_trying = true;
537    }
538    // New "htmlsafe" special formatting option
539    if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
540    {
541      metaoption.metacommand |= mHTMLSafe;
542      meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
543      keep_trying = true;
544    }
545    // New "xmlsafe" special formatting option
546    if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
547    {
548      metaoption.metacommand |= mXMLSafe;
549      meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
550      keep_trying = true;
551    }
552    // New "dmsafe" special formatting option
553    if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
554    {
555      metaoption.metacommand |= mDMSafe;
556      meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
557      keep_trying = true;
558    }
559  }
560
561  bool had_parent_or_child = true;
562  bool prev_was_parent = false;
563  bool prev_was_child  = false;
564
565  while (had_parent_or_child) {
566    if (meta.size() > 7
567    && (substr (meta.begin(), meta.begin()+6) == "parent")) {
568
569      // clear out sibling and child (cmd and options)
570      metaoption.metacommand &= ~(mChild|mSibling);
571      metaoption.childoptions.clear();
572      metaoption.siblingoptions.clear();
573
574      getParents = true;
575      metaoption.metacommand |= mParent;
576      get_parent_options (meta, metaoption);
577
578      if (prev_was_parent) {
579    metaoption.pre_tree_traverse += ".pr";
580      }
581      else if (prev_was_child) {
582    metaoption.pre_tree_traverse += ".fc";
583      }
584
585      prev_was_parent = true;
586      prev_was_child  = false;
587    }
588    else if (meta.size() > 6
589         && (substr (meta.begin(), meta.begin()+5) == "child")) {
590
591      // clear out sibling and parent (cmd and options)
592      metaoption.metacommand &= ~(mParent|mSibling);
593      metaoption.parentoptions.clear();
594      metaoption.siblingoptions.clear();
595
596      metaoption.metacommand |= mChild;
597      get_child_options (meta, metaoption);
598      metadata.insert("contains");
599
600      if (prev_was_parent) {
601    metaoption.pre_tree_traverse += ".pr";
602      }
603      else if (prev_was_child) {
604    metaoption.pre_tree_traverse += ".fc";
605      }
606
607      prev_was_child  = true;
608      prev_was_parent = false;
609    }
610    else {
611      prev_was_child  = false;
612      prev_was_parent = false;
613      had_parent_or_child = false;
614    }
615  }
616
617  // parent/child can have sibling tacked on end also
618  if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
619    metaoption.metacommand |= mSibling;
620    get_sibling_options (meta, metaoption);
621  }
622 
623  // check for ex. which may occur in format statements
624  // remove "ex." prefix, but only if there are no other metadata set qualifiers
625  // in the metaname, since we want to retain prefixes like "ex.dc." as-is
626  text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
627  text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
628
629  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
630    meta = substr (meta.begin()+3, meta.end());
631  }
632  metadata.insert (meta);
633  metaoption.metaname = meta;
634}
635
636static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
637  if (meta == "collection") {
638    // no qualifiers
639    metaoption.metaname = g_EmptyText;
640    return;
641  }
642  meta = substr (meta.begin()+11, meta.end());
643  metaoption.metaname = meta;
644 
645}
646
647static void parse_meta (text_t &meta, format_t *formatlistptr,
648            text_tset &metadata, bool &getParents) {
649 
650  // check for ex. which may occur in format statements
651  // remove "ex." prefix, but only if there are no other metadata set qualifiers
652  // in the metaname, since we want to retain prefixes like "ex.dc." as-is
653  text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
654  text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
655
656  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
657    meta = substr (meta.begin()+3, meta.end());
658  }
659  if (meta == "link")
660    formatlistptr->command = comLink;
661  else if (meta == "/link")
662    formatlistptr->command = comEndLink;
663
664  // the metaname "srclink_file" is deprecated, use "srclinkFile"
665  else if (meta == "srclink") {
666    formatlistptr->command = comAssocLink;
667    formatlistptr->meta.metaname = "srclinkFile";
668    metadata.insert("srclinkFile");
669  }
670  else if (meta == "srchref") {
671    formatlistptr->command = comAssocLink;
672    formatlistptr->text = "href";
673    formatlistptr->meta.metaname = "srclinkFile";
674    metadata.insert("srclinkFile");
675  }
676  else if (meta == "/srclink") {
677    formatlistptr->command = comEndAssocLink;
678    formatlistptr->meta.metaname = "srclinkFile";
679  }
680  // and weblink etc
681  else if (meta == "href")
682    formatlistptr->command = comHref;
683
684  else if (meta == "num")
685    formatlistptr->command = comNum;
686
687  else if (meta == "icon")
688    formatlistptr->command = comIcon;
689
690  else if (meta == "Text")
691    formatlistptr->command = comDoc;
692 
693  else if (meta == "RelatedDocuments")
694   formatlistptr->command = comRel;
695
696  else if (meta == "highlight")
697    formatlistptr->command = comHighlight;
698
699  else if (meta == "/highlight")
700    formatlistptr->command = comEndHighlight;
701
702  else if (meta == "metadata-spanwrap")
703    formatlistptr->command = comMetadataSpanWrap;
704
705  else if (meta == "/metadata-spanwrap")
706    formatlistptr->command = comEndMetadataSpanWrap;
707
708  else if (meta == "metadata-divwrap")
709    formatlistptr->command = comMetadataDivWrap;
710
711  else if (meta == "/metadata-divwrap")
712    formatlistptr->command = comEndMetadataDivWrap;
713
714  else if (meta == "Summary")
715    formatlistptr->command = comSummary;
716
717  else if (meta == "DocImage")
718    formatlistptr->command = comImage;
719
720  else if (meta == "DocTOC")
721    formatlistptr->command = comTOC;
722
723  else if (meta == "DocumentButtonDetach")
724     formatlistptr->command = comDocumentButtonDetach;
725 
726  else if (meta == "DocumentButtonHighlight")
727     formatlistptr->command = comDocumentButtonHighlight;
728 
729  else if (meta == "DocumentButtonExpandContents")
730    formatlistptr->command = comDocumentButtonExpandContents;
731
732  else if (meta == "DocumentButtonExpandText")
733     formatlistptr->command = comDocumentButtonExpandText;
734
735  else if (meta == "DocOID")
736     formatlistptr->command = comOID;
737  else if (meta == "DocTopOID")
738    formatlistptr->command = comTopOID;
739  else if (meta == "DocRank")
740    formatlistptr->command = comRank;
741  else if (meta == "DocTermsFreqTotal")
742    formatlistptr->command = comDocTermsFreqTotal;
743  else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
744    formatlistptr->command = comCollection;
745    parse_coll_meta(meta, formatlistptr->meta);
746  }
747  else {
748    formatlistptr->command = comMeta;
749    parse_meta (meta, formatlistptr->meta, metadata, getParents);
750  }
751}
752
753
754static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
755              text_tset &metadata, bool &getParents) {
756
757  text_t text;
758  text_t::const_iterator here = formatstring.begin();
759  text_t::const_iterator end = formatstring.end();
760
761  while (here != end) {
762
763    if (*here == '\\') {
764      ++here;
765      if (here != end) text.push_back (*here);
766
767    } else if (*here == '{') {
768      if (!text.empty()) {
769    formatlistptr->command = comText;
770    formatlistptr->text = text;
771    formatlistptr->nextptr = new format_t();
772    formatlistptr = formatlistptr->nextptr;
773   
774    text.clear();
775      }
776      if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
777
778    formatlistptr->nextptr = new format_t();
779    formatlistptr = formatlistptr->nextptr;
780    if (here == end) break;
781      }
782    } else if (*here == '[') {
783      if (!text.empty()) {
784    formatlistptr->command = comText;
785    formatlistptr->text = text;
786    formatlistptr->nextptr = new format_t();
787    formatlistptr = formatlistptr->nextptr;
788
789    text.clear();
790      }
791      text_t meta;
792      ++here;
793      while (*here != ']') {
794    if (here == end) return false;
795    meta.push_back (*here);
796    ++here;
797      }
798      parse_meta (meta, formatlistptr, metadata, getParents);
799      formatlistptr->nextptr = new format_t();
800      formatlistptr = formatlistptr->nextptr;
801
802    } else
803      text.push_back (*here);
804
805    if (here != end) ++here;
806  }
807  if (!text.empty()) {
808    formatlistptr->command = comText;
809    formatlistptr->text = text;
810    formatlistptr->nextptr = new format_t();
811    formatlistptr = formatlistptr->nextptr;
812
813  }
814  return true;
815}
816
817
818static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
819              format_t *formatlistptr, text_tset &metadata, bool &getParents) {
820
821  text_t::const_iterator it = findchar (here, end, '}');
822  if (it == end) return false;
823
824  text_t com = substr (here, it);
825  here = findchar (it, end, '{');
826  if (here == end) return false;
827  else ++here;
828
829  if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
830  else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
831  else return false;
832
833  int commacount = 0;
834  text_t text;
835  while (here != end) {
836
837    if (*here == '\\') {
838      ++here;
839      if (here != end) text.push_back(*here);
840     
841    }
842 
843    else if (*here == ',' || *here == '}' || *here == '{') {
844
845      if (formatlistptr->command == comOr) {
846    // the {Or}{this, or this, or this, or this} statement
847    format_t *or_ptr;
848   
849    // find the next unused orptr
850    if (formatlistptr->orptr == NULL) {
851      formatlistptr->orptr = new format_t();
852      or_ptr = formatlistptr->orptr;
853    } else {
854      or_ptr = formatlistptr->orptr;
855      while (or_ptr->nextptr != NULL)
856        or_ptr = or_ptr->nextptr;
857      or_ptr->nextptr = new format_t();
858      or_ptr = or_ptr->nextptr;
859    }
860
861    if (!text.empty())
862      {
863        if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
864      }
865
866    if (*here == '{')
867      {
868        // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
869        // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
870        // The latter can always be re-written:
871        // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
872       
873        if (!text.empty()) // already used up allocated format_t
874          {
875        // => allocate new one for detected action
876        or_ptr->nextptr = new format_t();
877        or_ptr = or_ptr->nextptr;
878          }
879        if (!parse_action(++here, end, or_ptr, metadata, getParents))
880          {
881        return false;
882          }
883      }
884    else
885      {
886        if (*here == '}') break;
887      }
888    text.clear();
889
890      }
891
892      // Parse an {If}{decide,do,else} statement
893      else {
894   
895    // Read the decision component. 
896    if (commacount == 0) {
897      // Decsion can be a metadata element, or a piece of text.
898      // Originally Stefan's code, updated 25/10/2000 by Gordon.
899
900      text_t::const_iterator beginbracket = text.begin();
901      text_t::const_iterator endbracket = (text.end() - 1);
902
903      // Decision is based on a metadata element
904      if ((*beginbracket == '[') && (*endbracket == ']')) {
905        // Ignore the surrounding square brackets
906        text_t meta = substr (beginbracket+1, endbracket);
907        parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
908        ++commacount;
909        text.clear();
910      }
911
912      // Decision is a piece of text (probably a macro like _cgiargmode_).
913      else {
914
915        // hunt for any metadata in string, which might be uses in
916        // to test a condition, e.g. [Format] eq 'PDF'
917        format_t* dummyformat = new format_t();
918        // update which metadata fields needed
919        // (not interested in updatng formatlistptr)
920        parse_string (text, dummyformat, metadata, getParents);
921        delete dummyformat;
922
923        formatlistptr->decision.command = dText;
924        formatlistptr->decision.text = text;
925        ++commacount;
926        text.clear();
927      }
928    }
929
930    // Read the "then" and "else" components of the {If} statement.
931    else {
932      format_t** nextlistptr = NULL;
933      if (commacount == 1) {
934        nextlistptr = &formatlistptr->ifptr;
935      } else if (commacount == 2 ) {
936        nextlistptr = &formatlistptr->elseptr;
937      } else {
938        return false;
939      }
940
941      if (!text.empty()) {
942        if (*nextlistptr == NULL) {
943          *nextlistptr = new format_t();
944        } else {
945
946          // skip to the end of any format_t statements already added
947          while ((*nextlistptr)->nextptr != NULL)
948          {
949        nextlistptr = &(*nextlistptr)->nextptr;
950          }
951
952          (*nextlistptr)->nextptr = new format_t();
953          nextlistptr = &(*nextlistptr)->nextptr;
954        }
955
956        if (!parse_string (text, *nextlistptr, metadata, getParents))
957          {
958        return false;
959          }
960        text.clear();
961      }
962     
963      if (*here == '{')
964        {
965          if (*nextlistptr == NULL) {
966        *nextlistptr = new format_t();
967          } else {
968        // skip to the end of any format_t statements already added
969        while ((*nextlistptr)->nextptr != NULL)
970          {
971            nextlistptr = &(*nextlistptr)->nextptr;
972          }
973
974        (*nextlistptr)->nextptr = new format_t();
975        nextlistptr = &(*nextlistptr)->nextptr;
976          }
977
978          if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
979        {
980          return false;
981        }
982        }
983      else
984        {
985          if (*here == '}') break;
986          ++commacount;
987        }
988    }
989      }
990     
991    } else text.push_back(*here);
992   
993    if (here != end) ++here;
994  }
995
996  return true;
997}
998
999
1000static text_t wrap_metatext(const text_t& metatext, const text_t& OID,
1001                const text_t metaname, int metapos=-1)
1002{
1003
1004  text_t tag_type = metadata_wrap_type;
1005  text_t editable_type = (metaname == "Text") ? "text" : "metadata";
1006
1007  text_t wrapped_metatext = "<" + tag_type + " ";
1008  wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
1009
1010  wrapped_metatext += "docoid=\"" + OID + "\" "; 
1011  wrapped_metatext += "metaname=\"" + metaname + "\"";
1012
1013  if (metapos>=0) {
1014    text_t metapos_str = metapos;
1015    wrapped_metatext += " metapos=\"" + metapos_str + "\"";
1016  }
1017
1018  wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1019
1020  return wrapped_metatext;
1021}
1022
1023   
1024
1025bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1026             text_tset &metadata, bool &getParents) {
1027
1028  formatlistptr->clear();
1029  getParents = false;
1030
1031  return (parse_string (formatstring, formatlistptr, metadata, getParents));
1032}
1033
1034// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1035// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1036
1037static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1038{
1039  text_t no_ns_metaname = remove_namespace(meta.metaname);
1040  text_t formatted_metatext;
1041  bool first = true;
1042 
1043  const int start_i=0;
1044  const int end_i = metainfo.values.size()-1;
1045 
1046  if (position == -1) { // all
1047    for (int i=start_i; i<=end_i; ++i) {
1048      if (!first) formatted_metatext += meta.siblingoptions;
1049     
1050      text_t fresh_metatext;
1051
1052      if (meta.metacommand & mSpecial) {
1053    // special formatting
1054    if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1055    else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1056    else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1057      }
1058      else fresh_metatext = metainfo.values[i];
1059
1060      // New "truncate" special formatting option
1061      if (meta.metacommand & mTruncate)
1062      {
1063    int truncate_length = meta.siblingoptions.getint();
1064    text_t truncated_value = fresh_metatext;
1065    if (truncated_value.size() > truncate_length)
1066    {
1067      truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1068    }
1069    fresh_metatext = truncated_value;
1070      }
1071      // New "xmlsafe" special formatting option
1072      if (meta.metacommand & mXMLSafe)
1073      {
1074    // Make it XML-safe
1075    text_t text_xml_safe = "";
1076    text_t::const_iterator text_iterator = fresh_metatext.begin();
1077    while (text_iterator != fresh_metatext.end())
1078    {
1079      if (*text_iterator == '&') text_xml_safe += "&amp;";
1080      else if (*text_iterator == '<') text_xml_safe += "&lt;";
1081      else if (*text_iterator == '>') text_xml_safe += "&gt;";
1082      else text_xml_safe.push_back(*text_iterator);
1083      text_iterator++;
1084    }
1085    fresh_metatext = text_xml_safe;
1086      }
1087      // New "htmlsafe" special formatting option
1088      if (meta.metacommand & mHTMLSafe)
1089      {
1090    // Make it HTML-safe
1091    text_t text_html_safe = "";
1092    text_t::const_iterator text_iterator = fresh_metatext.begin();
1093    while (text_iterator != fresh_metatext.end())
1094    {
1095      if (*text_iterator == '&') text_html_safe += "&amp;";
1096      else if (*text_iterator == '<') text_html_safe += "&lt;";
1097      else if (*text_iterator == '>') text_html_safe += "&gt;";
1098      else if (*text_iterator == '"') text_html_safe += "&quot;";
1099      else text_html_safe.push_back(*text_iterator);
1100      text_iterator++;
1101    }
1102    fresh_metatext = text_html_safe;
1103      }
1104      // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1105      // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1106      if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1107      {
1108    // Make it macro-safe
1109    text_t text_dm_safe = dm_safe(fresh_metatext);
1110    fresh_metatext = text_dm_safe;
1111      }
1112
1113      if (metadata_wrap) {
1114    fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,i);
1115      }
1116      formatted_metatext += fresh_metatext;
1117
1118      first = false;
1119     
1120    }
1121  } else {
1122    if (position == -2) { // end
1123      position = end_i;
1124    } else if (position < start_i || position > end_i) {
1125      return "";
1126    }
1127
1128    text_t fresh_metatext;
1129    if (meta.metacommand & mSpecial) {
1130
1131      // special formatting
1132      if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1133      else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1134      else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1135    }
1136    else fresh_metatext = metainfo.values[position];
1137
1138    // New "truncate" special formatting option
1139    if (meta.metacommand & mTruncate)
1140    {
1141      int truncate_length = meta.siblingoptions.getint();
1142      text_t truncated_value = fresh_metatext;
1143      if (truncated_value.size() > truncate_length)
1144      {
1145    truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1146      }
1147      fresh_metatext = truncated_value;
1148    }
1149    // New "xmlsafe" special formatting option
1150    if (meta.metacommand & mXMLSafe)
1151    {
1152      // Make it XML-safe
1153      text_t text_xml_safe = "";
1154      text_t::const_iterator text_iterator = fresh_metatext.begin();
1155      while (text_iterator != fresh_metatext.end())
1156      {
1157    if (*text_iterator == '&') text_xml_safe += "&amp;";
1158    else if (*text_iterator == '<') text_xml_safe += "&lt;";
1159    else if (*text_iterator == '>') text_xml_safe += "&gt;";
1160    else text_xml_safe.push_back(*text_iterator);
1161    text_iterator++;
1162      }
1163      fresh_metatext = text_xml_safe;
1164    }
1165    // New "htmlsafe" special formatting option
1166    if (meta.metacommand & mHTMLSafe)
1167    {
1168      // Make it HTML-safe
1169      text_t text_html_safe = "";
1170      text_t::const_iterator text_iterator = fresh_metatext.begin();
1171      while (text_iterator != fresh_metatext.end())
1172      {
1173    if (*text_iterator == '&') text_html_safe += "&amp;";
1174    else if (*text_iterator == '<') text_html_safe += "&lt;";
1175    else if (*text_iterator == '>') text_html_safe += "&gt;";
1176    else if (*text_iterator == '"') text_html_safe += "&quot;";
1177    else if (*text_iterator == '\'') text_html_safe += "&#39;";
1178    else if (*text_iterator == ',') text_html_safe += "&#44;";
1179    else text_html_safe.push_back(*text_iterator);
1180    text_iterator++;
1181      }
1182      fresh_metatext = text_html_safe;
1183    }
1184    // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1185    // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1186    if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1187    {
1188      // Make it macro-safe
1189      text_t text_dm_safe = dm_safe(fresh_metatext);
1190      fresh_metatext = text_dm_safe;
1191    }
1192
1193    if (metadata_wrap) {
1194      fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,position);
1195    }
1196
1197    formatted_metatext += fresh_metatext;
1198  }
1199
1200  if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1201  else return formatted_metatext;
1202}
1203
1204static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1205{
1206 
1207  MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1208
1209  switch (meta.mqualifier.parent) {
1210  case pNone:
1211    return "Nothing!!";
1212    break;
1213
1214  case pImmediate:
1215    if (parent != NULL) {
1216      text_t parent_oid = get_parent(docinfo.OID);
1217      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1218    }
1219    break;
1220
1221  case pTop:
1222    if (parent != NULL) {
1223      text_t parent_oid = get_parent(docinfo.OID);
1224
1225      while (parent->parent != NULL) {
1226    parent = parent->parent;
1227    parent_oid = get_parent(parent_oid);
1228      }
1229      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1230    }
1231    break;
1232
1233  case pAll:
1234    MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1235    if (parent != NULL) {
1236      text_t parent_oid = get_parent(docinfo.OID);
1237
1238      text_tarray tmparray;
1239      while (parent != NULL) {
1240    tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1241    parent = parent->parent;
1242    parent_oid = get_parent(parent_oid);
1243
1244      }
1245      // now join them up - use teh parent separator
1246      bool first = true;
1247      text_t tmp;
1248      text_tarray::reverse_iterator here = tmparray.rbegin();
1249      text_tarray::reverse_iterator end = tmparray.rend();
1250      while (here != end) {
1251    if (!first) tmp += meta.parentoptions;
1252    tmp += *here;
1253    first = false;
1254    ++here;
1255      }
1256      if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1257      else return tmp;
1258    }
1259  }
1260  return "";
1261
1262}
1263
1264static text_t get_child_meta (const text_t& collection,
1265                  recptproto* collectproto,
1266                  ResultDocInfo_t &docinfo, displayclass &disp,
1267                  const metadata_t &meta, text_tmap &options,
1268                  ostream& logout, int siblings_values)
1269{
1270  if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1271 
1272  const text_t& pre_tree_trav = meta.pre_tree_traverse;
1273  const text_t& child_metaname = meta.metaname;
1274  const text_t& child_field = meta.childoptions;
1275  text_tset child_metadata;
1276  child_metadata.insert(child_metaname);
1277
1278  FilterResponse_t child_response;
1279  if (meta.mqualifier.child == cNum) {
1280    // just one child
1281    //get the information associated with the metadata for child doc
1282    if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1283           child_metadata, false, collectproto, child_response,
1284           logout)) return ""; // invalid child number
1285
1286      if (child_response.docInfo.empty()) return false; // no info for the child
1287 
1288      ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1289      MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1290 
1291      text_t child_metavalue
1292    = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1293      return expand_metadata(child_metavalue,collection,collectproto,
1294             child_docinfo,disp,options,logout);
1295  }
1296 
1297   
1298  if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1299
1300
1301  if (!pre_tree_trav.empty()) {
1302    // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1303    FilterResponse_t trav_response;
1304
1305    text_tset trav_metadata;
1306    trav_metadata.insert("contains");
1307
1308    if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1309           trav_metadata, false, collectproto, trav_response,
1310           logout)) return ""; // invalid pre_tree_trav
1311
1312    if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1313 
1314    ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1315
1316    // use this for rest of routine
1317    docinfo = trav_docinfo;
1318  }
1319 
1320  // we need to get all children
1321  text_t result = "";
1322  text_tarray children;
1323  text_t contains = docinfo.metadata["contains"].values[0];
1324  splitchar (contains.begin(), contains.end(), ';', children);
1325  text_tarray::const_iterator here = children.begin();
1326  text_tarray::const_iterator end = children.end();
1327  bool first = true;
1328  while (here !=end) {
1329    text_t oid = *here;
1330    here++;
1331    if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1332
1333    //get the information associated with the metadata for child doc
1334    if (!get_info (oid, collection, "", child_metadata,
1335           false, collectproto, child_response, logout) ||
1336    child_response.docInfo.empty()) {
1337      first = false;
1338      continue;
1339    }
1340   
1341   
1342    ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1343    MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1344   
1345    text_t child_metavalue
1346      = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1347
1348
1349    if (!first) result += child_field;
1350    first = false;
1351    // need to do this here cos otherwise we are in the wrong document
1352    text_t em =  expand_metadata(child_metavalue,collection,collectproto,
1353                 child_docinfo,disp,options,logout);
1354
1355    result += em;
1356  }
1357  return result;
1358   
1359}
1360
1361static text_t get_meta (const text_t& collection, recptproto* collectproto,
1362            ResultDocInfo_t &docinfo, displayclass &disp,
1363            const metadata_t &meta, text_tmap &options,
1364            ostream& logout) {
1365 
1366  // make sure we have the requested metadata
1367  MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1368  if (it == docinfo.metadata.end()) return "";
1369
1370  int siblings_values = 0; // default is no siblings, just the first metadata available
1371  if (meta.metacommand & mSibling) {
1372    if (meta.mqualifier.sibling == sAll) {
1373      siblings_values = -1; //all
1374    } else if (meta.mqualifier.sibling == sNum) {
1375      siblings_values = meta.siblingoptions.getint();
1376    }
1377  }
1378  if (meta.metacommand & mParent) {
1379    return get_parent_meta(docinfo,meta,siblings_values);
1380  }
1381
1382  else if (meta.metacommand & mChild) {
1383    return get_child_meta(collection,collectproto,docinfo,disp,meta,
1384                options,logout, siblings_values);
1385  }
1386  else if (meta.metacommand & mSibling) { // only siblings
1387    MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1388    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1389  }
1390  else {
1391   
1392    // straightforward metadata request (nothing fancy)
1393
1394    text_t classifier_metaname = docinfo.classifier_metadata_type;
1395    int metaname_index
1396      = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1397    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1398  }
1399 
1400  return "";
1401}
1402
1403static text_t get_or (const text_t& collection, recptproto* collectproto,
1404              ResultDocInfo_t &docinfo, displayclass &disp,
1405              format_t *orptr, text_tmap &options,
1406              ostream& logout) {
1407
1408  while (orptr != NULL) {
1409
1410    if (metadata_wrap) {
1411      // need to be a bit more careful about this
1412      // => test for it *without* spanwrap or divwrap, and if defined, then
1413      // got back and generate it again, this time with spanwrap/divwrap on
1414
1415      metadata_wrap = false;
1416      text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1417                       options, logout);
1418      metadata_wrap = true;
1419      if (!test_tmp.empty()) {
1420
1421    return format_string (collection,collectproto,docinfo, disp, orptr,
1422                  options, logout);
1423      }
1424    }
1425    else {
1426      text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1427                  options, logout);
1428      if (!tmp.empty()) return tmp;
1429    }
1430
1431    orptr = orptr->nextptr;
1432  }
1433  return "";
1434}
1435
1436static bool char_is_whitespace(const char c)
1437{
1438  return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1439
1440}
1441
1442static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1443{
1444  int pos = start_pos;
1445  while (pos<outstring.size()) {
1446    if (!char_is_whitespace(outstring[pos])) {
1447      break;
1448    }
1449    ++pos;
1450  }
1451
1452  return pos;
1453}
1454
1455static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1456{
1457  int pos = start_pos;
1458  while (pos>=0) {
1459    if (!char_is_whitespace(outstring[pos])) {
1460      break;
1461    }
1462    --pos;
1463  }
1464
1465  return pos;
1466}
1467
1468static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1469{
1470  int pos = start_pos;
1471  while (pos>=0) {
1472    if (char_is_whitespace(outstring[pos])) {
1473      break;
1474    }
1475    --pos;
1476  }
1477
1478  return pos;
1479}
1480
1481
1482static int rscan_for(const text_t& outstring, const int start_pos,
1483             const char find_c)
1484{
1485  int pos = start_pos;
1486  while (pos>=0) {
1487    char c = outstring[pos];
1488    if (outstring[pos] == find_c) {
1489      break;
1490    }
1491    --pos;
1492  }
1493
1494  return pos;
1495}
1496
1497text_t extract_substr(const text_t& outstring, const int start_pos,
1498              const int end_pos)
1499{
1500  text_t extracted_str;
1501  extracted_str.clear();
1502
1503  for (int pos=start_pos; pos<=end_pos; ++pos) {
1504    extracted_str.push_back(outstring[pos]);
1505  }
1506
1507  return extracted_str;
1508}
1509
1510
1511static text_t expand_potential_metadata(const text_t& collection,
1512                    recptproto* collectproto,
1513                    ResultDocInfo_t &docinfo,
1514                    displayclass &disp,
1515                    const text_t& intext,
1516                    text_tmap &options,
1517                    ostream& logout)
1518{
1519  text_t outtext;
1520
1521  // decide if dealing with metadata or text
1522
1523  text_t::const_iterator beginbracket = intext.begin();
1524  text_t::const_iterator endbracket = (intext.end() - 1);
1525
1526  // Decision is based on a metadata element
1527  if ((*beginbracket == '[') && (*endbracket == ']')) {
1528    // Ignore the surrounding square brackets
1529    text_t meta_text = substr (beginbracket+1, endbracket);
1530
1531    if (meta_text == "Text") {
1532      outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1533    }
1534    else {
1535
1536      text_tset metadata;
1537      bool getParents =false;
1538      metadata_t meta;
1539     
1540      parse_meta (meta_text, meta, metadata, getParents);   
1541      outtext
1542    = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1543    }
1544
1545  }
1546  else {
1547    outtext = intext;
1548  }
1549
1550  return outtext;
1551}
1552
1553
1554
1555
1556static bool uses_expression(const text_t& collection, recptproto* collectproto,
1557                ResultDocInfo_t &docinfo,
1558                displayclass &disp,
1559                const text_t& outstring, text_t& lhs_expr,
1560                text_t& op_expr, text_t& rhs_expr,
1561                text_tmap &options,
1562                ostream& logout)
1563{
1564  // Note: the string may not be of the form: str1 op str2, however
1565  // to deterine this we have to process it on the assumption it is,
1566  // and if at any point an 'erroneous' value is encountered, return
1567  // false and let something else have a go at evaluating it
1568
1569  // Starting at the end of the string and working backwards ..
1570
1571  const int outstring_len = outstring.size();
1572
1573  // skip over white space
1574  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1575
1576  if (rhs_end<=0) {
1577    // no meaningful text or (rhs_end==0) no room for operator
1578    return false;
1579  }
1580
1581  // check for ' or " and then scan over token
1582  const char potential_quote = outstring[rhs_end];
1583  int rhs_start=rhs_end;
1584  bool quoted = false;
1585
1586  if ((potential_quote == '\'') || (potential_quote == '\"')) {
1587    --rhs_end;
1588    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1589    quoted = true;
1590  }
1591  else {
1592    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1593  }
1594
1595  if ((rhs_end-rhs_start)<0) {
1596    // no meaningful rhs expression
1597    return false;
1598  }
1599
1600  // form rhs_expr
1601  rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1602
1603  // skip over white space
1604  const int to_whitespace = (quoted) ? 2 : 1;
1605
1606  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1607  int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1608
1609  if ((op_end<0) && (op_start<0)) {
1610    // no meaningful expression operator
1611    return false;
1612  }
1613
1614  if (op_end-op_start<0) {
1615    // no meaningful expression operator
1616    return false;
1617  }
1618
1619  op_expr = extract_substr(outstring,op_start,op_end);
1620
1621
1622  // check for operator
1623  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1624     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1625
1626    // not a valid operator
1627    return false;
1628  }
1629
1630  int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1631  if (lhs_end<0) {
1632    // no meaningful lhs expression
1633    return false;
1634  }
1635
1636  int lhs_start = scan_over_whitespace(outstring,0);
1637
1638  // form lhs_expr from remainder of string
1639  lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1640
1641  // Now we know we have a valid expression, look up any
1642  // metadata terms
1643
1644  rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1645                       disp,rhs_expr,options,logout);
1646  lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1647                       disp,lhs_expr,options,logout);
1648
1649  return true;
1650}
1651
1652static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1653                 const text_t& rhs_expr, ostream& logout)
1654{
1655  if (op_expr == "eq") return (lhs_expr == rhs_expr);
1656  else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1657  else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1658  else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1659  else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1660  else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1661  else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1662  else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1663  else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1664  else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1665  else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1666  else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1667  else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1668  else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1669  else {
1670    logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1671  }
1672 
1673  return false;
1674}
1675
1676
1677static text_t get_if (const text_t& collection, recptproto* collectproto,
1678              ResultDocInfo_t &docinfo, displayclass &disp,
1679              const decision_t &decision,
1680              format_t *ifptr, format_t *elseptr,
1681              text_tmap &options, ostream& logout)
1682{
1683  // If the decision component is a metadata element, then evaluate it
1684  // to see whether we output the "then" or the "else" clause
1685  if (decision.command == dMeta) {
1686
1687    bool store_metadata_wrap = metadata_wrap;
1688    metadata_wrap = 0;
1689
1690    // temporarily suspend metadata-XXXwrap (if on) so can test if metadata item really exits or not
1691    bool metadata_exists
1692      = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1693           logout) != "");
1694
1695    metadata_wrap = store_metadata_wrap;
1696
1697    if (metadata_exists) {
1698      if (ifptr != NULL)
1699    return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1700                     options, logout);
1701    }
1702    else {
1703      if (elseptr != NULL)
1704    return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1705                     options, logout);
1706    }
1707  }
1708
1709  // If the decision component is text, then evaluate it (it is probably a
1710  // macro like _cgiargmode_) to decide what to output.
1711  else if (decision.command == dText) {
1712
1713    text_t outstring;
1714    disp.expandstring (decision.text, outstring);
1715
1716    // Check for if expression in form: str1 op str2
1717    // (such as [x] eq "y")
1718    text_t lhs_expr, op_expr, rhs_expr;
1719    if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1720      if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1721    if (ifptr != NULL) {
1722      return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1723                       options, logout);
1724    }
1725    else {
1726      return "";
1727    }
1728      } else {
1729    if (elseptr != NULL) {
1730      return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1731                       options, logout);
1732    }
1733    else {
1734      return "";
1735    }
1736      }
1737    }
1738
1739
1740    // This is a tad tricky.  When we expand a string like _cgiargmode_, that is
1741    // a cgi argument macro that has not been set, it evaluates to itself.
1742    // Therefore, were have to say that a piece of text evalautes true if
1743    // it is non-empty and if it is a cgi argument evaulating to itself.
1744
1745    if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1746      if (ifptr != NULL)
1747    return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1748                     options, logout);
1749    } else {
1750      if (elseptr != NULL)
1751    return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1752                     options, logout);
1753    }
1754  }
1755 
1756  return "";
1757}
1758
1759bool includes_metadata(const text_t& text)
1760{
1761  text_t::const_iterator here = text.begin();
1762  text_t::const_iterator end = text.end();
1763
1764  char startbracket = '[';
1765  char endbracket = ']';
1766
1767  char bracket = startbracket;
1768  while (here != end) {
1769      if (*here == bracket) {
1770          if(bracket == startbracket) {
1771              // seen a [, next look for a ] to confirm it's metadata
1772              bracket = endbracket;
1773          } else if(bracket == endbracket) {
1774              // found [ ... ] in text, so we think it includes metadata
1775              return true;
1776          }
1777      }
1778    ++here;
1779  }
1780
1781  return false;
1782}
1783
1784static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1785                  recptproto* collectproto,
1786                  ResultDocInfo_t &docinfo,
1787                  displayclass &disp, text_tmap &options,
1788                  ostream &logout) {
1789     
1790  if (includes_metadata(metavalue)) {
1791   
1792    // text has embedded metadata in it => expand it
1793    FilterRequest_t request;
1794    FilterResponse_t response;
1795   
1796    request.getParents = false;
1797   
1798    format_t *expanded_formatlistptr = new format_t();
1799    parse_formatstring (metavalue, expanded_formatlistptr,
1800            request.fields, request.getParents);
1801   
1802    // retrieve metadata
1803    get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1804         collectproto, response, logout);
1805   
1806    if (!response.docInfo.empty()) {
1807     
1808      text_t expanded_metavalue
1809    = get_formatted_string(collection, collectproto,
1810                   response.docInfo[0], disp, expanded_formatlistptr,
1811                   options, logout);
1812     
1813      return expanded_metavalue;
1814    }
1815    else {
1816      return metavalue;
1817    }
1818  }
1819  else {
1820   
1821    return metavalue;
1822  }
1823}
1824
1825text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1826               displayclass &disp,
1827               text_t meta_name, ostream& logout) {
1828 
1829  ColInfoResponse_t collectinfo;
1830  comerror_t err;
1831  collectproto->get_collectinfo (collection, collectinfo,err,logout);
1832  text_t meta_value = "";
1833  text_t lang;
1834  disp.expandstring("_cgiargl_",lang);
1835  if (lang.empty()) {
1836    lang = "en";
1837  }
1838
1839  if (err == noError) {
1840    meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1841  }
1842  return meta_value;
1843 
1844
1845}
1846text_t format_string (const text_t& collection, recptproto* collectproto,
1847              ResultDocInfo_t &docinfo, displayclass &disp,
1848              format_t *formatlistptr, text_tmap &options,
1849              ostream& logout) {
1850
1851  if (formatlistptr == NULL) return "";
1852
1853  switch (formatlistptr->command) {
1854     case comOID:
1855    return docinfo.OID;
1856  case comTopOID:
1857    {
1858      text_t top_id;
1859      get_top(docinfo.OID, top_id);
1860      return top_id;
1861    }
1862  case comRank:
1863    return text_t(docinfo.ranking);
1864     case comText:
1865    return formatlistptr->text;
1866     case comLink:
1867    return options["link"];
1868     case comEndLink:
1869       {
1870     if (options["link"].empty()) return "";
1871    else return "</a>";
1872       }
1873     case comHref:
1874    return get_href(options["link"]);
1875     case comIcon:
1876    return options["icon"];
1877     case comNum:
1878    return docinfo.result_num;
1879     case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1880    return get_related_docs(collection, collectproto, docinfo, logout);
1881
1882     case comSummary:
1883       return format_summary(collection, collectproto, docinfo, disp, options, logout);
1884     case comAssocLink:
1885       {
1886     text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1887         if (!link_filename.empty()) {
1888       text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
1889       if (formatlistptr->text == "href") {
1890         return href;
1891       }
1892       return "<a href=\""+ href + "\">";
1893     }
1894     return "";
1895       }
1896  case comEndAssocLink:
1897    {
1898    text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1899    if (!link_filename.empty()) {
1900      return "</a>";
1901    }
1902    return "";
1903    }
1904     case comMeta:
1905    {
1906       const text_t& metavalue =  get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1907       return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1908    }
1909
1910     case comDoc:
1911       return format_text(collection, collectproto, docinfo, disp, options, logout);
1912
1913     case comImage:
1914    return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1915     case comTOC:
1916    return options["DocTOC"];
1917     case comDocumentButtonDetach:
1918    return options["DocumentButtonDetach"];
1919     case comDocumentButtonHighlight:
1920    return options["DocumentButtonHighlight"];
1921     case comDocumentButtonExpandContents:
1922    return options["DocumentButtonExpandContents"];
1923     case comDocumentButtonExpandText:
1924    return options["DocumentButtonExpandText"];
1925     case comHighlight:
1926    if (options["highlight"] == "1") return "<b>";
1927    break;
1928     case comEndHighlight:
1929    if (options["highlight"] == "1") return "</b>";
1930    break;
1931     case comMetadataSpanWrap:
1932        metadata_wrap=true;  metadata_wrap_type="span"; return "";
1933    break;
1934     case comEndMetadataSpanWrap:
1935    metadata_wrap=false; metadata_wrap_type="";     return "";
1936    break;
1937     case comMetadataDivWrap:
1938        metadata_wrap=true;  metadata_wrap_type="div";  return "";
1939    break;
1940     case comEndMetadataDivWrap:
1941    metadata_wrap=false; metadata_wrap_type="";     return "";
1942    break;
1943     case comIf:
1944    return get_if (collection, collectproto, docinfo, disp,
1945               formatlistptr->decision, formatlistptr->ifptr,
1946               formatlistptr->elseptr, options, logout);
1947     case comOr:
1948    return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1949               options, logout);
1950     case comDocTermsFreqTotal:
1951       return docinfo.num_terms_matched;
1952     case comCollection:
1953       if (formatlistptr->meta.metaname == g_EmptyText) {
1954     return collection;
1955       }
1956       return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1957   
1958  }
1959  return "";
1960}
1961
1962text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1963                 ResultDocInfo_t &docinfo, displayclass &disp,
1964                 format_t *formatlistptr, text_tmap &options,
1965                 ostream& logout) {
1966
1967   text_t ft;
1968   while (formatlistptr != NULL)
1969      {
1970     ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1971                  options, logout);
1972     formatlistptr = formatlistptr->nextptr;
1973      }
1974   
1975   return ft;
1976}
1977
1978
1979// we have only preloaded the text in DocumentAction. But you may want
1980// to get the text in query, so copy what we have done with
1981// format_summary and get the text here. Probably is quite expensive?
1982text_t format_text (const text_t& collection, recptproto* collectproto,
1983            ResultDocInfo_t &docinfo, displayclass &disp,
1984            text_tmap &options, ostream& logout)
1985{
1986  text_t text;
1987
1988  if (!options["text"].empty()) {
1989    text = options["text"];
1990  }
1991  else {
1992    // get document text here
1993    DocumentRequest_t docrequest;
1994    DocumentResponse_t docresponse;
1995    comerror_t err;
1996    docrequest.OID = docinfo.OID;
1997    collectproto->get_document (collection, docrequest, docresponse, err, logout);
1998    text = docresponse.doc;
1999  }
2000
2001  if (metadata_wrap) {
2002    text = wrap_metatext(text,docinfo.OID,"Text");
2003  }
2004
2005  return text;
2006}
2007 
2008/* FUNCTION NAME: format_summary
2009 * DESC: this is invoked when a [Summary] special metadata is processed.
2010 * RETURNS: a query-biased summary for the document */
2011
2012text_t format_summary (const text_t& collection, recptproto* collectproto,
2013               ResultDocInfo_t &docinfo, displayclass &disp,
2014               text_tmap &options, ostream& logout) {
2015
2016  // GRB: added code here to ensure that the cstr (and other collections)
2017  //      uses the document metadata item Summary, rather than compressing
2018  //      the text of the document, processed via the methods in
2019  //      summarise.cpp
2020
2021  text_t summary;
2022
2023  if (docinfo.metadata.count("Summary") > 0 &&
2024      docinfo.metadata["Summary"].values.size() > 0) {
2025    summary = docinfo.metadata["Summary"].values[0];
2026  }
2027  else {
2028 
2029    text_t textToSummarise, query;
2030
2031    if(options["text"].empty()) { // get document text
2032      DocumentRequest_t docrequest;
2033      DocumentResponse_t docresponse;
2034      comerror_t err;
2035      docrequest.OID = docinfo.OID;
2036      collectproto->get_document (collection, docrequest, docresponse, err, logout);
2037      textToSummarise = docresponse.doc;
2038    }
2039    else {
2040      // in practice, this would not happen, because text is only
2041      // loaded with the [Text] command
2042      textToSummarise = options["text"];
2043    }
2044   
2045    disp.expandstring("_cgiargq_",query);
2046    summary = summarise(textToSummarise,query,80);
2047    //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
2048  }
2049
2050  summary.replace("'","&#039;");
2051  summary.replace("\n","&#013;");
2052
2053  if (metadata_wrap) {
2054    summary = wrap_metatext(summary,docinfo.OID,"Summary");
2055  }
2056
2057  return summary;
2058}
Note: See TracBrowser for help on using the browser.