root/main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp @ 28760

Revision 28760, 71.1 KB (checked in by ak19, 6 years ago)

Adding in the first working version of the formatconverter program which uses formattools to convert GS2 statements to GS3. Not all the GS2 terms have GS3 equivalents yet and the current ones still need to be run by Dr Bainbridge, but nested IFs and ORs seem to work alright in general. Kathy made the important changes to Makefile.in to get the new formatconverter.cpp to compile. formatconverter.cpp uses the new GS2-to-GS3 specific functions added to formattools.cpp

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "gsdltools.h"
29#include "recptprototools.h"
30#include "OIDtools.h"
31#include "summarise.h"
32
33#include <assert.h>
34
35static bool metadata_wrap = false;
36static text_t metadata_wrap_type = "";
37
38// a few function prototypes
39
40static text_t format_string (const text_t& collection, recptproto* collectproto,
41                 ResultDocInfo_t &docinfo, displayclass &disp,
42                 format_t *formatlistptr, text_tmap &options,
43                 ostream& logout);
44
45static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
46              format_t *formatlistptr, text_tset &metadata, bool &getParents);
47
48static text_t format_summary (const text_t& collection, recptproto* collectproto,
49                  ResultDocInfo_t &docinfo, displayclass &disp,
50                  text_tmap &options, ostream& logout);
51
52static text_t format_text (const text_t& collection, recptproto* collectproto,
53                  ResultDocInfo_t &docinfo, displayclass &disp,
54                  text_tmap &options, ostream& logout);
55
56static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
57                  recptproto* collectproto, ResultDocInfo_t &docinfo,
58                  displayclass &disp, text_tmap &options,
59                  ostream &logout);
60
61static text_t transform_to_GS3_format (format_t *formatlistptr);
62
63void metadata_t::clear() {
64  metaname.clear();
65  metacommand = mNone;
66  mqualifier.parent  = pNone;
67  mqualifier.sibling = sNone;
68  mqualifier.child   = cNone;
69  pre_tree_traverse.clear();
70  parentoptions.clear();
71  siblingoptions.clear();
72  childoptions.clear();
73}
74
75void decision_t::clear() {
76  command = dMeta;
77  meta.clear();
78  text.clear();
79}
80
81format_t::~format_t()
82{
83  if (nextptr != NULL) delete nextptr;
84  if (ifptr != NULL) delete ifptr;
85  if (elseptr != NULL) delete elseptr;
86  if (orptr != NULL) delete orptr;
87}
88
89void format_t::clear() {
90  command = comText;
91  decision.clear();
92  text.clear();
93  meta.clear();
94  nextptr = NULL;
95  ifptr = NULL;
96  elseptr = NULL;
97  orptr = NULL;
98}
99
100void formatinfo_t::clear() {
101  DocumentImages = false;
102  DocumentTitles = true;
103  DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
104  DocumentContents = true;
105  DocumentArrowsBottom = true;
106  DocumentArrowsTop = false;
107  DocumentSearchResultLinks = false;
108  DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
109  //  DocumentButtons.push_back ("Expand Text");
110  //  DocumentButtons.push_back ("Expand Contents");
111  DocumentButtons.push_back ("Detach");
112  DocumentButtons.push_back ("Highlight");
113  RelatedDocuments = "";
114  DocumentText = "[Text]";
115  formatstrings.erase (formatstrings.begin(), formatstrings.end());
116  DocumentUseHTML = false;
117  AllowExtendedOptions = false;
118}
119
120// simply checks to see if formatstring begins with a <td> tag
121bool is_table_content (const text_t &formatstring) {
122  text_t::const_iterator here = formatstring.begin();
123  text_t::const_iterator end = formatstring.end();
124 
125  while (here != end) {
126    if (*here != ' ') {
127      if ((*here == '<') && ((here+3) < end)) {
128    if ((*(here+1) == 't' || *(here+1) == 'T') &&
129        (*(here+2) == 'd' || *(here+2) == 'D') &&
130        (*(here+3) == '>' || *(here+3) == ' '))
131      //|| *(here+3) == '\t' || *(here+3) == '\n'))
132      return true;
133      } else return false;
134    }
135    ++here;
136  }
137  return false;
138}
139
140bool is_table_content (const format_t *formatlistptr) {
141
142  if (formatlistptr == NULL) return false;
143 
144  if (formatlistptr->command == comText)
145    return is_table_content (formatlistptr->text);
146   
147  return false;
148}
149
150// returns false if key isn't in formatstringmap
151bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
152               text_t &formatstring) {
153
154  formatstring.clear();
155  text_tmap::const_iterator it = formatstringmap.find(key);
156  if (it == formatstringmap.end()) return false;
157  formatstring = (*it).second;
158  return true;
159}
160
161// tries to find "key1key2" then "key1" then "key2"
162bool get_formatstring (const text_t &key1, const text_t &key2, 
163               const text_tmap &formatstringmap,
164               text_t &formatstring) {
165
166  formatstring.clear();
167  text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
168  if (it != formatstringmap.end()) {
169    formatstring = (*it).second;
170    return true;
171  }
172  it = formatstringmap.find(key1);
173  if (it != formatstringmap.end()) {
174    formatstring = (*it).second;
175    return true;
176  }
177  it = formatstringmap.find(key2);
178  if (it != formatstringmap.end()) {
179    formatstring = (*it).second;
180    return true;
181  }
182  return false;
183}
184
185
186text_t remove_namespace(const text_t &meta_name) {
187  text_t::const_iterator end = meta_name.end();
188  text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
189  if (it != end) {
190    return substr(it+1, end);
191  }
192
193  return meta_name;
194
195}
196// returns a date of form _format:date_(year, month, day)
197// input is date of type yyyy-?mm-?dd
198// at least the year must be present in date
199text_t format_date (const text_t &date) {
200
201  if (date.size() < 4) return "";
202
203  text_t::const_iterator datebegin = date.begin();
204
205  text_t year = substr (datebegin, datebegin+4);
206  int chars_seen_so_far = 4;
207  if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
208
209  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
210  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
211 
212  text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
213  int imonth = month.getint();
214  if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
215 
216  chars_seen_so_far += 2;
217  if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
218
219  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
220  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
221
222  text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
223  if (day[0] == '0') day = substr (day.begin()+1, day.end());
224  int iday = day.getint();
225  if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
226   
227  return "_format:date_("+year+","+month+","+day+")";
228
229
230// converts an iso639 language code to its English equivalent
231// should we be checking that the macro exists??
232text_t iso639 (const text_t &langcode) {
233  if (langcode.empty()) return "";
234  return "_iso639:iso639"+langcode+"_";
235}
236
237
238text_t get_href (const text_t &link) {
239
240  text_t href;
241
242  text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
243  text_t::const_iterator end = link.end();
244  if (here == end) return g_EmptyText;
245 
246  ++here;
247  while (here != end) {
248    if (*here == '"') break;
249    href.push_back(*here);
250    ++here;
251  }
252
253  return href;
254}
255
256//this function gets the information associated with the relation
257//metadata for the document associated with 'docinfo'. This relation
258//metadata consists of a line of pairs containing 'collection, document OID'
259//(this is the OID of the document related to the current document, and
260//the collection the related document belongs to). For each of these pairs
261//the title metadata is obtained and then an html link between the title
262//of the related doc and the document's position (the document will be
263//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
264//(where collection is the related documents collection, and OID is the
265//related documents OID).  A list of these html links are made for as many
266//related documents as there are. This list is then returned. If there are
267//no related documents available for the current document then the string
268//'.. no related documents .. ' is returned.
269text_t get_related_docs(const text_t& collection, recptproto* collectproto,
270            ResultDocInfo_t &docinfo, ostream& logout){
271 
272  text_tset metadata;
273
274  //insert the metadata we wish to collect
275  metadata.insert("dc.Relation");
276  metadata.insert("Title"); 
277  metadata.insert("Subject"); //for emails, where title data doesn't apply
278 
279  FilterResponse_t response;
280  text_t relation = ""; //string for displaying relation metadata
281  text_t relationTitle = ""; //the related documents Title (or subject)
282  text_t relationOID = ""; //the related documents OID 
283
284  //get the information associated with the metadata for current doc
285  if (get_info (docinfo.OID, collection, "", metadata,
286        false, collectproto, response, logout)) {
287   
288    //if the relation metadata exists, store for displaying
289    if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
290      relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
291
292      //split relation data into pairs of collectionname,ID number
293      text_tarray relationpairs;
294      splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
295     
296      text_tarray::const_iterator currDoc = relationpairs.begin(); 
297      text_tarray::const_iterator lastDoc = relationpairs.end();
298
299      //iterate through the pairs to split and display
300      while(currDoc != lastDoc){
301   
302    //split pairs into collectionname and ID
303    text_tarray relationdata;
304    splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
305   
306    //get first element in the array (collection)
307    text_tarray::const_iterator doc_data = relationdata.begin();
308    text_t document_collection = *doc_data;
309    ++doc_data; //increment to get next item in array (oid)
310    text_t document_OID = *doc_data;
311   
312    //create html link to related document
313    relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
314    relation += "&amp;cl=search&amp;d=" + document_OID;
315       
316    //get the information associated with the metadata for related doc
317    if (get_info (document_OID, document_collection, "", metadata,
318              false, collectproto, response, logout)) {
319     
320      //if title metadata doesn't exist, collect subject metadata
321      //if that doesn't exist, just call it 'related document'
322      if (!response.docInfo[0].metadata["Title"].values[0].empty())
323        relationTitle = response.docInfo[0].metadata["Title"].values[0];
324      else if (!response.docInfo[0].metadata["Subject"].values.empty())
325        relationTitle = response.docInfo[0].metadata["Subject"].values[0];
326      else relationTitle =  "RELATED DOCUMENT";
327     
328    }
329   
330    //link the related document's title to its page
331    relation += "\">" + relationTitle + "</a>";
332    relation += "  (" + document_collection + ")<br>";
333   
334    ++currDoc;
335      }
336    }
337   
338  }
339
340  if(relation.empty()) //no relation data for documnet
341    relation = ".. no related documents .. ";
342
343  return relation;
344}
345
346
347
348static void get_parent_options (text_t &instring, metadata_t &metaoption) {
349
350  assert (instring.size() > 7);
351  if (instring.size() <= 7) return;
352
353  text_t meta, com, op;
354  bool inbraces = false;
355  bool inquotes = false;
356  bool foundcolon = false;
357  text_t::const_iterator here = instring.begin()+6;
358  text_t::const_iterator end = instring.end();
359  while (here != end) {
360    if (foundcolon) meta.push_back (*here);
361    else if (*here == '(') inbraces = true;
362    else if (*here == ')') inbraces = false;
363    else if (*here == '\'' && !inquotes) inquotes = true;
364    else if (*here == '\'' && inquotes) inquotes = false;
365    else if (*here == ':' && !inbraces) foundcolon = true;
366    else if (inquotes) op.push_back (*here);
367    else com.push_back (*here);
368    ++here;
369  }
370
371  instring = meta;
372  if (com.empty())
373    metaoption.mqualifier.parent = pImmediate;
374  else if (com == "Top")
375    metaoption.mqualifier.parent = pTop;
376  else if (com == "All") {
377    metaoption.mqualifier.parent = pAll;
378    metaoption.parentoptions = op;
379  }
380}
381
382
383static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
384
385  assert (instring.size() > 8);
386  if (instring.size() <= 8) return;
387  text_t meta, com, op;
388  bool inbraces = false;
389  bool inquotes = false;
390  bool foundcolon = false;
391  text_t::const_iterator here = instring.begin()+7;
392  text_t::const_iterator end = instring.end();
393  while (here != end) {
394    if (foundcolon) meta.push_back (*here);
395    else if (*here == '(') inbraces = true;
396    else if (*here == ')') inbraces = false;
397    else if (*here == '\'' && !inquotes) inquotes = true;
398    else if (*here == '\'' && inquotes) inquotes = false;
399    else if (*here == ':' && !inbraces) foundcolon = true;   
400    else if (inquotes) op.push_back (*here);
401    else com.push_back (*here);
402    ++here;
403  }
404
405  instring = meta;
406  metaoption.siblingoptions.clear();
407
408  if (com.empty()) {
409    metaoption.mqualifier.sibling = sAll;
410    metaoption.siblingoptions = " ";
411  }
412  else if (com == "first") {
413    metaoption.mqualifier.sibling = sNum;
414    metaoption.siblingoptions = "0";
415  }
416  else if (com == "last") {
417    metaoption.mqualifier.sibling = sNum;
418    metaoption.siblingoptions = "-2"; // == last
419  }
420  else if (com.getint()>0) {
421    metaoption.mqualifier.sibling = sNum;
422    int pos = com.getint()-1;
423    metaoption.siblingoptions +=pos;
424  }
425  else {
426    metaoption.mqualifier.sibling = sAll;
427    metaoption.siblingoptions = op;
428  }
429}
430
431static void get_child_options (text_t &instring, metadata_t &metaoption) {
432
433  assert (instring.size() > 6);
434  if (instring.size() <= 6) return;
435  text_t meta, com, op;
436  bool inbraces = false;
437  bool inquotes = false;
438  bool foundcolon = false;
439  text_t::const_iterator here = instring.begin()+5;
440  text_t::const_iterator end = instring.end();
441  while (here != end) {
442    if (foundcolon) meta.push_back (*here);
443    else if (*here == '(') inbraces = true;
444    else if (*here == ')') inbraces = false;
445    else if (*here == '\'' && !inquotes) inquotes = true;
446    else if (*here == '\'' && inquotes) inquotes = false;
447    else if (*here == ':' && !inbraces) foundcolon = true;
448    else if (inquotes) op.push_back (*here);
449    else com.push_back (*here);
450    ++here;
451  }
452
453  instring = meta;
454  if (com.empty()) {
455    metaoption.mqualifier.child = cAll;
456    metaoption.childoptions = " ";
457  }
458  else if (com == "first") {
459    metaoption.mqualifier.child = cNum;
460    metaoption.childoptions = ".fc";
461  }
462  else if (com == "last") {
463    metaoption.mqualifier.child = cNum;
464    metaoption.childoptions = ".lc";
465  }
466  else if (com.getint()>0) {
467    metaoption.mqualifier.child = cNum;
468    metaoption.childoptions = "."+com;
469  }
470  else {
471    metaoption.mqualifier.child = cAll;
472    metaoption.childoptions = op;
473  }
474}
475
476
477static void get_truncate_options (text_t &instring, metadata_t &metaoption)
478{
479  assert (instring.size() > ((text_t) "truncate").size());
480  if (instring.size() <= ((text_t) "truncate").size()) return;
481  text_t meta, com;
482  bool inbraces = false;
483  bool foundcolon = false;
484  text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
485  text_t::const_iterator end = instring.end();
486  while (here != end) {
487    if (foundcolon) meta.push_back (*here);
488    else if (*here == '(') inbraces = true;
489    else if (*here == ')') inbraces = false;
490    else if (*here == ':' && !inbraces) foundcolon = true;   
491    else com.push_back (*here);
492    ++here;
493  }
494
495  instring = meta;
496
497  if (!com.empty())
498  {
499    metaoption.siblingoptions = com;
500  }
501  else
502  {
503    // Default is 100 characters if not specified
504    metaoption.siblingoptions = "100";
505  }
506}
507
508
509
510static void parse_meta (text_t &meta, metadata_t &metaoption,
511            text_tset &metadata, bool &getParents) {
512
513  // Look for the various format statement modifiers
514  // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
515  //   is irrelevant because this is not stored in metaoption.metacommand anyway
516  bool keep_trying = true;
517  while (keep_trying)
518  {
519    keep_trying = false;
520
521    if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
522    {
523      metaoption.metacommand |= mCgiSafe;
524      meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
525      keep_trying = true;
526    }
527    if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
528    {   
529      metaoption.metacommand |= mSpecial;
530      meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
531      keep_trying = true;
532    }
533
534    // New "truncate" special formatting option
535    if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate"))  // No colons due to truncate(X)
536    {
537      metaoption.metacommand |= mTruncate;
538      get_truncate_options (meta, metaoption);
539      keep_trying = true;
540    }
541    // New "htmlsafe" special formatting option
542    if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
543    {
544      metaoption.metacommand |= mHTMLSafe;
545      meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
546      keep_trying = true;
547    }
548    // New "xmlsafe" special formatting option
549    if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
550    {
551      metaoption.metacommand |= mXMLSafe;
552      meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
553      keep_trying = true;
554    }
555    // New "dmsafe" special formatting option
556    if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
557    {
558      metaoption.metacommand |= mDMSafe;
559      meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
560      keep_trying = true;
561    }
562  }
563
564  bool had_parent_or_child = true;
565  bool prev_was_parent = false;
566  bool prev_was_child  = false;
567
568  while (had_parent_or_child) {
569    if (meta.size() > 7
570    && (substr (meta.begin(), meta.begin()+6) == "parent")) {
571
572      // clear out sibling and child (cmd and options)
573      metaoption.metacommand &= ~(mChild|mSibling);
574      metaoption.childoptions.clear();
575      metaoption.siblingoptions.clear();
576
577      getParents = true;
578      metaoption.metacommand |= mParent;
579      get_parent_options (meta, metaoption);
580
581      if (prev_was_parent) {
582    metaoption.pre_tree_traverse += ".pr";
583      }
584      else if (prev_was_child) {
585    metaoption.pre_tree_traverse += ".fc";
586      }
587
588      prev_was_parent = true;
589      prev_was_child  = false;
590    }
591    else if (meta.size() > 6
592         && (substr (meta.begin(), meta.begin()+5) == "child")) {
593
594      // clear out sibling and parent (cmd and options)
595      metaoption.metacommand &= ~(mParent|mSibling);
596      metaoption.parentoptions.clear();
597      metaoption.siblingoptions.clear();
598
599      metaoption.metacommand |= mChild;
600      get_child_options (meta, metaoption);
601      metadata.insert("contains");
602
603      if (prev_was_parent) {
604    metaoption.pre_tree_traverse += ".pr";
605      }
606      else if (prev_was_child) {
607    metaoption.pre_tree_traverse += ".fc";
608      }
609
610      prev_was_child  = true;
611      prev_was_parent = false;
612    }
613    else {
614      prev_was_child  = false;
615      prev_was_parent = false;
616      had_parent_or_child = false;
617    }
618  }
619
620  // parent/child can have sibling tacked on end also
621  if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
622    metaoption.metacommand |= mSibling;
623    get_sibling_options (meta, metaoption);
624  }
625 
626  // check for ex. which may occur in format statements
627  // remove "ex." prefix, but only if there are no other metadata set qualifiers
628  // in the metaname, since we want to retain prefixes like "ex.dc." as-is
629  text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
630  text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
631
632  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
633    meta = substr (meta.begin()+3, meta.end());
634  }
635  metadata.insert (meta);
636  metaoption.metaname = meta;
637}
638
639static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
640  if (meta == "collection") {
641    // no qualifiers
642    metaoption.metaname = g_EmptyText;
643    return;
644  }
645  meta = substr (meta.begin()+11, meta.end());
646  metaoption.metaname = meta;
647 
648}
649
650static void parse_meta (text_t &meta, format_t *formatlistptr,
651            text_tset &metadata, bool &getParents) {
652 
653  // check for ex. which may occur in format statements
654  // remove "ex." prefix, but only if there are no other metadata set qualifiers
655  // in the metaname, since we want to retain prefixes like "ex.dc." as-is
656  text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
657  text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
658
659  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
660    meta = substr (meta.begin()+3, meta.end());
661  }
662  if (meta == "link")
663    formatlistptr->command = comLink;
664  else if (meta == "/link")
665    formatlistptr->command = comEndLink;
666
667  // the metaname "srclink_file" is deprecated, use "srclinkFile"
668  else if (meta == "srclink") {
669    formatlistptr->command = comAssocLink;
670    formatlistptr->meta.metaname = "srclinkFile";
671    metadata.insert("srclinkFile");
672  }
673  else if (meta == "srchref") {
674    formatlistptr->command = comAssocLink;
675    formatlistptr->text = "href";
676    formatlistptr->meta.metaname = "srclinkFile";
677    metadata.insert("srclinkFile");
678  }
679  else if (meta == "/srclink") {
680    formatlistptr->command = comEndAssocLink;
681    formatlistptr->meta.metaname = "srclinkFile";
682  }
683  // and weblink etc
684  else if (meta == "href")
685    formatlistptr->command = comHref;
686
687  else if (meta == "num")
688    formatlistptr->command = comNum;
689
690  else if (meta == "icon")
691    formatlistptr->command = comIcon;
692
693  else if (meta == "Text")
694    formatlistptr->command = comDoc;
695 
696  else if (meta == "RelatedDocuments")
697   formatlistptr->command = comRel;
698
699  else if (meta == "highlight")
700    formatlistptr->command = comHighlight;
701
702  else if (meta == "/highlight")
703    formatlistptr->command = comEndHighlight;
704
705  else if (meta == "metadata-spanwrap")
706    formatlistptr->command = comMetadataSpanWrap;
707
708  else if (meta == "/metadata-spanwrap")
709    formatlistptr->command = comEndMetadataSpanWrap;
710
711  else if (meta == "metadata-divwrap")
712    formatlistptr->command = comMetadataDivWrap;
713
714  else if (meta == "/metadata-divwrap")
715    formatlistptr->command = comEndMetadataDivWrap;
716
717  else if (meta == "Summary")
718    formatlistptr->command = comSummary;
719
720  else if (meta == "DocImage")
721    formatlistptr->command = comImage;
722
723  else if (meta == "DocTOC")
724    formatlistptr->command = comTOC;
725
726  else if (meta == "DocumentButtonDetach")
727     formatlistptr->command = comDocumentButtonDetach;
728 
729  else if (meta == "DocumentButtonHighlight")
730     formatlistptr->command = comDocumentButtonHighlight;
731 
732  else if (meta == "DocumentButtonExpandContents")
733    formatlistptr->command = comDocumentButtonExpandContents;
734
735  else if (meta == "DocumentButtonExpandText")
736     formatlistptr->command = comDocumentButtonExpandText;
737
738  else if (meta == "DocOID")
739     formatlistptr->command = comOID;
740  else if (meta == "DocTopOID")
741    formatlistptr->command = comTopOID;
742  else if (meta == "DocRank")
743    formatlistptr->command = comRank;
744  else if (meta == "DocTermsFreqTotal")
745    formatlistptr->command = comDocTermsFreqTotal;
746  else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
747    formatlistptr->command = comCollection;
748    parse_coll_meta(meta, formatlistptr->meta);
749  }
750  else {
751    formatlistptr->command = comMeta;
752    parse_meta (meta, formatlistptr->meta, metadata, getParents);
753  }
754}
755
756
757static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
758              text_tset &metadata, bool &getParents) {
759
760  text_t text;
761  text_t::const_iterator here = formatstring.begin();
762  text_t::const_iterator end = formatstring.end();
763
764  while (here != end) {
765
766    if (*here == '\\') {
767      ++here;
768      if (here != end) text.push_back (*here);
769
770    } else if (*here == '{') {
771      if (!text.empty()) {
772    formatlistptr->command = comText;
773    formatlistptr->text = text;
774    formatlistptr->nextptr = new format_t();
775    formatlistptr = formatlistptr->nextptr;
776   
777    text.clear();
778      }
779      if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
780
781    formatlistptr->nextptr = new format_t();
782    formatlistptr = formatlistptr->nextptr;
783    if (here == end) break;
784      }
785    } else if (*here == '[') {
786      if (!text.empty()) {
787    formatlistptr->command = comText;
788    formatlistptr->text = text;
789    formatlistptr->nextptr = new format_t();
790    formatlistptr = formatlistptr->nextptr;
791
792    text.clear();
793      }
794      text_t meta;
795      ++here;
796      while (*here != ']') {
797    if (here == end) return false;
798    meta.push_back (*here);
799    ++here;
800      }
801      parse_meta (meta, formatlistptr, metadata, getParents);
802      formatlistptr->nextptr = new format_t();
803      formatlistptr = formatlistptr->nextptr;
804
805    } else
806      text.push_back (*here);
807
808    if (here != end) ++here;
809  }
810  if (!text.empty()) {
811    formatlistptr->command = comText;
812    formatlistptr->text = text;
813    formatlistptr->nextptr = new format_t();
814    formatlistptr = formatlistptr->nextptr;
815
816  }
817  return true;
818}
819
820
821static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
822              format_t *formatlistptr, text_tset &metadata, bool &getParents) {
823
824  text_t::const_iterator it = findchar (here, end, '}');
825  if (it == end) return false;
826
827  text_t com = substr (here, it);
828  here = findchar (it, end, '{');
829  if (here == end) return false;
830  else ++here;
831
832  if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
833  else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
834  else return false;
835
836  int commacount = 0;
837  text_t text;
838  while (here != end) {
839
840    if (*here == '\\') {
841      ++here;
842      if (here != end) text.push_back(*here);
843     
844    }
845 
846    else if (*here == ',' || *here == '}' || *here == '{') {
847
848      if (formatlistptr->command == comOr) {
849    // the {Or}{this, or this, or this, or this} statement
850    format_t *or_ptr;
851   
852    // find the next unused orptr
853    if (formatlistptr->orptr == NULL) {
854      formatlistptr->orptr = new format_t();
855      or_ptr = formatlistptr->orptr;
856    } else {
857      or_ptr = formatlistptr->orptr;
858      while (or_ptr->nextptr != NULL)
859        or_ptr = or_ptr->nextptr;
860      or_ptr->nextptr = new format_t();
861      or_ptr = or_ptr->nextptr;
862    }
863
864    if (!text.empty())
865      {
866        if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
867      }
868
869    if (*here == '{')
870      {
871        // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
872        // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
873        // The latter can always be re-written:
874        // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
875       
876        if (!text.empty()) // already used up allocated format_t
877          {
878        // => allocate new one for detected action
879        or_ptr->nextptr = new format_t();
880        or_ptr = or_ptr->nextptr;
881          }
882        if (!parse_action(++here, end, or_ptr, metadata, getParents))
883          {
884        return false;
885          }
886      }
887    else
888      {
889        if (*here == '}') break;
890      }
891    text.clear();
892
893      }
894
895      // Parse an {If}{decide,do,else} statement
896      else {
897   
898    // Read the decision component. 
899    if (commacount == 0) {
900      // Decsion can be a metadata element, or a piece of text.
901      // Originally Stefan's code, updated 25/10/2000 by Gordon.
902
903      text_t::const_iterator beginbracket = text.begin();
904      text_t::const_iterator endbracket = (text.end() - 1);
905
906      // Decision is based on a metadata element
907      if ((*beginbracket == '[') && (*endbracket == ']')) {
908        // Ignore the surrounding square brackets
909        text_t meta = substr (beginbracket+1, endbracket);
910        parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
911        ++commacount;
912        text.clear();
913      }
914
915      // Decision is a piece of text (probably a macro like _cgiargmode_).
916      else {
917
918        // hunt for any metadata in string, which might be uses in
919        // to test a condition, e.g. [Format] eq 'PDF'
920        format_t* dummyformat = new format_t();
921        // update which metadata fields needed
922        // (not interested in updatng formatlistptr)
923        parse_string (text, dummyformat, metadata, getParents);
924        delete dummyformat;
925
926        formatlistptr->decision.command = dText;
927        formatlistptr->decision.text = text;
928        ++commacount;
929        text.clear();
930      }
931    }
932
933    // Read the "then" and "else" components of the {If} statement.
934    else {
935      format_t** nextlistptr = NULL;
936      if (commacount == 1) {
937        nextlistptr = &formatlistptr->ifptr;
938      } else if (commacount == 2 ) {
939        nextlistptr = &formatlistptr->elseptr;
940      } else {
941        return false;
942      }
943
944      if (!text.empty()) {
945        if (*nextlistptr == NULL) {
946          *nextlistptr = new format_t();
947        } else {
948
949          // skip to the end of any format_t statements already added
950          while ((*nextlistptr)->nextptr != NULL)
951          {
952        nextlistptr = &(*nextlistptr)->nextptr;
953          }
954
955          (*nextlistptr)->nextptr = new format_t();
956          nextlistptr = &(*nextlistptr)->nextptr;
957        }
958
959        if (!parse_string (text, *nextlistptr, metadata, getParents))
960          {
961        return false;
962          }
963        text.clear();
964      }
965     
966      if (*here == '{')
967        {
968          if (*nextlistptr == NULL) {
969        *nextlistptr = new format_t();
970          } else {
971        // skip to the end of any format_t statements already added
972        while ((*nextlistptr)->nextptr != NULL)
973          {
974            nextlistptr = &(*nextlistptr)->nextptr;
975          }
976
977        (*nextlistptr)->nextptr = new format_t();
978        nextlistptr = &(*nextlistptr)->nextptr;
979          }
980
981          if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
982        {
983          return false;
984        }
985        }
986      else
987        {
988          if (*here == '}') break;
989          ++commacount;
990        }
991    }
992      }
993     
994    } else text.push_back(*here);
995   
996    if (here != end) ++here;
997  }
998
999  return true;
1000}
1001
1002
1003static text_t wrap_metatext(const text_t& metatext, const text_t& OID,
1004                const text_t metaname, int metapos=-1)
1005{
1006
1007  text_t tag_type = metadata_wrap_type;
1008  text_t editable_type = (metaname == "Text") ? "text" : "metadata";
1009
1010  text_t wrapped_metatext = "<" + tag_type + " ";
1011  wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
1012
1013  wrapped_metatext += "docoid=\"" + OID + "\" "; 
1014  wrapped_metatext += "metaname=\"" + metaname + "\"";
1015
1016  if (metapos>=0) {
1017    text_t metapos_str = metapos;
1018    wrapped_metatext += " metapos=\"" + metapos_str + "\"";
1019  }
1020
1021  wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1022
1023  return wrapped_metatext;
1024}
1025
1026   
1027
1028bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1029             text_tset &metadata, bool &getParents) {
1030
1031  formatlistptr->clear();
1032  getParents = false;
1033
1034  return (parse_string (formatstring, formatlistptr, metadata, getParents));
1035}
1036
1037// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1038// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1039
1040static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1041{
1042  text_t no_ns_metaname = remove_namespace(meta.metaname);
1043  text_t formatted_metatext;
1044  bool first = true;
1045 
1046  const int start_i=0;
1047  const int end_i = metainfo.values.size()-1;
1048 
1049  if (position == -1) { // all
1050    for (int i=start_i; i<=end_i; ++i) {
1051      if (!first) formatted_metatext += meta.siblingoptions;
1052     
1053      text_t fresh_metatext;
1054
1055      if (meta.metacommand & mSpecial) {
1056    // special formatting
1057    if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1058    else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1059    else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1060      }
1061      else fresh_metatext = metainfo.values[i];
1062
1063      // New "truncate" special formatting option
1064      if (meta.metacommand & mTruncate)
1065      {
1066    int truncate_length = meta.siblingoptions.getint();
1067    text_t truncated_value = fresh_metatext;
1068    if (truncated_value.size() > truncate_length)
1069    {
1070      truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1071    }
1072    fresh_metatext = truncated_value;
1073      }
1074      // New "xmlsafe" special formatting option
1075      if (meta.metacommand & mXMLSafe)
1076      {
1077    // Make it XML-safe
1078    text_t text_xml_safe = "";
1079    text_t::const_iterator text_iterator = fresh_metatext.begin();
1080    while (text_iterator != fresh_metatext.end())
1081    {
1082      if (*text_iterator == '&') text_xml_safe += "&amp;";
1083      else if (*text_iterator == '<') text_xml_safe += "&lt;";
1084      else if (*text_iterator == '>') text_xml_safe += "&gt;";
1085      else text_xml_safe.push_back(*text_iterator);
1086      text_iterator++;
1087    }
1088    fresh_metatext = text_xml_safe;
1089      }
1090      // New "htmlsafe" special formatting option
1091      if (meta.metacommand & mHTMLSafe)
1092      {
1093    // Make it HTML-safe
1094    text_t text_html_safe = "";
1095    text_t::const_iterator text_iterator = fresh_metatext.begin();
1096    while (text_iterator != fresh_metatext.end())
1097    {
1098      if (*text_iterator == '&') text_html_safe += "&amp;";
1099      else if (*text_iterator == '<') text_html_safe += "&lt;";
1100      else if (*text_iterator == '>') text_html_safe += "&gt;";
1101      else if (*text_iterator == '"') text_html_safe += "&quot;";
1102      else text_html_safe.push_back(*text_iterator);
1103      text_iterator++;
1104    }
1105    fresh_metatext = text_html_safe;
1106      }
1107      // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1108      // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1109      if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1110      {
1111    // Make it macro-safe
1112    text_t text_dm_safe = dm_safe(fresh_metatext);
1113    fresh_metatext = text_dm_safe;
1114      }
1115
1116      if (metadata_wrap) {
1117    fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,i);
1118      }
1119      formatted_metatext += fresh_metatext;
1120
1121      first = false;
1122     
1123    }
1124  } else {
1125    if (position == -2) { // end
1126      position = end_i;
1127    } else if (position < start_i || position > end_i) {
1128      return "";
1129    }
1130
1131    text_t fresh_metatext;
1132    if (meta.metacommand & mSpecial) {
1133
1134      // special formatting
1135      if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1136      else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1137      else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1138    }
1139    else fresh_metatext = metainfo.values[position];
1140
1141    // New "truncate" special formatting option
1142    if (meta.metacommand & mTruncate)
1143    {
1144      int truncate_length = meta.siblingoptions.getint();
1145      text_t truncated_value = fresh_metatext;
1146      if (truncated_value.size() > truncate_length)
1147      {
1148    truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1149      }
1150      fresh_metatext = truncated_value;
1151    }
1152    // New "xmlsafe" special formatting option
1153    if (meta.metacommand & mXMLSafe)
1154    {
1155      // Make it XML-safe
1156      text_t text_xml_safe = "";
1157      text_t::const_iterator text_iterator = fresh_metatext.begin();
1158      while (text_iterator != fresh_metatext.end())
1159      {
1160    if (*text_iterator == '&') text_xml_safe += "&amp;";
1161    else if (*text_iterator == '<') text_xml_safe += "&lt;";
1162    else if (*text_iterator == '>') text_xml_safe += "&gt;";
1163    else text_xml_safe.push_back(*text_iterator);
1164    text_iterator++;
1165      }
1166      fresh_metatext = text_xml_safe;
1167    }
1168    // New "htmlsafe" special formatting option
1169    if (meta.metacommand & mHTMLSafe)
1170    {
1171      // Make it HTML-safe
1172      text_t text_html_safe = "";
1173      text_t::const_iterator text_iterator = fresh_metatext.begin();
1174      while (text_iterator != fresh_metatext.end())
1175      {
1176    if (*text_iterator == '&') text_html_safe += "&amp;";
1177    else if (*text_iterator == '<') text_html_safe += "&lt;";
1178    else if (*text_iterator == '>') text_html_safe += "&gt;";
1179    else if (*text_iterator == '"') text_html_safe += "&quot;";
1180    else if (*text_iterator == '\'') text_html_safe += "&#39;";
1181    else if (*text_iterator == ',') text_html_safe += "&#44;";
1182    else text_html_safe.push_back(*text_iterator);
1183    text_iterator++;
1184      }
1185      fresh_metatext = text_html_safe;
1186    }
1187    // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1188    // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1189    if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1190    {
1191      // Make it macro-safe
1192      text_t text_dm_safe = dm_safe(fresh_metatext);
1193      fresh_metatext = text_dm_safe;
1194    }
1195
1196    if (metadata_wrap) {
1197      fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,position);
1198    }
1199
1200    formatted_metatext += fresh_metatext;
1201  }
1202
1203  if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1204  else return formatted_metatext;
1205}
1206
1207static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1208{
1209 
1210  MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1211
1212  switch (meta.mqualifier.parent) {
1213  case pNone:
1214    return "Nothing!!";
1215    break;
1216
1217  case pImmediate:
1218    if (parent != NULL) {
1219      text_t parent_oid = get_parent(docinfo.OID);
1220      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1221    }
1222    break;
1223
1224  case pTop:
1225    if (parent != NULL) {
1226      text_t parent_oid = get_parent(docinfo.OID);
1227
1228      while (parent->parent != NULL) {
1229    parent = parent->parent;
1230    parent_oid = get_parent(parent_oid);
1231      }
1232      return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1233    }
1234    break;
1235
1236  case pAll:
1237    MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1238    if (parent != NULL) {
1239      text_t parent_oid = get_parent(docinfo.OID);
1240
1241      text_tarray tmparray;
1242      while (parent != NULL) {
1243    tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1244    parent = parent->parent;
1245    parent_oid = get_parent(parent_oid);
1246
1247      }
1248      // now join them up - use teh parent separator
1249      bool first = true;
1250      text_t tmp;
1251      text_tarray::reverse_iterator here = tmparray.rbegin();
1252      text_tarray::reverse_iterator end = tmparray.rend();
1253      while (here != end) {
1254    if (!first) tmp += meta.parentoptions;
1255    tmp += *here;
1256    first = false;
1257    ++here;
1258      }
1259      if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1260      else return tmp;
1261    }
1262  }
1263  return "";
1264
1265}
1266
1267static text_t get_child_meta (const text_t& collection,
1268                  recptproto* collectproto,
1269                  ResultDocInfo_t &docinfo, displayclass &disp,
1270                  const metadata_t &meta, text_tmap &options,
1271                  ostream& logout, int siblings_values)
1272{
1273  if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1274 
1275  const text_t& pre_tree_trav = meta.pre_tree_traverse;
1276  const text_t& child_metaname = meta.metaname;
1277  const text_t& child_field = meta.childoptions;
1278  text_tset child_metadata;
1279  child_metadata.insert(child_metaname);
1280
1281  FilterResponse_t child_response;
1282  if (meta.mqualifier.child == cNum) {
1283    // just one child
1284    //get the information associated with the metadata for child doc
1285    if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1286           child_metadata, false, collectproto, child_response,
1287           logout)) return ""; // invalid child number
1288
1289      if (child_response.docInfo.empty()) return false; // no info for the child
1290 
1291      ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1292      MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1293 
1294      text_t child_metavalue
1295    = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1296      return expand_metadata(child_metavalue,collection,collectproto,
1297             child_docinfo,disp,options,logout);
1298  }
1299 
1300   
1301  if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1302
1303
1304  if (!pre_tree_trav.empty()) {
1305    // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1306    FilterResponse_t trav_response;
1307
1308    text_tset trav_metadata;
1309    trav_metadata.insert("contains");
1310
1311    if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1312           trav_metadata, false, collectproto, trav_response,
1313           logout)) return ""; // invalid pre_tree_trav
1314
1315    if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1316 
1317    ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1318
1319    // use this for rest of routine
1320    docinfo = trav_docinfo;
1321  }
1322 
1323  // we need to get all children
1324  text_t result = "";
1325  text_tarray children;
1326  text_t contains = docinfo.metadata["contains"].values[0];
1327  splitchar (contains.begin(), contains.end(), ';', children);
1328  text_tarray::const_iterator here = children.begin();
1329  text_tarray::const_iterator end = children.end();
1330  bool first = true;
1331  while (here !=end) {
1332    text_t oid = *here;
1333    here++;
1334    if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1335
1336    //get the information associated with the metadata for child doc
1337    if (!get_info (oid, collection, "", child_metadata,
1338           false, collectproto, child_response, logout) ||
1339    child_response.docInfo.empty()) {
1340      first = false;
1341      continue;
1342    }
1343   
1344   
1345    ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1346    MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1347   
1348    text_t child_metavalue
1349      = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1350
1351
1352    if (!first) result += child_field;
1353    first = false;
1354    // need to do this here cos otherwise we are in the wrong document
1355    text_t em =  expand_metadata(child_metavalue,collection,collectproto,
1356                 child_docinfo,disp,options,logout);
1357
1358    result += em;
1359  }
1360  return result;
1361   
1362}
1363
1364static text_t get_meta (const text_t& collection, recptproto* collectproto,
1365            ResultDocInfo_t &docinfo, displayclass &disp,
1366            const metadata_t &meta, text_tmap &options,
1367            ostream& logout) {
1368 
1369  // make sure we have the requested metadata
1370  MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1371  if (it == docinfo.metadata.end()) return "";
1372
1373  int siblings_values = 0; // default is no siblings, just the first metadata available
1374  if (meta.metacommand & mSibling) {
1375    if (meta.mqualifier.sibling == sAll) {
1376      siblings_values = -1; //all
1377    } else if (meta.mqualifier.sibling == sNum) {
1378      siblings_values = meta.siblingoptions.getint();
1379    }
1380  }
1381  if (meta.metacommand & mParent) {
1382    return get_parent_meta(docinfo,meta,siblings_values);
1383  }
1384
1385  else if (meta.metacommand & mChild) {
1386    return get_child_meta(collection,collectproto,docinfo,disp,meta,
1387                options,logout, siblings_values);
1388  }
1389  else if (meta.metacommand & mSibling) { // only siblings
1390    MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1391    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1392  }
1393  else {
1394   
1395    // straightforward metadata request (nothing fancy)
1396
1397    text_t classifier_metaname = docinfo.classifier_metadata_type;
1398    int metaname_index
1399      = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1400    return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1401  }
1402 
1403  return "";
1404}
1405
1406static text_t get_or (const text_t& collection, recptproto* collectproto,
1407              ResultDocInfo_t &docinfo, displayclass &disp,
1408              format_t *orptr, text_tmap &options,
1409              ostream& logout) {
1410
1411  while (orptr != NULL) {
1412
1413    if (metadata_wrap) {
1414      // need to be a bit more careful about this
1415      // => test for it *without* spanwrap or divwrap, and if defined, then
1416      // got back and generate it again, this time with spanwrap/divwrap on
1417
1418      metadata_wrap = false;
1419      text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1420                       options, logout);
1421      metadata_wrap = true;
1422      if (!test_tmp.empty()) {
1423
1424    return format_string (collection,collectproto,docinfo, disp, orptr,
1425                  options, logout);
1426      }
1427    }
1428    else {
1429      text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1430                  options, logout);
1431      if (!tmp.empty()) return tmp;
1432    }
1433
1434    orptr = orptr->nextptr;
1435  }
1436  return "";
1437}
1438
1439static bool char_is_whitespace(const char c)
1440{
1441  return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1442
1443}
1444
1445static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1446{
1447  int pos = start_pos;
1448  while (pos<outstring.size()) {
1449    if (!char_is_whitespace(outstring[pos])) {
1450      break;
1451    }
1452    ++pos;
1453  }
1454
1455  return pos;
1456}
1457
1458static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1459{
1460  int pos = start_pos;
1461  while (pos>=0) {
1462    if (!char_is_whitespace(outstring[pos])) {
1463      break;
1464    }
1465    --pos;
1466  }
1467
1468  return pos;
1469}
1470
1471static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1472{
1473  int pos = start_pos;
1474  while (pos>=0) {
1475    if (char_is_whitespace(outstring[pos])) {
1476      break;
1477    }
1478    --pos;
1479  }
1480
1481  return pos;
1482}
1483
1484
1485static int rscan_for(const text_t& outstring, const int start_pos,
1486             const char find_c)
1487{
1488  int pos = start_pos;
1489  while (pos>=0) {
1490    char c = outstring[pos];
1491    if (outstring[pos] == find_c) {
1492      break;
1493    }
1494    --pos;
1495  }
1496
1497  return pos;
1498}
1499
1500text_t extract_substr(const text_t& outstring, const int start_pos,
1501              const int end_pos)
1502{
1503  text_t extracted_str;
1504  extracted_str.clear();
1505
1506  for (int pos=start_pos; pos<=end_pos; ++pos) {
1507    extracted_str.push_back(outstring[pos]);
1508  }
1509
1510  return extracted_str;
1511}
1512
1513
1514static text_t expand_potential_metadata(const text_t& collection,
1515                    recptproto* collectproto,
1516                    ResultDocInfo_t &docinfo,
1517                    displayclass &disp,
1518                    const text_t& intext,
1519                    text_tmap &options,
1520                    ostream& logout)
1521{
1522  text_t outtext;
1523
1524  // decide if dealing with metadata or text
1525
1526  text_t::const_iterator beginbracket = intext.begin();
1527  text_t::const_iterator endbracket = (intext.end() - 1);
1528
1529  // Decision is based on a metadata element
1530  if ((*beginbracket == '[') && (*endbracket == ']')) {
1531    // Ignore the surrounding square brackets
1532    text_t meta_text = substr (beginbracket+1, endbracket);
1533
1534    if (meta_text == "Text") {
1535      outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1536    }
1537    else {
1538
1539      text_tset metadata;
1540      bool getParents =false;
1541      metadata_t meta;
1542     
1543      parse_meta (meta_text, meta, metadata, getParents);   
1544      outtext
1545    = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1546    }
1547
1548  }
1549  else {
1550    outtext = intext;
1551  }
1552
1553  return outtext;
1554}
1555
1556
1557static bool uses_expression(const text_t& collection, recptproto* collectproto,
1558                ResultDocInfo_t &docinfo,
1559                displayclass &disp,
1560                const text_t& outstring, text_t& lhs_expr,
1561                text_t& op_expr, text_t& rhs_expr,
1562                text_tmap &options,
1563                ostream& logout)
1564{
1565  // Note: the string may not be of the form: str1 op str2, however
1566  // to deterine this we have to process it on the assumption it is,
1567  // and if at any point an 'erroneous' value is encountered, return
1568  // false and let something else have a go at evaluating it
1569
1570  // Starting at the end of the string and working backwards ..
1571
1572  const int outstring_len = outstring.size();
1573
1574  // skip over white space
1575  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1576
1577  if (rhs_end<=0) {
1578    // no meaningful text or (rhs_end==0) no room for operator
1579    return false;
1580  }
1581
1582  // check for ' or " and then scan over token
1583  const char potential_quote = outstring[rhs_end];
1584  int rhs_start=rhs_end;
1585  bool quoted = false;
1586
1587  if ((potential_quote == '\'') || (potential_quote == '\"')) {
1588    --rhs_end;
1589    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1590    quoted = true;
1591  }
1592  else {
1593    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1594  }
1595
1596  if ((rhs_end-rhs_start)<0) {
1597    // no meaningful rhs expression
1598    return false;
1599  }
1600
1601  // form rhs_expr
1602  rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1603
1604  // skip over white space
1605  const int to_whitespace = (quoted) ? 2 : 1;
1606
1607  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1608  int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1609
1610  if ((op_end<0) && (op_start<0)) {
1611    // no meaningful expression operator
1612    return false;
1613  }
1614
1615  if (op_end-op_start<0) {
1616    // no meaningful expression operator
1617    return false;
1618  }
1619
1620  op_expr = extract_substr(outstring,op_start,op_end);
1621
1622
1623  // check for operator
1624  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1625     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1626
1627    // not a valid operator
1628    return false;
1629  }
1630
1631  int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1632  if (lhs_end<0) {
1633    // no meaningful lhs expression
1634    return false;
1635  }
1636
1637  int lhs_start = scan_over_whitespace(outstring,0);
1638
1639  // form lhs_expr from remainder of string
1640  lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1641
1642  // Now we know we have a valid expression, look up any
1643  // metadata terms
1644
1645  rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1646                       disp,rhs_expr,options,logout);
1647  lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1648                       disp,lhs_expr,options,logout);
1649
1650  return true;
1651}
1652
1653static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1654                 const text_t& rhs_expr, ostream& logout)
1655{
1656  if (op_expr == "eq") return (lhs_expr == rhs_expr);
1657  else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1658  else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1659  else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1660  else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1661  else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1662  else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1663  else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1664  else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1665  else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1666  else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1667  else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1668  else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1669  else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1670  else {
1671    logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1672  }
1673 
1674  return false;
1675}
1676
1677
1678static text_t get_if (const text_t& collection, recptproto* collectproto,
1679              ResultDocInfo_t &docinfo, displayclass &disp,
1680              const decision_t &decision,
1681              format_t *ifptr, format_t *elseptr,
1682              text_tmap &options, ostream& logout)
1683{
1684  // If the decision component is a metadata element, then evaluate it
1685  // to see whether we output the "then" or the "else" clause
1686  if (decision.command == dMeta) {
1687
1688    bool store_metadata_wrap = metadata_wrap;
1689    metadata_wrap = 0;
1690
1691    // temporarily suspend metadata-XXXwrap (if on) so can test if metadata item really exits or not
1692    bool metadata_exists
1693      = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1694           logout) != "");
1695
1696    metadata_wrap = store_metadata_wrap;
1697
1698    if (metadata_exists) {
1699      if (ifptr != NULL)
1700    return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1701                     options, logout);
1702    }
1703    else {
1704      if (elseptr != NULL)
1705    return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1706                     options, logout);
1707    }
1708  }
1709
1710  // If the decision component is text, then evaluate it (it is probably a
1711  // macro like _cgiargmode_) to decide what to output.
1712  else if (decision.command == dText) {
1713
1714    text_t outstring;
1715    disp.expandstring (decision.text, outstring);
1716
1717    // Check for if expression in form: str1 op str2
1718    // (such as [x] eq "y")
1719    text_t lhs_expr, op_expr, rhs_expr;
1720    if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1721      if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1722    if (ifptr != NULL) {
1723      return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1724                       options, logout);
1725    }
1726    else {
1727      return "";
1728    }
1729      } else {
1730    if (elseptr != NULL) {
1731      return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1732                       options, logout);
1733    }
1734    else {
1735      return "";
1736    }
1737      }
1738    }
1739
1740
1741    // This is a tad tricky.  When we expand a string like _cgiargmode_, that is
1742    // a cgi argument macro that has not been set, it evaluates to itself.
1743    // Therefore, we have to say that a piece of text evaluates true if
1744    // it is non-empty and if it is a cgi argument evaulating to itself.
1745
1746    if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1747      if (ifptr != NULL)
1748    return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1749                     options, logout);
1750    } else {
1751      if (elseptr != NULL)
1752    return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1753                     options, logout);
1754    }
1755  }
1756 
1757  return "";
1758}
1759
1760bool includes_metadata(const text_t& text)
1761{
1762  text_t::const_iterator here = text.begin();
1763  text_t::const_iterator end = text.end();
1764
1765  char startbracket = '[';
1766  char endbracket = ']';
1767
1768  char bracket = startbracket;
1769  while (here != end) {
1770      if (*here == bracket) {
1771          if(bracket == startbracket) {
1772              // seen a [, next look for a ] to confirm it's metadata
1773              bracket = endbracket;
1774          } else if(bracket == endbracket) {
1775              // found [ ... ] in text, so we think it includes metadata
1776              return true;
1777          }
1778      }
1779    ++here;
1780  }
1781
1782  return false;
1783}
1784
1785static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1786                  recptproto* collectproto,
1787                  ResultDocInfo_t &docinfo,
1788                  displayclass &disp, text_tmap &options,
1789                  ostream &logout) {
1790     
1791  if (includes_metadata(metavalue)) {
1792   
1793    // text has embedded metadata in it => expand it
1794    FilterRequest_t request;
1795    FilterResponse_t response;
1796   
1797    request.getParents = false;
1798   
1799    format_t *expanded_formatlistptr = new format_t();
1800    parse_formatstring (metavalue, expanded_formatlistptr,
1801            request.fields, request.getParents);
1802   
1803    // retrieve metadata
1804    get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1805         collectproto, response, logout);
1806   
1807    if (!response.docInfo.empty()) {
1808     
1809      text_t expanded_metavalue
1810    = get_formatted_string(collection, collectproto,
1811                   response.docInfo[0], disp, expanded_formatlistptr,
1812                   options, logout);
1813     
1814      return expanded_metavalue;
1815    }
1816    else {
1817      return metavalue;
1818    }
1819  }
1820  else {
1821   
1822    return metavalue;
1823  }
1824}
1825
1826text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1827               displayclass &disp,
1828               text_t meta_name, ostream& logout) {
1829 
1830  ColInfoResponse_t collectinfo;
1831  comerror_t err;
1832  collectproto->get_collectinfo (collection, collectinfo,err,logout);
1833  text_t meta_value = "";
1834  text_t lang;
1835  disp.expandstring("_cgiargl_",lang);
1836  if (lang.empty()) {
1837    lang = "en";
1838  }
1839
1840  if (err == noError) {
1841    meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1842  }
1843  return meta_value;
1844 
1845
1846}
1847text_t format_string (const text_t& collection, recptproto* collectproto,
1848              ResultDocInfo_t &docinfo, displayclass &disp,
1849              format_t *formatlistptr, text_tmap &options,
1850              ostream& logout) {
1851
1852  if (formatlistptr == NULL) return "";
1853
1854  switch (formatlistptr->command) {
1855     case comOID:
1856    return docinfo.OID;
1857  case comTopOID:
1858    {
1859      text_t top_id;
1860      get_top(docinfo.OID, top_id);
1861      return top_id;
1862    }
1863  case comRank:
1864    return text_t(docinfo.ranking);
1865     case comText:
1866    return formatlistptr->text;
1867     case comLink:
1868    return options["link"];
1869     case comEndLink:
1870       {
1871     if (options["link"].empty()) return "";
1872    else return "</a>";
1873       }
1874     case comHref:
1875    return get_href(options["link"]);
1876     case comIcon:
1877    return options["icon"];
1878     case comNum:
1879    return docinfo.result_num;
1880     case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1881    return get_related_docs(collection, collectproto, docinfo, logout);
1882
1883     case comSummary:
1884       return format_summary(collection, collectproto, docinfo, disp, options, logout);
1885     case comAssocLink:
1886       {
1887     text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1888         if (!link_filename.empty()) {
1889       text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
1890       if (formatlistptr->text == "href") {
1891         return href;
1892       }
1893       return "<a href=\""+ href + "\">";
1894     }
1895     return "";
1896       }
1897  case comEndAssocLink:
1898    {
1899    text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1900    if (!link_filename.empty()) {
1901      return "</a>";
1902    }
1903    return "";
1904    }
1905     case comMeta:
1906    {
1907       const text_t& metavalue =  get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1908       return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1909    }
1910
1911     case comDoc:
1912       return format_text(collection, collectproto, docinfo, disp, options, logout);
1913
1914     case comImage:
1915    return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1916     case comTOC:
1917    return options["DocTOC"];
1918     case comDocumentButtonDetach:
1919    return options["DocumentButtonDetach"];
1920     case comDocumentButtonHighlight:
1921    return options["DocumentButtonHighlight"];
1922     case comDocumentButtonExpandContents:
1923    return options["DocumentButtonExpandContents"];
1924     case comDocumentButtonExpandText:
1925    return options["DocumentButtonExpandText"];
1926     case comHighlight:
1927    if (options["highlight"] == "1") return "<b>";
1928    break;
1929     case comEndHighlight:
1930    if (options["highlight"] == "1") return "</b>";
1931    break;
1932     case comMetadataSpanWrap:
1933        metadata_wrap=true;  metadata_wrap_type="span"; return "";
1934    break;
1935     case comEndMetadataSpanWrap:
1936    metadata_wrap=false; metadata_wrap_type="";     return "";
1937    break;
1938     case comMetadataDivWrap:
1939        metadata_wrap=true;  metadata_wrap_type="div";  return "";
1940    break;
1941     case comEndMetadataDivWrap:
1942    metadata_wrap=false; metadata_wrap_type="";     return "";
1943    break;
1944     case comIf:
1945    return get_if (collection, collectproto, docinfo, disp,
1946               formatlistptr->decision, formatlistptr->ifptr,
1947               formatlistptr->elseptr, options, logout);
1948     case comOr:
1949    return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1950               options, logout);
1951     case comDocTermsFreqTotal:
1952       return docinfo.num_terms_matched;
1953     case comCollection:
1954       if (formatlistptr->meta.metaname == g_EmptyText) {
1955     return collection;
1956       }
1957       return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1958   
1959  }
1960  return "";
1961}
1962
1963text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1964                 ResultDocInfo_t &docinfo, displayclass &disp,
1965                 format_t *formatlistptr, text_tmap &options,
1966                 ostream& logout) {
1967
1968   text_t ft;
1969   while (formatlistptr != NULL)
1970      {
1971     ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1972                  options, logout);
1973     formatlistptr = formatlistptr->nextptr;
1974      }
1975   
1976   return ft;
1977}
1978
1979
1980// we have only preloaded the text in DocumentAction. But you may want
1981// to get the text in query, so copy what we have done with
1982// format_summary and get the text here. Probably is quite expensive?
1983text_t format_text (const text_t& collection, recptproto* collectproto,
1984            ResultDocInfo_t &docinfo, displayclass &disp,
1985            text_tmap &options, ostream& logout)
1986{
1987  text_t text;
1988
1989  if (!options["text"].empty()) {
1990    text = options["text"];
1991  }
1992  else {
1993    // get document text here
1994    DocumentRequest_t docrequest;
1995    DocumentResponse_t docresponse;
1996    comerror_t err;
1997    docrequest.OID = docinfo.OID;
1998    collectproto->get_document (collection, docrequest, docresponse, err, logout);
1999    text = docresponse.doc;
2000  }
2001
2002  if (metadata_wrap) {
2003    text = wrap_metatext(text,docinfo.OID,"Text");
2004  }
2005
2006  return text;
2007}
2008 
2009/* FUNCTION NAME: format_summary
2010 * DESC: this is invoked when a [Summary] special metadata is processed.
2011 * RETURNS: a query-biased summary for the document */
2012
2013text_t format_summary (const text_t& collection, recptproto* collectproto,
2014               ResultDocInfo_t &docinfo, displayclass &disp,
2015               text_tmap &options, ostream& logout) {
2016
2017  // GRB: added code here to ensure that the cstr (and other collections)
2018  //      uses the document metadata item Summary, rather than compressing
2019  //      the text of the document, processed via the methods in
2020  //      summarise.cpp
2021
2022  text_t summary;
2023
2024  if (docinfo.metadata.count("Summary") > 0 &&
2025      docinfo.metadata["Summary"].values.size() > 0) {
2026    summary = docinfo.metadata["Summary"].values[0];
2027  }
2028  else {
2029 
2030    text_t textToSummarise, query;
2031
2032    if(options["text"].empty()) { // get document text
2033      DocumentRequest_t docrequest;
2034      DocumentResponse_t docresponse;
2035      comerror_t err;
2036      docrequest.OID = docinfo.OID;
2037      collectproto->get_document (collection, docrequest, docresponse, err, logout);
2038      textToSummarise = docresponse.doc;
2039    }
2040    else {
2041      // in practice, this would not happen, because text is only
2042      // loaded with the [Text] command
2043      textToSummarise = options["text"];
2044    }
2045   
2046    disp.expandstring("_cgiargq_",query);
2047    summary = summarise(textToSummarise,query,80);
2048    //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
2049  }
2050
2051  summary.replace("'","&#039;");
2052  summary.replace("\n","&#013;");
2053
2054  if (metadata_wrap) {
2055    summary = wrap_metatext(summary,docinfo.OID,"Summary");
2056  }
2057
2058  return summary;
2059}
2060
2061//-------------- GS3 related functions --------------
2062// copy of the other uses_expression function, but without using the extra GS2-runtime-specific parameters
2063static bool uses_expression(const text_t& outstring, text_t& lhs_expr,
2064                text_t& op_expr, text_t& rhs_expr)
2065{
2066  // Note: the string may not be of the form: str1 op str2, however
2067  // to deterine this we have to process it on the assumption it is,
2068  // and if at any point an 'erroneous' value is encountered, return
2069  // false and let something else have a go at evaluating it
2070
2071  // Starting at the end of the string and working backwards ..
2072
2073  const int outstring_len = outstring.size();
2074
2075  // skip over white space
2076  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
2077
2078  if (rhs_end<=0) {
2079    // no meaningful text or (rhs_end==0) no room for operator
2080    return false;
2081  }
2082
2083  // check for ' or " and then scan over token
2084  const char potential_quote = outstring[rhs_end];
2085  int rhs_start=rhs_end;
2086  bool quoted = false;
2087
2088  if ((potential_quote == '\'') || (potential_quote == '\"')) {
2089    --rhs_end;
2090    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
2091    quoted = true;
2092  }
2093  else {
2094    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
2095  }
2096
2097  if ((rhs_end-rhs_start)<0) {
2098    // no meaningful rhs expression
2099    return false;
2100  }
2101
2102  // form rhs_expr
2103  rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
2104
2105  // skip over white space
2106  const int to_whitespace = (quoted) ? 2 : 1;
2107
2108  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
2109  int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
2110
2111  if ((op_end<0) && (op_start<0)) {
2112    // no meaningful expression operator
2113    return false;
2114  }
2115
2116  if (op_end-op_start<0) {
2117    // no meaningful expression operator
2118    return false;
2119  }
2120
2121  op_expr = extract_substr(outstring,op_start,op_end);
2122
2123
2124  // check for operator
2125  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
2126     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
2127
2128    // not a valid operator
2129    return false;
2130  }
2131
2132  int lhs_end = rscan_over_whitespace(outstring,op_start-1);
2133  if (lhs_end<0) {
2134    // no meaningful lhs expression
2135    return false;
2136  }
2137
2138  int lhs_start = scan_over_whitespace(outstring,0);
2139
2140  // form lhs_expr from remainder of string
2141  lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
2142
2143  return true;
2144}
2145
2146// [ex.Title] -> ex.Title
2147static text_t remove_bracket_bookends(const text_t &str) {
2148 
2149  if(str[0] == '[' && str[str.size()-1] == ']') {
2150    return substr (str.begin()+1, str.end()-1);
2151  } else {
2152    return str;
2153  }
2154}
2155
2156static text_t get_gs3_if (const decision_t &decision, format_t *ifptr, format_t *elseptr)
2157{
2158  text_t ifstmt ="<gsf:switch>";
2159
2160
2161  if (decision.command == dMeta) {
2162    ifstmt += "<gsf:metadata name=\"";
2163    ifstmt += remove_bracket_bookends(decision.meta.metaname);
2164    ifstmt += "\"/>";
2165  }
2166
2167  else { //if(decision.command == dText)
2168
2169    text_t outstring = decision.text;
2170
2171    // Check for if expression in form: str1 op str2
2172    // (such as [x] eq "y")
2173    text_t lhs_expr, op_expr, rhs_expr;
2174    if (uses_expression(outstring,lhs_expr,op_expr,rhs_expr)) {
2175
2176      text_t if_operator = op_expr;
2177      if (op_expr == "eq" || op_expr == "==") {
2178    if_operator = "equals";
2179      } else if (op_expr == "ne" || op_expr == "!=") {
2180    if_operator = "notEquals";
2181      } else if (op_expr == "gt" || op_expr == ">") {
2182    if_operator = "greaterThan";
2183      } else if (op_expr == "lt" || op_expr == "<") {
2184    if_operator = "lessThan";
2185      } else if (op_expr == "ge" || op_expr == ">=") {
2186    if_operator = "greaterThanOrEquals";
2187      } else if (op_expr == "le" || op_expr == "<=") {
2188    if_operator = "lessThanOrEquals";
2189      } else if (op_expr == "sw") {
2190    if_operator = "startsWith";
2191      } else if (op_expr == "ew") {
2192    if_operator = "endsWith";
2193      }
2194
2195      ifstmt += "<gsf:metadata name=\"";
2196      ifstmt += remove_bracket_bookends(lhs_expr);
2197      ifstmt += "\"/>";
2198
2199      ifstmt += "<gsf:when test=\"";
2200      ifstmt += if_operator; // the test operator
2201      ifstmt += "\" test-value=\"";
2202      ifstmt += remove_bracket_bookends(rhs_expr); // the test-value
2203      ifstmt += "\">";
2204    }
2205    else {
2206      ifstmt += "<gsf:metadata name=\"";
2207      ifstmt += remove_bracket_bookends(decision.text);
2208      ifstmt += "\"/>";
2209      ifstmt += "<gsf:when test=\"exists\">";
2210    }
2211  }
2212   
2213  // if portion
2214  text_t if_body = "";
2215  while(ifptr != NULL) { // body of if can contain a list of items to be transformed into GS3 format stmts
2216    if_body += transform_to_GS3_format (ifptr);
2217    ifptr = ifptr->nextptr;
2218  }
2219  ifstmt += if_body;
2220  ifstmt += "</gsf:when>";
2221
2222  // else portion
2223  if(elseptr != NULL) {
2224
2225    ifstmt += "<gsf:otherwise>";
2226    text_t else_body = ""; // body of else can contain a list of items to be transformed into GS3 format stmts
2227    while(elseptr != NULL) {
2228      else_body += transform_to_GS3_format (elseptr);
2229      elseptr = elseptr->nextptr;
2230    }
2231    ifstmt += else_body;
2232    ifstmt += "</gsf:otherwise>";
2233  } 
2234
2235  ifstmt += "</gsf:switch>";
2236  return ifstmt;
2237}
2238
2239
2240static text_t get_gs3_or (format_t *orptr) {
2241  text_t result = "<gsf:choose-metadata>";
2242
2243  while(orptr != NULL) {
2244    text_t or_body = transform_to_GS3_format (orptr);
2245    if (!or_body.empty()) {
2246      result += or_body;
2247    }
2248
2249    orptr = orptr->nextptr;   
2250  }
2251  result += "</gsf:choose-metadata>";
2252  return result;
2253}
2254
2255// what about all the <td>? Does that get stored in formatlistptr, such as under the ->text field?
2256text_t get_GS3_formatstring (format_t *formatlistptr) {
2257  text_t result;
2258
2259  while (formatlistptr != NULL) {
2260    result += transform_to_GS3_format(formatlistptr);   
2261    formatlistptr = formatlistptr->nextptr;
2262  }
2263 
2264  return result;
2265}
2266
2267text_t transform_to_GS3_format (format_t *formatlistptr) {
2268
2269  if (formatlistptr == NULL) return "";
2270 
2271  switch (formatlistptr->command) {
2272  case comOID:
2273    return "<gsf:OID/>";
2274  case comTopOID:
2275    return "<gsf:metadata name='OID' select='root' />"; // for now try this
2276  case comRank:
2277    return "<gsf:rank/>";
2278  case comText:
2279    return formatlistptr->text; // [text]? or any string that is not a command or reserved
2280  case comLink:
2281    return "<gsf:link type='document'>"; // type?
2282  case comEndLink:
2283    return "</gsf:link>";
2284  case comHref:
2285    return "<gsf:lib name=\"href\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to
2286  case comIcon:
2287    return "<gsf:icon type='document'/>";
2288  case comNum:
2289    return "<gsf:lib name=\"num\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to
2290  case comRel: //if [RelatedDocuments] appears in format string, collect relation data
2291    return "<gsf:lib name=\"RelatedDocuments\"/>"; // output comment marking this as deprecated or to be implemented for GS3 in gslib xslt
2292  case comSummary:
2293    return "<gsf:lib name=\"Summary\"/>"; // in gslib xslt output comment marking this as to be implemented for GS3
2294    // need to invent this for GS3 based on what GS2 does
2295  case comAssocLink:
2296    return "<gsf:link type='source'>";
2297  case comEndAssocLink:
2298    return "</gsf:link>";
2299  case comMeta:
2300    return "<gsf:metadata name=\"" + formatlistptr->meta.metaname + "\" />";//?
2301  case comDoc:
2302    return "<gsf:text/>";
2303  case comImage: // the cover img seems to be handled by some magic code in GS3
2304    return "<gsf:lib name=\"image\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable
2305  case comTOC:
2306    return "<gsf:lib name=\"TOC\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable
2307    // need to think about whether an equivalent actually exists
2308    // return "<gsf:option name=\"TOC\" value=\"true\"/>"; // this is wrong
2309  case comDocumentButtonDetach:
2310    return "<gsf:lib name=\"DocumentButtonDetach\"/>"; // output comment marking this as deprecated in gslib xslt
2311  case comDocumentButtonHighlight:
2312    return "<gsf:lib name=\"DocumentButtonHighlight\"/>"; // output comment marking this as deprecated in gslib xslt
2313  case comDocumentButtonExpandContents:
2314    return "<gsf:lib name=\"DocumentButtonExpandContents\"/>"; // output comment marking this as deprecated in gslib xslt
2315  case comDocumentButtonExpandText:
2316    return "<gsf:lib name=\"DocumentButtonExpandText\"/>"; // output comment marking this as deprecated in gslib xslt
2317  case comHighlight:
2318    return "<span class=\"highlight\">";
2319    break;
2320  case comEndHighlight:
2321    return "</span>";
2322    break;
2323  case comMetadataSpanWrap:
2324    metadata_wrap=true;  metadata_wrap_type="span"; return "";
2325    break;
2326  case comEndMetadataSpanWrap:
2327    metadata_wrap=false; metadata_wrap_type="";     return "";
2328    break;
2329  case comMetadataDivWrap:
2330    metadata_wrap=true;  metadata_wrap_type="div";  return "";
2331    break;
2332  case comEndMetadataDivWrap:
2333    metadata_wrap=false; metadata_wrap_type="";     return "";
2334    break;
2335  case comIf:
2336    return get_gs3_if (formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr);
2337  case comOr:
2338    return get_gs3_or (formatlistptr->orptr);
2339    //return "<gsf:choose-metadata>"+get_gs3_or (formatlistptr->orptr)+"</gsf:choose-metadata>";
2340  case comDocTermsFreqTotal:
2341    return "<gsf:lib name=\"DocTermsFreqTotal\"/>";
2342  case comCollection: // trying to get all the metadata for a collection. How is this done in GS3???
2343    return "<gsf:lib name=\"collection\"/>";
2344  }
2345  return "";
2346}
Note: See TracBrowser for help on using the browser.