root/gsdl/trunk/runtime-src/src/recpt/formattools.cpp @ 16915

Revision 16915, 47.0 KB (checked in by mdewsnip, 12 years ago)

Changes made by Richard Managh at DL Consulting Ltd for returning document-level term frequency totals.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "recptprototools.h"
29#include "OIDtools.h"
30#include "summarise.h"
31
32#include <assert.h>
33
34// a few function prototypes
35
36static text_t format_string (const text_t& collection, recptproto* collectproto,
37                 ResultDocInfo_t &docinfo, displayclass &disp,
38                 format_t *formatlistptr, text_tmap &options,
39                 ostream& logout);
40
41static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
42              format_t *formatlistptr, text_tset &metadata, bool &getParents);
43
44static text_t format_summary (const text_t& collection, recptproto* collectproto,
45                  ResultDocInfo_t &docinfo, displayclass &disp,
46                  text_tmap &options, ostream& logout);
47static text_t format_text (const text_t& collection, recptproto* collectproto,
48                  ResultDocInfo_t &docinfo, displayclass &disp,
49                  text_tmap &options, ostream& logout);
50
51static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
52                  recptproto* collectproto, ResultDocInfo_t &docinfo,
53                  displayclass &disp, text_tmap &options,
54                  ostream &logout);
55
56
57void metadata_t::clear() {
58  metaname.clear();
59  metacommand = mNone;
60  mqualifier.parent  = pNone;
61  mqualifier.sibling = sNone;
62  mqualifier.child   = cNone;
63  parentoptions.clear();
64  siblingoptions.clear();
65  childoptions.clear();
66}
67
68void decision_t::clear() {
69  command = dMeta;
70  meta.clear();
71  text.clear();
72}
73
74void format_t::clear() {
75  command = comText;
76  decision.clear();
77  text.clear();
78  meta.clear();
79  nextptr = NULL;
80  ifptr = NULL;
81  elseptr = NULL;
82  orptr = NULL;
83}
84
85void formatinfo_t::clear() {
86  DocumentImages = false;
87  DocumentTitles = true;
88  DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
89  DocumentContents = true;
90  DocumentArrowsBottom = true;
91  DocumentArrowsTop = false;
92  DocumentSearchResultLinks = false;
93  DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
94  //  DocumentButtons.push_back ("Expand Text");
95  //  DocumentButtons.push_back ("Expand Contents");
96  DocumentButtons.push_back ("Detach");
97  DocumentButtons.push_back ("Highlight");
98  RelatedDocuments = "";
99  DocumentText = "[Text]";
100  formatstrings.erase (formatstrings.begin(), formatstrings.end());
101  DocumentUseHTML = false;
102  AllowExtendedOptions = false;
103}
104
105// simply checks to see if formatstring begins with a <td> tag
106bool is_table_content (const text_t &formatstring) {
107  text_t::const_iterator here = formatstring.begin();
108  text_t::const_iterator end = formatstring.end();
109 
110  while (here != end) {
111    if (*here != ' ') {
112      if ((*here == '<') && ((here+3) < end)) {
113    if ((*(here+1) == 't' || *(here+1) == 'T') &&
114        (*(here+2) == 'd' || *(here+2) == 'D') &&
115        (*(here+3) == '>' || *(here+3) == ' '))
116      return true;
117      } else return false;
118    }
119    ++here;
120  }
121  return false;
122}
123
124bool is_table_content (const format_t *formatlistptr) {
125
126  if (formatlistptr == NULL) return false;
127 
128  if (formatlistptr->command == comText)
129    return is_table_content (formatlistptr->text);
130   
131  return false;
132}
133
134// returns false if key isn't in formatstringmap
135bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
136               text_t &formatstring) {
137
138  formatstring.clear();
139  text_tmap::const_iterator it = formatstringmap.find(key);
140  if (it == formatstringmap.end()) return false;
141  formatstring = (*it).second;
142  return true;
143}
144
145// tries to find "key1key2" then "key1" then "key2"
146bool get_formatstring (const text_t &key1, const text_t &key2, 
147               const text_tmap &formatstringmap,
148               text_t &formatstring) {
149
150  formatstring.clear();
151  text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
152  if (it != formatstringmap.end()) {
153    formatstring = (*it).second;
154    return true;
155  }
156  it = formatstringmap.find(key1);
157  if (it != formatstringmap.end()) {
158    formatstring = (*it).second;
159    return true;
160  }
161  it = formatstringmap.find(key2);
162  if (it != formatstringmap.end()) {
163    formatstring = (*it).second;
164    return true;
165  }
166  return false;
167}
168
169
170text_t remove_namespace(const text_t &meta_name) {
171  text_t::const_iterator end = meta_name.end();
172  text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
173  if (it != end) {
174    return substr(it+1, end);
175  }
176
177  return meta_name;
178
179}
180// returns a date of form _format:date_(year, month, day)
181// input is date of type yyyy-?mm-?dd
182// at least the year must be present in date
183text_t format_date (const text_t &date) {
184
185  if (date.size() < 4) return "";
186
187  text_t::const_iterator datebegin = date.begin();
188
189  text_t year = substr (datebegin, datebegin+4);
190  int chars_seen_so_far = 4;
191
192  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
193  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
194 
195  text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
196  int imonth = month.getint();
197  if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
198 
199  chars_seen_so_far += 2;
200  if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
201 
202  if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
203
204  text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
205  if (day[0] == '0') day = substr (day.begin()+1, day.end());
206  int iday = day.getint();
207  if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
208   
209  return "_format:date_("+year+","+month+","+day+")";
210
211
212// converts an iso639 language code to its English equivalent
213// should we be checking that the macro exists??
214text_t iso639 (const text_t &langcode) {
215  if (langcode.empty()) return "";
216  return "_iso639:iso639"+langcode+"_";
217}
218
219
220text_t get_href (const text_t &link) {
221
222  text_t href;
223
224  text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
225  text_t::const_iterator end = link.end();
226  if (here == end) return g_EmptyText;
227 
228  ++here;
229  while (here != end) {
230    if (*here == '"') break;
231    href.push_back(*here);
232    ++here;
233  }
234
235  return href;
236}
237
238//this function gets the information associated with the relation
239//metadata for the document associated with 'docinfo'. This relation
240//metadata consists of a line of pairs containing 'collection, document OID'
241//(this is the OID of the document related to the current document, and
242//the collection the related document belongs to). For each of these pairs
243//the title metadata is obtained and then an html link between the title
244//of the related doc and the document's position (the document will be
245//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
246//(where collection is the related documents collection, and OID is the
247//related documents OID).  A list of these html links are made for as many
248//related documents as there are. This list is then returned. If there are
249//no related documents available for the current document then the string
250//'.. no related documents .. ' is returned.
251text_t get_related_docs(const text_t& collection, recptproto* collectproto,
252            ResultDocInfo_t &docinfo, ostream& logout){
253 
254  text_tset metadata;
255
256  //insert the metadata we wish to collect
257  metadata.insert("dc.Relation");
258  metadata.insert("Title"); 
259  metadata.insert("Subject"); //for emails, where title data doesn't apply
260 
261  FilterResponse_t response;
262  text_t relation = ""; //string for displaying relation metadata
263  text_t relationTitle = ""; //the related documents Title (or subject)
264  text_t relationOID = ""; //the related documents OID 
265
266  //get the information associated with the metadata for current doc
267  if (get_info (docinfo.OID, collection, "", metadata,
268        false, collectproto, response, logout)) {
269   
270    //if the relation metadata exists, store for displaying
271    if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
272      relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
273
274      //split relation data into pairs of collectionname,ID number
275      text_tarray relationpairs;
276      splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
277     
278      text_tarray::const_iterator currDoc = relationpairs.begin(); 
279      text_tarray::const_iterator lastDoc = relationpairs.end();
280
281      //iterate through the pairs to split and display
282      while(currDoc != lastDoc){
283   
284    //split pairs into collectionname and ID
285    text_tarray relationdata;
286    splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
287   
288    //get first element in the array (collection)
289    text_tarray::const_iterator doc_data = relationdata.begin();
290    text_t document_collection = *doc_data;
291    ++doc_data; //increment to get next item in array (oid)
292    text_t document_OID = *doc_data;
293   
294    //create html link to related document
295    relation += "<a href=\"_httpdocument_&c=" + document_collection;
296    relation += "&cl=search&d=" + document_OID;
297       
298    //get the information associated with the metadata for related doc
299    if (get_info (document_OID, document_collection, "", metadata,
300              false, collectproto, response, logout)) {
301     
302      //if title metadata doesn't exist, collect subject metadata
303      //if that doesn't exist, just call it 'related document'
304      if (!response.docInfo[0].metadata["Title"].values[0].empty())
305        relationTitle = response.docInfo[0].metadata["Title"].values[0];
306      else if (!response.docInfo[0].metadata["Subject"].values.empty())
307        relationTitle = response.docInfo[0].metadata["Subject"].values[0];
308      else relationTitle =  "RELATED DOCUMENT";
309     
310    }
311   
312    //link the related document's title to its page
313    relation += "\">" + relationTitle + "</a>";
314    relation += "  (" + document_collection + ")<br>";
315   
316    ++currDoc;
317      }
318    }
319   
320  }
321
322  if(relation.empty()) //no relation data for documnet
323    relation = ".. no related documents .. ";
324
325  return relation;
326}
327
328
329
330static void get_parent_options (text_t &instring, metadata_t &metaoption) {
331
332  assert (instring.size() > 7);
333  if (instring.size() <= 7) return;
334
335  text_t meta, com, op;
336  bool inbraces = false;
337  bool inquotes = false;
338  bool foundcolon = false;
339  text_t::const_iterator here = instring.begin()+6;
340  text_t::const_iterator end = instring.end();
341  while (here != end) {
342    if (foundcolon) meta.push_back (*here);
343    else if (*here == '(') inbraces = true;
344    else if (*here == ')') inbraces = false;
345    else if (*here == '\'' && !inquotes) inquotes = true;
346    else if (*here == '\'' && inquotes) inquotes = false;
347    else if (*here == ':' && !inbraces) foundcolon = true;
348    else if (inquotes) op.push_back (*here);
349    else com.push_back (*here);
350    ++here;
351  }
352
353  instring = meta;
354  if (com.empty())
355    metaoption.mqualifier.parent = pImmediate;
356  else if (com == "Top")
357    metaoption.mqualifier.parent = pTop;
358  else if (com == "All") {
359    metaoption.mqualifier.parent = pAll;
360    metaoption.parentoptions = op;
361  }
362}
363
364
365static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
366
367  assert (instring.size() > 8);
368  if (instring.size() <= 8) return;
369  text_t meta, com, op;
370  bool inbraces = false;
371  bool inquotes = false;
372  bool foundcolon = false;
373  text_t::const_iterator here = instring.begin()+7;
374  text_t::const_iterator end = instring.end();
375  while (here != end) {
376    if (foundcolon) meta.push_back (*here);
377    else if (*here == '(') inbraces = true;
378    else if (*here == ')') inbraces = false;
379    else if (*here == '\'' && !inquotes) inquotes = true;
380    else if (*here == '\'' && inquotes) inquotes = false;
381    else if (*here == ':' && !inbraces) foundcolon = true;   
382    else if (inquotes) op.push_back (*here);
383    else com.push_back (*here);
384    ++here;
385  }
386
387  instring = meta;
388  metaoption.siblingoptions.clear();
389
390  if (com.empty()) {
391    metaoption.mqualifier.sibling = sAll;
392    metaoption.siblingoptions = " ";
393  }
394  else if (com == "first") {
395    metaoption.mqualifier.sibling = sNum;
396    metaoption.siblingoptions = "0";
397  }
398  else if (com == "last") {
399    metaoption.mqualifier.sibling = sNum;
400    metaoption.siblingoptions = "-2"; // == last
401  }
402  else if (com.getint()>0) {
403    metaoption.mqualifier.sibling = sNum;
404    int pos = com.getint()-1;
405    metaoption.siblingoptions +=pos;
406  }
407  else {
408    metaoption.mqualifier.sibling = sAll;
409    metaoption.siblingoptions = op;
410  }
411}
412
413static void get_child_options (text_t &instring, metadata_t &metaoption) {
414
415  assert (instring.size() > 6);
416  if (instring.size() <= 6) return;
417  text_t meta, com, op;
418  bool inbraces = false;
419  bool inquotes = false;
420  bool foundcolon = false;
421  text_t::const_iterator here = instring.begin()+5;
422  text_t::const_iterator end = instring.end();
423  while (here != end) {
424    if (foundcolon) meta.push_back (*here);
425    else if (*here == '(') inbraces = true;
426    else if (*here == ')') inbraces = false;
427    else if (*here == '\'' && !inquotes) inquotes = true;
428    else if (*here == '\'' && inquotes) inquotes = false;
429    else if (*here == ':' && !inbraces) foundcolon = true;
430    else if (inquotes) op.push_back (*here);
431    else com.push_back (*here);
432    ++here;
433  }
434
435  instring = meta;
436  if (com.empty()) {
437    metaoption.mqualifier.child = cAll;
438    metaoption.childoptions = " ";
439  }
440  else if (com == "first") {
441    metaoption.mqualifier.child = cNum;
442    metaoption.childoptions = ".fc";
443  }
444  else if (com == "last") {
445    metaoption.mqualifier.child = cNum;
446    metaoption.childoptions = ".lc";
447  }
448  else if (com.getint()>0) {
449    metaoption.mqualifier.child = cNum;
450    metaoption.childoptions = "."+com;
451  }
452  else {
453    metaoption.mqualifier.child = cAll;
454    metaoption.childoptions = op;
455  }
456}
457
458
459
460static void parse_meta (text_t &meta, metadata_t &metaoption,
461            text_tset &metadata, bool &getParents) {
462
463  if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
464    metaoption.metacommand |= mCgiSafe;
465    meta = substr (meta.begin()+8, meta.end());
466  }
467  if (meta.size() > 7 && (substr(meta.begin(), meta.begin()+7) == "format:")) {   
468    metaoption.metacommand |= mSpecial;
469    meta = substr (meta.begin()+7, meta.end());
470  }
471
472  if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
473    getParents = true;
474    metaoption.metacommand |= mParent;
475    get_parent_options (meta, metaoption);
476  }
477  else if (meta.size() > 6 && (substr (meta.begin(), meta.begin()+5) == "child")) {
478    metaoption.metacommand |= mChild;
479    get_child_options (meta, metaoption);
480    metadata.insert("contains");
481  }
482  // parent and child can have sibling also
483  if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
484    metaoption.metacommand |= mSibling;
485    get_sibling_options (meta, metaoption);
486  }
487 
488  // check for ex. which may occur in format statements
489  if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
490    meta = substr (meta.begin()+3, meta.end());
491  }
492  metadata.insert (meta);
493  metaoption.metaname = meta;
494}
495
496static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
497  if (meta == "collection") {
498    // no qualifiers
499    metaoption.metaname = g_EmptyText;
500    return;
501  }
502  meta = substr (meta.begin()+11, meta.end());
503  metaoption.metaname = meta;
504 
505}
506
507static void parse_meta (text_t &meta, format_t *formatlistptr,
508            text_tset &metadata, bool &getParents) {
509 
510  if (meta == "link")
511    formatlistptr->command = comLink;
512  else if (meta == "/link")
513    formatlistptr->command = comEndLink;
514
515  else if (meta == "href")
516    formatlistptr->command = comHref;
517
518  else if (meta == "num")
519    formatlistptr->command = comNum;
520
521  else if (meta == "icon")
522    formatlistptr->command = comIcon;
523
524  else if (meta == "Text")
525    formatlistptr->command = comDoc;
526 
527  else if (meta == "RelatedDocuments")
528   formatlistptr->command = comRel;
529
530  else if (meta == "highlight")
531    formatlistptr->command = comHighlight;
532
533  else if (meta == "/highlight")
534    formatlistptr->command = comEndHighlight;
535
536  else if (meta == "Summary")
537    formatlistptr->command = comSummary;
538
539  else if (meta == "DocImage")
540    formatlistptr->command = comImage;
541
542  else if (meta == "DocTOC")
543    formatlistptr->command = comTOC;
544
545  else if (meta == "DocumentButtonDetach")
546     formatlistptr->command = comDocumentButtonDetach;
547 
548  else if (meta == "DocumentButtonHighlight")
549     formatlistptr->command = comDocumentButtonHighlight;
550 
551  else if (meta == "DocumentButtonExpandContents")
552    formatlistptr->command = comDocumentButtonExpandContents;
553
554  else if (meta == "DocumentButtonExpandText")
555     formatlistptr->command = comDocumentButtonExpandText;
556
557  else if (meta == "DocOID")
558     formatlistptr->command = comOID;
559  else if (meta == "DocTopOID")
560    formatlistptr->command = comTopOID;
561  else if (meta == "DocRank")
562    formatlistptr->command = comRank;
563  else if (meta == "DocTermsFreqTotal")
564    formatlistptr->command = comDocTermsFreqTotal;
565  else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
566    formatlistptr->command = comCollection;
567    parse_coll_meta(meta, formatlistptr->meta);
568  }
569  else {
570    formatlistptr->command = comMeta;
571    parse_meta (meta, formatlistptr->meta, metadata, getParents);
572  }
573}
574
575
576static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
577              text_tset &metadata, bool &getParents) {
578
579  text_t text;
580  text_t::const_iterator here = formatstring.begin();
581  text_t::const_iterator end = formatstring.end();
582
583  while (here != end) {
584
585    if (*here == '\\') {
586      ++here;
587      if (here != end) text.push_back (*here);
588
589    } else if (*here == '{') {
590      if (!text.empty()) {
591    formatlistptr->command = comText;
592    formatlistptr->text = text;
593    formatlistptr->nextptr = new format_t();
594    formatlistptr = formatlistptr->nextptr;
595   
596    text.clear();
597      }
598      if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
599
600    formatlistptr->nextptr = new format_t();
601    formatlistptr = formatlistptr->nextptr;
602    if (here == end) break;
603      }
604    } else if (*here == '[') {
605      if (!text.empty()) {
606    formatlistptr->command = comText;
607    formatlistptr->text = text;
608    formatlistptr->nextptr = new format_t();
609    formatlistptr = formatlistptr->nextptr;
610
611    text.clear();
612      }
613      text_t meta;
614      ++here;
615      while (*here != ']') {
616    if (here == end) return false;
617    meta.push_back (*here);
618    ++here;
619      }
620      parse_meta (meta, formatlistptr, metadata, getParents);
621      formatlistptr->nextptr = new format_t();
622      formatlistptr = formatlistptr->nextptr;
623
624    } else
625      text.push_back (*here);
626
627    if (here != end) ++here;
628  }
629  if (!text.empty()) {
630    formatlistptr->command = comText;
631    formatlistptr->text = text;
632    formatlistptr->nextptr = new format_t();
633    formatlistptr = formatlistptr->nextptr;
634
635  }
636  return true;
637}
638
639
640static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
641              format_t *formatlistptr, text_tset &metadata, bool &getParents) {
642
643  text_t::const_iterator it = findchar (here, end, '}');
644  if (it == end) return false;
645
646  text_t com = substr (here, it);
647  here = findchar (it, end, '{');
648  if (here == end) return false;
649  else ++here;
650
651  if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
652  else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
653  else return false;
654
655  int commacount = 0;
656  text_t text;
657  while (here != end) {
658
659    if (*here == '\\') {
660      ++here;
661      if (here != end) text.push_back(*here);
662     
663    }
664 
665    else if (*here == ',' || *here == '}' || *here == '{') {
666
667      if (formatlistptr->command == comOr) {
668    // the {Or}{this, or this, or this, or this} statement
669    format_t *or_ptr;
670   
671    // find the next unused orptr
672    if (formatlistptr->orptr == NULL) {
673      formatlistptr->orptr = new format_t();
674      or_ptr = formatlistptr->orptr;
675    } else {
676      or_ptr = formatlistptr->orptr;
677      while (or_ptr->nextptr != NULL)
678        or_ptr = or_ptr->nextptr;
679      or_ptr->nextptr = new format_t();
680      or_ptr = or_ptr->nextptr;
681    }
682
683    if (!text.empty())
684      {
685        if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
686      }
687
688    if (*here == '{')
689      {
690        // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
691        // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
692        // The latter can always be re-written:
693        // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
694       
695        if (!text.empty()) // already used up allocated format_t
696          {
697        // => allocate new one for detected action
698        or_ptr->nextptr = new format_t();
699        or_ptr = or_ptr->nextptr;
700          }
701        if (!parse_action(++here, end, or_ptr, metadata, getParents))
702          {
703        return false;
704          }
705      }
706    else
707      {
708        if (*here == '}') break;
709      }
710    text.clear();
711
712      }
713
714      // Parse an {If}{decide,do,else} statement
715      else {
716   
717    // Read the decision component. 
718    if (commacount == 0) {
719      // Decsion can be a metadata element, or a piece of text.
720      // Originally Stefan's code, updated 25/10/2000 by Gordon.
721
722      text_t::const_iterator beginbracket = text.begin();
723      text_t::const_iterator endbracket = (text.end() - 1);
724
725      // Decision is based on a metadata element
726      if ((*beginbracket == '[') && (*endbracket == ']')) {
727        // Ignore the surrounding square brackets
728        text_t meta = substr (beginbracket+1, endbracket);
729        parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
730        ++commacount;
731        text.clear();
732      }
733
734      // Decision is a piece of text (probably a macro like _cgiargmode_).
735      else {
736
737        // hunt for any metadata in string, which might be uses in
738        // to test a condition, e.g. [Format] eq 'PDF'
739        format_t* dummyformat = new format_t();
740        // update which metadata fields needed
741        // (not interested in updatng formatlistptr)
742        parse_string (text, dummyformat, metadata, getParents);
743        delete dummyformat;
744
745        formatlistptr->decision.command = dText;
746        formatlistptr->decision.text = text;
747        ++commacount;
748        text.clear();
749      }
750    }
751
752    // Read the "then" and "else" components of the {If} statement.
753    else {
754      format_t** nextlistptr = NULL;
755      if (commacount == 1) {
756        nextlistptr = &formatlistptr->ifptr;
757      } else if (commacount == 2 ) {
758        nextlistptr = &formatlistptr->elseptr;
759      } else {
760        return false;
761      }
762
763      if (!text.empty()) {
764        if (*nextlistptr == NULL) {
765          *nextlistptr = new format_t();
766        } else {
767
768          // skip to the end of any format_t statements already added
769          while ((*nextlistptr)->nextptr != NULL)
770          {
771        nextlistptr = &(*nextlistptr)->nextptr;
772          }
773
774          (*nextlistptr)->nextptr = new format_t();
775          nextlistptr = &(*nextlistptr)->nextptr;
776        }
777
778        if (!parse_string (text, *nextlistptr, metadata, getParents))
779          {
780        return false;
781          }
782        text.clear();
783      }
784     
785      if (*here == '{')
786        {
787          if (*nextlistptr == NULL) {
788        *nextlistptr = new format_t();
789          } else {
790        // skip to the end of any format_t statements already added
791        while ((*nextlistptr)->nextptr != NULL)
792          {
793            nextlistptr = &(*nextlistptr)->nextptr;
794          }
795
796        (*nextlistptr)->nextptr = new format_t();
797        nextlistptr = &(*nextlistptr)->nextptr;
798          }
799
800          if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
801        {
802          return false;
803        }
804        }
805      else
806        {
807          if (*here == '}') break;
808          ++commacount;
809        }
810    }
811      }
812     
813    } else text.push_back(*here);
814   
815    if (here != end) ++here;
816  }
817
818  return true;
819}
820
821
822bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
823             text_tset &metadata, bool &getParents) {
824
825  formatlistptr->clear();
826  getParents = false;
827
828  return (parse_string (formatstring, formatlistptr, metadata, getParents));
829}
830
831// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
832// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
833static text_t get_formatted_meta_text(MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
834{
835  text_t no_ns_metaname = remove_namespace(meta.metaname);
836  text_t tmp;
837  bool first = true;
838 
839  const int start_i=0;
840  const int end_i = metainfo.values.size()-1;
841 
842  if (position == -1) { // all
843    for (int i=start_i; i<=end_i; ++i) {
844      if (!first) tmp += meta.siblingoptions;
845      if (meta.metacommand & mSpecial) {
846    // special formatting
847    if (no_ns_metaname == "Date") tmp += format_date (metainfo.values[i]);
848    else if (no_ns_metaname == "Language") tmp += iso639(metainfo.values[i]);
849    else tmp += "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
850      }
851      else tmp += metainfo.values[i];
852      first = false;
853     
854    }
855  } else {
856    if (position == -2) { // end
857      position = end_i;
858    } else if (position < start_i || position > end_i) {
859      return "";
860    }
861    if (meta.metacommand & mSpecial) {
862      // special formatting
863      if (no_ns_metaname == "Date") tmp += format_date (metainfo.values[position]);
864      else if (no_ns_metaname == "Language") tmp += iso639(metainfo.values[position]);
865      else tmp += "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
866    }
867    else tmp += metainfo.values[position];
868  }
869  if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (tmp);
870  else return tmp;
871}
872
873static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
874{
875 
876  MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
877  switch (meta.mqualifier.parent) {
878  case pNone:
879    return "Nothing!!";
880    break;
881
882  case pImmediate:
883    if (parent != NULL) {
884      return get_formatted_meta_text(*parent, meta, siblings_values);
885    }
886    break;
887
888  case pTop:
889    if (parent != NULL) {
890      while (parent->parent != NULL) parent = parent->parent;
891      return get_formatted_meta_text(*parent, meta, siblings_values);
892    }
893    break;
894
895  case pAll:
896    MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
897    if (parent != NULL) {
898      text_tarray tmparray;
899      while (parent != NULL) {
900    tmparray.push_back (get_formatted_meta_text(*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
901    parent = parent->parent;
902      }
903      // now join them up - use teh parent separator
904      bool first = true;
905      text_t tmp;
906      text_tarray::reverse_iterator here = tmparray.rbegin();
907      text_tarray::reverse_iterator end = tmparray.rend();
908      while (here != end) {
909    if (!first) tmp += meta.parentoptions;
910    tmp += *here;
911    first = false;
912    ++here;
913      }
914      if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
915      else return tmp;
916    }
917  }
918  return "";
919
920}
921
922static text_t get_child_meta (const text_t& collection,
923                  recptproto* collectproto,
924                  ResultDocInfo_t &docinfo, displayclass &disp,
925                  const metadata_t &meta, text_tmap &options,
926                  ostream& logout, int siblings_values)
927{
928  if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
929 
930  const text_t& child_metaname = meta.metaname;
931  const text_t& child_field = meta.childoptions;
932  text_tset child_metadata;
933  child_metadata.insert(child_metaname);
934
935  FilterResponse_t child_response;
936  if (meta.mqualifier.child == cNum) {
937    // just one child
938    //get the information associated with the metadata for child doc
939    if (!get_info (docinfo.OID+child_field, collection, "", child_metadata,
940           false, collectproto, child_response, logout)) return ""; // invalid child number
941
942      if (child_response.docInfo.empty()) return false; // no info for the child
943 
944      ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
945      MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
946 
947      text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
948      return expand_metadata(child_metavalue,collection,collectproto,
949             child_docinfo,disp,options,logout);
950  }
951 
952   
953  if (meta.mqualifier.child != cAll) return false; // invalid qualifier
954 
955  // we need to get all children
956  text_t result = "";
957  text_tarray children;
958  text_t contains = docinfo.metadata["contains"].values[0];
959  splitchar (contains.begin(), contains.end(), ';', children);
960  text_tarray::const_iterator here = children.begin();
961  text_tarray::const_iterator end = children.end();
962  bool first = true;
963  while (here !=end) {
964    text_t oid = *here;
965    here++;
966    if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
967   
968   
969    //get the information associated with the metadata for child doc
970    if (!get_info (oid, collection, "", child_metadata,
971           false, collectproto, child_response, logout) ||
972    child_response.docInfo.empty()) {
973      first = false;
974      continue;
975    }
976   
977   
978    ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
979    MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
980   
981    text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
982    if (!first) result += child_field;
983    first = false;
984    // need to do this here cos otherwise we are in the wrong document
985    result +=  expand_metadata(child_metavalue,collection,collectproto,
986                   child_docinfo,disp,options,logout);
987  }
988  return result;
989   
990}
991
992static text_t get_meta (const text_t& collection, recptproto* collectproto,
993            ResultDocInfo_t &docinfo, displayclass &disp,
994            const metadata_t &meta, text_tmap &options,
995            ostream& logout) {
996 
997  // make sure we have the requested metadata
998  MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
999  if (it == docinfo.metadata.end()) return "";
1000
1001  int siblings_values = 0; // default is no siblings, just the first metadata available
1002  if (meta.metacommand & mSibling) {
1003    if (meta.mqualifier.sibling == sAll) {
1004      siblings_values = -1; //all
1005    } else if (meta.mqualifier.sibling == sNum) {
1006      siblings_values = meta.siblingoptions.getint();
1007    }
1008  }
1009  if (meta.metacommand & mParent) {
1010    return get_parent_meta(docinfo,meta,siblings_values);
1011  }
1012
1013  else if (meta.metacommand & mChild) {
1014    return get_child_meta(collection,collectproto,docinfo,disp,meta,
1015                options,logout, siblings_values);
1016  }
1017  else if (meta.metacommand & mSibling) { // only siblings
1018    MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1019    return get_formatted_meta_text(docinfo.metadata[meta.metaname],meta, siblings_values);
1020  }
1021  else {
1022   
1023    // straightforward metadata request (nothing fancy)
1024
1025    text_t classifier_metaname = docinfo.classifier_metadata_type;
1026    int metaname_index
1027      = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1028    return get_formatted_meta_text(docinfo.metadata[meta.metaname], meta, metaname_index);
1029  }
1030 
1031  return "";
1032}
1033
1034static text_t get_or (const text_t& collection, recptproto* collectproto,
1035              ResultDocInfo_t &docinfo, displayclass &disp,
1036              format_t *orptr, text_tmap &options,
1037              ostream& logout) {
1038
1039  text_t tmp;
1040  while (orptr != NULL) {
1041
1042    tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1043             options, logout);
1044    if (!tmp.empty()) return tmp;
1045
1046    orptr = orptr->nextptr;
1047  }
1048  return "";
1049}
1050
1051static bool char_is_whitespace(const char c)
1052{
1053  return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1054
1055}
1056
1057static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1058{
1059  int pos = start_pos;
1060  while (pos<outstring.size()) {
1061    if (!char_is_whitespace(outstring[pos])) {
1062      break;
1063    }
1064    ++pos;
1065  }
1066
1067  return pos;
1068}
1069
1070static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1071{
1072  int pos = start_pos;
1073  while (pos>=0) {
1074    if (!char_is_whitespace(outstring[pos])) {
1075      break;
1076    }
1077    --pos;
1078  }
1079
1080  return pos;
1081}
1082
1083static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1084{
1085  int pos = start_pos;
1086  while (pos>=0) {
1087    if (char_is_whitespace(outstring[pos])) {
1088      break;
1089    }
1090    --pos;
1091  }
1092
1093  return pos;
1094}
1095
1096
1097static int rscan_for(const text_t& outstring, const int start_pos,
1098             const char find_c)
1099{
1100  int pos = start_pos;
1101  while (pos>=0) {
1102    char c = outstring[pos];
1103    if (outstring[pos] == find_c) {
1104      break;
1105    }
1106    --pos;
1107  }
1108
1109  return pos;
1110}
1111
1112text_t extract_substr(const text_t& outstring, const int start_pos,
1113              const int end_pos)
1114{
1115  text_t extracted_str;
1116  extracted_str.clear();
1117
1118  for (int pos=start_pos; pos<=end_pos; ++pos) {
1119    extracted_str.push_back(outstring[pos]);
1120  }
1121
1122  return extracted_str;
1123}
1124
1125
1126static text_t expand_potential_metadata(const text_t& collection,
1127                    recptproto* collectproto,
1128                    ResultDocInfo_t &docinfo,
1129                    displayclass &disp,
1130                    const text_t& intext,
1131                    text_tmap &options,
1132                    ostream& logout)
1133{
1134  text_t outtext;
1135
1136  // decide if dealing with metadata or text
1137
1138  text_t::const_iterator beginbracket = intext.begin();
1139  text_t::const_iterator endbracket = (intext.end() - 1);
1140
1141  // Decision is based on a metadata element
1142  if ((*beginbracket == '[') && (*endbracket == ']')) {
1143    // Ignore the surrounding square brackets
1144    text_t meta_text = substr (beginbracket+1, endbracket);
1145
1146    if (meta_text == "Text") {
1147      outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1148    } else {
1149
1150      text_tset metadata;
1151      bool getParents =false;
1152      metadata_t meta;
1153     
1154      parse_meta (meta_text, meta, metadata, getParents);   
1155      outtext
1156    = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1157    }
1158
1159  }
1160  else {
1161    outtext = intext;
1162  }
1163
1164  return outtext;
1165}
1166
1167
1168
1169
1170static bool uses_expression(const text_t& collection, recptproto* collectproto,
1171                ResultDocInfo_t &docinfo,
1172                displayclass &disp,
1173                const text_t& outstring, text_t& lhs_expr,
1174                text_t& op_expr, text_t& rhs_expr,
1175                text_tmap &options,
1176                ostream& logout)
1177{
1178  // Note: the string may not be of the form: str1 op str2, however
1179  // to deterine this we have to process it on the assumption it is,
1180  // and if at any point an 'erroneous' value is encountered, return
1181  // false and let something else have a go at evaluating it
1182
1183  // Starting at the end of the string and working backwards ..
1184
1185  const int outstring_len = outstring.size();
1186
1187  // skip over white space
1188  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1189
1190  if (rhs_end<=0) {
1191    // no meaningful text or (rhs_end==0) no room for operator
1192    return false;
1193  }
1194
1195  // check for ' or " and then scan over token
1196  const char potential_quote = outstring[rhs_end];
1197  int rhs_start=rhs_end;
1198  bool quoted = false;
1199
1200  if ((potential_quote == '\'') || (potential_quote == '\"')) {
1201    --rhs_end;
1202    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1203    quoted = true;
1204  }
1205  else {
1206    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1207  }
1208
1209  if ((rhs_end-rhs_start)<0) {
1210    // no meaningful rhs expression
1211    return false;
1212  }
1213
1214  // form rhs_expr
1215  rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1216
1217  // skip over white space
1218  const int to_whitespace = (quoted) ? 2 : 1;
1219
1220  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1221  int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1222
1223
1224  if (op_end-op_start<0) {
1225    // no meaningful expression operator
1226    return false;
1227  }
1228
1229  op_expr = extract_substr(outstring,op_start,op_end);
1230
1231
1232  // check for operator
1233  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1234     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1235
1236    // not a valid operator
1237    return false;
1238  }
1239
1240  int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1241  if (lhs_end<0) {
1242    // no meaningful lhs expression
1243    return false;
1244  }
1245
1246  int lhs_start = scan_over_whitespace(outstring,0);
1247
1248  // form lhs_expr from remainder of string
1249  lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1250
1251  // Now we know we have a valid expression, look up any
1252  // metadata terms
1253
1254  rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1255                       disp,rhs_expr,options,logout);
1256  lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1257                       disp,lhs_expr,options,logout);
1258
1259  return true;
1260}
1261
1262static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1263                 const text_t& rhs_expr, ostream& logout)
1264{
1265  if (op_expr == "eq") return (lhs_expr == rhs_expr);
1266  else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1267  else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1268  else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1269  else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1270  else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1271  else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1272  else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1273  else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1274  else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1275  else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1276  else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1277  else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1278  else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1279  else {
1280    logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1281  }
1282 
1283  return false;
1284}
1285
1286
1287static text_t get_if (const text_t& collection, recptproto* collectproto,
1288              ResultDocInfo_t &docinfo, displayclass &disp,
1289              const decision_t &decision,
1290              format_t *ifptr, format_t *elseptr,
1291              text_tmap &options, ostream& logout)
1292{
1293  // If the decision component is a metadata element, then evaluate it
1294  // to see whether we output the "then" or the "else" clause
1295  if (decision.command == dMeta) {
1296    if (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1297          logout) != "") {
1298      if (ifptr != NULL)
1299    return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1300                     options, logout);
1301    }
1302    else {
1303      if (elseptr != NULL)
1304    return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1305                     options, logout);
1306    }
1307  }
1308
1309  // If the decision component is text, then evaluate it (it is probably a
1310  // macro like _cgiargmode_) to decide what to output.
1311  else if (decision.command == dText) {
1312
1313    text_t outstring;
1314    disp.expandstring (decision.text, outstring);
1315
1316    // Check for if expression in form: str1 op str2
1317    // (such as [x] eq "y")
1318    text_t lhs_expr, op_expr, rhs_expr;
1319    if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1320      if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1321    if (ifptr != NULL) {
1322      return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1323                       options, logout);
1324    }
1325    else {
1326      return "";
1327    }
1328      } else {
1329    if (elseptr != NULL) {
1330      return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1331                       options, logout);
1332    }
1333    else {
1334      return "";
1335    }
1336      }
1337    }
1338
1339
1340    // This is a tad tricky.  When we expand a string like _cgiargmode_, that is
1341    // a cgi argument macro that has not been set, it evaluates to itself.
1342    // Therefore, were have to say that a piece of text evalautes true if
1343    // it is non-empty and if it is a cgi argument evaulating to itself.
1344
1345    if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1346      if (ifptr != NULL)
1347    return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1348                     options, logout);
1349    } else {
1350      if (elseptr != NULL)
1351    return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1352                     options, logout);
1353    }
1354  }
1355 
1356  return "";
1357}
1358
1359bool includes_metadata(const text_t& text)
1360{
1361  text_t::const_iterator here = text.begin();
1362  text_t::const_iterator end = text.end();
1363  while (here != end) {
1364    if (*here == '[') return true;
1365    ++here;
1366  }
1367
1368  return false;
1369}
1370
1371static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1372                  recptproto* collectproto,
1373                  ResultDocInfo_t &docinfo,
1374                  displayclass &disp, text_tmap &options,
1375                  ostream &logout) {
1376     
1377  if (includes_metadata(metavalue)) {
1378   
1379    // text has embedded metadata in it => expand it
1380    FilterRequest_t request;
1381    FilterResponse_t response;
1382   
1383    request.getParents = false;
1384   
1385    format_t *expanded_formatlistptr = new format_t();
1386    parse_formatstring (metavalue, expanded_formatlistptr,
1387            request.fields, request.getParents);
1388   
1389    // retrieve metadata
1390    get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1391         collectproto, response, logout);
1392   
1393    if (!response.docInfo.empty()) {
1394     
1395      text_t expanded_metavalue
1396    = get_formatted_string(collection, collectproto,
1397                   response.docInfo[0], disp, expanded_formatlistptr,
1398                   options, logout);
1399     
1400      return expanded_metavalue;
1401    }
1402    else {
1403      return metavalue;
1404    }
1405  }
1406  else {
1407   
1408    return metavalue;
1409  }
1410}
1411
1412text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1413               displayclass &disp,
1414               text_t meta_name, ostream& logout) {
1415 
1416  ColInfoResponse_t collectinfo;
1417  comerror_t err;
1418  collectproto->get_collectinfo (collection, collectinfo,err,logout);
1419  text_t meta_value = "";
1420  text_t lang;
1421  disp.expandstring("_cgiargl_",lang);
1422  if (lang.empty()) {
1423    lang = "en";
1424  }
1425
1426  if (err == noError) {
1427    meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1428  }
1429  return meta_value;
1430 
1431
1432}
1433text_t format_string (const text_t& collection, recptproto* collectproto,
1434              ResultDocInfo_t &docinfo, displayclass &disp,
1435              format_t *formatlistptr, text_tmap &options,
1436              ostream& logout) {
1437
1438  if (formatlistptr == NULL) return "";
1439
1440  switch (formatlistptr->command) {
1441     case comOID:
1442    return docinfo.OID;
1443  case comTopOID:
1444    {
1445      text_t top_id;
1446      get_top(docinfo.OID, top_id);
1447      return top_id;
1448    }
1449  case comRank:
1450    return text_t(docinfo.ranking);
1451     case comText:
1452    return formatlistptr->text;
1453     case comLink:
1454    return options["link"];
1455     case comEndLink:
1456    if (options["link"].empty()) return "";
1457    else return "</a>";
1458     case comHref:
1459    return get_href(options["link"]);
1460     case comIcon:
1461    return options["icon"];
1462     case comNum:
1463    return docinfo.result_num;
1464     case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1465    return get_related_docs(collection, collectproto, docinfo, logout);
1466     case comSummary:
1467    return format_summary(collection, collectproto, docinfo, disp, options, logout);
1468     case comMeta:
1469    {
1470       const text_t& metavalue =  get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1471       return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1472    }
1473     case comDoc:
1474    return format_text(collection, collectproto, docinfo, disp, options, logout);
1475    //return options["text"];
1476     case comImage:
1477    return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1478     case comTOC:
1479    return options["DocTOC"];
1480     case comDocumentButtonDetach:
1481    return options["DocumentButtonDetach"];
1482     case comDocumentButtonHighlight:
1483    return options["DocumentButtonHighlight"];
1484     case comDocumentButtonExpandContents:
1485    return options["DocumentButtonExpandContents"];
1486     case comDocumentButtonExpandText:
1487    return options["DocumentButtonExpandText"];
1488     case comHighlight:
1489    if (options["highlight"] == "1") return "<b>";
1490    break;
1491     case comEndHighlight:
1492    if (options["highlight"] == "1") return "</b>";
1493    break;
1494     case comIf:
1495    return get_if (collection, collectproto, docinfo, disp,
1496               formatlistptr->decision, formatlistptr->ifptr,
1497               formatlistptr->elseptr, options, logout);
1498     case comOr:
1499    return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1500               options, logout);
1501     case comDocTermsFreqTotal:
1502       return docinfo.num_terms_matched;
1503     case comCollection:
1504       if (formatlistptr->meta.metaname == g_EmptyText) {
1505     return collection;
1506       }
1507       return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1508   
1509  }
1510  return "";
1511}
1512
1513text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1514                 ResultDocInfo_t &docinfo, displayclass &disp,
1515                 format_t *formatlistptr, text_tmap &options,
1516                 ostream& logout) {
1517
1518   text_t ft;
1519   while (formatlistptr != NULL)
1520      {
1521     ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1522                  options, logout);
1523     formatlistptr = formatlistptr->nextptr;
1524      }
1525   
1526   return ft;
1527}
1528
1529
1530// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1531text_t format_text (const text_t& collection, recptproto* collectproto,
1532            ResultDocInfo_t &docinfo, displayclass &disp,
1533            text_tmap &options, ostream& logout) {
1534  if(!options["text"].empty()) {
1535    return options["text"];
1536  }
1537  // else get document text here
1538  DocumentRequest_t docrequest;
1539  DocumentResponse_t docresponse;
1540  comerror_t err;
1541  docrequest.OID = docinfo.OID;
1542  collectproto->get_document (collection, docrequest, docresponse, err, logout);
1543  return docresponse.doc;
1544
1545}
1546 
1547/* FUNCTION NAME: format_summary
1548 * DESC: this is invoked when a [Summary] special metadata is processed.
1549 * RETURNS: a query-biased summary for the document */
1550
1551text_t format_summary (const text_t& collection, recptproto* collectproto,
1552               ResultDocInfo_t &docinfo, displayclass &disp,
1553               text_tmap &options, ostream& logout) {
1554
1555  // GRB: added code here to ensure that the cstr (and other collections)
1556  //      uses the document metadata item Summary, rather than compressing
1557  //      the text of the document, processed via the methods in
1558  //      summarise.cpp
1559  if (docinfo.metadata.count("Summary") > 0 &&
1560      docinfo.metadata["Summary"].values.size() > 0) {
1561    return docinfo.metadata["Summary"].values[0];
1562  }
1563
1564  text_t textToSummarise, query;
1565  if(options["text"].empty()) { // get document text
1566     DocumentRequest_t docrequest;
1567     DocumentResponse_t docresponse;
1568     comerror_t err;
1569     docrequest.OID = docinfo.OID;
1570     collectproto->get_document (collection, docrequest, docresponse, err, logout);
1571     textToSummarise = docresponse.doc;
1572  } else // in practice, this would not happen, because text is only
1573         // loaded with the [Text] command
1574     textToSummarise = options["text"];
1575  disp.expandstring("_cgiargq_",query);
1576  return summarise(textToSummarise,query,80);
1577}
Note: See TracBrowser for help on using the browser.