source: gsdl/trunk/runtime-src/src/recpt/formattools.cpp@ 19298

Last change on this file since 19298 was 19298, checked in by davidb, 15 years ago

optional support for metadata wrapped in span tags added. *OFF* by default.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 49.7 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "recptprototools.h"
29#include "OIDtools.h"
30#include "summarise.h"
31
32#include <assert.h>
33
34bool spanwrap = false;
35
36// a few function prototypes
37
38static text_t format_string (const text_t& collection, recptproto* collectproto,
39 ResultDocInfo_t &docinfo, displayclass &disp,
40 format_t *formatlistptr, text_tmap &options,
41 ostream& logout);
42
43static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
44 format_t *formatlistptr, text_tset &metadata, bool &getParents);
45
46static text_t format_summary (const text_t& collection, recptproto* collectproto,
47 ResultDocInfo_t &docinfo, displayclass &disp,
48 text_tmap &options, ostream& logout);
49static text_t format_text (const text_t& collection, recptproto* collectproto,
50 ResultDocInfo_t &docinfo, displayclass &disp,
51 text_tmap &options, ostream& logout);
52
53static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
54 recptproto* collectproto, ResultDocInfo_t &docinfo,
55 displayclass &disp, text_tmap &options,
56 ostream &logout);
57
58
59void metadata_t::clear() {
60 metaname.clear();
61 metacommand = mNone;
62 mqualifier.parent = pNone;
63 mqualifier.sibling = sNone;
64 mqualifier.child = cNone;
65 pre_tree_traverse.clear();
66 parentoptions.clear();
67 siblingoptions.clear();
68 childoptions.clear();
69}
70
71void decision_t::clear() {
72 command = dMeta;
73 meta.clear();
74 text.clear();
75}
76
77void format_t::clear() {
78 command = comText;
79 decision.clear();
80 text.clear();
81 meta.clear();
82 nextptr = NULL;
83 ifptr = NULL;
84 elseptr = NULL;
85 orptr = NULL;
86}
87
88void formatinfo_t::clear() {
89 DocumentImages = false;
90 DocumentTitles = true;
91 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
92 DocumentContents = true;
93 DocumentArrowsBottom = true;
94 DocumentArrowsTop = false;
95 DocumentSearchResultLinks = false;
96 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
97 // DocumentButtons.push_back ("Expand Text");
98 // DocumentButtons.push_back ("Expand Contents");
99 DocumentButtons.push_back ("Detach");
100 DocumentButtons.push_back ("Highlight");
101 RelatedDocuments = "";
102 DocumentText = "[Text]";
103 formatstrings.erase (formatstrings.begin(), formatstrings.end());
104 DocumentUseHTML = false;
105 AllowExtendedOptions = false;
106}
107
108// simply checks to see if formatstring begins with a <td> tag
109bool is_table_content (const text_t &formatstring) {
110 text_t::const_iterator here = formatstring.begin();
111 text_t::const_iterator end = formatstring.end();
112
113 while (here != end) {
114 if (*here != ' ') {
115 if ((*here == '<') && ((here+3) < end)) {
116 if ((*(here+1) == 't' || *(here+1) == 'T') &&
117 (*(here+2) == 'd' || *(here+2) == 'D') &&
118 (*(here+3) == '>' || *(here+3) == ' '))
119 return true;
120 } else return false;
121 }
122 ++here;
123 }
124 return false;
125}
126
127bool is_table_content (const format_t *formatlistptr) {
128
129 if (formatlistptr == NULL) return false;
130
131 if (formatlistptr->command == comText)
132 return is_table_content (formatlistptr->text);
133
134 return false;
135}
136
137// returns false if key isn't in formatstringmap
138bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
139 text_t &formatstring) {
140
141 formatstring.clear();
142 text_tmap::const_iterator it = formatstringmap.find(key);
143 if (it == formatstringmap.end()) return false;
144 formatstring = (*it).second;
145 return true;
146}
147
148// tries to find "key1key2" then "key1" then "key2"
149bool get_formatstring (const text_t &key1, const text_t &key2,
150 const text_tmap &formatstringmap,
151 text_t &formatstring) {
152
153 formatstring.clear();
154 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
155 if (it != formatstringmap.end()) {
156 formatstring = (*it).second;
157 return true;
158 }
159 it = formatstringmap.find(key1);
160 if (it != formatstringmap.end()) {
161 formatstring = (*it).second;
162 return true;
163 }
164 it = formatstringmap.find(key2);
165 if (it != formatstringmap.end()) {
166 formatstring = (*it).second;
167 return true;
168 }
169 return false;
170}
171
172
173text_t remove_namespace(const text_t &meta_name) {
174 text_t::const_iterator end = meta_name.end();
175 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
176 if (it != end) {
177 return substr(it+1, end);
178 }
179
180 return meta_name;
181
182}
183// returns a date of form _format:date_(year, month, day)
184// input is date of type yyyy-?mm-?dd
185// at least the year must be present in date
186text_t format_date (const text_t &date) {
187
188 if (date.size() < 4) return "";
189
190 text_t::const_iterator datebegin = date.begin();
191
192 text_t year = substr (datebegin, datebegin+4);
193 int chars_seen_so_far = 4;
194
195 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
196 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
197
198 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
199 int imonth = month.getint();
200 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
201
202 chars_seen_so_far += 2;
203 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
204
205 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
206
207 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
208 if (day[0] == '0') day = substr (day.begin()+1, day.end());
209 int iday = day.getint();
210 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
211
212 return "_format:date_("+year+","+month+","+day+")";
213}
214
215// converts an iso639 language code to its English equivalent
216// should we be checking that the macro exists??
217text_t iso639 (const text_t &langcode) {
218 if (langcode.empty()) return "";
219 return "_iso639:iso639"+langcode+"_";
220}
221
222
223text_t get_href (const text_t &link) {
224
225 text_t href;
226
227 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
228 text_t::const_iterator end = link.end();
229 if (here == end) return g_EmptyText;
230
231 ++here;
232 while (here != end) {
233 if (*here == '"') break;
234 href.push_back(*here);
235 ++here;
236 }
237
238 return href;
239}
240
241//this function gets the information associated with the relation
242//metadata for the document associated with 'docinfo'. This relation
243//metadata consists of a line of pairs containing 'collection, document OID'
244//(this is the OID of the document related to the current document, and
245//the collection the related document belongs to). For each of these pairs
246//the title metadata is obtained and then an html link between the title
247//of the related doc and the document's position (the document will be
248//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
249//(where collection is the related documents collection, and OID is the
250//related documents OID). A list of these html links are made for as many
251//related documents as there are. This list is then returned. If there are
252//no related documents available for the current document then the string
253//'.. no related documents .. ' is returned.
254text_t get_related_docs(const text_t& collection, recptproto* collectproto,
255 ResultDocInfo_t &docinfo, ostream& logout){
256
257 text_tset metadata;
258
259 //insert the metadata we wish to collect
260 metadata.insert("dc.Relation");
261 metadata.insert("Title");
262 metadata.insert("Subject"); //for emails, where title data doesn't apply
263
264 FilterResponse_t response;
265 text_t relation = ""; //string for displaying relation metadata
266 text_t relationTitle = ""; //the related documents Title (or subject)
267 text_t relationOID = ""; //the related documents OID
268
269 //get the information associated with the metadata for current doc
270 if (get_info (docinfo.OID, collection, "", metadata,
271 false, collectproto, response, logout)) {
272
273 //if the relation metadata exists, store for displaying
274 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
275 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
276
277 //split relation data into pairs of collectionname,ID number
278 text_tarray relationpairs;
279 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
280
281 text_tarray::const_iterator currDoc = relationpairs.begin();
282 text_tarray::const_iterator lastDoc = relationpairs.end();
283
284 //iterate through the pairs to split and display
285 while(currDoc != lastDoc){
286
287 //split pairs into collectionname and ID
288 text_tarray relationdata;
289 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
290
291 //get first element in the array (collection)
292 text_tarray::const_iterator doc_data = relationdata.begin();
293 text_t document_collection = *doc_data;
294 ++doc_data; //increment to get next item in array (oid)
295 text_t document_OID = *doc_data;
296
297 //create html link to related document
298 relation += "<a href=\"_httpdocument_&c=" + document_collection;
299 relation += "&cl=search&d=" + document_OID;
300
301 //get the information associated with the metadata for related doc
302 if (get_info (document_OID, document_collection, "", metadata,
303 false, collectproto, response, logout)) {
304
305 //if title metadata doesn't exist, collect subject metadata
306 //if that doesn't exist, just call it 'related document'
307 if (!response.docInfo[0].metadata["Title"].values[0].empty())
308 relationTitle = response.docInfo[0].metadata["Title"].values[0];
309 else if (!response.docInfo[0].metadata["Subject"].values.empty())
310 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
311 else relationTitle = "RELATED DOCUMENT";
312
313 }
314
315 //link the related document's title to its page
316 relation += "\">" + relationTitle + "</a>";
317 relation += " (" + document_collection + ")<br>";
318
319 ++currDoc;
320 }
321 }
322
323 }
324
325 if(relation.empty()) //no relation data for documnet
326 relation = ".. no related documents .. ";
327
328 return relation;
329}
330
331
332
333static void get_parent_options (text_t &instring, metadata_t &metaoption) {
334
335 assert (instring.size() > 7);
336 if (instring.size() <= 7) return;
337
338 text_t meta, com, op;
339 bool inbraces = false;
340 bool inquotes = false;
341 bool foundcolon = false;
342 text_t::const_iterator here = instring.begin()+6;
343 text_t::const_iterator end = instring.end();
344 while (here != end) {
345 if (foundcolon) meta.push_back (*here);
346 else if (*here == '(') inbraces = true;
347 else if (*here == ')') inbraces = false;
348 else if (*here == '\'' && !inquotes) inquotes = true;
349 else if (*here == '\'' && inquotes) inquotes = false;
350 else if (*here == ':' && !inbraces) foundcolon = true;
351 else if (inquotes) op.push_back (*here);
352 else com.push_back (*here);
353 ++here;
354 }
355
356 instring = meta;
357 if (com.empty())
358 metaoption.mqualifier.parent = pImmediate;
359 else if (com == "Top")
360 metaoption.mqualifier.parent = pTop;
361 else if (com == "All") {
362 metaoption.mqualifier.parent = pAll;
363 metaoption.parentoptions = op;
364 }
365}
366
367
368static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
369
370 assert (instring.size() > 8);
371 if (instring.size() <= 8) return;
372 text_t meta, com, op;
373 bool inbraces = false;
374 bool inquotes = false;
375 bool foundcolon = false;
376 text_t::const_iterator here = instring.begin()+7;
377 text_t::const_iterator end = instring.end();
378 while (here != end) {
379 if (foundcolon) meta.push_back (*here);
380 else if (*here == '(') inbraces = true;
381 else if (*here == ')') inbraces = false;
382 else if (*here == '\'' && !inquotes) inquotes = true;
383 else if (*here == '\'' && inquotes) inquotes = false;
384 else if (*here == ':' && !inbraces) foundcolon = true;
385 else if (inquotes) op.push_back (*here);
386 else com.push_back (*here);
387 ++here;
388 }
389
390 instring = meta;
391 metaoption.siblingoptions.clear();
392
393 if (com.empty()) {
394 metaoption.mqualifier.sibling = sAll;
395 metaoption.siblingoptions = " ";
396 }
397 else if (com == "first") {
398 metaoption.mqualifier.sibling = sNum;
399 metaoption.siblingoptions = "0";
400 }
401 else if (com == "last") {
402 metaoption.mqualifier.sibling = sNum;
403 metaoption.siblingoptions = "-2"; // == last
404 }
405 else if (com.getint()>0) {
406 metaoption.mqualifier.sibling = sNum;
407 int pos = com.getint()-1;
408 metaoption.siblingoptions +=pos;
409 }
410 else {
411 metaoption.mqualifier.sibling = sAll;
412 metaoption.siblingoptions = op;
413 }
414}
415
416static void get_child_options (text_t &instring, metadata_t &metaoption) {
417
418 assert (instring.size() > 6);
419 if (instring.size() <= 6) return;
420 text_t meta, com, op;
421 bool inbraces = false;
422 bool inquotes = false;
423 bool foundcolon = false;
424 text_t::const_iterator here = instring.begin()+5;
425 text_t::const_iterator end = instring.end();
426 while (here != end) {
427 if (foundcolon) meta.push_back (*here);
428 else if (*here == '(') inbraces = true;
429 else if (*here == ')') inbraces = false;
430 else if (*here == '\'' && !inquotes) inquotes = true;
431 else if (*here == '\'' && inquotes) inquotes = false;
432 else if (*here == ':' && !inbraces) foundcolon = true;
433 else if (inquotes) op.push_back (*here);
434 else com.push_back (*here);
435 ++here;
436 }
437
438 instring = meta;
439 if (com.empty()) {
440 metaoption.mqualifier.child = cAll;
441 metaoption.childoptions = " ";
442 }
443 else if (com == "first") {
444 metaoption.mqualifier.child = cNum;
445 metaoption.childoptions = ".fc";
446 }
447 else if (com == "last") {
448 metaoption.mqualifier.child = cNum;
449 metaoption.childoptions = ".lc";
450 }
451 else if (com.getint()>0) {
452 metaoption.mqualifier.child = cNum;
453 metaoption.childoptions = "."+com;
454 }
455 else {
456 metaoption.mqualifier.child = cAll;
457 metaoption.childoptions = op;
458 }
459}
460
461
462
463static void parse_meta (text_t &meta, metadata_t &metaoption,
464 text_tset &metadata, bool &getParents) {
465
466 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
467 metaoption.metacommand |= mCgiSafe;
468 meta = substr (meta.begin()+8, meta.end());
469 }
470 if (meta.size() > 7 && (substr(meta.begin(), meta.begin()+7) == "format:")) {
471 metaoption.metacommand |= mSpecial;
472 meta = substr (meta.begin()+7, meta.end());
473 }
474
475 bool had_parent_or_child = true;
476 bool prev_was_parent = false;
477 bool prev_was_child = false;
478
479 while (had_parent_or_child) {
480 if (meta.size() > 7
481 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
482
483 // clear out sibling and child (cmd and options)
484 metaoption.metacommand &= ~(mChild|mSibling);
485 metaoption.childoptions.clear();
486 metaoption.siblingoptions.clear();
487
488 getParents = true;
489 metaoption.metacommand |= mParent;
490 get_parent_options (meta, metaoption);
491
492 if (prev_was_parent) {
493 metaoption.pre_tree_traverse += ".pr";
494 }
495 else if (prev_was_child) {
496 metaoption.pre_tree_traverse += ".fc";
497 }
498
499 prev_was_parent = true;
500 prev_was_child = false;
501 }
502 else if (meta.size() > 6
503 && (substr (meta.begin(), meta.begin()+5) == "child")) {
504
505 // clear out sibling and parent (cmd and options)
506 metaoption.metacommand &= ~(mParent|mSibling);
507 metaoption.parentoptions.clear();
508 metaoption.siblingoptions.clear();
509
510 metaoption.metacommand |= mChild;
511 get_child_options (meta, metaoption);
512 metadata.insert("contains");
513
514 if (prev_was_parent) {
515 metaoption.pre_tree_traverse += ".pr";
516 }
517 else if (prev_was_child) {
518 metaoption.pre_tree_traverse += ".fc";
519 }
520
521 prev_was_child = true;
522 prev_was_parent = false;
523 }
524 else {
525 prev_was_child = false;
526 prev_was_parent = false;
527 had_parent_or_child = false;
528 }
529 }
530
531 // parent/child can have sibling tacked on end also
532 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
533 metaoption.metacommand |= mSibling;
534 get_sibling_options (meta, metaoption);
535 }
536
537 // check for ex. which may occur in format statements
538 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
539 meta = substr (meta.begin()+3, meta.end());
540 }
541 metadata.insert (meta);
542 metaoption.metaname = meta;
543}
544
545static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
546 if (meta == "collection") {
547 // no qualifiers
548 metaoption.metaname = g_EmptyText;
549 return;
550 }
551 meta = substr (meta.begin()+11, meta.end());
552 metaoption.metaname = meta;
553
554}
555
556static void parse_meta (text_t &meta, format_t *formatlistptr,
557 text_tset &metadata, bool &getParents) {
558
559 if (meta == "link")
560 formatlistptr->command = comLink;
561 else if (meta == "/link")
562 formatlistptr->command = comEndLink;
563
564 else if (meta == "href")
565 formatlistptr->command = comHref;
566
567 else if (meta == "num")
568 formatlistptr->command = comNum;
569
570 else if (meta == "icon")
571 formatlistptr->command = comIcon;
572
573 else if (meta == "Text")
574 formatlistptr->command = comDoc;
575
576 else if (meta == "RelatedDocuments")
577 formatlistptr->command = comRel;
578
579 else if (meta == "highlight")
580 formatlistptr->command = comHighlight;
581
582 else if (meta == "/highlight")
583 formatlistptr->command = comEndHighlight;
584
585 else if (meta == "Summary")
586 formatlistptr->command = comSummary;
587
588 else if (meta == "DocImage")
589 formatlistptr->command = comImage;
590
591 else if (meta == "DocTOC")
592 formatlistptr->command = comTOC;
593
594 else if (meta == "DocumentButtonDetach")
595 formatlistptr->command = comDocumentButtonDetach;
596
597 else if (meta == "DocumentButtonHighlight")
598 formatlistptr->command = comDocumentButtonHighlight;
599
600 else if (meta == "DocumentButtonExpandContents")
601 formatlistptr->command = comDocumentButtonExpandContents;
602
603 else if (meta == "DocumentButtonExpandText")
604 formatlistptr->command = comDocumentButtonExpandText;
605
606 else if (meta == "DocOID")
607 formatlistptr->command = comOID;
608 else if (meta == "DocTopOID")
609 formatlistptr->command = comTopOID;
610 else if (meta == "DocRank")
611 formatlistptr->command = comRank;
612 else if (meta == "DocTermsFreqTotal")
613 formatlistptr->command = comDocTermsFreqTotal;
614 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
615 formatlistptr->command = comCollection;
616 parse_coll_meta(meta, formatlistptr->meta);
617 }
618 else {
619 formatlistptr->command = comMeta;
620 parse_meta (meta, formatlistptr->meta, metadata, getParents);
621 }
622}
623
624
625static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
626 text_tset &metadata, bool &getParents) {
627
628 text_t text;
629 text_t::const_iterator here = formatstring.begin();
630 text_t::const_iterator end = formatstring.end();
631
632 while (here != end) {
633
634 if (*here == '\\') {
635 ++here;
636 if (here != end) text.push_back (*here);
637
638 } else if (*here == '{') {
639 if (!text.empty()) {
640 formatlistptr->command = comText;
641 formatlistptr->text = text;
642 formatlistptr->nextptr = new format_t();
643 formatlistptr = formatlistptr->nextptr;
644
645 text.clear();
646 }
647 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
648
649 formatlistptr->nextptr = new format_t();
650 formatlistptr = formatlistptr->nextptr;
651 if (here == end) break;
652 }
653 } else if (*here == '[') {
654 if (!text.empty()) {
655 formatlistptr->command = comText;
656 formatlistptr->text = text;
657 formatlistptr->nextptr = new format_t();
658 formatlistptr = formatlistptr->nextptr;
659
660 text.clear();
661 }
662 text_t meta;
663 ++here;
664 while (*here != ']') {
665 if (here == end) return false;
666 meta.push_back (*here);
667 ++here;
668 }
669 parse_meta (meta, formatlistptr, metadata, getParents);
670 formatlistptr->nextptr = new format_t();
671 formatlistptr = formatlistptr->nextptr;
672
673 } else
674 text.push_back (*here);
675
676 if (here != end) ++here;
677 }
678 if (!text.empty()) {
679 formatlistptr->command = comText;
680 formatlistptr->text = text;
681 formatlistptr->nextptr = new format_t();
682 formatlistptr = formatlistptr->nextptr;
683
684 }
685 return true;
686}
687
688
689static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
690 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
691
692 text_t::const_iterator it = findchar (here, end, '}');
693 if (it == end) return false;
694
695 text_t com = substr (here, it);
696 here = findchar (it, end, '{');
697 if (here == end) return false;
698 else ++here;
699
700 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
701 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
702 else return false;
703
704 int commacount = 0;
705 text_t text;
706 while (here != end) {
707
708 if (*here == '\\') {
709 ++here;
710 if (here != end) text.push_back(*here);
711
712 }
713
714 else if (*here == ',' || *here == '}' || *here == '{') {
715
716 if (formatlistptr->command == comOr) {
717 // the {Or}{this, or this, or this, or this} statement
718 format_t *or_ptr;
719
720 // find the next unused orptr
721 if (formatlistptr->orptr == NULL) {
722 formatlistptr->orptr = new format_t();
723 or_ptr = formatlistptr->orptr;
724 } else {
725 or_ptr = formatlistptr->orptr;
726 while (or_ptr->nextptr != NULL)
727 or_ptr = or_ptr->nextptr;
728 or_ptr->nextptr = new format_t();
729 or_ptr = or_ptr->nextptr;
730 }
731
732 if (!text.empty())
733 {
734 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
735 }
736
737 if (*here == '{')
738 {
739 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
740 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
741 // The latter can always be re-written:
742 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
743
744 if (!text.empty()) // already used up allocated format_t
745 {
746 // => allocate new one for detected action
747 or_ptr->nextptr = new format_t();
748 or_ptr = or_ptr->nextptr;
749 }
750 if (!parse_action(++here, end, or_ptr, metadata, getParents))
751 {
752 return false;
753 }
754 }
755 else
756 {
757 if (*here == '}') break;
758 }
759 text.clear();
760
761 }
762
763 // Parse an {If}{decide,do,else} statement
764 else {
765
766 // Read the decision component.
767 if (commacount == 0) {
768 // Decsion can be a metadata element, or a piece of text.
769 // Originally Stefan's code, updated 25/10/2000 by Gordon.
770
771 text_t::const_iterator beginbracket = text.begin();
772 text_t::const_iterator endbracket = (text.end() - 1);
773
774 // Decision is based on a metadata element
775 if ((*beginbracket == '[') && (*endbracket == ']')) {
776 // Ignore the surrounding square brackets
777 text_t meta = substr (beginbracket+1, endbracket);
778 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
779 ++commacount;
780 text.clear();
781 }
782
783 // Decision is a piece of text (probably a macro like _cgiargmode_).
784 else {
785
786 // hunt for any metadata in string, which might be uses in
787 // to test a condition, e.g. [Format] eq 'PDF'
788 format_t* dummyformat = new format_t();
789 // update which metadata fields needed
790 // (not interested in updatng formatlistptr)
791 parse_string (text, dummyformat, metadata, getParents);
792 delete dummyformat;
793
794 formatlistptr->decision.command = dText;
795 formatlistptr->decision.text = text;
796 ++commacount;
797 text.clear();
798 }
799 }
800
801 // Read the "then" and "else" components of the {If} statement.
802 else {
803 format_t** nextlistptr = NULL;
804 if (commacount == 1) {
805 nextlistptr = &formatlistptr->ifptr;
806 } else if (commacount == 2 ) {
807 nextlistptr = &formatlistptr->elseptr;
808 } else {
809 return false;
810 }
811
812 if (!text.empty()) {
813 if (*nextlistptr == NULL) {
814 *nextlistptr = new format_t();
815 } else {
816
817 // skip to the end of any format_t statements already added
818 while ((*nextlistptr)->nextptr != NULL)
819 {
820 nextlistptr = &(*nextlistptr)->nextptr;
821 }
822
823 (*nextlistptr)->nextptr = new format_t();
824 nextlistptr = &(*nextlistptr)->nextptr;
825 }
826
827 if (!parse_string (text, *nextlistptr, metadata, getParents))
828 {
829 return false;
830 }
831 text.clear();
832 }
833
834 if (*here == '{')
835 {
836 if (*nextlistptr == NULL) {
837 *nextlistptr = new format_t();
838 } else {
839 // skip to the end of any format_t statements already added
840 while ((*nextlistptr)->nextptr != NULL)
841 {
842 nextlistptr = &(*nextlistptr)->nextptr;
843 }
844
845 (*nextlistptr)->nextptr = new format_t();
846 nextlistptr = &(*nextlistptr)->nextptr;
847 }
848
849 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
850 {
851 return false;
852 }
853 }
854 else
855 {
856 if (*here == '}') break;
857 ++commacount;
858 }
859 }
860 }
861
862 } else text.push_back(*here);
863
864 if (here != end) ++here;
865 }
866
867 return true;
868}
869
870
871bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
872 text_tset &metadata, bool &getParents) {
873
874 formatlistptr->clear();
875 getParents = false;
876
877 return (parse_string (formatstring, formatlistptr, metadata, getParents));
878}
879
880// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
881// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
882static text_t get_formatted_meta_text(MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
883{
884 text_t no_ns_metaname = remove_namespace(meta.metaname);
885 text_t formatted_metatext;
886 bool first = true;
887
888 const int start_i=0;
889 const int end_i = metainfo.values.size()-1;
890
891 if (position == -1) { // all
892 for (int i=start_i; i<=end_i; ++i) {
893 if (!first) formatted_metatext += meta.siblingoptions;
894
895 text_t fresh_metatext;
896
897 if (meta.metacommand & mSpecial) {
898 // special formatting
899 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
900 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
901 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
902 }
903 else fresh_metatext = metainfo.values[i];
904
905 if (spanwrap) {
906 fresh_metatext = "<span metaname=\"" + meta.metaname + "\" metapos=\"" + i + "\">" + fresh_metatext + "</span>";
907 }
908 formatted_metatext += fresh_metatext;
909
910 first = false;
911
912 }
913 } else {
914 if (position == -2) { // end
915 position = end_i;
916 } else if (position < start_i || position > end_i) {
917 return "";
918 }
919
920 text_t fresh_metatext;
921 if (meta.metacommand & mSpecial) {
922
923 // special formatting
924 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
925 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
926 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
927 }
928 else fresh_metatext = metainfo.values[position];
929
930 if (spanwrap) {
931 fresh_metatext = "<span metaname=\"" + meta.metaname + "\" metapos=\"" + position + "\">" + fresh_metatext + "</span>";
932 }
933
934 formatted_metatext += fresh_metatext;
935 }
936
937 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
938 else return formatted_metatext;
939}
940
941static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
942{
943
944 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
945 switch (meta.mqualifier.parent) {
946 case pNone:
947 return "Nothing!!";
948 break;
949
950 case pImmediate:
951 if (parent != NULL) {
952 return get_formatted_meta_text(*parent, meta, siblings_values);
953 }
954 break;
955
956 case pTop:
957 if (parent != NULL) {
958 while (parent->parent != NULL) parent = parent->parent;
959 return get_formatted_meta_text(*parent, meta, siblings_values);
960 }
961 break;
962
963 case pAll:
964 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
965 if (parent != NULL) {
966 text_tarray tmparray;
967 while (parent != NULL) {
968 tmparray.push_back (get_formatted_meta_text(*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
969 parent = parent->parent;
970 }
971 // now join them up - use teh parent separator
972 bool first = true;
973 text_t tmp;
974 text_tarray::reverse_iterator here = tmparray.rbegin();
975 text_tarray::reverse_iterator end = tmparray.rend();
976 while (here != end) {
977 if (!first) tmp += meta.parentoptions;
978 tmp += *here;
979 first = false;
980 ++here;
981 }
982 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
983 else return tmp;
984 }
985 }
986 return "";
987
988}
989
990static text_t get_child_meta (const text_t& collection,
991 recptproto* collectproto,
992 ResultDocInfo_t &docinfo, displayclass &disp,
993 const metadata_t &meta, text_tmap &options,
994 ostream& logout, int siblings_values)
995{
996 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
997
998 const text_t& pre_tree_trav = meta.pre_tree_traverse;
999 const text_t& child_metaname = meta.metaname;
1000 const text_t& child_field = meta.childoptions;
1001 text_tset child_metadata;
1002 child_metadata.insert(child_metaname);
1003
1004 FilterResponse_t child_response;
1005 if (meta.mqualifier.child == cNum) {
1006 // just one child
1007 //get the information associated with the metadata for child doc
1008 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1009 child_metadata, false, collectproto, child_response,
1010 logout)) return ""; // invalid child number
1011
1012 if (child_response.docInfo.empty()) return false; // no info for the child
1013
1014 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1015 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1016
1017 text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
1018 return expand_metadata(child_metavalue,collection,collectproto,
1019 child_docinfo,disp,options,logout);
1020 }
1021
1022
1023 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1024
1025
1026 if (!pre_tree_trav.empty()) {
1027 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1028 FilterResponse_t trav_response;
1029
1030 text_tset trav_metadata;
1031 trav_metadata.insert("contains");
1032
1033 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1034 trav_metadata, false, collectproto, trav_response,
1035 logout)) return ""; // invalid pre_tree_trav
1036
1037 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1038
1039 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1040 // use this for rest of routine
1041 docinfo = trav_docinfo;
1042 }
1043
1044 // we need to get all children
1045 text_t result = "";
1046 text_tarray children;
1047 text_t contains = docinfo.metadata["contains"].values[0];
1048 splitchar (contains.begin(), contains.end(), ';', children);
1049 text_tarray::const_iterator here = children.begin();
1050 text_tarray::const_iterator end = children.end();
1051 bool first = true;
1052 while (here !=end) {
1053 text_t oid = *here;
1054 here++;
1055 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1056
1057 //get the information associated with the metadata for child doc
1058 if (!get_info (oid, collection, "", child_metadata,
1059 false, collectproto, child_response, logout) ||
1060 child_response.docInfo.empty()) {
1061 first = false;
1062 continue;
1063 }
1064
1065
1066 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1067 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1068
1069 text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
1070 if (!first) result += child_field;
1071 first = false;
1072 // need to do this here cos otherwise we are in the wrong document
1073 result += expand_metadata(child_metavalue,collection,collectproto,
1074 child_docinfo,disp,options,logout);
1075 }
1076 return result;
1077
1078}
1079
1080static text_t get_meta (const text_t& collection, recptproto* collectproto,
1081 ResultDocInfo_t &docinfo, displayclass &disp,
1082 const metadata_t &meta, text_tmap &options,
1083 ostream& logout) {
1084
1085 // make sure we have the requested metadata
1086 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1087 if (it == docinfo.metadata.end()) return "";
1088
1089 int siblings_values = 0; // default is no siblings, just the first metadata available
1090 if (meta.metacommand & mSibling) {
1091 if (meta.mqualifier.sibling == sAll) {
1092 siblings_values = -1; //all
1093 } else if (meta.mqualifier.sibling == sNum) {
1094 siblings_values = meta.siblingoptions.getint();
1095 }
1096 }
1097 if (meta.metacommand & mParent) {
1098 return get_parent_meta(docinfo,meta,siblings_values);
1099 }
1100
1101 else if (meta.metacommand & mChild) {
1102 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1103 options,logout, siblings_values);
1104 }
1105 else if (meta.metacommand & mSibling) { // only siblings
1106 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1107 return get_formatted_meta_text(docinfo.metadata[meta.metaname],meta, siblings_values);
1108 }
1109 else {
1110
1111 // straightforward metadata request (nothing fancy)
1112
1113 text_t classifier_metaname = docinfo.classifier_metadata_type;
1114 int metaname_index
1115 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1116 return get_formatted_meta_text(docinfo.metadata[meta.metaname], meta, metaname_index);
1117 }
1118
1119 return "";
1120}
1121
1122static text_t get_or (const text_t& collection, recptproto* collectproto,
1123 ResultDocInfo_t &docinfo, displayclass &disp,
1124 format_t *orptr, text_tmap &options,
1125 ostream& logout) {
1126
1127 text_t tmp;
1128 while (orptr != NULL) {
1129
1130 tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1131 options, logout);
1132 if (!tmp.empty()) return tmp;
1133
1134 orptr = orptr->nextptr;
1135 }
1136 return "";
1137}
1138
1139static bool char_is_whitespace(const char c)
1140{
1141 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1142
1143}
1144
1145static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1146{
1147 int pos = start_pos;
1148 while (pos<outstring.size()) {
1149 if (!char_is_whitespace(outstring[pos])) {
1150 break;
1151 }
1152 ++pos;
1153 }
1154
1155 return pos;
1156}
1157
1158static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1159{
1160 int pos = start_pos;
1161 while (pos>=0) {
1162 if (!char_is_whitespace(outstring[pos])) {
1163 break;
1164 }
1165 --pos;
1166 }
1167
1168 return pos;
1169}
1170
1171static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1172{
1173 int pos = start_pos;
1174 while (pos>=0) {
1175 if (char_is_whitespace(outstring[pos])) {
1176 break;
1177 }
1178 --pos;
1179 }
1180
1181 return pos;
1182}
1183
1184
1185static int rscan_for(const text_t& outstring, const int start_pos,
1186 const char find_c)
1187{
1188 int pos = start_pos;
1189 while (pos>=0) {
1190 char c = outstring[pos];
1191 if (outstring[pos] == find_c) {
1192 break;
1193 }
1194 --pos;
1195 }
1196
1197 return pos;
1198}
1199
1200text_t extract_substr(const text_t& outstring, const int start_pos,
1201 const int end_pos)
1202{
1203 text_t extracted_str;
1204 extracted_str.clear();
1205
1206 for (int pos=start_pos; pos<=end_pos; ++pos) {
1207 extracted_str.push_back(outstring[pos]);
1208 }
1209
1210 return extracted_str;
1211}
1212
1213
1214static text_t expand_potential_metadata(const text_t& collection,
1215 recptproto* collectproto,
1216 ResultDocInfo_t &docinfo,
1217 displayclass &disp,
1218 const text_t& intext,
1219 text_tmap &options,
1220 ostream& logout)
1221{
1222 text_t outtext;
1223
1224 // decide if dealing with metadata or text
1225
1226 text_t::const_iterator beginbracket = intext.begin();
1227 text_t::const_iterator endbracket = (intext.end() - 1);
1228
1229 // Decision is based on a metadata element
1230 if ((*beginbracket == '[') && (*endbracket == ']')) {
1231 // Ignore the surrounding square brackets
1232 text_t meta_text = substr (beginbracket+1, endbracket);
1233
1234 if (meta_text == "Text") {
1235 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1236 } else {
1237
1238 text_tset metadata;
1239 bool getParents =false;
1240 metadata_t meta;
1241
1242 parse_meta (meta_text, meta, metadata, getParents);
1243 outtext
1244 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1245 }
1246
1247 }
1248 else {
1249 outtext = intext;
1250 }
1251
1252 return outtext;
1253}
1254
1255
1256
1257
1258static bool uses_expression(const text_t& collection, recptproto* collectproto,
1259 ResultDocInfo_t &docinfo,
1260 displayclass &disp,
1261 const text_t& outstring, text_t& lhs_expr,
1262 text_t& op_expr, text_t& rhs_expr,
1263 text_tmap &options,
1264 ostream& logout)
1265{
1266 // Note: the string may not be of the form: str1 op str2, however
1267 // to deterine this we have to process it on the assumption it is,
1268 // and if at any point an 'erroneous' value is encountered, return
1269 // false and let something else have a go at evaluating it
1270
1271 // Starting at the end of the string and working backwards ..
1272
1273 const int outstring_len = outstring.size();
1274
1275 // skip over white space
1276 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1277
1278 if (rhs_end<=0) {
1279 // no meaningful text or (rhs_end==0) no room for operator
1280 return false;
1281 }
1282
1283 // check for ' or " and then scan over token
1284 const char potential_quote = outstring[rhs_end];
1285 int rhs_start=rhs_end;
1286 bool quoted = false;
1287
1288 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1289 --rhs_end;
1290 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1291 quoted = true;
1292 }
1293 else {
1294 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1295 }
1296
1297 if ((rhs_end-rhs_start)<0) {
1298 // no meaningful rhs expression
1299 return false;
1300 }
1301
1302 // form rhs_expr
1303 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1304
1305 // skip over white space
1306 const int to_whitespace = (quoted) ? 2 : 1;
1307
1308 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1309 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1310
1311 if ((op_end<0) && (op_start<0)) {
1312 // no meaningful expression operator
1313 return false;
1314 }
1315
1316 if (op_end-op_start<0) {
1317 // no meaningful expression operator
1318 return false;
1319 }
1320
1321 op_expr = extract_substr(outstring,op_start,op_end);
1322
1323
1324 // check for operator
1325 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1326 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1327
1328 // not a valid operator
1329 return false;
1330 }
1331
1332 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1333 if (lhs_end<0) {
1334 // no meaningful lhs expression
1335 return false;
1336 }
1337
1338 int lhs_start = scan_over_whitespace(outstring,0);
1339
1340 // form lhs_expr from remainder of string
1341 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1342
1343 // Now we know we have a valid expression, look up any
1344 // metadata terms
1345
1346 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1347 disp,rhs_expr,options,logout);
1348 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1349 disp,lhs_expr,options,logout);
1350
1351 return true;
1352}
1353
1354static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1355 const text_t& rhs_expr, ostream& logout)
1356{
1357 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1358 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1359 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1360 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1361 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1362 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1363 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1364 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1365 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1366 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1367 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1368 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1369 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1370 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1371 else {
1372 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1373 }
1374
1375 return false;
1376}
1377
1378
1379static text_t get_if (const text_t& collection, recptproto* collectproto,
1380 ResultDocInfo_t &docinfo, displayclass &disp,
1381 const decision_t &decision,
1382 format_t *ifptr, format_t *elseptr,
1383 text_tmap &options, ostream& logout)
1384{
1385 // If the decision component is a metadata element, then evaluate it
1386 // to see whether we output the "then" or the "else" clause
1387 if (decision.command == dMeta) {
1388
1389 bool store_spanwrap = spanwrap;
1390 spanwrap = 0;
1391
1392 // temporarily suspend spanwrap (if on) so can test if metadata item really exits or not
1393 bool metadata_exists
1394 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1395 logout) != "");
1396
1397 spanwrap = store_spanwrap;
1398
1399 if (metadata_exists) {
1400 if (ifptr != NULL)
1401 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1402 options, logout);
1403 }
1404 else {
1405 if (elseptr != NULL)
1406 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1407 options, logout);
1408 }
1409 }
1410
1411 // If the decision component is text, then evaluate it (it is probably a
1412 // macro like _cgiargmode_) to decide what to output.
1413 else if (decision.command == dText) {
1414
1415 text_t outstring;
1416 disp.expandstring (decision.text, outstring);
1417
1418 // Check for if expression in form: str1 op str2
1419 // (such as [x] eq "y")
1420 text_t lhs_expr, op_expr, rhs_expr;
1421 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1422 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1423 if (ifptr != NULL) {
1424 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1425 options, logout);
1426 }
1427 else {
1428 return "";
1429 }
1430 } else {
1431 if (elseptr != NULL) {
1432 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1433 options, logout);
1434 }
1435 else {
1436 return "";
1437 }
1438 }
1439 }
1440
1441
1442 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1443 // a cgi argument macro that has not been set, it evaluates to itself.
1444 // Therefore, were have to say that a piece of text evalautes true if
1445 // it is non-empty and if it is a cgi argument evaulating to itself.
1446
1447 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1448 if (ifptr != NULL)
1449 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1450 options, logout);
1451 } else {
1452 if (elseptr != NULL)
1453 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1454 options, logout);
1455 }
1456 }
1457
1458 return "";
1459}
1460
1461bool includes_metadata(const text_t& text)
1462{
1463 text_t::const_iterator here = text.begin();
1464 text_t::const_iterator end = text.end();
1465 while (here != end) {
1466 if (*here == '[') return true;
1467 ++here;
1468 }
1469
1470 return false;
1471}
1472
1473static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1474 recptproto* collectproto,
1475 ResultDocInfo_t &docinfo,
1476 displayclass &disp, text_tmap &options,
1477 ostream &logout) {
1478
1479 if (includes_metadata(metavalue)) {
1480
1481 // text has embedded metadata in it => expand it
1482 FilterRequest_t request;
1483 FilterResponse_t response;
1484
1485 request.getParents = false;
1486
1487 format_t *expanded_formatlistptr = new format_t();
1488 parse_formatstring (metavalue, expanded_formatlistptr,
1489 request.fields, request.getParents);
1490
1491 // retrieve metadata
1492 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1493 collectproto, response, logout);
1494
1495 if (!response.docInfo.empty()) {
1496
1497 text_t expanded_metavalue
1498 = get_formatted_string(collection, collectproto,
1499 response.docInfo[0], disp, expanded_formatlistptr,
1500 options, logout);
1501
1502 return expanded_metavalue;
1503 }
1504 else {
1505 return metavalue;
1506 }
1507 }
1508 else {
1509
1510 return metavalue;
1511 }
1512}
1513
1514text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1515 displayclass &disp,
1516 text_t meta_name, ostream& logout) {
1517
1518 ColInfoResponse_t collectinfo;
1519 comerror_t err;
1520 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1521 text_t meta_value = "";
1522 text_t lang;
1523 disp.expandstring("_cgiargl_",lang);
1524 if (lang.empty()) {
1525 lang = "en";
1526 }
1527
1528 if (err == noError) {
1529 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1530 }
1531 return meta_value;
1532
1533
1534}
1535text_t format_string (const text_t& collection, recptproto* collectproto,
1536 ResultDocInfo_t &docinfo, displayclass &disp,
1537 format_t *formatlistptr, text_tmap &options,
1538 ostream& logout) {
1539
1540 if (formatlistptr == NULL) return "";
1541
1542 switch (formatlistptr->command) {
1543 case comOID:
1544 return docinfo.OID;
1545 case comTopOID:
1546 {
1547 text_t top_id;
1548 get_top(docinfo.OID, top_id);
1549 return top_id;
1550 }
1551 case comRank:
1552 return text_t(docinfo.ranking);
1553 case comText:
1554 return formatlistptr->text;
1555 case comLink:
1556 return options["link"];
1557 case comEndLink:
1558 if (options["link"].empty()) return "";
1559 else return "</a>";
1560 case comHref:
1561 return get_href(options["link"]);
1562 case comIcon:
1563 return options["icon"];
1564 case comNum:
1565 return docinfo.result_num;
1566 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1567 return get_related_docs(collection, collectproto, docinfo, logout);
1568 case comSummary:
1569 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1570 case comMeta:
1571 {
1572 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1573 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1574 }
1575 case comDoc:
1576 return format_text(collection, collectproto, docinfo, disp, options, logout);
1577 //return options["text"];
1578 case comImage:
1579 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1580 case comTOC:
1581 return options["DocTOC"];
1582 case comDocumentButtonDetach:
1583 return options["DocumentButtonDetach"];
1584 case comDocumentButtonHighlight:
1585 return options["DocumentButtonHighlight"];
1586 case comDocumentButtonExpandContents:
1587 return options["DocumentButtonExpandContents"];
1588 case comDocumentButtonExpandText:
1589 return options["DocumentButtonExpandText"];
1590 case comHighlight:
1591 if (options["highlight"] == "1") return "<b>";
1592 break;
1593 case comEndHighlight:
1594 if (options["highlight"] == "1") return "</b>";
1595 break;
1596 case comIf:
1597 return get_if (collection, collectproto, docinfo, disp,
1598 formatlistptr->decision, formatlistptr->ifptr,
1599 formatlistptr->elseptr, options, logout);
1600 case comOr:
1601 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1602 options, logout);
1603 case comDocTermsFreqTotal:
1604 return docinfo.num_terms_matched;
1605 case comCollection:
1606 if (formatlistptr->meta.metaname == g_EmptyText) {
1607 return collection;
1608 }
1609 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1610
1611 }
1612 return "";
1613}
1614
1615text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1616 ResultDocInfo_t &docinfo, displayclass &disp,
1617 format_t *formatlistptr, text_tmap &options,
1618 ostream& logout) {
1619
1620 text_t ft;
1621 while (formatlistptr != NULL)
1622 {
1623 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1624 options, logout);
1625 formatlistptr = formatlistptr->nextptr;
1626 }
1627
1628 return ft;
1629}
1630
1631
1632// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1633text_t format_text (const text_t& collection, recptproto* collectproto,
1634 ResultDocInfo_t &docinfo, displayclass &disp,
1635 text_tmap &options, ostream& logout) {
1636 if(!options["text"].empty()) {
1637 return options["text"];
1638 }
1639 // else get document text here
1640 DocumentRequest_t docrequest;
1641 DocumentResponse_t docresponse;
1642 comerror_t err;
1643 docrequest.OID = docinfo.OID;
1644 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1645 return docresponse.doc;
1646
1647}
1648
1649/* FUNCTION NAME: format_summary
1650 * DESC: this is invoked when a [Summary] special metadata is processed.
1651 * RETURNS: a query-biased summary for the document */
1652
1653text_t format_summary (const text_t& collection, recptproto* collectproto,
1654 ResultDocInfo_t &docinfo, displayclass &disp,
1655 text_tmap &options, ostream& logout) {
1656
1657 // GRB: added code here to ensure that the cstr (and other collections)
1658 // uses the document metadata item Summary, rather than compressing
1659 // the text of the document, processed via the methods in
1660 // summarise.cpp
1661 if (docinfo.metadata.count("Summary") > 0 &&
1662 docinfo.metadata["Summary"].values.size() > 0) {
1663 return docinfo.metadata["Summary"].values[0];
1664 }
1665
1666 text_t textToSummarise, query;
1667 if(options["text"].empty()) { // get document text
1668 DocumentRequest_t docrequest;
1669 DocumentResponse_t docresponse;
1670 comerror_t err;
1671 docrequest.OID = docinfo.OID;
1672 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1673 textToSummarise = docresponse.doc;
1674 } else // in practice, this would not happen, because text is only
1675 // loaded with the [Text] command
1676 textToSummarise = options["text"];
1677 disp.expandstring("_cgiargq_",query);
1678 return summarise(textToSummarise,query,80);
1679}
Note: See TracBrowser for help on using the repository browser.