source: trunk/gsdl/src/recpt/formattools.cpp@ 10361

Last change on this file since 10361 was 10145, checked in by kjdon, 19 years ago

heres the real log mesage for the last commit which happened by accident.
added operators to the {If} statements: gt, ge, lt, le, ==, !=, <, <=, >, >=, sw (starts with), ew (ends with).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 49.2 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "OIDtools.h"
29#include "summarise.h"
30
31#include <assert.h>
32
33// a few function prototypes
34
35static text_t format_string (const text_t& collection, recptproto* collectproto,
36 ResultDocInfo_t &docinfo, displayclass &disp,
37 format_t *formatlistptr, text_tmap &options,
38 ostream& logout);
39
40static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
41 format_t *formatlistptr, text_tset &metadata, bool &getParents);
42
43static text_t format_summary (const text_t& collection, recptproto* collectproto,
44 ResultDocInfo_t &docinfo, displayclass &disp,
45 text_tmap &options, ostream& logout);
46static text_t format_text (const text_t& collection, recptproto* collectproto,
47 ResultDocInfo_t &docinfo, displayclass &disp,
48 text_tmap &options, ostream& logout);
49
50static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
51 recptproto* collectproto, ResultDocInfo_t &docinfo,
52 displayclass &disp, text_tmap &options,
53 ostream &logout);
54
55
56void metadata_t::clear() {
57 metaname.clear();
58 metacommand = mNone;
59 mqualifier.parent = pNone;
60 mqualifier.sibling = sNone;
61 mqualifier.child = cNone;
62 functionoptions.clear();
63}
64
65void decision_t::clear() {
66 command = dMeta;
67 meta.clear();
68 text.clear();
69}
70
71void format_t::clear() {
72 command = comText;
73 decision.clear();
74 text.clear();
75 meta.clear();
76 nextptr = NULL;
77 ifptr = NULL;
78 elseptr = NULL;
79 orptr = NULL;
80}
81
82void formatinfo_t::clear() {
83 DocumentImages = false;
84 DocumentTitles = true;
85 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
86 DocumentContents = true;
87 DocumentArrowsBottom = true;
88 DocumentArrowsTop = false;
89 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
90 // DocumentButtons.push_back ("Expand Text");
91 // DocumentButtons.push_back ("Expand Contents");
92 DocumentButtons.push_back ("Detach");
93 DocumentButtons.push_back ("Highlight");
94 RelatedDocuments = "";
95 DocumentText = "<center><table width=_pagewidth_><tr><td>[Text]</td></tr></table></center>";
96 formatstrings.erase (formatstrings.begin(), formatstrings.end());
97 DocumentUseHTML = false;
98 AllowExtendedOptions = false;
99}
100
101// simply checks to see if formatstring begins with a <td> tag
102bool is_table_content (const text_t &formatstring) {
103 text_t::const_iterator here = formatstring.begin();
104 text_t::const_iterator end = formatstring.end();
105
106 while (here != end) {
107 if (*here != ' ') {
108 if ((*here == '<') && ((here+3) < end)) {
109 if ((*(here+1) == 't' || *(here+1) == 'T') &&
110 (*(here+2) == 'd' || *(here+2) == 'D') &&
111 (*(here+3) == '>' || *(here+3) == ' '))
112 return true;
113 } else return false;
114 }
115 ++here;
116 }
117 return false;
118}
119
120bool is_table_content (const format_t *formatlistptr) {
121
122 if (formatlistptr == NULL) return false;
123
124 if (formatlistptr->command == comText)
125 return is_table_content (formatlistptr->text);
126
127 return false;
128}
129
130// returns false if key isn't in formatstringmap
131bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
132 text_t &formatstring) {
133
134 formatstring.clear();
135 text_tmap::const_iterator it = formatstringmap.find(key);
136 if (it == formatstringmap.end()) return false;
137 formatstring = (*it).second;
138 return true;
139}
140
141// tries to find "key1key2" then "key1" then "key2"
142bool get_formatstring (const text_t &key1, const text_t &key2,
143 const text_tmap &formatstringmap,
144 text_t &formatstring) {
145
146 formatstring.clear();
147 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
148 if (it != formatstringmap.end()) {
149 formatstring = (*it).second;
150 return true;
151 }
152 it = formatstringmap.find(key1);
153 if (it != formatstringmap.end()) {
154 formatstring = (*it).second;
155 return true;
156 }
157 it = formatstringmap.find(key2);
158 if (it != formatstringmap.end()) {
159 formatstring = (*it).second;
160 return true;
161 }
162 return false;
163}
164
165
166text_t remove_namespace(const text_t &meta_name) {
167 text_t::const_iterator end = meta_name.end();
168 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
169 if (it != end) {
170 return substr(it+1, end);
171 }
172 return meta_name;
173
174}
175// returns a date of form 31 _textmonthnn_ 1999
176// input is date of type 19991231
177// at least the year must be present in date
178text_t format_date (const text_t &date) {
179
180 if (date.size() < 4) return "";
181
182 text_t::const_iterator datebegin = date.begin();
183
184 text_t year = substr (datebegin, datebegin+4);
185
186 if (date.size() < 6) return year;
187
188 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
189 int imonth = month.getint();
190 if (imonth < 0 || imonth > 12) return year;
191
192 if (date.size() < 8) return month + " " + year;
193
194 text_t day = substr (datebegin+6, datebegin+8);
195 if (day[0] == '0') day = substr (day.begin()+1, day.end());
196 int iday = day.getint();
197 if (iday < 0 || iday > 31) return month + " " + year;
198
199 return day + " " + month + " " + year;
200}
201
202// converts an iso639 language code to its English equivalent
203// I realize that this isn't the pretiest or most efficient implementation,
204// hopefully this ugly Language (and Date too) formatting won't survive to
205// see gsdl-3.0
206text_t iso639 (const text_t &langcode) {
207
208 if (langcode == "aa") return "Afar";
209 if (langcode == "ab") return "Abkhazian";
210 if (langcode == "af") return "Afrikaans";
211 if (langcode == "am") return "Amharic";
212 if (langcode == "ar") return "Arabic";
213 if (langcode == "as") return "Assamese";
214 if (langcode == "ay") return "Aymara";
215 if (langcode == "az") return "Azerbaijani";
216
217 if (langcode == "ba") return "Bashkir";
218 if (langcode == "be") return "Byelorussian";
219 if (langcode == "bg") return "Bulgarian";
220 if (langcode == "bh") return "Bihari";
221 if (langcode == "bi") return "Bislama";
222 if (langcode == "bn") return "Bengali; Bangla";
223 if (langcode == "bo") return "Tibetan";
224 if (langcode == "br") return "Breton";
225
226 if (langcode == "ca") return "Catalan";
227 if (langcode == "co") return "Corsican";
228 if (langcode == "cs") return "Czech";
229 if (langcode == "cy") return "Welsh";
230
231 if (langcode == "da") return "Danish";
232 if (langcode == "de") return "German";
233 if (langcode == "dz") return "Bhutani";
234
235 if (langcode == "el") return "Greek";
236 if (langcode == "en") return "English";
237 if (langcode == "eo") return "Esperanto";
238 if (langcode == "es") return "Spanish";
239 if (langcode == "et") return "Estonian";
240 if (langcode == "eu") return "Basque";
241
242 if (langcode == "fa") return "Persian";
243 if (langcode == "fi") return "Finnish";
244 if (langcode == "fj") return "Fiji";
245 if (langcode == "fo") return "Faroese";
246 if (langcode == "fr") return "French";
247 if (langcode == "fy") return "Frisian";
248
249 if (langcode == "ga") return "Irish";
250 if (langcode == "gd") return "Scots Gaelic";
251 if (langcode == "gl") return "Galician";
252 if (langcode == "gn") return "Guarani";
253 if (langcode == "gu") return "Gujarati";
254
255 if (langcode == "ha") return "Hausa";
256 if (langcode == "hi") return "Hindi";
257 if (langcode == "hr") return "Croatian";
258 if (langcode == "hu") return "Hungarian";
259 if (langcode == "hy") return "Armenian";
260
261 if (langcode == "ia") return "Interlingua";
262 if (langcode == "ie") return "Interlingue";
263 if (langcode == "ik") return "Inupiak";
264 if (langcode == "in") return "Indonesian";
265 if (langcode == "is") return "Icelandic";
266 if (langcode == "it") return "Italian";
267 if (langcode == "iw") return "Hebrew";
268
269 if (langcode == "ja") return "Japanese";
270 if (langcode == "ji") return "Yiddish";
271 if (langcode == "jw") return "Javanese";
272
273 if (langcode == "ka") return "Georgian";
274 if (langcode == "kk") return "Kazakh";
275 if (langcode == "kl") return "Greenlandic";
276 if (langcode == "km") return "Cambodian";
277 if (langcode == "kn") return "Kannada";
278 if (langcode == "ko") return "Korean";
279 if (langcode == "ks") return "Kashmiri";
280 if (langcode == "ku") return "Kurdish";
281 if (langcode == "ky") return "Kirghiz";
282
283 if (langcode == "la") return "Latin";
284 if (langcode == "ln") return "Lingala";
285 if (langcode == "lo") return "Laothian";
286 if (langcode == "lt") return "Lithuanian";
287 if (langcode == "lv") return "Latvian, Lettish";
288
289 if (langcode == "mg") return "Malagasy";
290 if (langcode == "mi") return "Maori";
291 if (langcode == "mk") return "Macedonian";
292 if (langcode == "ml") return "Malayalam";
293 if (langcode == "mn") return "Mongolian";
294 if (langcode == "mo") return "Moldavian";
295 if (langcode == "mr") return "Marathi";
296 if (langcode == "ms") return "Malay";
297 if (langcode == "mt") return "Maltese";
298 if (langcode == "my") return "Burmese";
299
300 if (langcode == "na") return "Nauru";
301 if (langcode == "ne") return "Nepali";
302 if (langcode == "nl") return "Dutch";
303 if (langcode == "no") return "Norwegian";
304
305 if (langcode == "oc") return "Occitan";
306 if (langcode == "om") return "(Afan) Oromo";
307 if (langcode == "or") return "Oriya";
308
309 if (langcode == "pa") return "Punjabi";
310 if (langcode == "pl") return "Polish";
311 if (langcode == "ps") return "Pashto, Pushto";
312 if (langcode == "pt") return "Portuguese";
313
314 if (langcode == "qu") return "Quechua";
315 if (langcode == "rm") return "Rhaeto-Romance";
316 if (langcode == "rn") return "Kirundi";
317 if (langcode == "ro") return "Romanian";
318 if (langcode == "ru") return "Russian";
319 if (langcode == "rw") return "Kinyarwanda";
320
321 if (langcode == "sa") return "Sanskrit";
322 if (langcode == "sd") return "Sindhi";
323 if (langcode == "sg") return "Sangro";
324 if (langcode == "sh") return "Serbo-Croatian";
325 if (langcode == "si") return "Singhalese";
326 if (langcode == "sk") return "Slovak";
327 if (langcode == "sl") return "Slovenian";
328 if (langcode == "sm") return "Samoan";
329 if (langcode == "sn") return "Shona";
330 if (langcode == "so") return "Somali";
331 if (langcode == "sq") return "Albanian";
332 if (langcode == "sr") return "Serbian";
333 if (langcode == "ss") return "Siswati";
334 if (langcode == "st") return "Sesotho";
335 if (langcode == "su") return "Sudanese";
336 if (langcode == "sv") return "Swedish";
337 if (langcode == "sw") return "Swahili";
338
339 if (langcode == "ta") return "Tamil";
340 if (langcode == "te") return "Tegulu";
341 if (langcode == "tg") return "Tajik";
342 if (langcode == "th") return "Thai";
343 if (langcode == "ti") return "Tigrinya";
344 if (langcode == "tk") return "Turkmen";
345 if (langcode == "tl") return "Tagalog";
346 if (langcode == "tn") return "Setswana";
347 if (langcode == "to") return "Tonga";
348 if (langcode == "tr") return "Turkish";
349 if (langcode == "ts") return "Tsonga";
350 if (langcode == "tt") return "Tatar";
351 if (langcode == "tw") return "Twi";
352
353 if (langcode == "uk") return "Ukrainian";
354 if (langcode == "ur") return "Urdu";
355 if (langcode == "uz") return "Uzbek";
356
357 if (langcode == "vi") return "Vietnamese";
358 if (langcode == "vo") return "Volapuk";
359
360 if (langcode == "wo") return "Wolof";
361
362 if (langcode == "xh") return "Xhosa";
363
364 if (langcode == "yo") return "Yoruba";
365
366 if (langcode == "zh") return "Chinese";
367 if (langcode == "zu") return "Zulu";
368 return "";
369}
370
371text_t get_href (const text_t &link) {
372
373 text_t href;
374
375 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
376 text_t::const_iterator end = link.end();
377
378 ++here;
379 while (here != end) {
380 if (*here == '"') break;
381 href.push_back(*here);
382 ++here;
383 }
384
385 return href;
386}
387
388//this function gets the information associated with the relation
389//metadata for the document associated with 'docinfo'. This relation
390//metadata consists of a line of pairs containing 'collection, document OID'
391//(this is the OID of the document related to the current document, and
392//the collection the related document belongs to). For each of these pairs
393//the title metadata is obtained and then an html link between the title
394//of the related doc and the document's position (the document will be
395//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
396//(where collection is the related documents collection, and OID is the
397//related documents OID). A list of these html links are made for as many
398//related documents as there are. This list is then returned. If there are
399//no related documents available for the current document then the string
400//'.. no related documents .. ' is returned.
401text_t get_related_docs(const text_t& collection, recptproto* collectproto,
402 ResultDocInfo_t &docinfo, ostream& logout){
403
404 text_tset metadata;
405
406 //insert the metadata we wish to collect
407 metadata.insert("relation");
408 metadata.insert("Title");
409 metadata.insert("Subject"); //for emails, where title data doesn't apply
410
411 FilterResponse_t response;
412 text_t relation = ""; //string for displaying relation metadata
413 text_t relationTitle = ""; //the related documents Title (or subject)
414 text_t relationOID = ""; //the related documents OID
415
416 //get the information associated with the metadata for current doc
417 if (get_info (docinfo.OID, collection, "", metadata,
418 false, collectproto, response, logout)) {
419
420 //if the relation metadata exists, store for displaying
421 if(!response.docInfo[0].metadata["relation"].values.empty()){
422 relationOID += response.docInfo[0].metadata["relation"].values[0];
423
424 //split relation data into pairs of collectionname,ID number
425 text_tarray relationpairs;
426 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
427
428 text_tarray::const_iterator currDoc = relationpairs.begin();
429 text_tarray::const_iterator lastDoc = relationpairs.end();
430
431 //iterate through the pairs to split and display
432 while(currDoc != lastDoc){
433
434 //split pairs into collectionname and ID
435 text_tarray relationdata;
436 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
437
438 //get first element in the array (collection)
439 text_tarray::const_iterator doc_data = relationdata.begin();
440 text_t document_collection = *doc_data;
441 ++doc_data; //increment to get next item in array (oid)
442 text_t document_OID = *doc_data;
443
444 //create html link to related document
445 relation += "<a href=\"_httpdocument_&c=" + document_collection;
446 relation += "&cl=search&d=" + document_OID;
447
448 //get the information associated with the metadata for related doc
449 if (get_info (document_OID, document_collection, "", metadata,
450 false, collectproto, response, logout)) {
451
452 //if title metadata doesn't exist, collect subject metadata
453 //if that doesn't exist, just call it 'related document'
454 if (!response.docInfo[0].metadata["Title"].values[0].empty())
455 relationTitle = response.docInfo[0].metadata["Title"].values[0];
456 else if (!response.docInfo[0].metadata["Subject"].values.empty())
457 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
458 else relationTitle = "RELATED DOCUMENT";
459
460 }
461
462 //link the related document's title to its page
463 relation += "\">" + relationTitle + "</a>";
464 relation += " (" + document_collection + ")<br>";
465
466 ++currDoc;
467 }
468 }
469
470 }
471
472 if(relation.empty()) //no relation data for documnet
473 relation = ".. no related documents .. ";
474
475 return relation;
476}
477
478
479
480static void get_parent_options (text_t &instring, metadata_t &metaoption) {
481
482 assert (instring.size() > 7);
483 if (instring.size() <= 7) return;
484
485 text_t meta, com, op;
486 bool inbraces = false;
487 bool inquotes = false;
488 bool foundcolon = false;
489 text_t::const_iterator here = instring.begin()+6;
490 text_t::const_iterator end = instring.end();
491 while (here != end) {
492 if (*here == '(') inbraces = true;
493 else if (*here == ')') inbraces = false;
494 else if (*here == '\'' && !inquotes) inquotes = true;
495 else if (*here == '\'' && inquotes) inquotes = false;
496 else if (*here == ':' && !inbraces) foundcolon = true;
497 else if (foundcolon) meta.push_back (*here);
498 else if (inquotes) op.push_back (*here);
499 else com.push_back (*here);
500 ++here;
501 }
502
503 instring = meta;
504 if (com.empty())
505 metaoption.mqualifier.parent = pImmediate;
506 else if (com == "Top")
507 metaoption.mqualifier.parent = pTop;
508 else if (com == "All") {
509 metaoption.mqualifier.parent = pAll;
510 metaoption.functionoptions = op;
511 }
512}
513
514
515static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
516
517 assert (instring.size() > 8);
518 if (instring.size() <= 8) return;
519
520 text_t meta, com, op;
521 bool inbraces = false;
522 bool inquotes = false;
523 bool foundcolon = false;
524 text_t::const_iterator here = instring.begin()+7;
525 text_t::const_iterator end = instring.end();
526 while (here != end) {
527 if (*here == '(') inbraces = true;
528 else if (*here == ')') inbraces = false;
529 else if (*here == '\'' && !inquotes) inquotes = true;
530 else if (*here == '\'' && inquotes) inquotes = false;
531 else if (*here == ':' && !inbraces) foundcolon = true;
532 else if (foundcolon) meta.push_back (*here);
533 else if (inquotes) op.push_back (*here);
534 else com.push_back (*here);
535 ++here;
536 }
537
538 instring = meta;
539
540 if (com.empty()) {
541 metaoption.mqualifier.sibling = sAll;
542 metaoption.functionoptions = " ";
543 }
544 else {
545 metaoption.mqualifier.sibling = sAll;
546 metaoption.functionoptions = op;
547 }
548}
549
550static void get_child_options (text_t &instring, metadata_t &metaoption) {
551
552 assert (instring.size() > 6);
553 if (instring.size() <= 6) return;
554
555 text_t meta, com, op;
556 bool inbraces = false;
557 bool inquotes = false;
558 bool foundcolon = false;
559 text_t::const_iterator here = instring.begin()+5;
560 text_t::const_iterator end = instring.end();
561 while (here != end) {
562 if (*here == '(') inbraces = true;
563 else if (*here == ')') inbraces = false;
564 else if (*here == '\'' && !inquotes) inquotes = true;
565 else if (*here == '\'' && inquotes) inquotes = false;
566 else if (*here == ':' && !inbraces) foundcolon = true;
567 else if (foundcolon) meta.push_back (*here);
568 else if (inquotes) op.push_back (*here);
569 else com.push_back (*here);
570 ++here;
571 }
572
573 instring = meta;
574
575 if (com.empty()) {
576 metaoption.mqualifier.child = cAll;
577 metaoption.functionoptions = " ";
578 }
579 else if (com == "first") {
580 metaoption.mqualifier.child = cNum;
581 metaoption.functionoptions = ".fc";
582 }
583 else if (com == "last") {
584 metaoption.mqualifier.child = cNum;
585 metaoption.functionoptions = ".lc";
586 }
587 else if (com.getint()>0) {
588 metaoption.mqualifier.child = cNum;
589 metaoption.functionoptions = com;
590 }
591 else {
592 metaoption.mqualifier.child = cAll;
593 metaoption.functionoptions = op;
594 }
595}
596
597
598
599static void parse_meta (text_t &meta, metadata_t &metaoption,
600 text_tset &metadata, bool &getParents) {
601
602 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
603 metaoption.metacommand |= mCgiSafe;
604 meta = substr (meta.begin()+8, meta.end());
605 }
606
607 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
608 getParents = true;
609 metaoption.metacommand |= mParent;
610 get_parent_options (meta, metaoption);
611 }
612 else if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
613 metaoption.metacommand |= mSibling;
614 get_sibling_options (meta, metaoption);
615 }
616 else if (meta.size() > 6 && (substr (meta.begin(), meta.begin()+5) == "child")) {
617 metaoption.metacommand |= mChild;
618 get_child_options (meta, metaoption);
619 }
620
621 // check for ex. which may occur in format statements
622 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
623 meta = substr (meta.begin()+3, meta.end());
624 }
625 metadata.insert (meta);
626 metaoption.metaname = meta;
627}
628
629static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
630 if (meta == "collection") {
631 // no qualifiers
632 metaoption.metaname = g_EmptyText;
633 return;
634 }
635 meta = substr (meta.begin()+11, meta.end());
636 metaoption.metaname = meta;
637
638}
639
640static void parse_meta (text_t &meta, format_t *formatlistptr,
641 text_tset &metadata, bool &getParents) {
642
643 if (meta == "link")
644 formatlistptr->command = comLink;
645 else if (meta == "/link")
646 formatlistptr->command = comEndLink;
647
648 else if (meta == "href")
649 formatlistptr->command = comHref;
650
651 else if (meta == "num")
652 formatlistptr->command = comNum;
653
654 else if (meta == "icon")
655 formatlistptr->command = comIcon;
656
657 else if (meta == "Text")
658 formatlistptr->command = comDoc;
659
660 else if (meta == "RelatedDocuments")
661 formatlistptr->command = comRel;
662
663 else if (meta == "highlight")
664 formatlistptr->command = comHighlight;
665
666 else if (meta == "/highlight")
667 formatlistptr->command = comEndHighlight;
668
669 else if (meta == "Summary")
670 formatlistptr->command = comSummary;
671
672 else if (meta == "DocImage")
673 formatlistptr->command = comImage;
674
675 else if (meta == "DocTOC")
676 formatlistptr->command = comTOC;
677
678 else if (meta == "DocumentButtonDetach")
679 formatlistptr->command = comDocumentButtonDetach;
680
681 else if (meta == "DocumentButtonHighlight")
682 formatlistptr->command = comDocumentButtonHighlight;
683
684 else if (meta == "DocumentButtonExpandContents")
685 formatlistptr->command = comDocumentButtonExpandContents;
686
687 else if (meta == "DocumentButtonExpandText")
688 formatlistptr->command = comDocumentButtonExpandText;
689
690 else if (meta == "DocOID")
691 formatlistptr->command = comOID;
692 else if (meta == "DocRank")
693 formatlistptr->command = comRank;
694 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
695 formatlistptr->command = comCollection;
696 parse_coll_meta(meta, formatlistptr->meta);
697 }
698 else {
699 formatlistptr->command = comMeta;
700 parse_meta (meta, formatlistptr->meta, metadata, getParents);
701 }
702}
703
704
705static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
706 text_tset &metadata, bool &getParents) {
707
708 text_t text;
709 text_t::const_iterator here = formatstring.begin();
710 text_t::const_iterator end = formatstring.end();
711
712 while (here != end) {
713
714 if (*here == '\\') {
715 ++here;
716 if (here != end) text.push_back (*here);
717
718 } else if (*here == '{') {
719 if (!text.empty()) {
720 formatlistptr->command = comText;
721 formatlistptr->text = text;
722 formatlistptr->nextptr = new format_t();
723 formatlistptr = formatlistptr->nextptr;
724
725 text.clear();
726 }
727 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
728
729 formatlistptr->nextptr = new format_t();
730 formatlistptr = formatlistptr->nextptr;
731 if (here == end) break;
732 }
733 } else if (*here == '[') {
734 if (!text.empty()) {
735 formatlistptr->command = comText;
736 formatlistptr->text = text;
737 formatlistptr->nextptr = new format_t();
738 formatlistptr = formatlistptr->nextptr;
739
740 text.clear();
741 }
742 text_t meta;
743 ++here;
744 while (*here != ']') {
745 if (here == end) return false;
746 meta.push_back (*here);
747 ++here;
748 }
749 parse_meta (meta, formatlistptr, metadata, getParents);
750 formatlistptr->nextptr = new format_t();
751 formatlistptr = formatlistptr->nextptr;
752
753 } else
754 text.push_back (*here);
755
756 if (here != end) ++here;
757 }
758 if (!text.empty()) {
759 formatlistptr->command = comText;
760 formatlistptr->text = text;
761 formatlistptr->nextptr = new format_t();
762 formatlistptr = formatlistptr->nextptr;
763
764 }
765 return true;
766}
767
768
769static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
770 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
771
772 text_t::const_iterator it = findchar (here, end, '}');
773 if (it == end) return false;
774
775 text_t com = substr (here, it);
776 here = findchar (it, end, '{');
777 if (here == end) return false;
778 else ++here;
779
780 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
781 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
782 else return false;
783
784 int commacount = 0;
785 text_t text;
786 while (here != end) {
787
788 if (*here == '\\') {
789 ++here;
790 if (here != end) text.push_back(*here);
791
792 }
793
794 else if (*here == ',' || *here == '}' || *here == '{') {
795
796 if (formatlistptr->command == comOr) {
797 // the {Or}{this, or this, or this, or this} statement
798 format_t *or_ptr;
799
800 // find the next unused orptr
801 if (formatlistptr->orptr == NULL) {
802 formatlistptr->orptr = new format_t();
803 or_ptr = formatlistptr->orptr;
804 } else {
805 or_ptr = formatlistptr->orptr;
806 while (or_ptr->nextptr != NULL)
807 or_ptr = or_ptr->nextptr;
808 or_ptr->nextptr = new format_t();
809 or_ptr = or_ptr->nextptr;
810 }
811
812 if (!text.empty())
813 {
814 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
815 }
816
817 if (*here == '{')
818 {
819 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
820 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
821 // The latter can always be re-written:
822 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
823
824 if (!text.empty()) // already used up allocated format_t
825 {
826 // => allocate new one for detected action
827 or_ptr->nextptr = new format_t();
828 or_ptr = or_ptr->nextptr;
829 }
830 if (!parse_action(++here, end, or_ptr, metadata, getParents))
831 {
832 return false;
833 }
834 }
835 else
836 {
837 if (*here == '}') break;
838 }
839 text.clear();
840
841 }
842
843 // Parse an {If}{decide,do,else} statement
844 else {
845
846 // Read the decision component.
847 if (commacount == 0) {
848 // Decsion can be a metadata element, or a piece of text.
849 // Originally Stefan's code, updated 25/10/2000 by Gordon.
850
851 text_t::const_iterator beginbracket = text.begin();
852 text_t::const_iterator endbracket = (text.end() - 1);
853
854 // Decision is based on a metadata element
855 if ((*beginbracket == '[') && (*endbracket == ']')) {
856 // Ignore the surrounding square brackets
857 text_t meta = substr (beginbracket+1, endbracket);
858 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
859 ++commacount;
860 text.clear();
861 }
862
863 // Decision is a piece of text (probably a macro like _cgiargmode_).
864 else {
865
866 // hunt for any metadata in string, which might be uses in
867 // to test a condition, e.g. [Format] eq 'PDF'
868 format_t* dummyformat = new format_t();
869 // update which metadata fields needed
870 // (not interested in updatng formatlistptr)
871 parse_string (text, dummyformat, metadata, getParents);
872 delete dummyformat;
873
874 formatlistptr->decision.command = dText;
875 formatlistptr->decision.text = text;
876 ++commacount;
877 text.clear();
878 }
879 }
880
881 // Read the "then" and "else" components of the {If} statement.
882 else {
883 format_t** nextlistptr = NULL;
884 if (commacount == 1) {
885 nextlistptr = &formatlistptr->ifptr;
886 } else if (commacount == 2 ) {
887 nextlistptr = &formatlistptr->elseptr;
888 } else {
889 return false;
890 }
891
892 if (!text.empty()) {
893 if (*nextlistptr == NULL) {
894 *nextlistptr = new format_t();
895 } else {
896
897 // skip to the end of any format_t statements already added
898 while ((*nextlistptr)->nextptr != NULL)
899 {
900 nextlistptr = &(*nextlistptr)->nextptr;
901 }
902
903 (*nextlistptr)->nextptr = new format_t();
904 nextlistptr = &(*nextlistptr)->nextptr;
905 }
906
907 if (!parse_string (text, *nextlistptr, metadata, getParents))
908 {
909 return false;
910 }
911 text.clear();
912 }
913
914 if (*here == '{')
915 {
916 if (*nextlistptr == NULL) {
917 *nextlistptr = new format_t();
918 } else {
919 // skip to the end of any format_t statements already added
920 while ((*nextlistptr)->nextptr != NULL)
921 {
922 nextlistptr = &(*nextlistptr)->nextptr;
923 }
924
925 (*nextlistptr)->nextptr = new format_t();
926 nextlistptr = &(*nextlistptr)->nextptr;
927 }
928
929 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
930 {
931 return false;
932 }
933 }
934 else
935 {
936 if (*here == '}') break;
937 ++commacount;
938 }
939 }
940 }
941
942 } else text.push_back(*here);
943
944 if (here != end) ++here;
945 }
946
947 return true;
948}
949
950
951bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
952 text_tset &metadata, bool &getParents) {
953
954 formatlistptr->clear();
955 getParents = false;
956
957 return (parse_string (formatstring, formatlistptr, metadata, getParents));
958}
959
960
961static text_t get_all_meta (MetadataInfo_t &metainfo, const metadata_t &meta)
962{
963 text_t no_ns_metaname = remove_namespace(meta.metaname);
964
965 text_t tmp;
966 bool first = true;
967
968 const int start_i=0;
969 const int end_i = metainfo.values.size()-1;
970
971 for (int i=start_i; i<=end_i; ++i) {
972 if (!first) tmp += meta.functionoptions;
973
974 if (no_ns_metaname == "Date") tmp += format_date (metainfo.values[i]);
975 else if (no_ns_metaname == "Language") tmp += iso639(metainfo.values[i]);
976 else tmp += metainfo.values[i];
977 first = false;
978 }
979
980 if (meta.metacommand & mCgiSafe) return cgi_safe (tmp);
981 else return tmp;
982}
983
984static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta)
985{
986
987 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
988 text_t no_ns_metaname = remove_namespace(meta.metaname);
989 switch (meta.mqualifier.parent) {
990 case pNone:
991 return "Nothing!!";
992 break;
993
994 case pImmediate:
995 if (parent != NULL) {
996 if (no_ns_metaname == "Date")
997 return format_date (parent->values[0]);
998 if (meta.metacommand & mCgiSafe)
999 return cgi_safe (parent->values[0]);
1000 else return parent->values[0];
1001 }
1002 break;
1003
1004 case pTop:
1005 if (parent != NULL) {
1006 while (parent->parent != NULL) parent = parent->parent;
1007
1008 if (no_ns_metaname == "Date")
1009 return format_date (parent->values[0]);
1010 if (meta.metacommand & mCgiSafe)
1011 return cgi_safe (parent->values[0]);
1012 else return parent->values[0];
1013 }
1014 break;
1015
1016 case pAll:
1017 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1018 if (parent != NULL) {
1019 text_tarray tmparray;
1020 while (parent != NULL) {
1021 tmparray.push_back (parent->values[0]);
1022 parent = parent->parent;
1023 }
1024 bool first = true;
1025 text_t tmp;
1026 text_tarray::reverse_iterator here = tmparray.rbegin();
1027 text_tarray::reverse_iterator end = tmparray.rend();
1028 while (here != end) {
1029 if (!first) tmp += meta.functionoptions;
1030 if (no_ns_metaname == "Date") tmp += format_date (*here);
1031 else tmp += *here;
1032 first = false;
1033 ++here;
1034 }
1035 if (meta.metacommand & mCgiSafe) return cgi_safe (tmp);
1036 else return tmp;
1037 }
1038 }
1039 return "";
1040
1041}
1042
1043static text_t get_child_meta (const text_t& collection,
1044 recptproto* collectproto,
1045 ResultDocInfo_t &docinfo, displayclass &disp,
1046 const metadata_t &meta, text_tmap &options,
1047 ostream& logout)
1048{
1049 if (docinfo.metadata["contains"].values.size()>0) {
1050
1051 text_t& contains = docinfo.metadata["contains"].values[0];
1052
1053 const text_t& child_metaname = meta.metaname;
1054 const text_t& child_field = meta.functionoptions;
1055
1056 text_tset child_metadata;
1057 child_metadata.insert(child_metaname);
1058
1059 FilterResponse_t child_response;
1060
1061 //get the information associated with the metadata for child doc
1062 if (get_info (docinfo.OID+child_field, collection, "", child_metadata,
1063 false, collectproto, child_response, logout)) {
1064
1065 if (!child_response.docInfo.empty()) {
1066 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1067
1068 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1069
1070 text_t child_metavalue = "";
1071
1072 if (meta.mqualifier.child == cNum) {
1073 if (metaname_rec.values.size()>0) {
1074 child_metavalue = metaname_rec.values[0];
1075 }
1076 }
1077 else if (meta.mqualifier.child == cAll) {
1078 child_metavalue = get_all_meta(metaname_rec,meta);
1079 }
1080
1081 return expand_metadata(child_metavalue,collection,collectproto,
1082 child_docinfo,disp,options,logout);
1083 }
1084 }
1085 }
1086
1087 return "";
1088}
1089
1090
1091
1092// note: all the format_date stuff is assuming that all Date metadata is going to
1093// be of the form yyyymmdd, this is of course, crap ;)
1094
1095static text_t get_meta (const text_t& collection, recptproto* collectproto,
1096 ResultDocInfo_t &docinfo, displayclass &disp,
1097 const metadata_t &meta, text_tmap &options,
1098 ostream& logout) {
1099
1100 // make sure we have the requested metadata
1101 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1102 if (it == docinfo.metadata.end()) return "";
1103
1104 text_t no_ns_metaname = remove_namespace(meta.metaname);
1105
1106 if (meta.metacommand & mParent) {
1107 return get_parent_meta(docinfo,meta);
1108 }
1109 else if (meta.metacommand & mChild) {
1110 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1111 options,logout);
1112 }
1113 else if (meta.metacommand & mSibling) {
1114 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1115 return get_all_meta(docinfo.metadata[meta.metaname],meta);
1116 }
1117 else {
1118
1119 // straightforward metadata request (nothing fancy)
1120
1121 text_t classifier_metaname = docinfo.classifier_metadata_type;
1122 int metaname_index
1123 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1124 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
1125
1126 if (no_ns_metaname == "Date")
1127 return format_date (metadata_item);
1128 else if (no_ns_metaname == "Language")
1129 return iso639(metadata_item);
1130 if (meta.metacommand & mCgiSafe)
1131 return cgi_safe (metadata_item);
1132 else return metadata_item;
1133 }
1134
1135 return "";
1136}
1137
1138static text_t get_or (const text_t& collection, recptproto* collectproto,
1139 ResultDocInfo_t &docinfo, displayclass &disp,
1140 format_t *orptr, text_tmap &options,
1141 ostream& logout) {
1142
1143 text_t tmp;
1144 while (orptr != NULL) {
1145
1146 tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1147 options, logout);
1148 if (!tmp.empty()) return tmp;
1149
1150 orptr = orptr->nextptr;
1151 }
1152 return "";
1153}
1154
1155static bool char_is_whitespace(const char c)
1156{
1157 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1158
1159}
1160
1161static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1162{
1163 int pos = start_pos;
1164 while (pos<outstring.size()) {
1165 if (!char_is_whitespace(outstring[pos])) {
1166 break;
1167 }
1168 ++pos;
1169 }
1170
1171 return pos;
1172}
1173
1174static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1175{
1176 int pos = start_pos;
1177 while (pos>=0) {
1178 if (!char_is_whitespace(outstring[pos])) {
1179 break;
1180 }
1181 --pos;
1182 }
1183
1184 return pos;
1185}
1186
1187static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1188{
1189 int pos = start_pos;
1190 while (pos>=0) {
1191 if (char_is_whitespace(outstring[pos])) {
1192 break;
1193 }
1194 --pos;
1195 }
1196
1197 return pos;
1198}
1199
1200
1201static int rscan_for(const text_t& outstring, const int start_pos,
1202 const char find_c)
1203{
1204 int pos = start_pos;
1205 while (pos>=0) {
1206 char c = outstring[pos];
1207 if (outstring[pos] == find_c) {
1208 break;
1209 }
1210 --pos;
1211 }
1212
1213 return pos;
1214}
1215
1216text_t extract_substr(const text_t& outstring, const int start_pos,
1217 const int end_pos)
1218{
1219 text_t extracted_str;
1220 extracted_str.clear();
1221
1222 for (int pos=start_pos; pos<=end_pos; ++pos) {
1223 extracted_str.push_back(outstring[pos]);
1224 }
1225
1226 return extracted_str;
1227}
1228
1229
1230static text_t expand_potential_metadata(const text_t& collection,
1231 recptproto* collectproto,
1232 ResultDocInfo_t &docinfo,
1233 displayclass &disp,
1234 const text_t& intext,
1235 text_tmap &options,
1236 ostream& logout)
1237{
1238 text_t outtext;
1239
1240 // decide if dealing with metadata or text
1241
1242 text_t::const_iterator beginbracket = intext.begin();
1243 text_t::const_iterator endbracket = (intext.end() - 1);
1244
1245 // Decision is based on a metadata element
1246 if ((*beginbracket == '[') && (*endbracket == ']')) {
1247 // Ignore the surrounding square brackets
1248 text_t meta_text = substr (beginbracket+1, endbracket);
1249
1250 text_tset metadata;
1251 bool getParents =false;
1252 metadata_t meta;
1253
1254 parse_meta (meta_text, meta, metadata, getParents);
1255 outtext
1256 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1257 }
1258 else {
1259 outtext = intext;
1260 }
1261
1262 return outtext;
1263}
1264
1265
1266
1267
1268static bool uses_expression(const text_t& collection, recptproto* collectproto,
1269 ResultDocInfo_t &docinfo,
1270 displayclass &disp,
1271 const text_t& outstring, text_t& lhs_expr,
1272 text_t& op_expr, text_t& rhs_expr,
1273 text_tmap &options,
1274 ostream& logout)
1275{
1276 // Note: the string may not be of the form: str1 op str2, however
1277 // to deterine this we have to process it on the assumption it is,
1278 // and if at any point an 'erroneous' value is encountered, return
1279 // false and let something else have a go at evaluating it
1280
1281 // Starting at the end of the string and working backwards ..
1282
1283 const int outstring_len = outstring.size();
1284
1285 // skip over white space
1286 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1287
1288 if (rhs_end<=0) {
1289 // no meaningful text or (rhs_end==0) no room for operator
1290 return false;
1291 }
1292
1293 // check for ' or " and then scan over token
1294 const char potential_quote = outstring[rhs_end];
1295 int rhs_start=rhs_end;
1296 bool quoted = false;
1297
1298 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1299 --rhs_end;
1300 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1301 quoted = true;
1302 }
1303 else {
1304 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1305 }
1306
1307 if ((rhs_end-rhs_start)<0) {
1308 // no meaningful rhs expression
1309 return false;
1310 }
1311
1312 // form rhs_expr
1313 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1314
1315 // skip over white space
1316 const int to_whitespace = (quoted) ? 2 : 1;
1317
1318 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1319 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1320
1321
1322 if (op_end-op_start<0) {
1323 // no meaningful expression operator
1324 return false;
1325 }
1326
1327 op_expr = extract_substr(outstring,op_start,op_end);
1328
1329
1330 // check for operator
1331 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1332 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1333
1334 // not a valid operator
1335 return false;
1336 }
1337
1338 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1339 if (lhs_end<0) {
1340 // no meaningful lhs expression
1341 return false;
1342 }
1343
1344 int lhs_start = scan_over_whitespace(outstring,0);
1345
1346 // form lhs_expr from remainder of string
1347 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1348
1349 // Now we know we have a valid expression, look up any
1350 // metadata terms
1351
1352 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1353 disp,rhs_expr,options,logout);
1354 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1355 disp,lhs_expr,options,logout);
1356
1357 return true;
1358}
1359
1360static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1361 const text_t& rhs_expr, ostream& logout)
1362{
1363 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1364 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1365 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1366 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1367 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1368 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1369 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1370 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1371 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1372 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1373 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1374 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1375 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1376 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1377 else {
1378 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1379 }
1380
1381 return false;
1382}
1383
1384
1385static text_t get_if (const text_t& collection, recptproto* collectproto,
1386 ResultDocInfo_t &docinfo, displayclass &disp,
1387 const decision_t &decision,
1388 format_t *ifptr, format_t *elseptr,
1389 text_tmap &options, ostream& logout)
1390{
1391 // If the decision component is a metadata element, then evaluate it
1392 // to see whether we output the "then" or the "else" clause
1393 if (decision.command == dMeta) {
1394 if (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1395 logout) != "") {
1396 if (ifptr != NULL)
1397 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1398 options, logout);
1399 }
1400 else {
1401 if (elseptr != NULL)
1402 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1403 options, logout);
1404 }
1405 }
1406
1407 // If the decision component is text, then evaluate it (it is probably a
1408 // macro like _cgiargmode_) to decide what to output.
1409 else if (decision.command == dText) {
1410
1411 text_t outstring;
1412 disp.expandstring (decision.text, outstring);
1413
1414 // Check for if expression in form: str1 op str2
1415 // (such as [x] eq "y")
1416 text_t lhs_expr, op_expr, rhs_expr;
1417 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1418 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1419 if (ifptr != NULL) {
1420 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1421 options, logout);
1422 }
1423 else {
1424 return "";
1425 }
1426 } else {
1427 if (elseptr != NULL) {
1428 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1429 options, logout);
1430 }
1431 else {
1432 return "";
1433 }
1434 }
1435 }
1436
1437
1438 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1439 // a cgi argument macro that has not been set, it evaluates to itself.
1440 // Therefore, were have to say that a piece of text evalautes true if
1441 // it is non-empty and if it is a cgi argument evaulating to itself.
1442
1443 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1444 if (ifptr != NULL)
1445 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1446 options, logout);
1447 } else {
1448 if (elseptr != NULL)
1449 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1450 options, logout);
1451 }
1452 }
1453
1454 return "";
1455}
1456
1457bool includes_metadata(const text_t& text)
1458{
1459 text_t::const_iterator here = text.begin();
1460 text_t::const_iterator end = text.end();
1461 while (here != end) {
1462 if (*here == '[') return true;
1463 ++here;
1464 }
1465
1466 return false;
1467}
1468
1469static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1470 recptproto* collectproto,
1471 ResultDocInfo_t &docinfo,
1472 displayclass &disp, text_tmap &options,
1473 ostream &logout) {
1474
1475 if (includes_metadata(metavalue))
1476 {
1477 // text has embedded metadata in it => expand it
1478 FilterRequest_t request;
1479 FilterResponse_t response;
1480
1481 request.getParents = false;
1482
1483 format_t *expanded_formatlistptr = new format_t();
1484 parse_formatstring (metavalue, expanded_formatlistptr,
1485 request.fields, request.getParents);
1486
1487 // retrieve metadata
1488 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1489 collectproto, response, logout);
1490
1491 if (!response.docInfo.empty())
1492 {
1493 text_t expanded_metavalue
1494 = get_formatted_string(collection, collectproto,
1495 response.docInfo[0], disp, expanded_formatlistptr,
1496 options, logout);
1497
1498 return expanded_metavalue;
1499 }
1500 else
1501 {
1502 return metavalue;
1503 }
1504 }
1505 else
1506 {
1507 return metavalue;
1508 }
1509}
1510
1511text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1512 displayclass &disp,
1513 text_t meta_name, ostream& logout) {
1514
1515 ColInfoResponse_t collectinfo;
1516 comerror_t err;
1517 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1518 text_t meta_value = "";
1519 text_t lang;
1520 disp.expandstring("_cgiargl_",lang);
1521 if (lang.empty()) {
1522 lang = "en";
1523 }
1524
1525 if (err == noError) {
1526 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1527 }
1528 return meta_value;
1529
1530
1531}
1532text_t format_string (const text_t& collection, recptproto* collectproto,
1533 ResultDocInfo_t &docinfo, displayclass &disp,
1534 format_t *formatlistptr, text_tmap &options,
1535 ostream& logout) {
1536
1537 if (formatlistptr == NULL) return "";
1538
1539 switch (formatlistptr->command) {
1540 case comOID:
1541 return docinfo.OID;
1542 case comRank:
1543 return text_t(docinfo.ranking);
1544 case comText:
1545 return formatlistptr->text;
1546 case comLink:
1547 return options["link"];
1548 case comEndLink:
1549 if (options["link"].empty()) return "";
1550 else return "</a>";
1551 case comHref:
1552 return get_href(options["link"]);
1553 case comIcon:
1554 return options["icon"];
1555 case comNum:
1556 return docinfo.result_num;
1557 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1558 return get_related_docs(collection, collectproto, docinfo, logout);
1559 case comSummary:
1560 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1561 case comMeta:
1562 {
1563 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1564 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1565 }
1566 case comDoc:
1567 return format_text(collection, collectproto, docinfo, disp, options, logout);
1568 //return options["text"];
1569 case comImage:
1570 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1571 case comTOC:
1572 return options["DocTOC"];
1573 case comDocumentButtonDetach:
1574 return options["DocumentButtonDetach"];
1575 case comDocumentButtonHighlight:
1576 return options["DocumentButtonHighlight"];
1577 case comDocumentButtonExpandContents:
1578 return options["DocumentButtonExpandContents"];
1579 case comDocumentButtonExpandText:
1580 return options["DocumentButtonExpandText"];
1581 case comHighlight:
1582 if (options["highlight"] == "1") return "<b>";
1583 break;
1584 case comEndHighlight:
1585 if (options["highlight"] == "1") return "</b>";
1586 break;
1587 case comIf:
1588 return get_if (collection, collectproto, docinfo, disp,
1589 formatlistptr->decision, formatlistptr->ifptr,
1590 formatlistptr->elseptr, options, logout);
1591 case comOr:
1592 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1593 options, logout);
1594 case comCollection:
1595 if (formatlistptr->meta.metaname == g_EmptyText) {
1596 return collection;
1597 }
1598 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1599
1600 }
1601 return "";
1602}
1603
1604text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1605 ResultDocInfo_t &docinfo, displayclass &disp,
1606 format_t *formatlistptr, text_tmap &options,
1607 ostream& logout) {
1608
1609 text_t ft;
1610 while (formatlistptr != NULL)
1611 {
1612 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1613 options, logout);
1614 formatlistptr = formatlistptr->nextptr;
1615 }
1616
1617 return ft;
1618}
1619
1620
1621// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1622text_t format_text (const text_t& collection, recptproto* collectproto,
1623 ResultDocInfo_t &docinfo, displayclass &disp,
1624 text_tmap &options, ostream& logout) {
1625 if(!options["text"].empty()) {
1626 return options["text"];
1627 }
1628 // else get document text here
1629 DocumentRequest_t docrequest;
1630 DocumentResponse_t docresponse;
1631 comerror_t err;
1632 docrequest.OID = docinfo.OID;
1633 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1634 return docresponse.doc;
1635
1636}
1637
1638/* FUNCTION NAME: format_summary
1639 * DESC: this is invoked when a [Summary] special metadata is processed.
1640 * RETURNS: a query-biased summary for the document */
1641
1642text_t format_summary (const text_t& collection, recptproto* collectproto,
1643 ResultDocInfo_t &docinfo, displayclass &disp,
1644 text_tmap &options, ostream& logout) {
1645
1646 // GRB: added code here to ensure that the cstr (and other collections)
1647 // uses the document metadata item Summary, rather than compressing
1648 // the text of the document, processed via the methods in
1649 // summarise.cpp
1650 if (docinfo.metadata.count("Summary") > 0 &&
1651 docinfo.metadata["Summary"].values.size() > 0) {
1652 return docinfo.metadata["Summary"].values[0];
1653 }
1654
1655 text_t textToSummarise, query;
1656 if(options["text"].empty()) { // get document text
1657 DocumentRequest_t docrequest;
1658 DocumentResponse_t docresponse;
1659 comerror_t err;
1660 docrequest.OID = docinfo.OID;
1661 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1662 textToSummarise = docresponse.doc;
1663 } else // in practice, this would not happen, because text is only
1664 // loaded with the [Text] command
1665 textToSummarise = options["text"];
1666 disp.expandstring("_cgiargq_",query);
1667 return summarise(textToSummarise,query,80);
1668}
Note: See TracBrowser for help on using the repository browser.