source: trunk/gsdl/src/recpt/formattools.cpp@ 10733

Last change on this file since 10733 was 10733, checked in by mdewsnip, 19 years ago

Improved display of dates so values like "19960000" and "19960100" will be displayed correctly (as "1996" and "January 1996", respectively).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 51.7 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "OIDtools.h"
29#include "summarise.h"
30
31#include <assert.h>
32
33// a few function prototypes
34
35static text_t format_string (const text_t& collection, recptproto* collectproto,
36 ResultDocInfo_t &docinfo, displayclass &disp,
37 format_t *formatlistptr, text_tmap &options,
38 ostream& logout);
39
40static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
41 format_t *formatlistptr, text_tset &metadata, bool &getParents);
42
43static text_t format_summary (const text_t& collection, recptproto* collectproto,
44 ResultDocInfo_t &docinfo, displayclass &disp,
45 text_tmap &options, ostream& logout);
46static text_t format_text (const text_t& collection, recptproto* collectproto,
47 ResultDocInfo_t &docinfo, displayclass &disp,
48 text_tmap &options, ostream& logout);
49
50static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
51 recptproto* collectproto, ResultDocInfo_t &docinfo,
52 displayclass &disp, text_tmap &options,
53 ostream &logout);
54
55
56void metadata_t::clear() {
57 metaname.clear();
58 metacommand = mNone;
59 mqualifier.parent = pNone;
60 mqualifier.sibling = sNone;
61 mqualifier.child = cNone;
62 parentoptions.clear();
63 siblingoptions.clear();
64 childoptions.clear();
65}
66
67void decision_t::clear() {
68 command = dMeta;
69 meta.clear();
70 text.clear();
71}
72
73void format_t::clear() {
74 command = comText;
75 decision.clear();
76 text.clear();
77 meta.clear();
78 nextptr = NULL;
79 ifptr = NULL;
80 elseptr = NULL;
81 orptr = NULL;
82}
83
84void formatinfo_t::clear() {
85 DocumentImages = false;
86 DocumentTitles = true;
87 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
88 DocumentContents = true;
89 DocumentArrowsBottom = true;
90 DocumentArrowsTop = false;
91 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
92 // DocumentButtons.push_back ("Expand Text");
93 // DocumentButtons.push_back ("Expand Contents");
94 DocumentButtons.push_back ("Detach");
95 DocumentButtons.push_back ("Highlight");
96 RelatedDocuments = "";
97 DocumentText = "<center><table width=_pagewidth_><tr><td>[Text]</td></tr></table></center>";
98 formatstrings.erase (formatstrings.begin(), formatstrings.end());
99 DocumentUseHTML = false;
100 AllowExtendedOptions = false;
101}
102
103// simply checks to see if formatstring begins with a <td> tag
104bool is_table_content (const text_t &formatstring) {
105 text_t::const_iterator here = formatstring.begin();
106 text_t::const_iterator end = formatstring.end();
107
108 while (here != end) {
109 if (*here != ' ') {
110 if ((*here == '<') && ((here+3) < end)) {
111 if ((*(here+1) == 't' || *(here+1) == 'T') &&
112 (*(here+2) == 'd' || *(here+2) == 'D') &&
113 (*(here+3) == '>' || *(here+3) == ' '))
114 return true;
115 } else return false;
116 }
117 ++here;
118 }
119 return false;
120}
121
122bool is_table_content (const format_t *formatlistptr) {
123
124 if (formatlistptr == NULL) return false;
125
126 if (formatlistptr->command == comText)
127 return is_table_content (formatlistptr->text);
128
129 return false;
130}
131
132// returns false if key isn't in formatstringmap
133bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
134 text_t &formatstring) {
135
136 formatstring.clear();
137 text_tmap::const_iterator it = formatstringmap.find(key);
138 if (it == formatstringmap.end()) return false;
139 formatstring = (*it).second;
140 return true;
141}
142
143// tries to find "key1key2" then "key1" then "key2"
144bool get_formatstring (const text_t &key1, const text_t &key2,
145 const text_tmap &formatstringmap,
146 text_t &formatstring) {
147
148 formatstring.clear();
149 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
150 if (it != formatstringmap.end()) {
151 formatstring = (*it).second;
152 return true;
153 }
154 it = formatstringmap.find(key1);
155 if (it != formatstringmap.end()) {
156 formatstring = (*it).second;
157 return true;
158 }
159 it = formatstringmap.find(key2);
160 if (it != formatstringmap.end()) {
161 formatstring = (*it).second;
162 return true;
163 }
164 return false;
165}
166
167
168text_t remove_namespace(const text_t &meta_name) {
169 text_t::const_iterator end = meta_name.end();
170 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
171 if (it != end) {
172 return substr(it+1, end);
173 }
174 return meta_name;
175
176}
177// returns a date of form 31 _textmonthnn_ 1999
178// input is date of type 19991231
179// at least the year must be present in date
180text_t format_date (const text_t &date) {
181
182 if (date.size() < 4) return "";
183
184 text_t::const_iterator datebegin = date.begin();
185
186 text_t year = substr (datebegin, datebegin+4);
187
188 if (date.size() < 6) return year;
189
190 text_t month = substr (datebegin+4, datebegin+6);
191 int imonth = month.getint();
192 if (imonth <= 0 || imonth > 12) return year;
193 month = "_textmonth" + month + "_";
194
195 if (date.size() < 8) return month + " " + year;
196
197 text_t day = substr (datebegin+6, datebegin+8);
198 if (day[0] == '0') day = substr (day.begin()+1, day.end());
199 int iday = day.getint();
200 if (iday <= 0 || iday > 31) return month + " " + year;
201
202 return day + " " + month + " " + year;
203}
204
205// converts an iso639 language code to its English equivalent
206// I realize that this isn't the pretiest or most efficient implementation,
207// hopefully this ugly Language (and Date too) formatting won't survive to
208// see gsdl-3.0
209text_t iso639 (const text_t &langcode) {
210
211 if (langcode == "aa") return "Afar";
212 if (langcode == "ab") return "Abkhazian";
213 if (langcode == "af") return "Afrikaans";
214 if (langcode == "am") return "Amharic";
215 if (langcode == "ar") return "Arabic";
216 if (langcode == "as") return "Assamese";
217 if (langcode == "ay") return "Aymara";
218 if (langcode == "az") return "Azerbaijani";
219
220 if (langcode == "ba") return "Bashkir";
221 if (langcode == "be") return "Byelorussian";
222 if (langcode == "bg") return "Bulgarian";
223 if (langcode == "bh") return "Bihari";
224 if (langcode == "bi") return "Bislama";
225 if (langcode == "bn") return "Bengali; Bangla";
226 if (langcode == "bo") return "Tibetan";
227 if (langcode == "br") return "Breton";
228
229 if (langcode == "ca") return "Catalan";
230 if (langcode == "co") return "Corsican";
231 if (langcode == "cs") return "Czech";
232 if (langcode == "cy") return "Welsh";
233
234 if (langcode == "da") return "Danish";
235 if (langcode == "de") return "German";
236 if (langcode == "dz") return "Bhutani";
237
238 if (langcode == "el") return "Greek";
239 if (langcode == "en") return "English";
240 if (langcode == "eo") return "Esperanto";
241 if (langcode == "es") return "Spanish";
242 if (langcode == "et") return "Estonian";
243 if (langcode == "eu") return "Basque";
244
245 if (langcode == "fa") return "Persian";
246 if (langcode == "fi") return "Finnish";
247 if (langcode == "fj") return "Fiji";
248 if (langcode == "fo") return "Faroese";
249 if (langcode == "fr") return "French";
250 if (langcode == "fy") return "Frisian";
251
252 if (langcode == "ga") return "Irish";
253 if (langcode == "gd") return "Scots Gaelic";
254 if (langcode == "gl") return "Galician";
255 if (langcode == "gn") return "Guarani";
256 if (langcode == "gu") return "Gujarati";
257
258 if (langcode == "ha") return "Hausa";
259 if (langcode == "hi") return "Hindi";
260 if (langcode == "hr") return "Croatian";
261 if (langcode == "hu") return "Hungarian";
262 if (langcode == "hy") return "Armenian";
263
264 if (langcode == "ia") return "Interlingua";
265 if (langcode == "ie") return "Interlingue";
266 if (langcode == "ik") return "Inupiak";
267 if (langcode == "in") return "Indonesian";
268 if (langcode == "is") return "Icelandic";
269 if (langcode == "it") return "Italian";
270 if (langcode == "iw") return "Hebrew";
271
272 if (langcode == "ja") return "Japanese";
273 if (langcode == "ji") return "Yiddish";
274 if (langcode == "jw") return "Javanese";
275
276 if (langcode == "ka") return "Georgian";
277 if (langcode == "kk") return "Kazakh";
278 if (langcode == "kl") return "Greenlandic";
279 if (langcode == "km") return "Cambodian";
280 if (langcode == "kn") return "Kannada";
281 if (langcode == "ko") return "Korean";
282 if (langcode == "ks") return "Kashmiri";
283 if (langcode == "ku") return "Kurdish";
284 if (langcode == "ky") return "Kirghiz";
285
286 if (langcode == "la") return "Latin";
287 if (langcode == "ln") return "Lingala";
288 if (langcode == "lo") return "Laothian";
289 if (langcode == "lt") return "Lithuanian";
290 if (langcode == "lv") return "Latvian, Lettish";
291
292 if (langcode == "mg") return "Malagasy";
293 if (langcode == "mi") return "Maori";
294 if (langcode == "mk") return "Macedonian";
295 if (langcode == "ml") return "Malayalam";
296 if (langcode == "mn") return "Mongolian";
297 if (langcode == "mo") return "Moldavian";
298 if (langcode == "mr") return "Marathi";
299 if (langcode == "ms") return "Malay";
300 if (langcode == "mt") return "Maltese";
301 if (langcode == "my") return "Burmese";
302
303 if (langcode == "na") return "Nauru";
304 if (langcode == "ne") return "Nepali";
305 if (langcode == "nl") return "Dutch";
306 if (langcode == "no") return "Norwegian";
307
308 if (langcode == "oc") return "Occitan";
309 if (langcode == "om") return "(Afan) Oromo";
310 if (langcode == "or") return "Oriya";
311
312 if (langcode == "pa") return "Punjabi";
313 if (langcode == "pl") return "Polish";
314 if (langcode == "ps") return "Pashto, Pushto";
315 if (langcode == "pt") return "Portuguese";
316
317 if (langcode == "qu") return "Quechua";
318 if (langcode == "rm") return "Rhaeto-Romance";
319 if (langcode == "rn") return "Kirundi";
320 if (langcode == "ro") return "Romanian";
321 if (langcode == "ru") return "Russian";
322 if (langcode == "rw") return "Kinyarwanda";
323
324 if (langcode == "sa") return "Sanskrit";
325 if (langcode == "sd") return "Sindhi";
326 if (langcode == "sg") return "Sangro";
327 if (langcode == "sh") return "Serbo-Croatian";
328 if (langcode == "si") return "Singhalese";
329 if (langcode == "sk") return "Slovak";
330 if (langcode == "sl") return "Slovenian";
331 if (langcode == "sm") return "Samoan";
332 if (langcode == "sn") return "Shona";
333 if (langcode == "so") return "Somali";
334 if (langcode == "sq") return "Albanian";
335 if (langcode == "sr") return "Serbian";
336 if (langcode == "ss") return "Siswati";
337 if (langcode == "st") return "Sesotho";
338 if (langcode == "su") return "Sudanese";
339 if (langcode == "sv") return "Swedish";
340 if (langcode == "sw") return "Swahili";
341
342 if (langcode == "ta") return "Tamil";
343 if (langcode == "te") return "Tegulu";
344 if (langcode == "tg") return "Tajik";
345 if (langcode == "th") return "Thai";
346 if (langcode == "ti") return "Tigrinya";
347 if (langcode == "tk") return "Turkmen";
348 if (langcode == "tl") return "Tagalog";
349 if (langcode == "tn") return "Setswana";
350 if (langcode == "to") return "Tonga";
351 if (langcode == "tr") return "Turkish";
352 if (langcode == "ts") return "Tsonga";
353 if (langcode == "tt") return "Tatar";
354 if (langcode == "tw") return "Twi";
355
356 if (langcode == "uk") return "Ukrainian";
357 if (langcode == "ur") return "Urdu";
358 if (langcode == "uz") return "Uzbek";
359
360 if (langcode == "vi") return "Vietnamese";
361 if (langcode == "vo") return "Volapuk";
362
363 if (langcode == "wo") return "Wolof";
364
365 if (langcode == "xh") return "Xhosa";
366
367 if (langcode == "yo") return "Yoruba";
368
369 if (langcode == "zh") return "Chinese";
370 if (langcode == "zu") return "Zulu";
371 return "";
372}
373
374text_t get_href (const text_t &link) {
375
376 text_t href;
377
378 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
379 text_t::const_iterator end = link.end();
380
381 ++here;
382 while (here != end) {
383 if (*here == '"') break;
384 href.push_back(*here);
385 ++here;
386 }
387
388 return href;
389}
390
391//this function gets the information associated with the relation
392//metadata for the document associated with 'docinfo'. This relation
393//metadata consists of a line of pairs containing 'collection, document OID'
394//(this is the OID of the document related to the current document, and
395//the collection the related document belongs to). For each of these pairs
396//the title metadata is obtained and then an html link between the title
397//of the related doc and the document's position (the document will be
398//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
399//(where collection is the related documents collection, and OID is the
400//related documents OID). A list of these html links are made for as many
401//related documents as there are. This list is then returned. If there are
402//no related documents available for the current document then the string
403//'.. no related documents .. ' is returned.
404text_t get_related_docs(const text_t& collection, recptproto* collectproto,
405 ResultDocInfo_t &docinfo, ostream& logout){
406
407 text_tset metadata;
408
409 //insert the metadata we wish to collect
410 metadata.insert("relation");
411 metadata.insert("Title");
412 metadata.insert("Subject"); //for emails, where title data doesn't apply
413
414 FilterResponse_t response;
415 text_t relation = ""; //string for displaying relation metadata
416 text_t relationTitle = ""; //the related documents Title (or subject)
417 text_t relationOID = ""; //the related documents OID
418
419 //get the information associated with the metadata for current doc
420 if (get_info (docinfo.OID, collection, "", metadata,
421 false, collectproto, response, logout)) {
422
423 //if the relation metadata exists, store for displaying
424 if(!response.docInfo[0].metadata["relation"].values.empty()){
425 relationOID += response.docInfo[0].metadata["relation"].values[0];
426
427 //split relation data into pairs of collectionname,ID number
428 text_tarray relationpairs;
429 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
430
431 text_tarray::const_iterator currDoc = relationpairs.begin();
432 text_tarray::const_iterator lastDoc = relationpairs.end();
433
434 //iterate through the pairs to split and display
435 while(currDoc != lastDoc){
436
437 //split pairs into collectionname and ID
438 text_tarray relationdata;
439 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
440
441 //get first element in the array (collection)
442 text_tarray::const_iterator doc_data = relationdata.begin();
443 text_t document_collection = *doc_data;
444 ++doc_data; //increment to get next item in array (oid)
445 text_t document_OID = *doc_data;
446
447 //create html link to related document
448 relation += "<a href=\"_httpdocument_&c=" + document_collection;
449 relation += "&cl=search&d=" + document_OID;
450
451 //get the information associated with the metadata for related doc
452 if (get_info (document_OID, document_collection, "", metadata,
453 false, collectproto, response, logout)) {
454
455 //if title metadata doesn't exist, collect subject metadata
456 //if that doesn't exist, just call it 'related document'
457 if (!response.docInfo[0].metadata["Title"].values[0].empty())
458 relationTitle = response.docInfo[0].metadata["Title"].values[0];
459 else if (!response.docInfo[0].metadata["Subject"].values.empty())
460 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
461 else relationTitle = "RELATED DOCUMENT";
462
463 }
464
465 //link the related document's title to its page
466 relation += "\">" + relationTitle + "</a>";
467 relation += " (" + document_collection + ")<br>";
468
469 ++currDoc;
470 }
471 }
472
473 }
474
475 if(relation.empty()) //no relation data for documnet
476 relation = ".. no related documents .. ";
477
478 return relation;
479}
480
481
482
483static void get_parent_options (text_t &instring, metadata_t &metaoption) {
484
485 assert (instring.size() > 7);
486 if (instring.size() <= 7) return;
487
488 text_t meta, com, op;
489 bool inbraces = false;
490 bool inquotes = false;
491 bool foundcolon = false;
492 text_t::const_iterator here = instring.begin()+6;
493 text_t::const_iterator end = instring.end();
494 while (here != end) {
495 if (foundcolon) meta.push_back (*here);
496 else if (*here == '(') inbraces = true;
497 else if (*here == ')') inbraces = false;
498 else if (*here == '\'' && !inquotes) inquotes = true;
499 else if (*here == '\'' && inquotes) inquotes = false;
500 else if (*here == ':' && !inbraces) foundcolon = true;
501 else if (inquotes) op.push_back (*here);
502 else com.push_back (*here);
503 ++here;
504 }
505
506 instring = meta;
507 if (com.empty())
508 metaoption.mqualifier.parent = pImmediate;
509 else if (com == "Top")
510 metaoption.mqualifier.parent = pTop;
511 else if (com == "All") {
512 metaoption.mqualifier.parent = pAll;
513 metaoption.parentoptions = op;
514 }
515}
516
517
518static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
519
520 assert (instring.size() > 8);
521 if (instring.size() <= 8) return;
522 text_t meta, com, op;
523 bool inbraces = false;
524 bool inquotes = false;
525 bool foundcolon = false;
526 text_t::const_iterator here = instring.begin()+7;
527 text_t::const_iterator end = instring.end();
528 while (here != end) {
529 if (foundcolon) meta.push_back (*here);
530 else if (*here == '(') inbraces = true;
531 else if (*here == ')') inbraces = false;
532 else if (*here == '\'' && !inquotes) inquotes = true;
533 else if (*here == '\'' && inquotes) inquotes = false;
534 else if (*here == ':' && !inbraces) foundcolon = true;
535 else if (inquotes) op.push_back (*here);
536 else com.push_back (*here);
537 ++here;
538 }
539
540 instring = meta;
541 metaoption.siblingoptions.clear();
542
543 if (com.empty()) {
544 metaoption.mqualifier.sibling = sAll;
545 metaoption.siblingoptions = " ";
546 }
547 else if (com == "first") {
548 metaoption.mqualifier.sibling = sNum;
549 metaoption.siblingoptions = "0";
550 }
551 else if (com == "last") {
552 metaoption.mqualifier.sibling = sNum;
553 metaoption.siblingoptions = "-2"; // == last
554 }
555 else if (com.getint()>0) {
556 metaoption.mqualifier.sibling = sNum;
557 int pos = com.getint()-1;
558 metaoption.siblingoptions +=pos;
559 }
560 else {
561 metaoption.mqualifier.sibling = sAll;
562 metaoption.siblingoptions = op;
563 }
564}
565
566static void get_child_options (text_t &instring, metadata_t &metaoption) {
567
568 assert (instring.size() > 6);
569 if (instring.size() <= 6) return;
570 text_t meta, com, op;
571 bool inbraces = false;
572 bool inquotes = false;
573 bool foundcolon = false;
574 text_t::const_iterator here = instring.begin()+5;
575 text_t::const_iterator end = instring.end();
576 while (here != end) {
577 if (foundcolon) meta.push_back (*here);
578 else if (*here == '(') inbraces = true;
579 else if (*here == ')') inbraces = false;
580 else if (*here == '\'' && !inquotes) inquotes = true;
581 else if (*here == '\'' && inquotes) inquotes = false;
582 else if (*here == ':' && !inbraces) foundcolon = true;
583 else if (inquotes) op.push_back (*here);
584 else com.push_back (*here);
585 ++here;
586 }
587
588 instring = meta;
589 if (com.empty()) {
590 metaoption.mqualifier.child = cAll;
591 metaoption.childoptions = " ";
592 }
593 else if (com == "first") {
594 metaoption.mqualifier.child = cNum;
595 metaoption.childoptions = ".fc";
596 }
597 else if (com == "last") {
598 metaoption.mqualifier.child = cNum;
599 metaoption.childoptions = ".lc";
600 }
601 else if (com.getint()>0) {
602 metaoption.mqualifier.child = cNum;
603 metaoption.childoptions = "."+com;
604 }
605 else {
606 metaoption.mqualifier.child = cAll;
607 metaoption.childoptions = op;
608 }
609}
610
611
612
613static void parse_meta (text_t &meta, metadata_t &metaoption,
614 text_tset &metadata, bool &getParents) {
615
616 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
617 metaoption.metacommand |= mCgiSafe;
618 meta = substr (meta.begin()+8, meta.end());
619 }
620
621 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
622 getParents = true;
623 metaoption.metacommand |= mParent;
624 get_parent_options (meta, metaoption);
625 }
626 else if (meta.size() > 6 && (substr (meta.begin(), meta.begin()+5) == "child")) {
627 metaoption.metacommand |= mChild;
628 get_child_options (meta, metaoption);
629 metadata.insert("contains");
630 }
631 // parent and child can have sibling also
632 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
633 metaoption.metacommand |= mSibling;
634 get_sibling_options (meta, metaoption);
635 }
636
637 // check for ex. which may occur in format statements
638 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
639 meta = substr (meta.begin()+3, meta.end());
640 }
641 metadata.insert (meta);
642 metaoption.metaname = meta;
643}
644
645static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
646 if (meta == "collection") {
647 // no qualifiers
648 metaoption.metaname = g_EmptyText;
649 return;
650 }
651 meta = substr (meta.begin()+11, meta.end());
652 metaoption.metaname = meta;
653
654}
655
656static void parse_meta (text_t &meta, format_t *formatlistptr,
657 text_tset &metadata, bool &getParents) {
658
659 if (meta == "link")
660 formatlistptr->command = comLink;
661 else if (meta == "/link")
662 formatlistptr->command = comEndLink;
663
664 else if (meta == "href")
665 formatlistptr->command = comHref;
666
667 else if (meta == "num")
668 formatlistptr->command = comNum;
669
670 else if (meta == "icon")
671 formatlistptr->command = comIcon;
672
673 else if (meta == "Text")
674 formatlistptr->command = comDoc;
675
676 else if (meta == "RelatedDocuments")
677 formatlistptr->command = comRel;
678
679 else if (meta == "highlight")
680 formatlistptr->command = comHighlight;
681
682 else if (meta == "/highlight")
683 formatlistptr->command = comEndHighlight;
684
685 else if (meta == "Summary")
686 formatlistptr->command = comSummary;
687
688 else if (meta == "DocImage")
689 formatlistptr->command = comImage;
690
691 else if (meta == "DocTOC")
692 formatlistptr->command = comTOC;
693
694 else if (meta == "DocumentButtonDetach")
695 formatlistptr->command = comDocumentButtonDetach;
696
697 else if (meta == "DocumentButtonHighlight")
698 formatlistptr->command = comDocumentButtonHighlight;
699
700 else if (meta == "DocumentButtonExpandContents")
701 formatlistptr->command = comDocumentButtonExpandContents;
702
703 else if (meta == "DocumentButtonExpandText")
704 formatlistptr->command = comDocumentButtonExpandText;
705
706 else if (meta == "DocOID")
707 formatlistptr->command = comOID;
708 else if (meta == "DocRank")
709 formatlistptr->command = comRank;
710 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
711 formatlistptr->command = comCollection;
712 parse_coll_meta(meta, formatlistptr->meta);
713 }
714 else {
715 formatlistptr->command = comMeta;
716 parse_meta (meta, formatlistptr->meta, metadata, getParents);
717 }
718}
719
720
721static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
722 text_tset &metadata, bool &getParents) {
723
724 text_t text;
725 text_t::const_iterator here = formatstring.begin();
726 text_t::const_iterator end = formatstring.end();
727
728 while (here != end) {
729
730 if (*here == '\\') {
731 ++here;
732 if (here != end) text.push_back (*here);
733
734 } else if (*here == '{') {
735 if (!text.empty()) {
736 formatlistptr->command = comText;
737 formatlistptr->text = text;
738 formatlistptr->nextptr = new format_t();
739 formatlistptr = formatlistptr->nextptr;
740
741 text.clear();
742 }
743 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
744
745 formatlistptr->nextptr = new format_t();
746 formatlistptr = formatlistptr->nextptr;
747 if (here == end) break;
748 }
749 } else if (*here == '[') {
750 if (!text.empty()) {
751 formatlistptr->command = comText;
752 formatlistptr->text = text;
753 formatlistptr->nextptr = new format_t();
754 formatlistptr = formatlistptr->nextptr;
755
756 text.clear();
757 }
758 text_t meta;
759 ++here;
760 while (*here != ']') {
761 if (here == end) return false;
762 meta.push_back (*here);
763 ++here;
764 }
765 parse_meta (meta, formatlistptr, metadata, getParents);
766 formatlistptr->nextptr = new format_t();
767 formatlistptr = formatlistptr->nextptr;
768
769 } else
770 text.push_back (*here);
771
772 if (here != end) ++here;
773 }
774 if (!text.empty()) {
775 formatlistptr->command = comText;
776 formatlistptr->text = text;
777 formatlistptr->nextptr = new format_t();
778 formatlistptr = formatlistptr->nextptr;
779
780 }
781 return true;
782}
783
784
785static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
786 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
787
788 text_t::const_iterator it = findchar (here, end, '}');
789 if (it == end) return false;
790
791 text_t com = substr (here, it);
792 here = findchar (it, end, '{');
793 if (here == end) return false;
794 else ++here;
795
796 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
797 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
798 else return false;
799
800 int commacount = 0;
801 text_t text;
802 while (here != end) {
803
804 if (*here == '\\') {
805 ++here;
806 if (here != end) text.push_back(*here);
807
808 }
809
810 else if (*here == ',' || *here == '}' || *here == '{') {
811
812 if (formatlistptr->command == comOr) {
813 // the {Or}{this, or this, or this, or this} statement
814 format_t *or_ptr;
815
816 // find the next unused orptr
817 if (formatlistptr->orptr == NULL) {
818 formatlistptr->orptr = new format_t();
819 or_ptr = formatlistptr->orptr;
820 } else {
821 or_ptr = formatlistptr->orptr;
822 while (or_ptr->nextptr != NULL)
823 or_ptr = or_ptr->nextptr;
824 or_ptr->nextptr = new format_t();
825 or_ptr = or_ptr->nextptr;
826 }
827
828 if (!text.empty())
829 {
830 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
831 }
832
833 if (*here == '{')
834 {
835 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
836 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
837 // The latter can always be re-written:
838 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
839
840 if (!text.empty()) // already used up allocated format_t
841 {
842 // => allocate new one for detected action
843 or_ptr->nextptr = new format_t();
844 or_ptr = or_ptr->nextptr;
845 }
846 if (!parse_action(++here, end, or_ptr, metadata, getParents))
847 {
848 return false;
849 }
850 }
851 else
852 {
853 if (*here == '}') break;
854 }
855 text.clear();
856
857 }
858
859 // Parse an {If}{decide,do,else} statement
860 else {
861
862 // Read the decision component.
863 if (commacount == 0) {
864 // Decsion can be a metadata element, or a piece of text.
865 // Originally Stefan's code, updated 25/10/2000 by Gordon.
866
867 text_t::const_iterator beginbracket = text.begin();
868 text_t::const_iterator endbracket = (text.end() - 1);
869
870 // Decision is based on a metadata element
871 if ((*beginbracket == '[') && (*endbracket == ']')) {
872 // Ignore the surrounding square brackets
873 text_t meta = substr (beginbracket+1, endbracket);
874 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
875 ++commacount;
876 text.clear();
877 }
878
879 // Decision is a piece of text (probably a macro like _cgiargmode_).
880 else {
881
882 // hunt for any metadata in string, which might be uses in
883 // to test a condition, e.g. [Format] eq 'PDF'
884 format_t* dummyformat = new format_t();
885 // update which metadata fields needed
886 // (not interested in updatng formatlistptr)
887 parse_string (text, dummyformat, metadata, getParents);
888 delete dummyformat;
889
890 formatlistptr->decision.command = dText;
891 formatlistptr->decision.text = text;
892 ++commacount;
893 text.clear();
894 }
895 }
896
897 // Read the "then" and "else" components of the {If} statement.
898 else {
899 format_t** nextlistptr = NULL;
900 if (commacount == 1) {
901 nextlistptr = &formatlistptr->ifptr;
902 } else if (commacount == 2 ) {
903 nextlistptr = &formatlistptr->elseptr;
904 } else {
905 return false;
906 }
907
908 if (!text.empty()) {
909 if (*nextlistptr == NULL) {
910 *nextlistptr = new format_t();
911 } else {
912
913 // skip to the end of any format_t statements already added
914 while ((*nextlistptr)->nextptr != NULL)
915 {
916 nextlistptr = &(*nextlistptr)->nextptr;
917 }
918
919 (*nextlistptr)->nextptr = new format_t();
920 nextlistptr = &(*nextlistptr)->nextptr;
921 }
922
923 if (!parse_string (text, *nextlistptr, metadata, getParents))
924 {
925 return false;
926 }
927 text.clear();
928 }
929
930 if (*here == '{')
931 {
932 if (*nextlistptr == NULL) {
933 *nextlistptr = new format_t();
934 } else {
935 // skip to the end of any format_t statements already added
936 while ((*nextlistptr)->nextptr != NULL)
937 {
938 nextlistptr = &(*nextlistptr)->nextptr;
939 }
940
941 (*nextlistptr)->nextptr = new format_t();
942 nextlistptr = &(*nextlistptr)->nextptr;
943 }
944
945 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
946 {
947 return false;
948 }
949 }
950 else
951 {
952 if (*here == '}') break;
953 ++commacount;
954 }
955 }
956 }
957
958 } else text.push_back(*here);
959
960 if (here != end) ++here;
961 }
962
963 return true;
964}
965
966
967bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
968 text_tset &metadata, bool &getParents) {
969
970 formatlistptr->clear();
971 getParents = false;
972
973 return (parse_string (formatstring, formatlistptr, metadata, getParents));
974}
975
976// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
977// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
978static text_t get_formatted_meta_text(MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
979{
980 text_t no_ns_metaname = remove_namespace(meta.metaname);
981 text_t tmp;
982 bool first = true;
983
984 const int start_i=0;
985 const int end_i = metainfo.values.size()-1;
986
987 if (position == -1) { // all
988 for (int i=start_i; i<=end_i; ++i) {
989 if (!first) tmp += meta.siblingoptions;
990 if (no_ns_metaname == "Date") tmp += format_date (metainfo.values[i]);
991 else if (no_ns_metaname == "Language") tmp += iso639(metainfo.values[i]);
992 else tmp += metainfo.values[i];
993 first = false;
994
995 }
996 } else {
997 if (position == -2) { // end
998 position = end_i;
999 } else if (position < start_i || position > end_i) {
1000 return "";
1001 }
1002 if (no_ns_metaname == "Date") tmp += format_date (metainfo.values[position]);
1003 else if (no_ns_metaname == "Language") tmp += iso639(metainfo.values[position]);
1004 else tmp += metainfo.values[position];
1005 }
1006 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe (tmp);
1007 else return tmp;
1008}
1009
1010static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1011{
1012
1013 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1014 switch (meta.mqualifier.parent) {
1015 case pNone:
1016 return "Nothing!!";
1017 break;
1018
1019 case pImmediate:
1020 if (parent != NULL) {
1021 return get_formatted_meta_text(*parent, meta, siblings_values);
1022 }
1023 break;
1024
1025 case pTop:
1026 if (parent != NULL) {
1027 while (parent->parent != NULL) parent = parent->parent;
1028 return get_formatted_meta_text(*parent, meta, siblings_values);
1029 }
1030 break;
1031
1032 case pAll:
1033 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1034 if (parent != NULL) {
1035 text_tarray tmparray;
1036 while (parent != NULL) {
1037 tmparray.push_back (get_formatted_meta_text(*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1038 parent = parent->parent;
1039 }
1040 // now join them up - use teh parent separator
1041 bool first = true;
1042 text_t tmp;
1043 text_tarray::reverse_iterator here = tmparray.rbegin();
1044 text_tarray::reverse_iterator end = tmparray.rend();
1045 while (here != end) {
1046 if (!first) tmp += meta.parentoptions;
1047 tmp += *here;
1048 first = false;
1049 ++here;
1050 }
1051 if (meta.metacommand & mCgiSafe) return cgi_safe (tmp);
1052 else return tmp;
1053 }
1054 }
1055 return "";
1056
1057}
1058
1059static text_t get_child_meta (const text_t& collection,
1060 recptproto* collectproto,
1061 ResultDocInfo_t &docinfo, displayclass &disp,
1062 const metadata_t &meta, text_tmap &options,
1063 ostream& logout, int siblings_values)
1064{
1065 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1066
1067 const text_t& child_metaname = meta.metaname;
1068 const text_t& child_field = meta.childoptions;
1069 text_tset child_metadata;
1070 child_metadata.insert(child_metaname);
1071
1072 FilterResponse_t child_response;
1073 if (meta.mqualifier.child == cNum) {
1074 // just one child
1075 //get the information associated with the metadata for child doc
1076 if (!get_info (docinfo.OID+child_field, collection, "", child_metadata,
1077 false, collectproto, child_response, logout)) return ""; // invalid child number
1078
1079 if (child_response.docInfo.empty()) return false; // no info for the child
1080
1081 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1082 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1083
1084 text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
1085 return expand_metadata(child_metavalue,collection,collectproto,
1086 child_docinfo,disp,options,logout);
1087 }
1088
1089
1090 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1091
1092 // we need to get all children
1093 text_t result = "";
1094 text_tarray children;
1095 text_t contains = docinfo.metadata["contains"].values[0];
1096 splitchar (contains.begin(), contains.end(), ';', children);
1097 text_tarray::const_iterator here = children.begin();
1098 text_tarray::const_iterator end = children.end();
1099 bool first = true;
1100 while (here !=end) {
1101 text_t oid = *here;
1102 here++;
1103 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1104
1105
1106 //get the information associated with the metadata for child doc
1107 if (!get_info (oid, collection, "", child_metadata,
1108 false, collectproto, child_response, logout) ||
1109 child_response.docInfo.empty()) {
1110 first = false;
1111 continue;
1112 }
1113
1114
1115 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1116 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1117
1118 text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
1119 if (!first) result += child_field;
1120 first = false;
1121 // need to do this here cos otherwise we are in the wrong document
1122 result += expand_metadata(child_metavalue,collection,collectproto,
1123 child_docinfo,disp,options,logout);
1124 }
1125 return result;
1126
1127}
1128
1129
1130// note: all the format_date stuff is assuming that all Date metadata is going to
1131// be of the form yyyymmdd, this is of course, crap ;)
1132
1133static text_t get_meta (const text_t& collection, recptproto* collectproto,
1134 ResultDocInfo_t &docinfo, displayclass &disp,
1135 const metadata_t &meta, text_tmap &options,
1136 ostream& logout) {
1137
1138 // make sure we have the requested metadata
1139 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1140 if (it == docinfo.metadata.end()) return "";
1141
1142 int siblings_values = 0; // default is no siblings, just the first metadata available
1143 if (meta.metacommand & mSibling) {
1144 if (meta.mqualifier.sibling == sAll) {
1145 siblings_values = -1; //all
1146 } else if (meta.mqualifier.sibling == sNum) {
1147 siblings_values = meta.siblingoptions.getint();
1148 }
1149 }
1150 if (meta.metacommand & mParent) {
1151 return get_parent_meta(docinfo,meta,siblings_values);
1152 }
1153
1154 else if (meta.metacommand & mChild) {
1155 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1156 options,logout, siblings_values);
1157 }
1158 else if (meta.metacommand & mSibling) { // only siblings
1159 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1160 return get_formatted_meta_text(docinfo.metadata[meta.metaname],meta, siblings_values);
1161 }
1162 else {
1163
1164 // straightforward metadata request (nothing fancy)
1165
1166 text_t classifier_metaname = docinfo.classifier_metadata_type;
1167 int metaname_index
1168 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1169 return get_formatted_meta_text(docinfo.metadata[meta.metaname], meta, metaname_index);
1170 }
1171
1172 return "";
1173}
1174
1175static text_t get_or (const text_t& collection, recptproto* collectproto,
1176 ResultDocInfo_t &docinfo, displayclass &disp,
1177 format_t *orptr, text_tmap &options,
1178 ostream& logout) {
1179
1180 text_t tmp;
1181 while (orptr != NULL) {
1182
1183 tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1184 options, logout);
1185 if (!tmp.empty()) return tmp;
1186
1187 orptr = orptr->nextptr;
1188 }
1189 return "";
1190}
1191
1192static bool char_is_whitespace(const char c)
1193{
1194 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1195
1196}
1197
1198static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1199{
1200 int pos = start_pos;
1201 while (pos<outstring.size()) {
1202 if (!char_is_whitespace(outstring[pos])) {
1203 break;
1204 }
1205 ++pos;
1206 }
1207
1208 return pos;
1209}
1210
1211static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1212{
1213 int pos = start_pos;
1214 while (pos>=0) {
1215 if (!char_is_whitespace(outstring[pos])) {
1216 break;
1217 }
1218 --pos;
1219 }
1220
1221 return pos;
1222}
1223
1224static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1225{
1226 int pos = start_pos;
1227 while (pos>=0) {
1228 if (char_is_whitespace(outstring[pos])) {
1229 break;
1230 }
1231 --pos;
1232 }
1233
1234 return pos;
1235}
1236
1237
1238static int rscan_for(const text_t& outstring, const int start_pos,
1239 const char find_c)
1240{
1241 int pos = start_pos;
1242 while (pos>=0) {
1243 char c = outstring[pos];
1244 if (outstring[pos] == find_c) {
1245 break;
1246 }
1247 --pos;
1248 }
1249
1250 return pos;
1251}
1252
1253text_t extract_substr(const text_t& outstring, const int start_pos,
1254 const int end_pos)
1255{
1256 text_t extracted_str;
1257 extracted_str.clear();
1258
1259 for (int pos=start_pos; pos<=end_pos; ++pos) {
1260 extracted_str.push_back(outstring[pos]);
1261 }
1262
1263 return extracted_str;
1264}
1265
1266
1267static text_t expand_potential_metadata(const text_t& collection,
1268 recptproto* collectproto,
1269 ResultDocInfo_t &docinfo,
1270 displayclass &disp,
1271 const text_t& intext,
1272 text_tmap &options,
1273 ostream& logout)
1274{
1275 text_t outtext;
1276
1277 // decide if dealing with metadata or text
1278
1279 text_t::const_iterator beginbracket = intext.begin();
1280 text_t::const_iterator endbracket = (intext.end() - 1);
1281
1282 // Decision is based on a metadata element
1283 if ((*beginbracket == '[') && (*endbracket == ']')) {
1284 // Ignore the surrounding square brackets
1285 text_t meta_text = substr (beginbracket+1, endbracket);
1286
1287 if (meta_text == "Text") {
1288 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1289 } else {
1290
1291 text_tset metadata;
1292 bool getParents =false;
1293 metadata_t meta;
1294
1295 parse_meta (meta_text, meta, metadata, getParents);
1296 outtext
1297 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1298 }
1299
1300 }
1301 else {
1302 outtext = intext;
1303 }
1304
1305 return outtext;
1306}
1307
1308
1309
1310
1311static bool uses_expression(const text_t& collection, recptproto* collectproto,
1312 ResultDocInfo_t &docinfo,
1313 displayclass &disp,
1314 const text_t& outstring, text_t& lhs_expr,
1315 text_t& op_expr, text_t& rhs_expr,
1316 text_tmap &options,
1317 ostream& logout)
1318{
1319 // Note: the string may not be of the form: str1 op str2, however
1320 // to deterine this we have to process it on the assumption it is,
1321 // and if at any point an 'erroneous' value is encountered, return
1322 // false and let something else have a go at evaluating it
1323
1324 // Starting at the end of the string and working backwards ..
1325
1326 const int outstring_len = outstring.size();
1327
1328 // skip over white space
1329 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1330
1331 if (rhs_end<=0) {
1332 // no meaningful text or (rhs_end==0) no room for operator
1333 return false;
1334 }
1335
1336 // check for ' or " and then scan over token
1337 const char potential_quote = outstring[rhs_end];
1338 int rhs_start=rhs_end;
1339 bool quoted = false;
1340
1341 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1342 --rhs_end;
1343 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1344 quoted = true;
1345 }
1346 else {
1347 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1348 }
1349
1350 if ((rhs_end-rhs_start)<0) {
1351 // no meaningful rhs expression
1352 return false;
1353 }
1354
1355 // form rhs_expr
1356 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1357
1358 // skip over white space
1359 const int to_whitespace = (quoted) ? 2 : 1;
1360
1361 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1362 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1363
1364
1365 if (op_end-op_start<0) {
1366 // no meaningful expression operator
1367 return false;
1368 }
1369
1370 op_expr = extract_substr(outstring,op_start,op_end);
1371
1372
1373 // check for operator
1374 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1375 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1376
1377 // not a valid operator
1378 return false;
1379 }
1380
1381 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1382 if (lhs_end<0) {
1383 // no meaningful lhs expression
1384 return false;
1385 }
1386
1387 int lhs_start = scan_over_whitespace(outstring,0);
1388
1389 // form lhs_expr from remainder of string
1390 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1391
1392 // Now we know we have a valid expression, look up any
1393 // metadata terms
1394
1395 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1396 disp,rhs_expr,options,logout);
1397 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1398 disp,lhs_expr,options,logout);
1399
1400 return true;
1401}
1402
1403static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1404 const text_t& rhs_expr, ostream& logout)
1405{
1406 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1407 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1408 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1409 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1410 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1411 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1412 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1413 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1414 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1415 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1416 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1417 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1418 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1419 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1420 else {
1421 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1422 }
1423
1424 return false;
1425}
1426
1427
1428static text_t get_if (const text_t& collection, recptproto* collectproto,
1429 ResultDocInfo_t &docinfo, displayclass &disp,
1430 const decision_t &decision,
1431 format_t *ifptr, format_t *elseptr,
1432 text_tmap &options, ostream& logout)
1433{
1434 // If the decision component is a metadata element, then evaluate it
1435 // to see whether we output the "then" or the "else" clause
1436 if (decision.command == dMeta) {
1437 if (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1438 logout) != "") {
1439 if (ifptr != NULL)
1440 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1441 options, logout);
1442 }
1443 else {
1444 if (elseptr != NULL)
1445 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1446 options, logout);
1447 }
1448 }
1449
1450 // If the decision component is text, then evaluate it (it is probably a
1451 // macro like _cgiargmode_) to decide what to output.
1452 else if (decision.command == dText) {
1453
1454 text_t outstring;
1455 disp.expandstring (decision.text, outstring);
1456
1457 // Check for if expression in form: str1 op str2
1458 // (such as [x] eq "y")
1459 text_t lhs_expr, op_expr, rhs_expr;
1460 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1461 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1462 if (ifptr != NULL) {
1463 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1464 options, logout);
1465 }
1466 else {
1467 return "";
1468 }
1469 } else {
1470 if (elseptr != NULL) {
1471 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1472 options, logout);
1473 }
1474 else {
1475 return "";
1476 }
1477 }
1478 }
1479
1480
1481 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1482 // a cgi argument macro that has not been set, it evaluates to itself.
1483 // Therefore, were have to say that a piece of text evalautes true if
1484 // it is non-empty and if it is a cgi argument evaulating to itself.
1485
1486 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1487 if (ifptr != NULL)
1488 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1489 options, logout);
1490 } else {
1491 if (elseptr != NULL)
1492 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1493 options, logout);
1494 }
1495 }
1496
1497 return "";
1498}
1499
1500bool includes_metadata(const text_t& text)
1501{
1502 text_t::const_iterator here = text.begin();
1503 text_t::const_iterator end = text.end();
1504 while (here != end) {
1505 if (*here == '[') return true;
1506 ++here;
1507 }
1508
1509 return false;
1510}
1511
1512static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1513 recptproto* collectproto,
1514 ResultDocInfo_t &docinfo,
1515 displayclass &disp, text_tmap &options,
1516 ostream &logout) {
1517
1518 if (includes_metadata(metavalue)) {
1519
1520 // text has embedded metadata in it => expand it
1521 FilterRequest_t request;
1522 FilterResponse_t response;
1523
1524 request.getParents = false;
1525
1526 format_t *expanded_formatlistptr = new format_t();
1527 parse_formatstring (metavalue, expanded_formatlistptr,
1528 request.fields, request.getParents);
1529
1530 // retrieve metadata
1531 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1532 collectproto, response, logout);
1533
1534 if (!response.docInfo.empty()) {
1535
1536 text_t expanded_metavalue
1537 = get_formatted_string(collection, collectproto,
1538 response.docInfo[0], disp, expanded_formatlistptr,
1539 options, logout);
1540
1541 return expanded_metavalue;
1542 }
1543 else {
1544 return metavalue;
1545 }
1546 }
1547 else {
1548
1549 return metavalue;
1550 }
1551}
1552
1553text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1554 displayclass &disp,
1555 text_t meta_name, ostream& logout) {
1556
1557 ColInfoResponse_t collectinfo;
1558 comerror_t err;
1559 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1560 text_t meta_value = "";
1561 text_t lang;
1562 disp.expandstring("_cgiargl_",lang);
1563 if (lang.empty()) {
1564 lang = "en";
1565 }
1566
1567 if (err == noError) {
1568 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1569 }
1570 return meta_value;
1571
1572
1573}
1574text_t format_string (const text_t& collection, recptproto* collectproto,
1575 ResultDocInfo_t &docinfo, displayclass &disp,
1576 format_t *formatlistptr, text_tmap &options,
1577 ostream& logout) {
1578
1579 if (formatlistptr == NULL) return "";
1580
1581 switch (formatlistptr->command) {
1582 case comOID:
1583 return docinfo.OID;
1584 case comRank:
1585 return text_t(docinfo.ranking);
1586 case comText:
1587 return formatlistptr->text;
1588 case comLink:
1589 return options["link"];
1590 case comEndLink:
1591 if (options["link"].empty()) return "";
1592 else return "</a>";
1593 case comHref:
1594 return get_href(options["link"]);
1595 case comIcon:
1596 return options["icon"];
1597 case comNum:
1598 return docinfo.result_num;
1599 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1600 return get_related_docs(collection, collectproto, docinfo, logout);
1601 case comSummary:
1602 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1603 case comMeta:
1604 {
1605 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1606 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1607 }
1608 case comDoc:
1609 return format_text(collection, collectproto, docinfo, disp, options, logout);
1610 //return options["text"];
1611 case comImage:
1612 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1613 case comTOC:
1614 return options["DocTOC"];
1615 case comDocumentButtonDetach:
1616 return options["DocumentButtonDetach"];
1617 case comDocumentButtonHighlight:
1618 return options["DocumentButtonHighlight"];
1619 case comDocumentButtonExpandContents:
1620 return options["DocumentButtonExpandContents"];
1621 case comDocumentButtonExpandText:
1622 return options["DocumentButtonExpandText"];
1623 case comHighlight:
1624 if (options["highlight"] == "1") return "<b>";
1625 break;
1626 case comEndHighlight:
1627 if (options["highlight"] == "1") return "</b>";
1628 break;
1629 case comIf:
1630 return get_if (collection, collectproto, docinfo, disp,
1631 formatlistptr->decision, formatlistptr->ifptr,
1632 formatlistptr->elseptr, options, logout);
1633 case comOr:
1634 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1635 options, logout);
1636 case comCollection:
1637 if (formatlistptr->meta.metaname == g_EmptyText) {
1638 return collection;
1639 }
1640 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1641
1642 }
1643 return "";
1644}
1645
1646text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1647 ResultDocInfo_t &docinfo, displayclass &disp,
1648 format_t *formatlistptr, text_tmap &options,
1649 ostream& logout) {
1650
1651 text_t ft;
1652 while (formatlistptr != NULL)
1653 {
1654 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1655 options, logout);
1656 formatlistptr = formatlistptr->nextptr;
1657 }
1658
1659 return ft;
1660}
1661
1662
1663// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1664text_t format_text (const text_t& collection, recptproto* collectproto,
1665 ResultDocInfo_t &docinfo, displayclass &disp,
1666 text_tmap &options, ostream& logout) {
1667 if(!options["text"].empty()) {
1668 return options["text"];
1669 }
1670 // else get document text here
1671 DocumentRequest_t docrequest;
1672 DocumentResponse_t docresponse;
1673 comerror_t err;
1674 docrequest.OID = docinfo.OID;
1675 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1676 return docresponse.doc;
1677
1678}
1679
1680/* FUNCTION NAME: format_summary
1681 * DESC: this is invoked when a [Summary] special metadata is processed.
1682 * RETURNS: a query-biased summary for the document */
1683
1684text_t format_summary (const text_t& collection, recptproto* collectproto,
1685 ResultDocInfo_t &docinfo, displayclass &disp,
1686 text_tmap &options, ostream& logout) {
1687
1688 // GRB: added code here to ensure that the cstr (and other collections)
1689 // uses the document metadata item Summary, rather than compressing
1690 // the text of the document, processed via the methods in
1691 // summarise.cpp
1692 if (docinfo.metadata.count("Summary") > 0 &&
1693 docinfo.metadata["Summary"].values.size() > 0) {
1694 return docinfo.metadata["Summary"].values[0];
1695 }
1696
1697 text_t textToSummarise, query;
1698 if(options["text"].empty()) { // get document text
1699 DocumentRequest_t docrequest;
1700 DocumentResponse_t docresponse;
1701 comerror_t err;
1702 docrequest.OID = docinfo.OID;
1703 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1704 textToSummarise = docresponse.doc;
1705 } else // in practice, this would not happen, because text is only
1706 // loaded with the [Text] command
1707 textToSummarise = options["text"];
1708 disp.expandstring("_cgiargq_",query);
1709 return summarise(textToSummarise,query,80);
1710}
Note: See TracBrowser for help on using the repository browser.