source: trunk/gsdl/src/recpt/formattools.cpp@ 5017

Last change on this file since 5017 was 4972, checked in by sjboddie, 21 years ago

More changes to the new document formatting code. Added an
AllowExtendedOptions format option which must be set to allow other
new options to work.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 33.4 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "OIDtools.h"
29#include "summarise.h"
30
31#include <assert.h>
32
33// a few function prototypes
34
35static text_t format_string (const text_t& collection, recptproto* collectproto,
36 ResultDocInfo_t &docinfo, displayclass &disp,
37 format_t *formatlistptr, text_tmap &options,
38 ostream& logout);
39
40static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
41 format_t *formatlistptr, text_tset &metadata, bool &getParents);
42
43static text_t format_summary (const text_t& collection, recptproto* collectproto,
44 ResultDocInfo_t &docinfo, displayclass &disp,
45 text_tmap &options, ostream& logout);
46
47
48void metadata_t::clear() {
49 metaname.clear();
50 metacommand = mNone;
51 parentcommand = pNone;
52 parentoptions.clear();
53}
54
55void decision_t::clear() {
56 command = dMeta;
57 meta.clear();
58 text.clear();
59}
60
61void format_t::clear() {
62 command = comText;
63 decision.clear();
64 text.clear();
65 meta.clear();
66 nextptr = NULL;
67 ifptr = NULL;
68 elseptr = NULL;
69 orptr = NULL;
70}
71
72void formatinfo_t::clear() {
73 DocumentImages = false;
74 DocumentTitles = true;
75 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
76 DocumentContents = true;
77 DocumentArrowsBottom = true;
78 DocumentArrowsTop = false;
79 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
80 // DocumentButtons.push_back ("Expand Text");
81 // DocumentButtons.push_back ("Expand Contents");
82 DocumentButtons.push_back ("Detach");
83 DocumentButtons.push_back ("Highlight");
84 RelatedDocuments = "";
85 DocumentText = "<center><table width=_pagewidth_><tr><td>[Text]</td></tr></table></center>";
86 formatstrings.erase (formatstrings.begin(), formatstrings.end());
87 DocumentUseHTML = false;
88 AllowExtendedOptions = false;
89}
90
91// simply checks to see if formatstring begins with a <td> tag
92bool is_table_content (const text_t &formatstring) {
93 text_t::const_iterator here = formatstring.begin();
94 text_t::const_iterator end = formatstring.end();
95
96 while (here != end) {
97 if (*here != ' ') {
98 if ((*here == '<') && ((here+3) < end)) {
99 if ((*(here+1) == 't' || *(here+1) == 'T') &&
100 (*(here+2) == 'd' || *(here+2) == 'D') &&
101 (*(here+3) == '>' || *(here+3) == ' '))
102 return true;
103 } else return false;
104 }
105 here ++;
106 }
107 return false;
108}
109
110bool is_table_content (const format_t *formatlistptr) {
111
112 if (formatlistptr == NULL) return false;
113
114 if (formatlistptr->command == comText)
115 return is_table_content (formatlistptr->text);
116
117 return false;
118}
119
120// returns false if key isn't in formatstringmap
121bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
122 text_t &formatstring) {
123
124 formatstring.clear();
125 text_tmap::const_iterator it = formatstringmap.find(key);
126 if (it == formatstringmap.end()) return false;
127 formatstring = (*it).second;
128 return true;
129}
130
131// tries to find "key1key2" then "key1" then "key2"
132bool get_formatstring (const text_t &key1, const text_t &key2,
133 const text_tmap &formatstringmap,
134 text_t &formatstring) {
135
136 formatstring.clear();
137 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
138 if (it != formatstringmap.end()) {
139 formatstring = (*it).second;
140 return true;
141 }
142 it = formatstringmap.find(key1);
143 if (it != formatstringmap.end()) {
144 formatstring = (*it).second;
145 return true;
146 }
147 it = formatstringmap.find(key2);
148 if (it != formatstringmap.end()) {
149 formatstring = (*it).second;
150 return true;
151 }
152 return false;
153}
154
155
156// returns a date of form 31 _textmonthnn_ 1999
157// input is date of type 19991231
158// at least the year must be present in date
159text_t format_date (const text_t &date) {
160
161 if (date.size() < 4) return "";
162
163 text_t::const_iterator datebegin = date.begin();
164
165 text_t year = substr (datebegin, datebegin+4);
166
167 if (date.size() < 6) return year;
168
169 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
170 int imonth = month.getint();
171 if (imonth < 0 || imonth > 12) return year;
172
173 if (date.size() < 8) return month + " " + year;
174
175 text_t day = substr (datebegin+6, datebegin+8);
176 if (day[0] == '0') day = substr (day.begin()+1, day.end());
177 int iday = day.getint();
178 if (iday < 0 || iday > 31) return month + " " + year;
179
180 return day + " " + month + " " + year;
181}
182
183// converts an iso639 language code to its English equivalent
184// I realize that this isn't the pretiest or most efficient implementation,
185// hopefully this ugly Language (and Date too) formatting won't survive to
186// see gsdl-3.0
187text_t iso639 (const text_t &langcode) {
188
189 if (langcode == "aa") return "Afar";
190 if (langcode == "ab") return "Abkhazian";
191 if (langcode == "af") return "Afrikaans";
192 if (langcode == "am") return "Amharic";
193 if (langcode == "ar") return "Arabic";
194 if (langcode == "as") return "Assamese";
195 if (langcode == "ay") return "Aymara";
196 if (langcode == "az") return "Azerbaijani";
197
198 if (langcode == "ba") return "Bashkir";
199 if (langcode == "be") return "Byelorussian";
200 if (langcode == "bg") return "Bulgarian";
201 if (langcode == "bh") return "Bihari";
202 if (langcode == "bi") return "Bislama";
203 if (langcode == "bn") return "Bengali; Bangla";
204 if (langcode == "bo") return "Tibetan";
205 if (langcode == "br") return "Breton";
206
207 if (langcode == "ca") return "Catalan";
208 if (langcode == "co") return "Corsican";
209 if (langcode == "cs") return "Czech";
210 if (langcode == "cy") return "Welsh";
211
212 if (langcode == "da") return "Danish";
213 if (langcode == "de") return "German";
214 if (langcode == "dz") return "Bhutani";
215
216 if (langcode == "el") return "Greek";
217 if (langcode == "en") return "English";
218 if (langcode == "eo") return "Esperanto";
219 if (langcode == "es") return "Spanish";
220 if (langcode == "et") return "Estonian";
221 if (langcode == "eu") return "Basque";
222
223 if (langcode == "fa") return "Persian";
224 if (langcode == "fi") return "Finnish";
225 if (langcode == "fj") return "Fiji";
226 if (langcode == "fo") return "Faroese";
227 if (langcode == "fr") return "French";
228 if (langcode == "fy") return "Frisian";
229
230 if (langcode == "ga") return "Irish";
231 if (langcode == "gd") return "Scots Gaelic";
232 if (langcode == "gl") return "Galician";
233 if (langcode == "gn") return "Guarani";
234 if (langcode == "gu") return "Gujarati";
235
236 if (langcode == "ha") return "Hausa";
237 if (langcode == "hi") return "Hindi";
238 if (langcode == "hr") return "Croatian";
239 if (langcode == "hu") return "Hungarian";
240 if (langcode == "hy") return "Armenian";
241
242 if (langcode == "ia") return "Interlingua";
243 if (langcode == "ie") return "Interlingue";
244 if (langcode == "ik") return "Inupiak";
245 if (langcode == "in") return "Indonesian";
246 if (langcode == "is") return "Icelandic";
247 if (langcode == "it") return "Italian";
248 if (langcode == "iw") return "Hebrew";
249
250 if (langcode == "ja") return "Japanese";
251 if (langcode == "ji") return "Yiddish";
252 if (langcode == "jw") return "Javanese";
253
254 if (langcode == "ka") return "Georgian";
255 if (langcode == "kk") return "Kazakh";
256 if (langcode == "kl") return "Greenlandic";
257 if (langcode == "km") return "Cambodian";
258 if (langcode == "kn") return "Kannada";
259 if (langcode == "ko") return "Korean";
260 if (langcode == "ks") return "Kashmiri";
261 if (langcode == "ku") return "Kurdish";
262 if (langcode == "ky") return "Kirghiz";
263
264 if (langcode == "la") return "Latin";
265 if (langcode == "ln") return "Lingala";
266 if (langcode == "lo") return "Laothian";
267 if (langcode == "lt") return "Lithuanian";
268 if (langcode == "lv") return "Latvian, Lettish";
269
270 if (langcode == "mg") return "Malagasy";
271 if (langcode == "mi") return "Maori";
272 if (langcode == "mk") return "Macedonian";
273 if (langcode == "ml") return "Malayalam";
274 if (langcode == "mn") return "Mongolian";
275 if (langcode == "mo") return "Moldavian";
276 if (langcode == "mr") return "Marathi";
277 if (langcode == "ms") return "Malay";
278 if (langcode == "mt") return "Maltese";
279 if (langcode == "my") return "Burmese";
280
281 if (langcode == "na") return "Nauru";
282 if (langcode == "ne") return "Nepali";
283 if (langcode == "nl") return "Dutch";
284 if (langcode == "no") return "Norwegian";
285
286 if (langcode == "oc") return "Occitan";
287 if (langcode == "om") return "(Afan) Oromo";
288 if (langcode == "or") return "Oriya";
289
290 if (langcode == "pa") return "Punjabi";
291 if (langcode == "pl") return "Polish";
292 if (langcode == "ps") return "Pashto, Pushto";
293 if (langcode == "pt") return "Portuguese";
294
295 if (langcode == "qu") return "Quechua";
296 if (langcode == "rm") return "Rhaeto-Romance";
297 if (langcode == "rn") return "Kirundi";
298 if (langcode == "ro") return "Romanian";
299 if (langcode == "ru") return "Russian";
300 if (langcode == "rw") return "Kinyarwanda";
301
302 if (langcode == "sa") return "Sanskrit";
303 if (langcode == "sd") return "Sindhi";
304 if (langcode == "sg") return "Sangro";
305 if (langcode == "sh") return "Serbo-Croatian";
306 if (langcode == "si") return "Singhalese";
307 if (langcode == "sk") return "Slovak";
308 if (langcode == "sl") return "Slovenian";
309 if (langcode == "sm") return "Samoan";
310 if (langcode == "sn") return "Shona";
311 if (langcode == "so") return "Somali";
312 if (langcode == "sq") return "Albanian";
313 if (langcode == "sr") return "Serbian";
314 if (langcode == "ss") return "Siswati";
315 if (langcode == "st") return "Sesotho";
316 if (langcode == "su") return "Sudanese";
317 if (langcode == "sv") return "Swedish";
318 if (langcode == "sw") return "Swahili";
319
320 if (langcode == "ta") return "Tamil";
321 if (langcode == "te") return "Tegulu";
322 if (langcode == "tg") return "Tajik";
323 if (langcode == "th") return "Thai";
324 if (langcode == "ti") return "Tigrinya";
325 if (langcode == "tk") return "Turkmen";
326 if (langcode == "tl") return "Tagalog";
327 if (langcode == "tn") return "Setswana";
328 if (langcode == "to") return "Tonga";
329 if (langcode == "tr") return "Turkish";
330 if (langcode == "ts") return "Tsonga";
331 if (langcode == "tt") return "Tatar";
332 if (langcode == "tw") return "Twi";
333
334 if (langcode == "uk") return "Ukrainian";
335 if (langcode == "ur") return "Urdu";
336 if (langcode == "uz") return "Uzbek";
337
338 if (langcode == "vi") return "Vietnamese";
339 if (langcode == "vo") return "Volapuk";
340
341 if (langcode == "wo") return "Wolof";
342
343 if (langcode == "xh") return "Xhosa";
344
345 if (langcode == "yo") return "Yoruba";
346
347 if (langcode == "zh") return "Chinese";
348 if (langcode == "zu") return "Zulu";
349 return "";
350}
351
352text_t get_href (const text_t &link) {
353
354 text_t href;
355
356 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
357 text_t::const_iterator end = link.end();
358
359 here ++;
360 while (here != end) {
361 if (*here == '"') break;
362 href.push_back(*here);
363 here ++;
364 }
365
366 return href;
367}
368
369//this function gets the information associated with the relation
370//metadata for the document associated with 'docinfo'. This relation
371//metadata consists of a line of pairs containing 'collection, document OID'
372//(this is the OID of the document related to the current document, and
373//the collection the related document belongs to). For each of these pairs
374//the title metadata is obtained and then an html link between the title
375//of the related doc and the document's position (the document will be
376//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
377//(where collection is the related documents collection, and OID is the
378//related documents OID). A list of these html links are made for as many
379//related documents as there are. This list is then returned. If there are
380//no related documents available for the current document then the string
381//'.. no related documents .. ' is returned.
382text_t get_related_docs(const text_t& collection, recptproto* collectproto,
383 ResultDocInfo_t &docinfo, ostream& logout){
384
385 text_tset metadata;
386
387 //insert the metadata we wish to collect
388 metadata.insert("relation");
389 metadata.insert("Title");
390 metadata.insert("Subject"); //for emails, where title data doesn't apply
391
392 FilterResponse_t response;
393 text_t relation = ""; //string for displaying relation metadata
394 text_t relationTitle = ""; //the related documents Title (or subject)
395 text_t relationOID = ""; //the related documents OID
396
397 //get the information associated with the metadata for current doc
398 if (get_info (docinfo.OID, collection, metadata,
399 false, collectproto, response, logout)) {
400
401 //if the relation metadata exists, store for displaying
402 if(!response.docInfo[0].metadata["relation"].values.empty()){
403 relationOID += response.docInfo[0].metadata["relation"].values[0];
404
405 //split relation data into pairs of collectionname,ID number
406 text_tarray relationpairs;
407 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
408
409 text_tarray::const_iterator currDoc = relationpairs.begin();
410 text_tarray::const_iterator lastDoc = relationpairs.end();
411
412 //iterate through the pairs to split and display
413 while(currDoc != lastDoc){
414
415 //split pairs into collectionname and ID
416 text_tarray relationdata;
417 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
418
419 //get first element in the array (collection)
420 text_tarray::const_iterator doc_data = relationdata.begin();
421 text_t document_collection = *doc_data;
422 doc_data++; //increment to get next item in array (oid)
423 text_t document_OID = *doc_data;
424
425 //create html link to related document
426 relation += "<a href=\"_httpdocument_&c=" + document_collection;
427 relation += "&cl=search&d=" + document_OID;
428
429 //get the information associated with the metadata for related doc
430 if (get_info (document_OID, document_collection, metadata,
431 false, collectproto, response, logout)) {
432
433 //if title metadata doesn't exist, collect subject metadata
434 //if that doesn't exist, just call it 'related document'
435 if (!response.docInfo[0].metadata["Title"].values[0].empty())
436 relationTitle = response.docInfo[0].metadata["Title"].values[0];
437 else if (!response.docInfo[0].metadata["Subject"].values.empty())
438 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
439 else relationTitle = "RELATED DOCUMENT";
440
441 }
442
443 //link the related document's title to its page
444 relation += "\">" + relationTitle + "</a>";
445 relation += " (" + document_collection + ")<br>";
446
447 currDoc++;
448 }
449 }
450
451 }
452
453 if(relation.empty()) //no relation data for documnet
454 relation = ".. no related documents .. ";
455
456 return relation;
457}
458
459
460
461static void get_parent_options (text_t &instring, metadata_t &metaoption) {
462
463 assert (instring.size() > 7);
464 if (instring.size() <= 7) return;
465
466 text_t meta, com, op;
467 bool inbraces = false;
468 bool inquotes = false;
469 bool foundcolon = false;
470 text_t::const_iterator here = instring.begin()+6;
471 text_t::const_iterator end = instring.end();
472 while (here != end) {
473 if (*here == '(') inbraces = true;
474 else if (*here == ')') inbraces = false;
475 else if (*here == '\'' && !inquotes) inquotes = true;
476 else if (*here == '\'' && inquotes) inquotes = false;
477 else if (*here == ':' && !inbraces) foundcolon = true;
478 else if (foundcolon) meta.push_back (*here);
479 else if (inquotes) op.push_back (*here);
480 else com.push_back (*here);
481 here ++;
482 }
483 instring = meta;
484 if (com.empty())
485 metaoption.parentcommand = pImmediate;
486 else if (com == "Top")
487 metaoption.parentcommand = pTop;
488 else if (com == "All") {
489 metaoption.parentcommand = pAll;
490 metaoption.parentoptions = op;
491 }
492}
493
494static void parse_meta (text_t &meta, metadata_t &metaoption,
495 text_tset &metadata, bool &getParents) {
496
497 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
498 metaoption.metacommand = mCgiSafe;
499 meta = substr (meta.begin()+8, meta.end());
500 }
501
502 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
503 getParents = true;
504 get_parent_options (meta, metaoption);
505 }
506
507 metadata.insert (meta);
508 metaoption.metaname = meta;
509}
510
511static void parse_meta (text_t &meta, format_t *formatlistptr,
512 text_tset &metadata, bool &getParents) {
513
514 if (meta == "link")
515 formatlistptr->command = comLink;
516 else if (meta == "/link")
517 formatlistptr->command = comEndLink;
518
519 else if (meta == "href")
520 formatlistptr->command = comHref;
521
522 else if (meta == "num")
523 formatlistptr->command = comNum;
524
525 else if (meta == "icon")
526 formatlistptr->command = comIcon;
527
528 else if (meta == "Text")
529 formatlistptr->command = comDoc;
530
531 else if (meta == "RelatedDocuments")
532 formatlistptr->command = comRel;
533
534 else if (meta == "highlight")
535 formatlistptr->command = comHighlight;
536
537 else if (meta == "/highlight")
538 formatlistptr->command = comEndHighlight;
539
540 else if (meta == "Summary")
541 formatlistptr->command = comSummary;
542
543 else if (meta == "DocImage")
544 formatlistptr->command = comImage;
545
546 else if (meta == "DocTOC")
547 formatlistptr->command = comTOC;
548
549 else if (meta == "DocumentButtonDetach")
550 formatlistptr->command = comDocumentButtonDetach;
551
552 else if (meta == "DocumentButtonHighlight")
553 formatlistptr->command = comDocumentButtonHighlight;
554
555 else if (meta == "DocumentButtonExpandContents")
556 formatlistptr->command = comDocumentButtonExpandContents;
557
558 else if (meta == "DocumentButtonExpandText")
559 formatlistptr->command = comDocumentButtonExpandText;
560
561 else {
562 formatlistptr->command = comMeta;
563 parse_meta (meta, formatlistptr->meta, metadata, getParents);
564 }
565}
566
567static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
568 text_tset &metadata, bool &getParents) {
569
570 text_t text;
571 text_t::const_iterator here = formatstring.begin();
572 text_t::const_iterator end = formatstring.end();
573
574 while (here != end) {
575
576 if (*here == '\\') {
577 here ++;
578 if (here != end) text.push_back (*here);
579
580 } else if (*here == '{') {
581 if (!text.empty()) {
582 formatlistptr->command = comText;
583 formatlistptr->text = text;
584 formatlistptr->nextptr = new format_t();
585 formatlistptr = formatlistptr->nextptr;
586
587 text.clear();
588 }
589 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
590
591 formatlistptr->nextptr = new format_t();
592 formatlistptr = formatlistptr->nextptr;
593 if (here == end) break;
594 }
595 } else if (*here == '[') {
596 if (!text.empty()) {
597 formatlistptr->command = comText;
598 formatlistptr->text = text;
599 formatlistptr->nextptr = new format_t();
600 formatlistptr = formatlistptr->nextptr;
601
602 text.clear();
603 }
604 text_t meta;
605 here ++;
606 while (*here != ']') {
607 if (here == end) return false;
608 meta.push_back (*here);
609 here ++;
610 }
611 parse_meta (meta, formatlistptr, metadata, getParents);
612 formatlistptr->nextptr = new format_t();
613 formatlistptr = formatlistptr->nextptr;
614
615 } else
616 text.push_back (*here);
617
618 if (here != end) here ++;
619 }
620 if (!text.empty()) {
621 formatlistptr->command = comText;
622 formatlistptr->text = text;
623 formatlistptr->nextptr = new format_t();
624 formatlistptr = formatlistptr->nextptr;
625
626 }
627 return true;
628}
629
630
631static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
632 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
633
634 text_t::const_iterator it = findchar (here, end, '}');
635 if (it == end) return false;
636
637 text_t com = substr (here, it);
638 here = findchar (it, end, '{');
639 if (here == end) return false;
640 else here ++;
641
642 if (com == "If") formatlistptr->command = comIf;
643 else if (com == "Or") formatlistptr->command = comOr;
644 else return false;
645
646 int commacount = 0;
647 text_t text;
648 while (here != end) {
649
650 if (*here == '\\') {
651 here++;
652 if (here != end) text.push_back(*here);
653
654 }
655
656 else if (*here == ',' || *here == '}' || *here == '{') {
657
658 if (formatlistptr->command == comOr) {
659 // the {Or}{this, or this, or this, or this} statement
660 format_t *or_ptr;
661
662 // find the next unused orptr
663 if (formatlistptr->orptr == NULL) {
664 formatlistptr->orptr = new format_t();
665 or_ptr = formatlistptr->orptr;
666 } else {
667 or_ptr = formatlistptr->orptr;
668 while (or_ptr->nextptr != NULL)
669 or_ptr = or_ptr->nextptr;
670 or_ptr->nextptr = new format_t();
671 or_ptr = or_ptr->nextptr;
672 }
673
674 if (!text.empty())
675 {
676 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
677 }
678
679 if (*here == '{')
680 {
681 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
682 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
683 // The latter can always be re-written:
684 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
685
686 if (!text.empty()) // already used up allocated format_t
687 {
688 // => allocate new one for detected action
689 or_ptr->nextptr = new format_t();
690 or_ptr = or_ptr->nextptr;
691 }
692 if (!parse_action(++here, end, or_ptr, metadata, getParents))
693 {
694 return false;
695 }
696 }
697 else
698 {
699 if (*here == '}') break;
700 }
701 text.clear();
702
703 }
704
705 // Parse an {If}{decide,do,else} statement
706 else {
707
708 // Read the decision component.
709 if (commacount == 0) {
710 // Decsion can be a metadata element, or a piece of text.
711 // Originally Stefan's code, updated 25/10/2000 by Gordon.
712
713 text_t::const_iterator beginbracket = text.begin();
714 text_t::const_iterator endbracket = (text.end() - 1);
715
716 // Decision is based on a metadata element
717 if ((*beginbracket == '[') && (*endbracket == ']')) {
718 // Ignore the surrounding square brackets
719 text_t meta = substr (beginbracket+1, endbracket);
720 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
721 commacount ++;
722 text.clear();
723 }
724
725 // Decision is a piece of text (probably a macro like _cgiargmode_).
726 else {
727 formatlistptr->decision.command = dText;
728 formatlistptr->decision.text = text;
729 commacount ++;
730 text.clear();
731 }
732 }
733
734 // Read the "then" and "else" components of the {If} statement.
735 else {
736 format_t** nextlistptr = NULL;
737 if (commacount == 1) {
738 nextlistptr = &formatlistptr->ifptr;
739 } else if (commacount == 2 ) {
740 nextlistptr = &formatlistptr->elseptr;
741 } else {
742 return false;
743 }
744
745 if (!text.empty()) {
746 if (*nextlistptr == NULL) {
747 *nextlistptr = new format_t();
748 } else {
749
750 // skip to the end of any format_t statements already added
751 while ((*nextlistptr)->nextptr != NULL)
752 {
753 nextlistptr = &(*nextlistptr)->nextptr;
754 }
755
756 (*nextlistptr)->nextptr = new format_t();
757 nextlistptr = &(*nextlistptr)->nextptr;
758 }
759
760 if (!parse_string (text, *nextlistptr, metadata, getParents))
761 {
762 return false;
763 }
764 text.clear();
765 }
766
767 if (*here == '{')
768 {
769 if (*nextlistptr == NULL) {
770 *nextlistptr = new format_t();
771 } else {
772 (*nextlistptr)->nextptr = new format_t();
773 nextlistptr = &(*nextlistptr)->nextptr;
774 }
775
776 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
777 {
778 return false;
779 }
780 }
781 else
782 {
783 if (*here == '}') break;
784 commacount ++;
785 }
786 }
787 }
788
789 } else text.push_back(*here);
790
791 if (here != end) here ++;
792 }
793
794 return true;
795}
796
797
798bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
799 text_tset &metadata, bool &getParents) {
800
801 formatlistptr->clear();
802 getParents = false;
803
804 return (parse_string (formatstring, formatlistptr, metadata, getParents));
805}
806
807
808// note: all the format_date stuff is assuming that all Date metadata is going to
809// be of the form yyyymmdd, this is of course, crap ;)
810
811static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) {
812
813 // make sure we have the requested metadata
814 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
815 if (it == docinfo.metadata.end()) return "";
816
817 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
818
819 switch (meta.parentcommand) {
820 case pNone:
821 {
822 text_t classifier_metaname = docinfo.classifier_metadata_type;
823 int metaname_index
824 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
825 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
826
827 if (meta.metaname == "Date")
828 return format_date (metadata_item);
829 else if (meta.metaname == "Language")
830 return iso639(metadata_item);
831 if (meta.metacommand == mCgiSafe)
832 return cgi_safe (metadata_item);
833 else return metadata_item;
834 }
835
836 case pImmediate:
837 if (parent != NULL) {
838 if (meta.metaname == "Date")
839 return format_date (parent->values[0]);
840 if (meta.metacommand == mCgiSafe)
841 return cgi_safe (parent->values[0]);
842 else return parent->values[0];
843 }
844 break;
845
846 case pTop:
847 if (parent != NULL) {
848 while (parent->parent != NULL) parent = parent->parent;
849
850 if (meta.metaname == "Date")
851 return format_date (parent->values[0]);
852 if (meta.metacommand == mCgiSafe)
853 return cgi_safe (parent->values[0]);
854 else return parent->values[0];
855 }
856 break;
857
858 case pAll:
859 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
860 if (parent != NULL) {
861 text_tarray tmparray;
862 while (parent != NULL) {
863 tmparray.push_back (parent->values[0]);
864 parent = parent->parent;
865 }
866 bool first = true;
867 text_t tmp;
868 text_tarray::reverse_iterator here = tmparray.rbegin();
869 text_tarray::reverse_iterator end = tmparray.rend();
870 while (here != end) {
871 if (!first) tmp += meta.parentoptions;
872 if (meta.metaname == "Date") tmp += format_date (*here);
873 else tmp += *here;
874 first = false;
875 here ++;
876 }
877 if (meta.metacommand == mCgiSafe) return cgi_safe (tmp);
878 else return tmp;
879 }
880 }
881 return "";
882}
883
884static text_t get_or (const text_t& collection, recptproto* collectproto,
885 ResultDocInfo_t &docinfo, displayclass &disp,
886 format_t *orptr, text_tmap &options,
887 ostream& logout) {
888
889 text_t tmp;
890 while (orptr != NULL) {
891
892 tmp = format_string (collection,collectproto, docinfo, disp, orptr,
893 options, logout);
894 if (!tmp.empty()) return tmp;
895
896 orptr = orptr->nextptr;
897 }
898 return "";
899}
900
901static text_t get_if (const text_t& collection, recptproto* collectproto,
902 ResultDocInfo_t &docinfo, displayclass &disp,
903 const decision_t &decision,
904 format_t *ifptr, format_t *elseptr,
905 text_tmap &options, ostream& logout)
906{
907
908 // If the decision component is a metadata element, then evaluate it
909 // to see whether we output the "then" or the "else" clause
910 if (decision.command == dMeta) {
911 if (get_meta (docinfo, decision.meta) != "") {
912 if (ifptr != NULL)
913 return get_formatted_string (collection,collectproto, docinfo, disp, ifptr,
914 options, logout);
915 }
916 else {
917 if (elseptr != NULL)
918 return get_formatted_string (collection,collectproto, docinfo, disp, elseptr,
919 options, logout);
920 }
921 }
922
923 // If the decision component is text, then evaluate it (it is probably a
924 // macro like _cgiargmode_) to decide what to output.
925 else if (decision.command == dText) {
926
927 text_t outstring;
928 disp.expandstring (decision.text, outstring);
929
930 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
931 // a cgi argument macro that has not been set, it evaluates to itself.
932 // Therefore, were have to say that a piece of text evalautes true if
933 // it is non-empty and if it is a cgi argument evaulating to itself.
934 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
935 if (ifptr != NULL)
936 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
937 options, logout);
938 } else {
939 if (elseptr != NULL)
940 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
941 options, logout);
942 }
943 }
944
945 return "";
946}
947
948bool includes_metadata(const text_t& text)
949{
950 text_t::const_iterator here = text.begin();
951 text_t::const_iterator end = text.end();
952 while (here != end) {
953 if (*here == '[') return true;
954 here ++;
955 }
956
957 return false;
958}
959
960static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
961 recptproto* collectproto, ResultDocInfo_t &docinfo,
962 displayclass &disp, text_tmap &options,
963 ostream &logout) {
964
965 if (includes_metadata(metavalue))
966 {
967 // text has embedded metadata in it => expand it
968 FilterRequest_t request;
969 FilterResponse_t response;
970
971 request.getParents = false;
972
973 format_t *expanded_formatlistptr = new format_t();
974 parse_formatstring (metavalue, expanded_formatlistptr,
975 request.fields, request.getParents);
976
977 // retrieve metadata
978 get_info(docinfo.OID, collection, request.fields, request.getParents,
979 collectproto, response, logout);
980
981 if (!response.docInfo.empty())
982 {
983 text_t expanded_metavalue
984 = get_formatted_string(collection, collectproto,
985 response.docInfo[0], disp, expanded_formatlistptr,
986 options, logout);
987
988 return expanded_metavalue;
989 }
990 else
991 {
992 return metavalue;
993 }
994 }
995 else
996 {
997 return metavalue;
998 }
999}
1000
1001text_t format_string (const text_t& collection, recptproto* collectproto,
1002 ResultDocInfo_t &docinfo, displayclass &disp,
1003 format_t *formatlistptr, text_tmap &options,
1004 ostream& logout) {
1005
1006 if (formatlistptr == NULL) return "";
1007
1008 switch (formatlistptr->command) {
1009 case comText:
1010 return formatlistptr->text;
1011 case comLink:
1012 return options["link"];
1013 case comEndLink:
1014 if (options["link"].empty()) return "";
1015 else return "</a>";
1016 case comHref:
1017 return get_href(options["link"]);
1018 case comIcon:
1019 return options["icon"];
1020 case comNum:
1021 return docinfo.result_num;
1022 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1023 return get_related_docs(collection, collectproto, docinfo, logout);
1024 case comSummary:
1025 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1026 case comMeta:
1027 {
1028 const text_t& metavalue = get_meta (docinfo, formatlistptr->meta);
1029 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1030 }
1031 case comDoc:
1032 return options["text"];
1033 case comImage:
1034 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1035 case comTOC:
1036 return options["DocTOC"];
1037 case comDocumentButtonDetach:
1038 return options["DocumentButtonDetach"];
1039 case comDocumentButtonHighlight:
1040 return options["DocumentButtonHighlight"];
1041 case comDocumentButtonExpandContents:
1042 return options["DocumentButtonExpandContents"];
1043 case comDocumentButtonExpandText:
1044 return options["DocumentButtonExpandText"];
1045 case comHighlight:
1046 if (options["highlight"] == "1") return "<b>";
1047 break;
1048 case comEndHighlight:
1049 if (options["highlight"] == "1") return "</b>";
1050 break;
1051 case comIf:
1052 return get_if (collection, collectproto, docinfo, disp,
1053 formatlistptr->decision, formatlistptr->ifptr,
1054 formatlistptr->elseptr, options, logout);
1055 case comOr:
1056 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1057 options, logout);
1058 }
1059 return "";
1060}
1061
1062text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1063 ResultDocInfo_t &docinfo, displayclass &disp,
1064 format_t *formatlistptr, text_tmap &options,
1065 ostream& logout) {
1066
1067 text_t ft;
1068 while (formatlistptr != NULL)
1069 {
1070 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1071 options, logout);
1072 formatlistptr = formatlistptr->nextptr;
1073 }
1074
1075 return ft;
1076}
1077
1078
1079/* FUNCTION NAME: format_summary
1080 * DESC: this is invoked when a [Summary] special metadata is processed.
1081 * RETURNS: a query-biased summary for the document */
1082
1083text_t format_summary (const text_t& collection, recptproto* collectproto,
1084 ResultDocInfo_t &docinfo, displayclass &disp,
1085 text_tmap &options, ostream& logout) {
1086
1087 // GRB: added code here to ensure that the cstr (and other collections)
1088 // uses the document metadata item Summary, rather than compressing
1089 // the text of the document, processed via the methods in
1090 // summarise.cpp
1091 if (docinfo.metadata.count("Summary") > 0 &&
1092 docinfo.metadata["Summary"].values.size() > 0) {
1093 return docinfo.metadata["Summary"].values[0];
1094 }
1095
1096 text_t textToSummarise, query;
1097 if(options["text"].empty()) { // get document text
1098 DocumentRequest_t docrequest;
1099 DocumentResponse_t docresponse;
1100 comerror_t err;
1101 docrequest.OID = docinfo.OID;
1102 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1103 textToSummarise = docresponse.doc;
1104 } else // in practice, this would not happen, because text is only
1105 // loaded with the [Text] command
1106 textToSummarise = options["text"];
1107 disp.expandstring("_cgiargq_",query);
1108 return summarise(textToSummarise,query,80);
1109}
Note: See TracBrowser for help on using the repository browser.