source: trunk/gsdl/src/recpt/formattools.cpp@ 6645

Last change on this file since 6645 was 6645, checked in by kjdon, 20 years ago

modified the datelist display so that it can use other metadata eg dc.Date not just Date. The classifier itself had been modified a while ago to take a datemeta argument, and then build the classifier on that metadata rather than assuming Date. It now adds a mdtype value into the top level classifier node, which contains the name of the date metadata. The documentaction when making the nav bar macros looks for mdtype if it comes across a datelist, and sets the dm arg for that classifier button. datelist browser now looks for the dm arg and uses that metadata to create the special years and months bit down the side. Formattools is also modified. It used to modify the Date metadata to display it as xx month YYYY - now it does that for any metadata that ends in .Date - ignores namespaces

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 35.5 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "OIDtools.h"
29#include "summarise.h"
30
31#include <assert.h>
32
33// a few function prototypes
34
35static text_t format_string (const text_t& collection, recptproto* collectproto,
36 ResultDocInfo_t &docinfo, displayclass &disp,
37 format_t *formatlistptr, text_tmap &options,
38 ostream& logout);
39
40static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
41 format_t *formatlistptr, text_tset &metadata, bool &getParents);
42
43static text_t format_summary (const text_t& collection, recptproto* collectproto,
44 ResultDocInfo_t &docinfo, displayclass &disp,
45 text_tmap &options, ostream& logout);
46
47
48void metadata_t::clear() {
49 metaname.clear();
50 metacommand = mNone;
51 parentcommand = pNone;
52 functionoptions.clear();
53}
54
55void decision_t::clear() {
56 command = dMeta;
57 meta.clear();
58 text.clear();
59}
60
61void format_t::clear() {
62 command = comText;
63 decision.clear();
64 text.clear();
65 meta.clear();
66 nextptr = NULL;
67 ifptr = NULL;
68 elseptr = NULL;
69 orptr = NULL;
70}
71
72void formatinfo_t::clear() {
73 DocumentImages = false;
74 DocumentTitles = true;
75 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
76 DocumentContents = true;
77 DocumentArrowsBottom = true;
78 DocumentArrowsTop = false;
79 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
80 // DocumentButtons.push_back ("Expand Text");
81 // DocumentButtons.push_back ("Expand Contents");
82 DocumentButtons.push_back ("Detach");
83 DocumentButtons.push_back ("Highlight");
84 RelatedDocuments = "";
85 DocumentText = "<center><table width=_pagewidth_><tr><td>[Text]</td></tr></table></center>";
86 formatstrings.erase (formatstrings.begin(), formatstrings.end());
87 DocumentUseHTML = false;
88 AllowExtendedOptions = false;
89}
90
91// simply checks to see if formatstring begins with a <td> tag
92bool is_table_content (const text_t &formatstring) {
93 text_t::const_iterator here = formatstring.begin();
94 text_t::const_iterator end = formatstring.end();
95
96 while (here != end) {
97 if (*here != ' ') {
98 if ((*here == '<') && ((here+3) < end)) {
99 if ((*(here+1) == 't' || *(here+1) == 'T') &&
100 (*(here+2) == 'd' || *(here+2) == 'D') &&
101 (*(here+3) == '>' || *(here+3) == ' '))
102 return true;
103 } else return false;
104 }
105 here ++;
106 }
107 return false;
108}
109
110bool is_table_content (const format_t *formatlistptr) {
111
112 if (formatlistptr == NULL) return false;
113
114 if (formatlistptr->command == comText)
115 return is_table_content (formatlistptr->text);
116
117 return false;
118}
119
120// returns false if key isn't in formatstringmap
121bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
122 text_t &formatstring) {
123
124 formatstring.clear();
125 text_tmap::const_iterator it = formatstringmap.find(key);
126 if (it == formatstringmap.end()) return false;
127 formatstring = (*it).second;
128 return true;
129}
130
131// tries to find "key1key2" then "key1" then "key2"
132bool get_formatstring (const text_t &key1, const text_t &key2,
133 const text_tmap &formatstringmap,
134 text_t &formatstring) {
135
136 formatstring.clear();
137 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
138 if (it != formatstringmap.end()) {
139 formatstring = (*it).second;
140 return true;
141 }
142 it = formatstringmap.find(key1);
143 if (it != formatstringmap.end()) {
144 formatstring = (*it).second;
145 return true;
146 }
147 it = formatstringmap.find(key2);
148 if (it != formatstringmap.end()) {
149 formatstring = (*it).second;
150 return true;
151 }
152 return false;
153}
154
155
156text_t remove_namespace(const text_t &meta_name) {
157 text_t::const_iterator end = meta_name.end();
158 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
159 if (it != end) {
160 return substr(it+1, end);
161 }
162 return meta_name;
163
164}
165// returns a date of form 31 _textmonthnn_ 1999
166// input is date of type 19991231
167// at least the year must be present in date
168text_t format_date (const text_t &date) {
169
170 if (date.size() < 4) return "";
171
172 text_t::const_iterator datebegin = date.begin();
173
174 text_t year = substr (datebegin, datebegin+4);
175
176 if (date.size() < 6) return year;
177
178 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
179 int imonth = month.getint();
180 if (imonth < 0 || imonth > 12) return year;
181
182 if (date.size() < 8) return month + " " + year;
183
184 text_t day = substr (datebegin+6, datebegin+8);
185 if (day[0] == '0') day = substr (day.begin()+1, day.end());
186 int iday = day.getint();
187 if (iday < 0 || iday > 31) return month + " " + year;
188
189 return day + " " + month + " " + year;
190}
191
192// converts an iso639 language code to its English equivalent
193// I realize that this isn't the pretiest or most efficient implementation,
194// hopefully this ugly Language (and Date too) formatting won't survive to
195// see gsdl-3.0
196text_t iso639 (const text_t &langcode) {
197
198 if (langcode == "aa") return "Afar";
199 if (langcode == "ab") return "Abkhazian";
200 if (langcode == "af") return "Afrikaans";
201 if (langcode == "am") return "Amharic";
202 if (langcode == "ar") return "Arabic";
203 if (langcode == "as") return "Assamese";
204 if (langcode == "ay") return "Aymara";
205 if (langcode == "az") return "Azerbaijani";
206
207 if (langcode == "ba") return "Bashkir";
208 if (langcode == "be") return "Byelorussian";
209 if (langcode == "bg") return "Bulgarian";
210 if (langcode == "bh") return "Bihari";
211 if (langcode == "bi") return "Bislama";
212 if (langcode == "bn") return "Bengali; Bangla";
213 if (langcode == "bo") return "Tibetan";
214 if (langcode == "br") return "Breton";
215
216 if (langcode == "ca") return "Catalan";
217 if (langcode == "co") return "Corsican";
218 if (langcode == "cs") return "Czech";
219 if (langcode == "cy") return "Welsh";
220
221 if (langcode == "da") return "Danish";
222 if (langcode == "de") return "German";
223 if (langcode == "dz") return "Bhutani";
224
225 if (langcode == "el") return "Greek";
226 if (langcode == "en") return "English";
227 if (langcode == "eo") return "Esperanto";
228 if (langcode == "es") return "Spanish";
229 if (langcode == "et") return "Estonian";
230 if (langcode == "eu") return "Basque";
231
232 if (langcode == "fa") return "Persian";
233 if (langcode == "fi") return "Finnish";
234 if (langcode == "fj") return "Fiji";
235 if (langcode == "fo") return "Faroese";
236 if (langcode == "fr") return "French";
237 if (langcode == "fy") return "Frisian";
238
239 if (langcode == "ga") return "Irish";
240 if (langcode == "gd") return "Scots Gaelic";
241 if (langcode == "gl") return "Galician";
242 if (langcode == "gn") return "Guarani";
243 if (langcode == "gu") return "Gujarati";
244
245 if (langcode == "ha") return "Hausa";
246 if (langcode == "hi") return "Hindi";
247 if (langcode == "hr") return "Croatian";
248 if (langcode == "hu") return "Hungarian";
249 if (langcode == "hy") return "Armenian";
250
251 if (langcode == "ia") return "Interlingua";
252 if (langcode == "ie") return "Interlingue";
253 if (langcode == "ik") return "Inupiak";
254 if (langcode == "in") return "Indonesian";
255 if (langcode == "is") return "Icelandic";
256 if (langcode == "it") return "Italian";
257 if (langcode == "iw") return "Hebrew";
258
259 if (langcode == "ja") return "Japanese";
260 if (langcode == "ji") return "Yiddish";
261 if (langcode == "jw") return "Javanese";
262
263 if (langcode == "ka") return "Georgian";
264 if (langcode == "kk") return "Kazakh";
265 if (langcode == "kl") return "Greenlandic";
266 if (langcode == "km") return "Cambodian";
267 if (langcode == "kn") return "Kannada";
268 if (langcode == "ko") return "Korean";
269 if (langcode == "ks") return "Kashmiri";
270 if (langcode == "ku") return "Kurdish";
271 if (langcode == "ky") return "Kirghiz";
272
273 if (langcode == "la") return "Latin";
274 if (langcode == "ln") return "Lingala";
275 if (langcode == "lo") return "Laothian";
276 if (langcode == "lt") return "Lithuanian";
277 if (langcode == "lv") return "Latvian, Lettish";
278
279 if (langcode == "mg") return "Malagasy";
280 if (langcode == "mi") return "Maori";
281 if (langcode == "mk") return "Macedonian";
282 if (langcode == "ml") return "Malayalam";
283 if (langcode == "mn") return "Mongolian";
284 if (langcode == "mo") return "Moldavian";
285 if (langcode == "mr") return "Marathi";
286 if (langcode == "ms") return "Malay";
287 if (langcode == "mt") return "Maltese";
288 if (langcode == "my") return "Burmese";
289
290 if (langcode == "na") return "Nauru";
291 if (langcode == "ne") return "Nepali";
292 if (langcode == "nl") return "Dutch";
293 if (langcode == "no") return "Norwegian";
294
295 if (langcode == "oc") return "Occitan";
296 if (langcode == "om") return "(Afan) Oromo";
297 if (langcode == "or") return "Oriya";
298
299 if (langcode == "pa") return "Punjabi";
300 if (langcode == "pl") return "Polish";
301 if (langcode == "ps") return "Pashto, Pushto";
302 if (langcode == "pt") return "Portuguese";
303
304 if (langcode == "qu") return "Quechua";
305 if (langcode == "rm") return "Rhaeto-Romance";
306 if (langcode == "rn") return "Kirundi";
307 if (langcode == "ro") return "Romanian";
308 if (langcode == "ru") return "Russian";
309 if (langcode == "rw") return "Kinyarwanda";
310
311 if (langcode == "sa") return "Sanskrit";
312 if (langcode == "sd") return "Sindhi";
313 if (langcode == "sg") return "Sangro";
314 if (langcode == "sh") return "Serbo-Croatian";
315 if (langcode == "si") return "Singhalese";
316 if (langcode == "sk") return "Slovak";
317 if (langcode == "sl") return "Slovenian";
318 if (langcode == "sm") return "Samoan";
319 if (langcode == "sn") return "Shona";
320 if (langcode == "so") return "Somali";
321 if (langcode == "sq") return "Albanian";
322 if (langcode == "sr") return "Serbian";
323 if (langcode == "ss") return "Siswati";
324 if (langcode == "st") return "Sesotho";
325 if (langcode == "su") return "Sudanese";
326 if (langcode == "sv") return "Swedish";
327 if (langcode == "sw") return "Swahili";
328
329 if (langcode == "ta") return "Tamil";
330 if (langcode == "te") return "Tegulu";
331 if (langcode == "tg") return "Tajik";
332 if (langcode == "th") return "Thai";
333 if (langcode == "ti") return "Tigrinya";
334 if (langcode == "tk") return "Turkmen";
335 if (langcode == "tl") return "Tagalog";
336 if (langcode == "tn") return "Setswana";
337 if (langcode == "to") return "Tonga";
338 if (langcode == "tr") return "Turkish";
339 if (langcode == "ts") return "Tsonga";
340 if (langcode == "tt") return "Tatar";
341 if (langcode == "tw") return "Twi";
342
343 if (langcode == "uk") return "Ukrainian";
344 if (langcode == "ur") return "Urdu";
345 if (langcode == "uz") return "Uzbek";
346
347 if (langcode == "vi") return "Vietnamese";
348 if (langcode == "vo") return "Volapuk";
349
350 if (langcode == "wo") return "Wolof";
351
352 if (langcode == "xh") return "Xhosa";
353
354 if (langcode == "yo") return "Yoruba";
355
356 if (langcode == "zh") return "Chinese";
357 if (langcode == "zu") return "Zulu";
358 return "";
359}
360
361text_t get_href (const text_t &link) {
362
363 text_t href;
364
365 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
366 text_t::const_iterator end = link.end();
367
368 here ++;
369 while (here != end) {
370 if (*here == '"') break;
371 href.push_back(*here);
372 here ++;
373 }
374
375 return href;
376}
377
378//this function gets the information associated with the relation
379//metadata for the document associated with 'docinfo'. This relation
380//metadata consists of a line of pairs containing 'collection, document OID'
381//(this is the OID of the document related to the current document, and
382//the collection the related document belongs to). For each of these pairs
383//the title metadata is obtained and then an html link between the title
384//of the related doc and the document's position (the document will be
385//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
386//(where collection is the related documents collection, and OID is the
387//related documents OID). A list of these html links are made for as many
388//related documents as there are. This list is then returned. If there are
389//no related documents available for the current document then the string
390//'.. no related documents .. ' is returned.
391text_t get_related_docs(const text_t& collection, recptproto* collectproto,
392 ResultDocInfo_t &docinfo, ostream& logout){
393
394 text_tset metadata;
395
396 //insert the metadata we wish to collect
397 metadata.insert("relation");
398 metadata.insert("Title");
399 metadata.insert("Subject"); //for emails, where title data doesn't apply
400
401 FilterResponse_t response;
402 text_t relation = ""; //string for displaying relation metadata
403 text_t relationTitle = ""; //the related documents Title (or subject)
404 text_t relationOID = ""; //the related documents OID
405
406 //get the information associated with the metadata for current doc
407 if (get_info (docinfo.OID, collection, metadata,
408 false, collectproto, response, logout)) {
409
410 //if the relation metadata exists, store for displaying
411 if(!response.docInfo[0].metadata["relation"].values.empty()){
412 relationOID += response.docInfo[0].metadata["relation"].values[0];
413
414 //split relation data into pairs of collectionname,ID number
415 text_tarray relationpairs;
416 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
417
418 text_tarray::const_iterator currDoc = relationpairs.begin();
419 text_tarray::const_iterator lastDoc = relationpairs.end();
420
421 //iterate through the pairs to split and display
422 while(currDoc != lastDoc){
423
424 //split pairs into collectionname and ID
425 text_tarray relationdata;
426 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
427
428 //get first element in the array (collection)
429 text_tarray::const_iterator doc_data = relationdata.begin();
430 text_t document_collection = *doc_data;
431 doc_data++; //increment to get next item in array (oid)
432 text_t document_OID = *doc_data;
433
434 //create html link to related document
435 relation += "<a href=\"_httpdocument_&c=" + document_collection;
436 relation += "&cl=search&d=" + document_OID;
437
438 //get the information associated with the metadata for related doc
439 if (get_info (document_OID, document_collection, metadata,
440 false, collectproto, response, logout)) {
441
442 //if title metadata doesn't exist, collect subject metadata
443 //if that doesn't exist, just call it 'related document'
444 if (!response.docInfo[0].metadata["Title"].values[0].empty())
445 relationTitle = response.docInfo[0].metadata["Title"].values[0];
446 else if (!response.docInfo[0].metadata["Subject"].values.empty())
447 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
448 else relationTitle = "RELATED DOCUMENT";
449
450 }
451
452 //link the related document's title to its page
453 relation += "\">" + relationTitle + "</a>";
454 relation += " (" + document_collection + ")<br>";
455
456 currDoc++;
457 }
458 }
459
460 }
461
462 if(relation.empty()) //no relation data for documnet
463 relation = ".. no related documents .. ";
464
465 return relation;
466}
467
468
469
470static void get_parent_options (text_t &instring, metadata_t &metaoption) {
471
472 assert (instring.size() > 7);
473 if (instring.size() <= 7) return;
474
475 text_t meta, com, op;
476 bool inbraces = false;
477 bool inquotes = false;
478 bool foundcolon = false;
479 text_t::const_iterator here = instring.begin()+6;
480 text_t::const_iterator end = instring.end();
481 while (here != end) {
482 if (*here == '(') inbraces = true;
483 else if (*here == ')') inbraces = false;
484 else if (*here == '\'' && !inquotes) inquotes = true;
485 else if (*here == '\'' && inquotes) inquotes = false;
486 else if (*here == ':' && !inbraces) foundcolon = true;
487 else if (foundcolon) meta.push_back (*here);
488 else if (inquotes) op.push_back (*here);
489 else com.push_back (*here);
490 here ++;
491 }
492 instring = meta;
493 if (com.empty())
494 metaoption.parentcommand = pImmediate;
495 else if (com == "Top")
496 metaoption.parentcommand = pTop;
497 else if (com == "All") {
498 metaoption.parentcommand = pAll;
499 metaoption.functionoptions = op;
500 }
501}
502
503
504static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
505
506 assert (instring.size() > 8);
507 if (instring.size() <= 8) return;
508
509 text_t meta, com, op;
510 bool inbraces = false;
511 bool inquotes = false;
512 bool foundcolon = false;
513 text_t::const_iterator here = instring.begin()+7;
514 text_t::const_iterator end = instring.end();
515 while (here != end) {
516 if (*here == '(') inbraces = true;
517 else if (*here == ')') inbraces = false;
518 else if (*here == '\'' && !inquotes) inquotes = true;
519 else if (*here == '\'' && inquotes) inquotes = false;
520 else if (*here == ':' && !inbraces) foundcolon = true;
521 else if (foundcolon) meta.push_back (*here);
522 else if (inquotes) op.push_back (*here);
523 else com.push_back (*here);
524 here ++;
525 }
526
527 instring = meta;
528
529 if (com.empty()) {
530 metaoption.functionoptions = " ";
531 }
532 else {
533 metaoption.functionoptions = op;
534 }
535}
536
537
538static void parse_meta (text_t &meta, metadata_t &metaoption,
539 text_tset &metadata, bool &getParents) {
540
541 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
542 metaoption.metacommand |= mCgiSafe;
543 meta = substr (meta.begin()+8, meta.end());
544 }
545
546 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
547 getParents = true;
548 get_parent_options (meta, metaoption);
549 }
550 else if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
551 metaoption.metacommand |= mSibling;
552 get_sibling_options (meta, metaoption);
553 }
554
555 metadata.insert (meta);
556 metaoption.metaname = meta;
557}
558
559static void parse_meta (text_t &meta, format_t *formatlistptr,
560 text_tset &metadata, bool &getParents) {
561
562 if (meta == "link")
563 formatlistptr->command = comLink;
564 else if (meta == "/link")
565 formatlistptr->command = comEndLink;
566
567 else if (meta == "href")
568 formatlistptr->command = comHref;
569
570 else if (meta == "num")
571 formatlistptr->command = comNum;
572
573 else if (meta == "icon")
574 formatlistptr->command = comIcon;
575
576 else if (meta == "Text")
577 formatlistptr->command = comDoc;
578
579 else if (meta == "RelatedDocuments")
580 formatlistptr->command = comRel;
581
582 else if (meta == "highlight")
583 formatlistptr->command = comHighlight;
584
585 else if (meta == "/highlight")
586 formatlistptr->command = comEndHighlight;
587
588 else if (meta == "Summary")
589 formatlistptr->command = comSummary;
590
591 else if (meta == "DocImage")
592 formatlistptr->command = comImage;
593
594 else if (meta == "DocTOC")
595 formatlistptr->command = comTOC;
596
597 else if (meta == "DocumentButtonDetach")
598 formatlistptr->command = comDocumentButtonDetach;
599
600 else if (meta == "DocumentButtonHighlight")
601 formatlistptr->command = comDocumentButtonHighlight;
602
603 else if (meta == "DocumentButtonExpandContents")
604 formatlistptr->command = comDocumentButtonExpandContents;
605
606 else if (meta == "DocumentButtonExpandText")
607 formatlistptr->command = comDocumentButtonExpandText;
608
609 else if (meta == "DocOID")
610 formatlistptr->command = comOID;
611
612 else {
613 formatlistptr->command = comMeta;
614 parse_meta (meta, formatlistptr->meta, metadata, getParents);
615 }
616}
617
618static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
619 text_tset &metadata, bool &getParents) {
620
621 text_t text;
622 text_t::const_iterator here = formatstring.begin();
623 text_t::const_iterator end = formatstring.end();
624
625 while (here != end) {
626
627 if (*here == '\\') {
628 here ++;
629 if (here != end) text.push_back (*here);
630
631 } else if (*here == '{') {
632 if (!text.empty()) {
633 formatlistptr->command = comText;
634 formatlistptr->text = text;
635 formatlistptr->nextptr = new format_t();
636 formatlistptr = formatlistptr->nextptr;
637
638 text.clear();
639 }
640 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
641
642 formatlistptr->nextptr = new format_t();
643 formatlistptr = formatlistptr->nextptr;
644 if (here == end) break;
645 }
646 } else if (*here == '[') {
647 if (!text.empty()) {
648 formatlistptr->command = comText;
649 formatlistptr->text = text;
650 formatlistptr->nextptr = new format_t();
651 formatlistptr = formatlistptr->nextptr;
652
653 text.clear();
654 }
655 text_t meta;
656 here ++;
657 while (*here != ']') {
658 if (here == end) return false;
659 meta.push_back (*here);
660 here ++;
661 }
662 parse_meta (meta, formatlistptr, metadata, getParents);
663 formatlistptr->nextptr = new format_t();
664 formatlistptr = formatlistptr->nextptr;
665
666 } else
667 text.push_back (*here);
668
669 if (here != end) here ++;
670 }
671 if (!text.empty()) {
672 formatlistptr->command = comText;
673 formatlistptr->text = text;
674 formatlistptr->nextptr = new format_t();
675 formatlistptr = formatlistptr->nextptr;
676
677 }
678 return true;
679}
680
681
682static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
683 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
684
685 text_t::const_iterator it = findchar (here, end, '}');
686 if (it == end) return false;
687
688 text_t com = substr (here, it);
689 here = findchar (it, end, '{');
690 if (here == end) return false;
691 else here ++;
692
693 if (com == "If") formatlistptr->command = comIf;
694 else if (com == "Or") formatlistptr->command = comOr;
695 else return false;
696
697 int commacount = 0;
698 text_t text;
699 while (here != end) {
700
701 if (*here == '\\') {
702 here++;
703 if (here != end) text.push_back(*here);
704
705 }
706
707 else if (*here == ',' || *here == '}' || *here == '{') {
708
709 if (formatlistptr->command == comOr) {
710 // the {Or}{this, or this, or this, or this} statement
711 format_t *or_ptr;
712
713 // find the next unused orptr
714 if (formatlistptr->orptr == NULL) {
715 formatlistptr->orptr = new format_t();
716 or_ptr = formatlistptr->orptr;
717 } else {
718 or_ptr = formatlistptr->orptr;
719 while (or_ptr->nextptr != NULL)
720 or_ptr = or_ptr->nextptr;
721 or_ptr->nextptr = new format_t();
722 or_ptr = or_ptr->nextptr;
723 }
724
725 if (!text.empty())
726 {
727 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
728 }
729
730 if (*here == '{')
731 {
732 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
733 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
734 // The latter can always be re-written:
735 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
736
737 if (!text.empty()) // already used up allocated format_t
738 {
739 // => allocate new one for detected action
740 or_ptr->nextptr = new format_t();
741 or_ptr = or_ptr->nextptr;
742 }
743 if (!parse_action(++here, end, or_ptr, metadata, getParents))
744 {
745 return false;
746 }
747 }
748 else
749 {
750 if (*here == '}') break;
751 }
752 text.clear();
753
754 }
755
756 // Parse an {If}{decide,do,else} statement
757 else {
758
759 // Read the decision component.
760 if (commacount == 0) {
761 // Decsion can be a metadata element, or a piece of text.
762 // Originally Stefan's code, updated 25/10/2000 by Gordon.
763
764 text_t::const_iterator beginbracket = text.begin();
765 text_t::const_iterator endbracket = (text.end() - 1);
766
767 // Decision is based on a metadata element
768 if ((*beginbracket == '[') && (*endbracket == ']')) {
769 // Ignore the surrounding square brackets
770 text_t meta = substr (beginbracket+1, endbracket);
771 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
772 commacount ++;
773 text.clear();
774 }
775
776 // Decision is a piece of text (probably a macro like _cgiargmode_).
777 else {
778 formatlistptr->decision.command = dText;
779 formatlistptr->decision.text = text;
780 commacount ++;
781 text.clear();
782 }
783 }
784
785 // Read the "then" and "else" components of the {If} statement.
786 else {
787 format_t** nextlistptr = NULL;
788 if (commacount == 1) {
789 nextlistptr = &formatlistptr->ifptr;
790 } else if (commacount == 2 ) {
791 nextlistptr = &formatlistptr->elseptr;
792 } else {
793 return false;
794 }
795
796 if (!text.empty()) {
797 if (*nextlistptr == NULL) {
798 *nextlistptr = new format_t();
799 } else {
800
801 // skip to the end of any format_t statements already added
802 while ((*nextlistptr)->nextptr != NULL)
803 {
804 nextlistptr = &(*nextlistptr)->nextptr;
805 }
806
807 (*nextlistptr)->nextptr = new format_t();
808 nextlistptr = &(*nextlistptr)->nextptr;
809 }
810
811 if (!parse_string (text, *nextlistptr, metadata, getParents))
812 {
813 return false;
814 }
815 text.clear();
816 }
817
818 if (*here == '{')
819 {
820 if (*nextlistptr == NULL) {
821 *nextlistptr = new format_t();
822 } else {
823 (*nextlistptr)->nextptr = new format_t();
824 nextlistptr = &(*nextlistptr)->nextptr;
825 }
826
827 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
828 {
829 return false;
830 }
831 }
832 else
833 {
834 if (*here == '}') break;
835 commacount ++;
836 }
837 }
838 }
839
840 } else text.push_back(*here);
841
842 if (here != end) here ++;
843 }
844
845 return true;
846}
847
848
849bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
850 text_tset &metadata, bool &getParents) {
851
852 formatlistptr->clear();
853 getParents = false;
854
855 return (parse_string (formatstring, formatlistptr, metadata, getParents));
856}
857
858
859// note: all the format_date stuff is assuming that all Date metadata is going to
860// be of the form yyyymmdd, this is of course, crap ;)
861
862static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) {
863
864 // make sure we have the requested metadata
865 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
866 if (it == docinfo.metadata.end()) return "";
867
868 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
869 text_t no_ns_metaname = remove_namespace(meta.metaname);
870 switch (meta.parentcommand) {
871 case pNone:
872 {
873 if (meta.metacommand & mSibling) {
874 text_t tmp;
875 bool first = true;
876
877 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
878
879 const int start_i=0;
880 const int end_i = metaname_rec.values.size()-1;
881
882 for (int i=start_i; i<=end_i; i++) {
883 if (!first) tmp += meta.functionoptions;
884
885 if (no_ns_metaname == "Date") tmp += format_date (metaname_rec.values[i]);
886 else if (no_ns_metaname == "Language") tmp += iso639(metaname_rec.values[i]);
887 else tmp += metaname_rec.values[i];
888 first = false;
889 }
890
891 if (meta.metacommand & mCgiSafe) return cgi_safe (tmp);
892 else return tmp;
893
894 }
895 else {
896
897 text_t classifier_metaname = docinfo.classifier_metadata_type;
898 int metaname_index
899 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
900 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
901
902 if (no_ns_metaname == "Date")
903 return format_date (metadata_item);
904 else if (no_ns_metaname == "Language")
905 return iso639(metadata_item);
906 if (meta.metacommand & mCgiSafe)
907 return cgi_safe (metadata_item);
908 else return metadata_item;
909 }
910 }
911
912 case pImmediate:
913 if (parent != NULL) {
914 if (no_ns_metaname == "Date")
915 return format_date (parent->values[0]);
916 if (meta.metacommand & mCgiSafe)
917 return cgi_safe (parent->values[0]);
918 else return parent->values[0];
919 }
920 break;
921
922 case pTop:
923 if (parent != NULL) {
924 while (parent->parent != NULL) parent = parent->parent;
925
926 if (no_ns_metaname == "Date")
927 return format_date (parent->values[0]);
928 if (meta.metacommand & mCgiSafe)
929 return cgi_safe (parent->values[0]);
930 else return parent->values[0];
931 }
932 break;
933
934 case pAll:
935 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
936 if (parent != NULL) {
937 text_tarray tmparray;
938 while (parent != NULL) {
939 tmparray.push_back (parent->values[0]);
940 parent = parent->parent;
941 }
942 bool first = true;
943 text_t tmp;
944 text_tarray::reverse_iterator here = tmparray.rbegin();
945 text_tarray::reverse_iterator end = tmparray.rend();
946 while (here != end) {
947 if (!first) tmp += meta.functionoptions;
948 if (no_ns_metaname == "Date") tmp += format_date (*here);
949 else tmp += *here;
950 first = false;
951 here ++;
952 }
953 if (meta.metacommand & mCgiSafe) return cgi_safe (tmp);
954 else return tmp;
955 }
956 }
957 return "";
958}
959
960static text_t get_or (const text_t& collection, recptproto* collectproto,
961 ResultDocInfo_t &docinfo, displayclass &disp,
962 format_t *orptr, text_tmap &options,
963 ostream& logout) {
964
965 text_t tmp;
966 while (orptr != NULL) {
967
968 tmp = format_string (collection,collectproto, docinfo, disp, orptr,
969 options, logout);
970 if (!tmp.empty()) return tmp;
971
972 orptr = orptr->nextptr;
973 }
974 return "";
975}
976
977static text_t get_if (const text_t& collection, recptproto* collectproto,
978 ResultDocInfo_t &docinfo, displayclass &disp,
979 const decision_t &decision,
980 format_t *ifptr, format_t *elseptr,
981 text_tmap &options, ostream& logout)
982{
983
984 // If the decision component is a metadata element, then evaluate it
985 // to see whether we output the "then" or the "else" clause
986 if (decision.command == dMeta) {
987 if (get_meta (docinfo, decision.meta) != "") {
988 if (ifptr != NULL)
989 return get_formatted_string (collection,collectproto, docinfo, disp, ifptr,
990 options, logout);
991 }
992 else {
993 if (elseptr != NULL)
994 return get_formatted_string (collection,collectproto, docinfo, disp, elseptr,
995 options, logout);
996 }
997 }
998
999 // If the decision component is text, then evaluate it (it is probably a
1000 // macro like _cgiargmode_) to decide what to output.
1001 else if (decision.command == dText) {
1002
1003 text_t outstring;
1004 disp.expandstring (decision.text, outstring);
1005
1006 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1007 // a cgi argument macro that has not been set, it evaluates to itself.
1008 // Therefore, were have to say that a piece of text evalautes true if
1009 // it is non-empty and if it is a cgi argument evaulating to itself.
1010 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1011 if (ifptr != NULL)
1012 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1013 options, logout);
1014 } else {
1015 if (elseptr != NULL)
1016 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1017 options, logout);
1018 }
1019 }
1020
1021 return "";
1022}
1023
1024bool includes_metadata(const text_t& text)
1025{
1026 text_t::const_iterator here = text.begin();
1027 text_t::const_iterator end = text.end();
1028 while (here != end) {
1029 if (*here == '[') return true;
1030 here ++;
1031 }
1032
1033 return false;
1034}
1035
1036static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1037 recptproto* collectproto, ResultDocInfo_t &docinfo,
1038 displayclass &disp, text_tmap &options,
1039 ostream &logout) {
1040
1041 if (includes_metadata(metavalue))
1042 {
1043 // text has embedded metadata in it => expand it
1044 FilterRequest_t request;
1045 FilterResponse_t response;
1046
1047 request.getParents = false;
1048
1049 format_t *expanded_formatlistptr = new format_t();
1050 parse_formatstring (metavalue, expanded_formatlistptr,
1051 request.fields, request.getParents);
1052
1053 // retrieve metadata
1054 get_info(docinfo.OID, collection, request.fields, request.getParents,
1055 collectproto, response, logout);
1056
1057 if (!response.docInfo.empty())
1058 {
1059 text_t expanded_metavalue
1060 = get_formatted_string(collection, collectproto,
1061 response.docInfo[0], disp, expanded_formatlistptr,
1062 options, logout);
1063
1064 return expanded_metavalue;
1065 }
1066 else
1067 {
1068 return metavalue;
1069 }
1070 }
1071 else
1072 {
1073 return metavalue;
1074 }
1075}
1076
1077text_t format_string (const text_t& collection, recptproto* collectproto,
1078 ResultDocInfo_t &docinfo, displayclass &disp,
1079 format_t *formatlistptr, text_tmap &options,
1080 ostream& logout) {
1081
1082 if (formatlistptr == NULL) return "";
1083
1084 switch (formatlistptr->command) {
1085 case comOID:
1086 return docinfo.OID;
1087 case comText:
1088 return formatlistptr->text;
1089 case comLink:
1090 return options["link"];
1091 case comEndLink:
1092 if (options["link"].empty()) return "";
1093 else return "</a>";
1094 case comHref:
1095 return get_href(options["link"]);
1096 case comIcon:
1097 return options["icon"];
1098 case comNum:
1099 return docinfo.result_num;
1100 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1101 return get_related_docs(collection, collectproto, docinfo, logout);
1102 case comSummary:
1103 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1104 case comMeta:
1105 {
1106 const text_t& metavalue = get_meta (docinfo, formatlistptr->meta);
1107 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1108 }
1109 case comDoc:
1110 return options["text"];
1111 case comImage:
1112 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1113 case comTOC:
1114 return options["DocTOC"];
1115 case comDocumentButtonDetach:
1116 return options["DocumentButtonDetach"];
1117 case comDocumentButtonHighlight:
1118 return options["DocumentButtonHighlight"];
1119 case comDocumentButtonExpandContents:
1120 return options["DocumentButtonExpandContents"];
1121 case comDocumentButtonExpandText:
1122 return options["DocumentButtonExpandText"];
1123 case comHighlight:
1124 if (options["highlight"] == "1") return "<b>";
1125 break;
1126 case comEndHighlight:
1127 if (options["highlight"] == "1") return "</b>";
1128 break;
1129 case comIf:
1130 return get_if (collection, collectproto, docinfo, disp,
1131 formatlistptr->decision, formatlistptr->ifptr,
1132 formatlistptr->elseptr, options, logout);
1133 case comOr:
1134 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1135 options, logout);
1136 }
1137 return "";
1138}
1139
1140text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1141 ResultDocInfo_t &docinfo, displayclass &disp,
1142 format_t *formatlistptr, text_tmap &options,
1143 ostream& logout) {
1144
1145 text_t ft;
1146 while (formatlistptr != NULL)
1147 {
1148 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1149 options, logout);
1150 formatlistptr = formatlistptr->nextptr;
1151 }
1152
1153 return ft;
1154}
1155
1156
1157/* FUNCTION NAME: format_summary
1158 * DESC: this is invoked when a [Summary] special metadata is processed.
1159 * RETURNS: a query-biased summary for the document */
1160
1161text_t format_summary (const text_t& collection, recptproto* collectproto,
1162 ResultDocInfo_t &docinfo, displayclass &disp,
1163 text_tmap &options, ostream& logout) {
1164
1165 // GRB: added code here to ensure that the cstr (and other collections)
1166 // uses the document metadata item Summary, rather than compressing
1167 // the text of the document, processed via the methods in
1168 // summarise.cpp
1169 if (docinfo.metadata.count("Summary") > 0 &&
1170 docinfo.metadata["Summary"].values.size() > 0) {
1171 return docinfo.metadata["Summary"].values[0];
1172 }
1173
1174 text_t textToSummarise, query;
1175 if(options["text"].empty()) { // get document text
1176 DocumentRequest_t docrequest;
1177 DocumentResponse_t docresponse;
1178 comerror_t err;
1179 docrequest.OID = docinfo.OID;
1180 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1181 textToSummarise = docresponse.doc;
1182 } else // in practice, this would not happen, because text is only
1183 // loaded with the [Text] command
1184 textToSummarise = options["text"];
1185 disp.expandstring("_cgiargq_",query);
1186 return summarise(textToSummarise,query,80);
1187}
Note: See TracBrowser for help on using the repository browser.