source: trunk/gsdl/src/recpt/formattools.cpp@ 7389

Last change on this file since 7389 was 7389, checked in by davidb, 20 years ago

Format statment 'if' syntax extended to support operators such as
'eq' and 'ne'. Can now say things like: {If}{[format] eq 'ab cd',X,Y}
Two main changes in code. The first is where the text of the text-condition
of the if statement is handled. This now needs to be parsed, to ensure
any metadata present it recorded, so when 'docinfo' is setup the necessary
metadata is there. The second change is that, when the test condition
of the if statement is evaluated, it is parsed 'on the fly' to look
for the precence of an operator such as 'eq' or 'ne' (in fact these are
currently the only ones implemented; others, such as 'gt' for greater
than etc would be straightforward to add). If there is an operator
detected, the the left and right hand sides of the expression are
determined (any metadata looked up) and the operator applied.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 41.2 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "OIDtools.h"
29#include "summarise.h"
30
31#include <assert.h>
32
33// a few function prototypes
34
35static text_t format_string (const text_t& collection, recptproto* collectproto,
36 ResultDocInfo_t &docinfo, displayclass &disp,
37 format_t *formatlistptr, text_tmap &options,
38 ostream& logout);
39
40static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
41 format_t *formatlistptr, text_tset &metadata, bool &getParents);
42
43static text_t format_summary (const text_t& collection, recptproto* collectproto,
44 ResultDocInfo_t &docinfo, displayclass &disp,
45 text_tmap &options, ostream& logout);
46
47
48void metadata_t::clear() {
49 metaname.clear();
50 metacommand = mNone;
51 parentcommand = pNone;
52 functionoptions.clear();
53}
54
55void decision_t::clear() {
56 command = dMeta;
57 meta.clear();
58 text.clear();
59}
60
61void format_t::clear() {
62 command = comText;
63 decision.clear();
64 text.clear();
65 meta.clear();
66 nextptr = NULL;
67 ifptr = NULL;
68 elseptr = NULL;
69 orptr = NULL;
70}
71
72void formatinfo_t::clear() {
73 DocumentImages = false;
74 DocumentTitles = true;
75 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
76 DocumentContents = true;
77 DocumentArrowsBottom = true;
78 DocumentArrowsTop = false;
79 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
80 // DocumentButtons.push_back ("Expand Text");
81 // DocumentButtons.push_back ("Expand Contents");
82 DocumentButtons.push_back ("Detach");
83 DocumentButtons.push_back ("Highlight");
84 RelatedDocuments = "";
85 DocumentText = "<center><table width=_pagewidth_><tr><td>[Text]</td></tr></table></center>";
86 formatstrings.erase (formatstrings.begin(), formatstrings.end());
87 DocumentUseHTML = false;
88 AllowExtendedOptions = false;
89}
90
91// simply checks to see if formatstring begins with a <td> tag
92bool is_table_content (const text_t &formatstring) {
93 text_t::const_iterator here = formatstring.begin();
94 text_t::const_iterator end = formatstring.end();
95
96 while (here != end) {
97 if (*here != ' ') {
98 if ((*here == '<') && ((here+3) < end)) {
99 if ((*(here+1) == 't' || *(here+1) == 'T') &&
100 (*(here+2) == 'd' || *(here+2) == 'D') &&
101 (*(here+3) == '>' || *(here+3) == ' '))
102 return true;
103 } else return false;
104 }
105 here ++;
106 }
107 return false;
108}
109
110bool is_table_content (const format_t *formatlistptr) {
111
112 if (formatlistptr == NULL) return false;
113
114 if (formatlistptr->command == comText)
115 return is_table_content (formatlistptr->text);
116
117 return false;
118}
119
120// returns false if key isn't in formatstringmap
121bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
122 text_t &formatstring) {
123
124 formatstring.clear();
125 text_tmap::const_iterator it = formatstringmap.find(key);
126 if (it == formatstringmap.end()) return false;
127 formatstring = (*it).second;
128 return true;
129}
130
131// tries to find "key1key2" then "key1" then "key2"
132bool get_formatstring (const text_t &key1, const text_t &key2,
133 const text_tmap &formatstringmap,
134 text_t &formatstring) {
135
136 formatstring.clear();
137 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
138 if (it != formatstringmap.end()) {
139 formatstring = (*it).second;
140 return true;
141 }
142 it = formatstringmap.find(key1);
143 if (it != formatstringmap.end()) {
144 formatstring = (*it).second;
145 return true;
146 }
147 it = formatstringmap.find(key2);
148 if (it != formatstringmap.end()) {
149 formatstring = (*it).second;
150 return true;
151 }
152 return false;
153}
154
155
156text_t remove_namespace(const text_t &meta_name) {
157 text_t::const_iterator end = meta_name.end();
158 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
159 if (it != end) {
160 return substr(it+1, end);
161 }
162 return meta_name;
163
164}
165// returns a date of form 31 _textmonthnn_ 1999
166// input is date of type 19991231
167// at least the year must be present in date
168text_t format_date (const text_t &date) {
169
170 if (date.size() < 4) return "";
171
172 text_t::const_iterator datebegin = date.begin();
173
174 text_t year = substr (datebegin, datebegin+4);
175
176 if (date.size() < 6) return year;
177
178 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
179 int imonth = month.getint();
180 if (imonth < 0 || imonth > 12) return year;
181
182 if (date.size() < 8) return month + " " + year;
183
184 text_t day = substr (datebegin+6, datebegin+8);
185 if (day[0] == '0') day = substr (day.begin()+1, day.end());
186 int iday = day.getint();
187 if (iday < 0 || iday > 31) return month + " " + year;
188
189 return day + " " + month + " " + year;
190}
191
192// converts an iso639 language code to its English equivalent
193// I realize that this isn't the pretiest or most efficient implementation,
194// hopefully this ugly Language (and Date too) formatting won't survive to
195// see gsdl-3.0
196text_t iso639 (const text_t &langcode) {
197
198 if (langcode == "aa") return "Afar";
199 if (langcode == "ab") return "Abkhazian";
200 if (langcode == "af") return "Afrikaans";
201 if (langcode == "am") return "Amharic";
202 if (langcode == "ar") return "Arabic";
203 if (langcode == "as") return "Assamese";
204 if (langcode == "ay") return "Aymara";
205 if (langcode == "az") return "Azerbaijani";
206
207 if (langcode == "ba") return "Bashkir";
208 if (langcode == "be") return "Byelorussian";
209 if (langcode == "bg") return "Bulgarian";
210 if (langcode == "bh") return "Bihari";
211 if (langcode == "bi") return "Bislama";
212 if (langcode == "bn") return "Bengali; Bangla";
213 if (langcode == "bo") return "Tibetan";
214 if (langcode == "br") return "Breton";
215
216 if (langcode == "ca") return "Catalan";
217 if (langcode == "co") return "Corsican";
218 if (langcode == "cs") return "Czech";
219 if (langcode == "cy") return "Welsh";
220
221 if (langcode == "da") return "Danish";
222 if (langcode == "de") return "German";
223 if (langcode == "dz") return "Bhutani";
224
225 if (langcode == "el") return "Greek";
226 if (langcode == "en") return "English";
227 if (langcode == "eo") return "Esperanto";
228 if (langcode == "es") return "Spanish";
229 if (langcode == "et") return "Estonian";
230 if (langcode == "eu") return "Basque";
231
232 if (langcode == "fa") return "Persian";
233 if (langcode == "fi") return "Finnish";
234 if (langcode == "fj") return "Fiji";
235 if (langcode == "fo") return "Faroese";
236 if (langcode == "fr") return "French";
237 if (langcode == "fy") return "Frisian";
238
239 if (langcode == "ga") return "Irish";
240 if (langcode == "gd") return "Scots Gaelic";
241 if (langcode == "gl") return "Galician";
242 if (langcode == "gn") return "Guarani";
243 if (langcode == "gu") return "Gujarati";
244
245 if (langcode == "ha") return "Hausa";
246 if (langcode == "hi") return "Hindi";
247 if (langcode == "hr") return "Croatian";
248 if (langcode == "hu") return "Hungarian";
249 if (langcode == "hy") return "Armenian";
250
251 if (langcode == "ia") return "Interlingua";
252 if (langcode == "ie") return "Interlingue";
253 if (langcode == "ik") return "Inupiak";
254 if (langcode == "in") return "Indonesian";
255 if (langcode == "is") return "Icelandic";
256 if (langcode == "it") return "Italian";
257 if (langcode == "iw") return "Hebrew";
258
259 if (langcode == "ja") return "Japanese";
260 if (langcode == "ji") return "Yiddish";
261 if (langcode == "jw") return "Javanese";
262
263 if (langcode == "ka") return "Georgian";
264 if (langcode == "kk") return "Kazakh";
265 if (langcode == "kl") return "Greenlandic";
266 if (langcode == "km") return "Cambodian";
267 if (langcode == "kn") return "Kannada";
268 if (langcode == "ko") return "Korean";
269 if (langcode == "ks") return "Kashmiri";
270 if (langcode == "ku") return "Kurdish";
271 if (langcode == "ky") return "Kirghiz";
272
273 if (langcode == "la") return "Latin";
274 if (langcode == "ln") return "Lingala";
275 if (langcode == "lo") return "Laothian";
276 if (langcode == "lt") return "Lithuanian";
277 if (langcode == "lv") return "Latvian, Lettish";
278
279 if (langcode == "mg") return "Malagasy";
280 if (langcode == "mi") return "Maori";
281 if (langcode == "mk") return "Macedonian";
282 if (langcode == "ml") return "Malayalam";
283 if (langcode == "mn") return "Mongolian";
284 if (langcode == "mo") return "Moldavian";
285 if (langcode == "mr") return "Marathi";
286 if (langcode == "ms") return "Malay";
287 if (langcode == "mt") return "Maltese";
288 if (langcode == "my") return "Burmese";
289
290 if (langcode == "na") return "Nauru";
291 if (langcode == "ne") return "Nepali";
292 if (langcode == "nl") return "Dutch";
293 if (langcode == "no") return "Norwegian";
294
295 if (langcode == "oc") return "Occitan";
296 if (langcode == "om") return "(Afan) Oromo";
297 if (langcode == "or") return "Oriya";
298
299 if (langcode == "pa") return "Punjabi";
300 if (langcode == "pl") return "Polish";
301 if (langcode == "ps") return "Pashto, Pushto";
302 if (langcode == "pt") return "Portuguese";
303
304 if (langcode == "qu") return "Quechua";
305 if (langcode == "rm") return "Rhaeto-Romance";
306 if (langcode == "rn") return "Kirundi";
307 if (langcode == "ro") return "Romanian";
308 if (langcode == "ru") return "Russian";
309 if (langcode == "rw") return "Kinyarwanda";
310
311 if (langcode == "sa") return "Sanskrit";
312 if (langcode == "sd") return "Sindhi";
313 if (langcode == "sg") return "Sangro";
314 if (langcode == "sh") return "Serbo-Croatian";
315 if (langcode == "si") return "Singhalese";
316 if (langcode == "sk") return "Slovak";
317 if (langcode == "sl") return "Slovenian";
318 if (langcode == "sm") return "Samoan";
319 if (langcode == "sn") return "Shona";
320 if (langcode == "so") return "Somali";
321 if (langcode == "sq") return "Albanian";
322 if (langcode == "sr") return "Serbian";
323 if (langcode == "ss") return "Siswati";
324 if (langcode == "st") return "Sesotho";
325 if (langcode == "su") return "Sudanese";
326 if (langcode == "sv") return "Swedish";
327 if (langcode == "sw") return "Swahili";
328
329 if (langcode == "ta") return "Tamil";
330 if (langcode == "te") return "Tegulu";
331 if (langcode == "tg") return "Tajik";
332 if (langcode == "th") return "Thai";
333 if (langcode == "ti") return "Tigrinya";
334 if (langcode == "tk") return "Turkmen";
335 if (langcode == "tl") return "Tagalog";
336 if (langcode == "tn") return "Setswana";
337 if (langcode == "to") return "Tonga";
338 if (langcode == "tr") return "Turkish";
339 if (langcode == "ts") return "Tsonga";
340 if (langcode == "tt") return "Tatar";
341 if (langcode == "tw") return "Twi";
342
343 if (langcode == "uk") return "Ukrainian";
344 if (langcode == "ur") return "Urdu";
345 if (langcode == "uz") return "Uzbek";
346
347 if (langcode == "vi") return "Vietnamese";
348 if (langcode == "vo") return "Volapuk";
349
350 if (langcode == "wo") return "Wolof";
351
352 if (langcode == "xh") return "Xhosa";
353
354 if (langcode == "yo") return "Yoruba";
355
356 if (langcode == "zh") return "Chinese";
357 if (langcode == "zu") return "Zulu";
358 return "";
359}
360
361text_t get_href (const text_t &link) {
362
363 text_t href;
364
365 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
366 text_t::const_iterator end = link.end();
367
368 here ++;
369 while (here != end) {
370 if (*here == '"') break;
371 href.push_back(*here);
372 here ++;
373 }
374
375 return href;
376}
377
378//this function gets the information associated with the relation
379//metadata for the document associated with 'docinfo'. This relation
380//metadata consists of a line of pairs containing 'collection, document OID'
381//(this is the OID of the document related to the current document, and
382//the collection the related document belongs to). For each of these pairs
383//the title metadata is obtained and then an html link between the title
384//of the related doc and the document's position (the document will be
385//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
386//(where collection is the related documents collection, and OID is the
387//related documents OID). A list of these html links are made for as many
388//related documents as there are. This list is then returned. If there are
389//no related documents available for the current document then the string
390//'.. no related documents .. ' is returned.
391text_t get_related_docs(const text_t& collection, recptproto* collectproto,
392 ResultDocInfo_t &docinfo, ostream& logout){
393
394 text_tset metadata;
395
396 //insert the metadata we wish to collect
397 metadata.insert("relation");
398 metadata.insert("Title");
399 metadata.insert("Subject"); //for emails, where title data doesn't apply
400
401 FilterResponse_t response;
402 text_t relation = ""; //string for displaying relation metadata
403 text_t relationTitle = ""; //the related documents Title (or subject)
404 text_t relationOID = ""; //the related documents OID
405
406 //get the information associated with the metadata for current doc
407 if (get_info (docinfo.OID, collection, metadata,
408 false, collectproto, response, logout)) {
409
410 //if the relation metadata exists, store for displaying
411 if(!response.docInfo[0].metadata["relation"].values.empty()){
412 relationOID += response.docInfo[0].metadata["relation"].values[0];
413
414 //split relation data into pairs of collectionname,ID number
415 text_tarray relationpairs;
416 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
417
418 text_tarray::const_iterator currDoc = relationpairs.begin();
419 text_tarray::const_iterator lastDoc = relationpairs.end();
420
421 //iterate through the pairs to split and display
422 while(currDoc != lastDoc){
423
424 //split pairs into collectionname and ID
425 text_tarray relationdata;
426 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
427
428 //get first element in the array (collection)
429 text_tarray::const_iterator doc_data = relationdata.begin();
430 text_t document_collection = *doc_data;
431 doc_data++; //increment to get next item in array (oid)
432 text_t document_OID = *doc_data;
433
434 //create html link to related document
435 relation += "<a href=\"_httpdocument_&c=" + document_collection;
436 relation += "&cl=search&d=" + document_OID;
437
438 //get the information associated with the metadata for related doc
439 if (get_info (document_OID, document_collection, metadata,
440 false, collectproto, response, logout)) {
441
442 //if title metadata doesn't exist, collect subject metadata
443 //if that doesn't exist, just call it 'related document'
444 if (!response.docInfo[0].metadata["Title"].values[0].empty())
445 relationTitle = response.docInfo[0].metadata["Title"].values[0];
446 else if (!response.docInfo[0].metadata["Subject"].values.empty())
447 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
448 else relationTitle = "RELATED DOCUMENT";
449
450 }
451
452 //link the related document's title to its page
453 relation += "\">" + relationTitle + "</a>";
454 relation += " (" + document_collection + ")<br>";
455
456 currDoc++;
457 }
458 }
459
460 }
461
462 if(relation.empty()) //no relation data for documnet
463 relation = ".. no related documents .. ";
464
465 return relation;
466}
467
468
469
470static void get_parent_options (text_t &instring, metadata_t &metaoption) {
471
472 assert (instring.size() > 7);
473 if (instring.size() <= 7) return;
474
475 text_t meta, com, op;
476 bool inbraces = false;
477 bool inquotes = false;
478 bool foundcolon = false;
479 text_t::const_iterator here = instring.begin()+6;
480 text_t::const_iterator end = instring.end();
481 while (here != end) {
482 if (*here == '(') inbraces = true;
483 else if (*here == ')') inbraces = false;
484 else if (*here == '\'' && !inquotes) inquotes = true;
485 else if (*here == '\'' && inquotes) inquotes = false;
486 else if (*here == ':' && !inbraces) foundcolon = true;
487 else if (foundcolon) meta.push_back (*here);
488 else if (inquotes) op.push_back (*here);
489 else com.push_back (*here);
490 here ++;
491 }
492 instring = meta;
493 if (com.empty())
494 metaoption.parentcommand = pImmediate;
495 else if (com == "Top")
496 metaoption.parentcommand = pTop;
497 else if (com == "All") {
498 metaoption.parentcommand = pAll;
499 metaoption.functionoptions = op;
500 }
501}
502
503
504static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
505
506 assert (instring.size() > 8);
507 if (instring.size() <= 8) return;
508
509 text_t meta, com, op;
510 bool inbraces = false;
511 bool inquotes = false;
512 bool foundcolon = false;
513 text_t::const_iterator here = instring.begin()+7;
514 text_t::const_iterator end = instring.end();
515 while (here != end) {
516 if (*here == '(') inbraces = true;
517 else if (*here == ')') inbraces = false;
518 else if (*here == '\'' && !inquotes) inquotes = true;
519 else if (*here == '\'' && inquotes) inquotes = false;
520 else if (*here == ':' && !inbraces) foundcolon = true;
521 else if (foundcolon) meta.push_back (*here);
522 else if (inquotes) op.push_back (*here);
523 else com.push_back (*here);
524 here ++;
525 }
526
527 instring = meta;
528
529 if (com.empty()) {
530 metaoption.functionoptions = " ";
531 }
532 else {
533 metaoption.functionoptions = op;
534 }
535}
536
537
538static void parse_meta (text_t &meta, metadata_t &metaoption,
539 text_tset &metadata, bool &getParents) {
540
541 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
542 metaoption.metacommand |= mCgiSafe;
543 meta = substr (meta.begin()+8, meta.end());
544 }
545
546 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
547 getParents = true;
548 get_parent_options (meta, metaoption);
549 }
550 else if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
551 metaoption.metacommand |= mSibling;
552 get_sibling_options (meta, metaoption);
553 }
554
555 metadata.insert (meta);
556 metaoption.metaname = meta;
557}
558
559static void parse_meta (text_t &meta, format_t *formatlistptr,
560 text_tset &metadata, bool &getParents) {
561
562 if (meta == "link")
563 formatlistptr->command = comLink;
564 else if (meta == "/link")
565 formatlistptr->command = comEndLink;
566
567 else if (meta == "href")
568 formatlistptr->command = comHref;
569
570 else if (meta == "num")
571 formatlistptr->command = comNum;
572
573 else if (meta == "icon")
574 formatlistptr->command = comIcon;
575
576 else if (meta == "Text")
577 formatlistptr->command = comDoc;
578
579 else if (meta == "RelatedDocuments")
580 formatlistptr->command = comRel;
581
582 else if (meta == "highlight")
583 formatlistptr->command = comHighlight;
584
585 else if (meta == "/highlight")
586 formatlistptr->command = comEndHighlight;
587
588 else if (meta == "Summary")
589 formatlistptr->command = comSummary;
590
591 else if (meta == "DocImage")
592 formatlistptr->command = comImage;
593
594 else if (meta == "DocTOC")
595 formatlistptr->command = comTOC;
596
597 else if (meta == "DocumentButtonDetach")
598 formatlistptr->command = comDocumentButtonDetach;
599
600 else if (meta == "DocumentButtonHighlight")
601 formatlistptr->command = comDocumentButtonHighlight;
602
603 else if (meta == "DocumentButtonExpandContents")
604 formatlistptr->command = comDocumentButtonExpandContents;
605
606 else if (meta == "DocumentButtonExpandText")
607 formatlistptr->command = comDocumentButtonExpandText;
608
609 else if (meta == "DocOID")
610 formatlistptr->command = comOID;
611 else if (meta == "DocRank")
612 formatlistptr->command = comRank;
613 else {
614 formatlistptr->command = comMeta;
615 parse_meta (meta, formatlistptr->meta, metadata, getParents);
616 }
617}
618
619static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
620 text_tset &metadata, bool &getParents) {
621
622 text_t text;
623 text_t::const_iterator here = formatstring.begin();
624 text_t::const_iterator end = formatstring.end();
625
626 while (here != end) {
627
628 if (*here == '\\') {
629 here ++;
630 if (here != end) text.push_back (*here);
631
632 } else if (*here == '{') {
633 if (!text.empty()) {
634 formatlistptr->command = comText;
635 formatlistptr->text = text;
636 formatlistptr->nextptr = new format_t();
637 formatlistptr = formatlistptr->nextptr;
638
639 text.clear();
640 }
641 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
642
643 formatlistptr->nextptr = new format_t();
644 formatlistptr = formatlistptr->nextptr;
645 if (here == end) break;
646 }
647 } else if (*here == '[') {
648 if (!text.empty()) {
649 formatlistptr->command = comText;
650 formatlistptr->text = text;
651 formatlistptr->nextptr = new format_t();
652 formatlistptr = formatlistptr->nextptr;
653
654 text.clear();
655 }
656 text_t meta;
657 here ++;
658 while (*here != ']') {
659 if (here == end) return false;
660 meta.push_back (*here);
661 here ++;
662 }
663 parse_meta (meta, formatlistptr, metadata, getParents);
664 formatlistptr->nextptr = new format_t();
665 formatlistptr = formatlistptr->nextptr;
666
667 } else
668 text.push_back (*here);
669
670 if (here != end) here ++;
671 }
672 if (!text.empty()) {
673 formatlistptr->command = comText;
674 formatlistptr->text = text;
675 formatlistptr->nextptr = new format_t();
676 formatlistptr = formatlistptr->nextptr;
677
678 }
679 return true;
680}
681
682
683static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
684 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
685
686 text_t::const_iterator it = findchar (here, end, '}');
687 if (it == end) return false;
688
689 text_t com = substr (here, it);
690 here = findchar (it, end, '{');
691 if (here == end) return false;
692 else here ++;
693
694 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
695 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
696 else return false;
697
698 int commacount = 0;
699 text_t text;
700 while (here != end) {
701
702 if (*here == '\\') {
703 here++;
704 if (here != end) text.push_back(*here);
705
706 }
707
708 else if (*here == ',' || *here == '}' || *here == '{') {
709
710 if (formatlistptr->command == comOr) {
711 // the {Or}{this, or this, or this, or this} statement
712 format_t *or_ptr;
713
714 // find the next unused orptr
715 if (formatlistptr->orptr == NULL) {
716 formatlistptr->orptr = new format_t();
717 or_ptr = formatlistptr->orptr;
718 } else {
719 or_ptr = formatlistptr->orptr;
720 while (or_ptr->nextptr != NULL)
721 or_ptr = or_ptr->nextptr;
722 or_ptr->nextptr = new format_t();
723 or_ptr = or_ptr->nextptr;
724 }
725
726 if (!text.empty())
727 {
728 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
729 }
730
731 if (*here == '{')
732 {
733 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
734 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
735 // The latter can always be re-written:
736 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
737
738 if (!text.empty()) // already used up allocated format_t
739 {
740 // => allocate new one for detected action
741 or_ptr->nextptr = new format_t();
742 or_ptr = or_ptr->nextptr;
743 }
744 if (!parse_action(++here, end, or_ptr, metadata, getParents))
745 {
746 return false;
747 }
748 }
749 else
750 {
751 if (*here == '}') break;
752 }
753 text.clear();
754
755 }
756
757 // Parse an {If}{decide,do,else} statement
758 else {
759
760 // Read the decision component.
761 if (commacount == 0) {
762 // Decsion can be a metadata element, or a piece of text.
763 // Originally Stefan's code, updated 25/10/2000 by Gordon.
764
765 text_t::const_iterator beginbracket = text.begin();
766 text_t::const_iterator endbracket = (text.end() - 1);
767
768 // Decision is based on a metadata element
769 if ((*beginbracket == '[') && (*endbracket == ']')) {
770 // Ignore the surrounding square brackets
771 text_t meta = substr (beginbracket+1, endbracket);
772 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
773 commacount ++;
774 text.clear();
775 }
776
777 // Decision is a piece of text (probably a macro like _cgiargmode_).
778 else {
779
780 // hunt for any metadata in string, which might be uses in
781 // to test a condition, e.g. [Format] eq 'PDF'
782 format_t* dummyformat = new format_t();
783 // update which metadata fields needed
784 // (not interested in updatng formatlistptr)
785 parse_string (text, dummyformat, metadata, getParents);
786 delete dummyformat;
787
788 formatlistptr->decision.command = dText;
789 formatlistptr->decision.text = text;
790 commacount ++;
791 text.clear();
792 }
793 }
794
795 // Read the "then" and "else" components of the {If} statement.
796 else {
797 format_t** nextlistptr = NULL;
798 if (commacount == 1) {
799 nextlistptr = &formatlistptr->ifptr;
800 } else if (commacount == 2 ) {
801 nextlistptr = &formatlistptr->elseptr;
802 } else {
803 return false;
804 }
805
806 if (!text.empty()) {
807 if (*nextlistptr == NULL) {
808 *nextlistptr = new format_t();
809 } else {
810
811 // skip to the end of any format_t statements already added
812 while ((*nextlistptr)->nextptr != NULL)
813 {
814 nextlistptr = &(*nextlistptr)->nextptr;
815 }
816
817 (*nextlistptr)->nextptr = new format_t();
818 nextlistptr = &(*nextlistptr)->nextptr;
819 }
820
821 if (!parse_string (text, *nextlistptr, metadata, getParents))
822 {
823 return false;
824 }
825 text.clear();
826 }
827
828 if (*here == '{')
829 {
830 if (*nextlistptr == NULL) {
831 *nextlistptr = new format_t();
832 } else {
833 (*nextlistptr)->nextptr = new format_t();
834 nextlistptr = &(*nextlistptr)->nextptr;
835 }
836
837 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
838 {
839 return false;
840 }
841 }
842 else
843 {
844 if (*here == '}') break;
845 commacount ++;
846 }
847 }
848 }
849
850 } else text.push_back(*here);
851
852 if (here != end) here ++;
853 }
854
855 return true;
856}
857
858
859bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
860 text_tset &metadata, bool &getParents) {
861
862 formatlistptr->clear();
863 getParents = false;
864
865 return (parse_string (formatstring, formatlistptr, metadata, getParents));
866}
867
868
869// note: all the format_date stuff is assuming that all Date metadata is going to
870// be of the form yyyymmdd, this is of course, crap ;)
871
872static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) {
873
874 // make sure we have the requested metadata
875 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
876 if (it == docinfo.metadata.end()) return "";
877
878 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
879 text_t no_ns_metaname = remove_namespace(meta.metaname);
880 switch (meta.parentcommand) {
881 case pNone:
882 {
883 if (meta.metacommand & mSibling) {
884 text_t tmp;
885 bool first = true;
886
887 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
888
889 const int start_i=0;
890 const int end_i = metaname_rec.values.size()-1;
891
892 for (int i=start_i; i<=end_i; i++) {
893 if (!first) tmp += meta.functionoptions;
894
895 if (no_ns_metaname == "Date") tmp += format_date (metaname_rec.values[i]);
896 else if (no_ns_metaname == "Language") tmp += iso639(metaname_rec.values[i]);
897 else tmp += metaname_rec.values[i];
898 first = false;
899 }
900
901 if (meta.metacommand & mCgiSafe) return cgi_safe (tmp);
902 else return tmp;
903
904 }
905 else {
906
907 text_t classifier_metaname = docinfo.classifier_metadata_type;
908 int metaname_index
909 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
910 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
911
912 if (no_ns_metaname == "Date")
913 return format_date (metadata_item);
914 else if (no_ns_metaname == "Language")
915 return iso639(metadata_item);
916 if (meta.metacommand & mCgiSafe)
917 return cgi_safe (metadata_item);
918 else return metadata_item;
919 }
920 }
921
922 case pImmediate:
923 if (parent != NULL) {
924 if (no_ns_metaname == "Date")
925 return format_date (parent->values[0]);
926 if (meta.metacommand & mCgiSafe)
927 return cgi_safe (parent->values[0]);
928 else return parent->values[0];
929 }
930 break;
931
932 case pTop:
933 if (parent != NULL) {
934 while (parent->parent != NULL) parent = parent->parent;
935
936 if (no_ns_metaname == "Date")
937 return format_date (parent->values[0]);
938 if (meta.metacommand & mCgiSafe)
939 return cgi_safe (parent->values[0]);
940 else return parent->values[0];
941 }
942 break;
943
944 case pAll:
945 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
946 if (parent != NULL) {
947 text_tarray tmparray;
948 while (parent != NULL) {
949 tmparray.push_back (parent->values[0]);
950 parent = parent->parent;
951 }
952 bool first = true;
953 text_t tmp;
954 text_tarray::reverse_iterator here = tmparray.rbegin();
955 text_tarray::reverse_iterator end = tmparray.rend();
956 while (here != end) {
957 if (!first) tmp += meta.functionoptions;
958 if (no_ns_metaname == "Date") tmp += format_date (*here);
959 else tmp += *here;
960 first = false;
961 here ++;
962 }
963 if (meta.metacommand & mCgiSafe) return cgi_safe (tmp);
964 else return tmp;
965 }
966 }
967 return "";
968}
969
970static text_t get_or (const text_t& collection, recptproto* collectproto,
971 ResultDocInfo_t &docinfo, displayclass &disp,
972 format_t *orptr, text_tmap &options,
973 ostream& logout) {
974
975 text_t tmp;
976 while (orptr != NULL) {
977
978 tmp = format_string (collection,collectproto, docinfo, disp, orptr,
979 options, logout);
980 if (!tmp.empty()) return tmp;
981
982 orptr = orptr->nextptr;
983 }
984 return "";
985}
986
987static bool char_is_whitespace(const char c)
988{
989 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
990
991}
992
993static int scan_over_whitespace(const text_t& outstring, const int start_pos)
994{
995 int pos = start_pos;
996 while (pos<outstring.size()) {
997 if (!char_is_whitespace(outstring[pos])) {
998 break;
999 }
1000 pos++;
1001 }
1002
1003 return pos;
1004}
1005
1006static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1007{
1008 int pos = start_pos;
1009 while (pos>=0) {
1010 if (!char_is_whitespace(outstring[pos])) {
1011 break;
1012 }
1013 pos--;
1014 }
1015
1016 return pos;
1017}
1018
1019static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1020{
1021 int pos = start_pos;
1022 while (pos>=0) {
1023 if (char_is_whitespace(outstring[pos])) {
1024 break;
1025 }
1026 pos--;
1027 }
1028
1029 return pos;
1030}
1031
1032
1033static int rscan_for(const text_t& outstring, const int start_pos,
1034 const char find_c)
1035{
1036 int pos = start_pos;
1037 while (pos>=0) {
1038 char c = outstring[pos];
1039 if (outstring[pos] == find_c) {
1040 break;
1041 }
1042 pos--;
1043 }
1044
1045 return pos;
1046}
1047
1048text_t extract_substr(const text_t& outstring, const int start_pos,
1049 const int end_pos)
1050{
1051 text_t extracted_str;
1052 extracted_str.clear();
1053
1054 for (int pos=start_pos; pos<=end_pos; pos++) {
1055 extracted_str.push_back(outstring[pos]);
1056 }
1057
1058 return extracted_str;
1059}
1060
1061
1062static text_t expand_potential_metadata(ResultDocInfo_t &docinfo,
1063 const text_t& intext)
1064{
1065 text_t outtext;
1066
1067 // decide if dealing with metadata or text
1068
1069 text_t::const_iterator beginbracket = intext.begin();
1070 text_t::const_iterator endbracket = (intext.end() - 1);
1071
1072 // Decision is based on a metadata element
1073 if ((*beginbracket == '[') && (*endbracket == ']')) {
1074 // Ignore the surrounding square brackets
1075 text_t meta_text = substr (beginbracket+1, endbracket);
1076
1077 metadata_t meta;
1078 meta.metaname = meta_text;
1079 meta.parentcommand = pNone;
1080 meta.metacommand = mSibling;
1081
1082 bool getParents =false;
1083 outtext = get_meta (docinfo,meta);
1084 }
1085 else {
1086 outtext = intext;
1087 }
1088
1089 return outtext;
1090}
1091
1092
1093
1094
1095static bool uses_expression(ResultDocInfo_t &docinfo,
1096 const text_t& outstring, text_t& lhs_expr,
1097 text_t& op_expr, text_t& rhs_expr)
1098{
1099 // Note: the string may not be of the form: str1 op str2, however
1100 // to deterine this we have to process it on the assumption it is,
1101 // and if at any point an 'erroneous' value is encountered, return
1102 // false and let something else have a go at evaluating it
1103
1104 // Starting at the end of the string and working backwards ..
1105
1106 const int outstring_len = outstring.size();
1107
1108 // skip over white space
1109 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1110
1111 if (rhs_end<=0) {
1112 // no meaningful text or (rhs_end==0) no room for operator
1113 return false;
1114 }
1115
1116 // check for ' or " and then scan over token
1117 const char potential_quote = outstring[rhs_end];
1118 int rhs_start=rhs_end;
1119 bool quoted = false;
1120
1121 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1122 rhs_end--;
1123 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1124 quoted = true;
1125 }
1126 else {
1127 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1128 }
1129
1130 if ((rhs_end-rhs_start)<=0) {
1131 // no meaningful rhs expression
1132 return false;
1133 }
1134
1135 // form rhs_expr
1136 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1137
1138 // skip over white space
1139
1140 const int to_whitespace = (quoted) ? 2 : 1;
1141
1142 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1143 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1144
1145
1146 if (op_end-op_start<=0) {
1147 // no meaningful expression operator
1148 return false;
1149 }
1150
1151 op_expr = extract_substr(outstring,op_start,op_end);
1152
1153
1154 // check for operator
1155 if ((op_expr != "eq") && (op_expr != "ne")) {
1156 // not a valid operator
1157 return false;
1158 }
1159
1160 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1161 if (lhs_end<=0) {
1162 // no meaningful lhs expression
1163 return false;
1164 }
1165
1166 int lhs_start = scan_over_whitespace(outstring,0);
1167
1168 // form lhs_expr from remainder of string
1169 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1170
1171 // Now we know we have a valid expression, look up any
1172 // metadata terms
1173
1174 rhs_expr = expand_potential_metadata(docinfo,rhs_expr);
1175 lhs_expr = expand_potential_metadata(docinfo,lhs_expr);
1176
1177 return true;
1178}
1179
1180static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1181 const text_t& rhs_expr, ostream& logout)
1182{
1183 if (op_expr == "eq") {
1184 return (lhs_expr == rhs_expr);
1185 }
1186 else if (op_expr == "ne" ) {
1187 return (lhs_expr != rhs_expr);
1188 }
1189 else {
1190 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1191 }
1192
1193 return false;
1194}
1195
1196
1197static text_t get_if (const text_t& collection, recptproto* collectproto,
1198 ResultDocInfo_t &docinfo, displayclass &disp,
1199 const decision_t &decision,
1200 format_t *ifptr, format_t *elseptr,
1201 text_tmap &options, ostream& logout)
1202{
1203 // If the decision component is a metadata element, then evaluate it
1204 // to see whether we output the "then" or the "else" clause
1205 if (decision.command == dMeta) {
1206 if (get_meta (docinfo, decision.meta) != "") {
1207 if (ifptr != NULL)
1208 return get_formatted_string (collection,collectproto, docinfo, disp, ifptr,
1209 options, logout);
1210 }
1211 else {
1212 if (elseptr != NULL)
1213 return get_formatted_string (collection,collectproto, docinfo, disp, elseptr,
1214 options, logout);
1215 }
1216 }
1217
1218 // If the decision component is text, then evaluate it (it is probably a
1219 // macro like _cgiargmode_) to decide what to output.
1220 else if (decision.command == dText) {
1221
1222 text_t outstring;
1223 disp.expandstring (decision.text, outstring);
1224
1225 // Check for if expression in form: str1 op str2
1226 // (such as [x] eq "y")
1227 text_t lhs_expr, op_expr, rhs_expr;
1228 if (uses_expression(docinfo, outstring,lhs_expr,op_expr,rhs_expr)) {
1229 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1230 if (ifptr != NULL) {
1231 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1232 options, logout);
1233 }
1234 else {
1235 return "";
1236 }
1237 } else {
1238 if (elseptr != NULL) {
1239 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1240 options, logout);
1241 }
1242 else {
1243 return "";
1244 }
1245 }
1246 }
1247
1248
1249 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1250 // a cgi argument macro that has not been set, it evaluates to itself.
1251 // Therefore, were have to say that a piece of text evalautes true if
1252 // it is non-empty and if it is a cgi argument evaulating to itself.
1253
1254 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1255 if (ifptr != NULL)
1256 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1257 options, logout);
1258 } else {
1259 if (elseptr != NULL)
1260 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1261 options, logout);
1262 }
1263 }
1264
1265 return "";
1266}
1267
1268bool includes_metadata(const text_t& text)
1269{
1270 text_t::const_iterator here = text.begin();
1271 text_t::const_iterator end = text.end();
1272 while (here != end) {
1273 if (*here == '[') return true;
1274 here ++;
1275 }
1276
1277 return false;
1278}
1279
1280static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1281 recptproto* collectproto, ResultDocInfo_t &docinfo,
1282 displayclass &disp, text_tmap &options,
1283 ostream &logout) {
1284
1285 if (includes_metadata(metavalue))
1286 {
1287 // text has embedded metadata in it => expand it
1288 FilterRequest_t request;
1289 FilterResponse_t response;
1290
1291 request.getParents = false;
1292
1293 format_t *expanded_formatlistptr = new format_t();
1294 parse_formatstring (metavalue, expanded_formatlistptr,
1295 request.fields, request.getParents);
1296
1297 // retrieve metadata
1298 get_info(docinfo.OID, collection, request.fields, request.getParents,
1299 collectproto, response, logout);
1300
1301 if (!response.docInfo.empty())
1302 {
1303 text_t expanded_metavalue
1304 = get_formatted_string(collection, collectproto,
1305 response.docInfo[0], disp, expanded_formatlistptr,
1306 options, logout);
1307
1308 return expanded_metavalue;
1309 }
1310 else
1311 {
1312 return metavalue;
1313 }
1314 }
1315 else
1316 {
1317 return metavalue;
1318 }
1319}
1320
1321text_t format_string (const text_t& collection, recptproto* collectproto,
1322 ResultDocInfo_t &docinfo, displayclass &disp,
1323 format_t *formatlistptr, text_tmap &options,
1324 ostream& logout) {
1325
1326 if (formatlistptr == NULL) return "";
1327
1328 switch (formatlistptr->command) {
1329 case comOID:
1330 return docinfo.OID;
1331 case comRank:
1332 return text_t(docinfo.ranking);
1333 case comText:
1334 return formatlistptr->text;
1335 case comLink:
1336 return options["link"];
1337 case comEndLink:
1338 if (options["link"].empty()) return "";
1339 else return "</a>";
1340 case comHref:
1341 return get_href(options["link"]);
1342 case comIcon:
1343 return options["icon"];
1344 case comNum:
1345 return docinfo.result_num;
1346 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1347 return get_related_docs(collection, collectproto, docinfo, logout);
1348 case comSummary:
1349 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1350 case comMeta:
1351 {
1352 const text_t& metavalue = get_meta (docinfo, formatlistptr->meta);
1353 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1354 }
1355 case comDoc:
1356 return options["text"];
1357 case comImage:
1358 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1359 case comTOC:
1360 return options["DocTOC"];
1361 case comDocumentButtonDetach:
1362 return options["DocumentButtonDetach"];
1363 case comDocumentButtonHighlight:
1364 return options["DocumentButtonHighlight"];
1365 case comDocumentButtonExpandContents:
1366 return options["DocumentButtonExpandContents"];
1367 case comDocumentButtonExpandText:
1368 return options["DocumentButtonExpandText"];
1369 case comHighlight:
1370 if (options["highlight"] == "1") return "<b>";
1371 break;
1372 case comEndHighlight:
1373 if (options["highlight"] == "1") return "</b>";
1374 break;
1375 case comIf:
1376 return get_if (collection, collectproto, docinfo, disp,
1377 formatlistptr->decision, formatlistptr->ifptr,
1378 formatlistptr->elseptr, options, logout);
1379 case comOr:
1380 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1381 options, logout);
1382 }
1383 return "";
1384}
1385
1386text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1387 ResultDocInfo_t &docinfo, displayclass &disp,
1388 format_t *formatlistptr, text_tmap &options,
1389 ostream& logout) {
1390
1391 text_t ft;
1392 while (formatlistptr != NULL)
1393 {
1394 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1395 options, logout);
1396 formatlistptr = formatlistptr->nextptr;
1397 }
1398
1399 return ft;
1400}
1401
1402
1403/* FUNCTION NAME: format_summary
1404 * DESC: this is invoked when a [Summary] special metadata is processed.
1405 * RETURNS: a query-biased summary for the document */
1406
1407text_t format_summary (const text_t& collection, recptproto* collectproto,
1408 ResultDocInfo_t &docinfo, displayclass &disp,
1409 text_tmap &options, ostream& logout) {
1410
1411 // GRB: added code here to ensure that the cstr (and other collections)
1412 // uses the document metadata item Summary, rather than compressing
1413 // the text of the document, processed via the methods in
1414 // summarise.cpp
1415 if (docinfo.metadata.count("Summary") > 0 &&
1416 docinfo.metadata["Summary"].values.size() > 0) {
1417 return docinfo.metadata["Summary"].values[0];
1418 }
1419
1420 text_t textToSummarise, query;
1421 if(options["text"].empty()) { // get document text
1422 DocumentRequest_t docrequest;
1423 DocumentResponse_t docresponse;
1424 comerror_t err;
1425 docrequest.OID = docinfo.OID;
1426 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1427 textToSummarise = docresponse.doc;
1428 } else // in practice, this would not happen, because text is only
1429 // loaded with the [Text] command
1430 textToSummarise = options["text"];
1431 disp.expandstring("_cgiargq_",query);
1432 return summarise(textToSummarise,query,80);
1433}
Note: See TracBrowser for help on using the repository browser.