source: trunk/gsdl/src/recpt/formattools.cpp@ 1266

Last change on this file since 1266 was 1257, checked in by sjboddie, 24 years ago

Tidied up a bit. Fixed bug in formattools. Nested If/Or's should now
work within formatstrings.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 20.3 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: formattools.cpp 1257 2000-06-30 00:40:39Z sjboddie $
25 *
26 *********************************************************************/
27
28/*
29 $Log$
30 Revision 1.21 2000/06/30 00:40:39 sjboddie
31 Tidied up a bit. Fixed bug in formattools. Nested If/Or's should now
32 work within formatstrings.
33
34 Revision 1.20 2000/04/07 04:40:44 sjboddie
35 Reverted back to old DocumentHeader, DocumentTitles, DocumentImages etc.
36 from DocumentColumns stuff. I'll move the DocumentColumns stuff to a
37 separate development branch (New_Config_Format-branch) for now. The plan
38 is to redesign the configuration file format a bit and limit the number of
39 distributions floating around that take different configuration formats).
40
41 Revision 1.17 2000/01/26 20:10:31 sjboddie
42 changed the default order of detach/expand/highlight buttons
43
44 Revision 1.16 2000/01/25 22:33:31 sjboddie
45 added DocumentUseHTML
46
47 Revision 1.15 1999/12/13 02:45:16 davidb
48 Support for more than one metavalue for the same metadata name
49
50 Revision 1.14 1999/10/30 22:23:11 sjboddie
51 moved table functions from browsetools
52
53 Revision 1.13 1999/10/14 23:01:24 sjboddie
54 changes for new browsing support
55
56 Revision 1.12 1999/10/10 08:14:07 sjboddie
57 - metadata now returns mp rather than array
58 - redesigned browsing support (although it's not finished so
59 won't currently work ;-)
60
61 Revision 1.11 1999/09/28 20:38:19 rjmcnab
62 fixed a couple of bugs
63
64 Revision 1.10 1999/09/07 04:56:55 sjboddie
65 added GPL notice
66
67 Revision 1.9 1999/09/02 00:31:25 rjmcnab
68 fixed small error.
69
70 Revision 1.8 1999/08/20 00:56:38 sjboddie
71 added cgisafe option - you can now do something like [cgisafe:Title] if
72 you want Title to be entered safely into a url
73
74 Revision 1.7 1999/08/10 22:38:08 sjboddie
75 added some more format options
76
77 Revision 1.6 1999/07/30 02:25:42 sjboddie
78 made format_date function global
79
80 Revision 1.5 1999/07/21 05:00:00 sjboddie
81 added some date formatting
82
83 Revision 1.4 1999/07/20 03:02:15 sjboddie
84 added an [icon] option, added ability to call get_formatted_string
85 with icon and link arguments set
86
87 Revision 1.3 1999/07/09 02:44:35 sjboddie
88 fixed parent(All) function so it only outputs parents and not current
89 level meta
90
91 Revision 1.2 1999/07/08 20:48:33 rjmcnab
92 Added ability to print the result number
93
94 Revision 1.1 1999/07/07 05:49:34 sjboddie
95 had another crack at the format string code - created a new formattools
96 module. It can now handle {If} and {Or} statements although there's a
97 bug preventing nested if's and or's.
98
99 */
100
101
102#include "formattools.h"
103#include "cgiutils.h"
104#include <assert.h>
105
106// a few function prototypes
107static text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
108 const text_t &link, const text_t &icon,
109 const text_t &text, bool highlight);
110
111static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
112 format_t *formatlistptr, text_tset &metadata, bool &getParents);
113
114void metadata_t::clear() {
115 metaname.clear();
116 metacommand = mNone;
117 parentcommand = pNone;
118 parentoptions.clear();
119}
120
121void decision_t::clear() {
122 command = dMeta;
123 meta.clear();
124}
125
126void format_t::clear() {
127 command = comText;
128 decision.clear();
129 text.clear();
130 meta.clear();
131 nextptr = NULL;
132 ifptr = NULL;
133 elseptr = NULL;
134 orptr = NULL;
135}
136
137void formatinfo_t::clear() {
138 DocumentImages = false;
139 DocumentTitles = true;
140 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
141 DocumentContents = true;
142 DocumentArrowsBottom = true;
143 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
144 DocumentButtons.push_back ("Expand Text");
145 DocumentButtons.push_back ("Expand Contents");
146 DocumentButtons.push_back ("Detach");
147 DocumentButtons.push_back ("Highlight");
148 DocumentText = "[Text]";
149 formatstrings.erase (formatstrings.begin(), formatstrings.end());
150 DocumentUseHTML = false;
151}
152
153// simply checks to see if formatstring begins with a <td> tag
154bool is_table_content (const text_t &formatstring) {
155 text_t::const_iterator here = formatstring.begin();
156 text_t::const_iterator end = formatstring.end();
157
158 while (here != end) {
159 if (*here != ' ') {
160 if ((*here == '<') && ((here+3) < end)) {
161 if ((*(here+1) == 't' || *(here+1) == 'T') &&
162 (*(here+2) == 'd' || *(here+2) == 'D') &&
163 (*(here+3) == '>' || *(here+3) == ' '))
164 return true;
165 } else return false;
166 }
167 here ++;
168 }
169 return false;
170}
171
172bool is_table_content (const format_t *formatlistptr) {
173
174 if (formatlistptr == NULL) return false;
175
176 if (formatlistptr->command == comText)
177 return is_table_content (formatlistptr->text);
178
179 return false;
180}
181
182// returns false if key isn't in formatstringmap
183bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
184 text_t &formatstring) {
185
186 formatstring.clear();
187 text_tmap::const_iterator it = formatstringmap.find(key);
188 if (it == formatstringmap.end()) return false;
189 formatstring = (*it).second;
190 return true;
191}
192
193// tries to find "key1key2" then "key1" then "key2"
194bool get_formatstring (const text_t &key1, const text_t &key2,
195 const text_tmap &formatstringmap,
196 text_t &formatstring) {
197
198 formatstring.clear();
199 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
200 if (it != formatstringmap.end()) {
201 formatstring = (*it).second;
202 return true;
203 }
204 it = formatstringmap.find(key1);
205 if (it != formatstringmap.end()) {
206 formatstring = (*it).second;
207 return true;
208 }
209 it = formatstringmap.find(key2);
210 if (it != formatstringmap.end()) {
211 formatstring = (*it).second;
212 return true;
213 }
214 return false;
215}
216
217
218// returns a date of form _textmonthnn_ 31, 1999
219// input is date of type 19991231
220// at least the year must be present in date
221text_t format_date (const text_t &date) {
222
223 if (date.size() < 4) return "";
224
225 text_t::const_iterator datebegin = date.begin();
226
227 text_t year = substr (datebegin, datebegin+4);
228
229 if (date.size() < 6) return year;
230
231 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
232 int imonth = month.getint();
233 if (imonth < 0 || imonth > 12) return year;
234
235 if (date.size() < 8) return month + ", " + year;
236
237 text_t day = substr (datebegin+6, datebegin+8);
238 if (day[0] == '0') day = substr (day.begin()+1, day.end());
239 int iday = day.getint();
240 if (iday < 0 || iday > 31) return month + ", " + year;
241
242 return month + " " + day + ", " + year;
243}
244
245static void get_parent_options (text_t &instring, metadata_t &metaoption) {
246
247 assert (instring.size() > 7);
248 if (instring.size() <= 7) return;
249
250 text_t meta, com, op;
251 bool inbraces = false;
252 bool inquotes = false;
253 bool foundcolon = false;
254 text_t::const_iterator here = instring.begin()+6;
255 text_t::const_iterator end = instring.end();
256 while (here != end) {
257 if (*here == '(') inbraces = true;
258 else if (*here == ')') inbraces = false;
259 else if (*here == '\'' && !inquotes) inquotes = true;
260 else if (*here == '\'' && inquotes) inquotes = false;
261 else if (*here == ':' && !inbraces) foundcolon = true;
262 else if (foundcolon) meta.push_back (*here);
263 else if (inquotes) op.push_back (*here);
264 else com.push_back (*here);
265 here ++;
266 }
267 instring = meta;
268 if (com.empty())
269 metaoption.parentcommand = pImmediate;
270 else if (com == "Top")
271 metaoption.parentcommand = pTop;
272 else if (com == "All") {
273 metaoption.parentcommand = pAll;
274 metaoption.parentoptions = op;
275 }
276}
277
278static void parse_meta (text_t &meta, metadata_t &metaoption,
279 text_tset &metadata, bool &getParents) {
280
281 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
282 metaoption.metacommand = mCgiSafe;
283 meta = substr (meta.begin()+8, meta.end());
284 }
285
286 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
287 getParents = true;
288 get_parent_options (meta, metaoption);
289 }
290
291 metadata.insert (meta);
292 metaoption.metaname = meta;
293}
294
295static void parse_meta (text_t &meta, format_t *formatlistptr,
296 text_tset &metadata, bool &getParents) {
297
298 if (meta == "link")
299 formatlistptr->command = comLink;
300 else if (meta == "/link")
301 formatlistptr->command = comEndLink;
302
303 else if (meta == "num")
304 formatlistptr->command = comNum;
305
306 else if (meta == "icon")
307 formatlistptr->command = comIcon;
308
309 else if (meta == "Text")
310 formatlistptr->command = comDoc;
311
312 else if (meta == "highlight")
313 formatlistptr->command = comHighlight;
314
315 else if (meta == "/highlight")
316 formatlistptr->command = comEndHighlight;
317
318 else {
319 formatlistptr->command = comMeta;
320 parse_meta (meta, formatlistptr->meta, metadata, getParents);
321 }
322}
323
324static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
325 text_tset &metadata, bool &getParents) {
326
327 text_t text;
328 text_t::const_iterator here = formatstring.begin();
329 text_t::const_iterator end = formatstring.end();
330
331 while (here != end) {
332
333 if (*here == '\\') {
334 here ++;
335 if (here != end) text.push_back (*here);
336
337 } else if (*here == '{') {
338 if (!text.empty()) {
339 formatlistptr->command = comText;
340 formatlistptr->text = text;
341 formatlistptr->nextptr = new format_t();
342 formatlistptr = formatlistptr->nextptr;
343
344 text.clear();
345 }
346 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
347 formatlistptr->nextptr = new format_t();
348 formatlistptr = formatlistptr->nextptr;
349 if (here == end) break;
350 }
351 } else if (*here == '[') {
352 if (!text.empty()) {
353 formatlistptr->command = comText;
354 formatlistptr->text = text;
355 formatlistptr->nextptr = new format_t();
356 formatlistptr = formatlistptr->nextptr;
357
358 text.clear();
359 }
360 text_t meta;
361 here ++;
362 while (*here != ']') {
363 if (here == end) return false;
364 meta.push_back (*here);
365 here ++;
366 }
367 parse_meta (meta, formatlistptr, metadata, getParents);
368 formatlistptr->nextptr = new format_t();
369 formatlistptr = formatlistptr->nextptr;
370
371 } else
372 text.push_back (*here);
373
374 if (here != end) here ++;
375 }
376 if (!text.empty()) {
377 formatlistptr->command = comText;
378 formatlistptr->text = text;
379 formatlistptr->nextptr = new format_t();
380 formatlistptr = formatlistptr->nextptr;
381
382 }
383 return true;
384}
385
386
387static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
388 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
389
390 text_t::const_iterator it = findchar (here, end, '}');
391 if (it == end) return false;
392
393 text_t com = substr (here, it);
394 here = findchar (it, end, '{');
395 if (here == end) return false;
396 else here ++;
397
398 if (com == "If") formatlistptr->command = comIf;
399 else if (com == "Or") formatlistptr->command = comOr;
400 else return false;
401
402 int curlycount = 0;
403 int commacount = 0;
404 text_t text;
405 while (here != end) {
406
407 if (*here == '\\') {
408 here++;
409 if (here != end) text.push_back(*here);
410
411 } else if (*here == '{') {curlycount ++; text.push_back(*here);}
412 else if (*here == '}' && curlycount > 0) {
413 curlycount --;
414 text.push_back(*here);
415 }
416
417 else if ((*here == ',' || *here == '}') && curlycount <= 0) {
418
419 if (formatlistptr->command == comOr) {
420 // the {Or}{this, or this, or this, or this} statement
421 // or'ed statements may be either [metadata] or plain text
422 format_t *or_ptr;
423
424 // find the next unused orptr
425 if (formatlistptr->orptr == NULL) {
426 formatlistptr->orptr = new format_t();
427 or_ptr = formatlistptr->orptr;
428 } else {
429 or_ptr = formatlistptr->orptr;
430 while (or_ptr->nextptr != NULL)
431 or_ptr = or_ptr->nextptr;
432 or_ptr->nextptr = new format_t();
433 or_ptr = or_ptr->nextptr;
434 }
435
436 text_t::const_iterator beginbracket = text.begin();
437 text_t::const_iterator endbracket = (text.end() - 1);
438 if ((*beginbracket == '[') && (*endbracket == ']')) {
439 // it's metadata
440 text_t meta = substr (beginbracket+1, endbracket);
441 parse_meta (meta, or_ptr, metadata, getParents);
442
443 } else {
444 parse_string (text, or_ptr, metadata, getParents);
445 }
446 text.clear();
447
448 } else {
449 // the {If}{decide,do,else} statement
450 if (commacount == 0) {
451 // If decision only supports metadata at present
452
453 // remove the surrounding square brackets
454 text_t::const_iterator beginbracket = text.begin();
455 text_t::const_iterator endbracket = (text.end() - 1);
456 if ((*beginbracket == '[') && (*endbracket == ']')) {
457 text_t meta = substr (beginbracket+1, endbracket);
458 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
459 commacount ++;
460 text.clear();
461 }
462
463 } else if (commacount == 1) {
464 formatlistptr->ifptr = new format_t();
465 parse_string (text, formatlistptr->ifptr, metadata, getParents);
466 commacount ++;
467 text.clear();
468
469 } else if (commacount == 2) {
470 formatlistptr->elseptr = new format_t();
471 parse_string (text, formatlistptr->elseptr, metadata, getParents);
472 commacount ++;
473 text.clear();
474 }
475 }
476 if (*here == '}') break;
477
478 } else text.push_back(*here);
479
480 if (here != end) here ++;
481 }
482
483 return true;
484}
485
486
487bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
488 text_tset &metadata, bool &getParents) {
489
490 formatlistptr->clear();
491 getParents = false;
492
493 return (parse_string (formatstring, formatlistptr, metadata, getParents));
494}
495
496
497// note: all the format_date stuff is assuming that all Date metadata is going to
498// be of the form yyyymmdd, this is of course, crap ;)
499
500static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) {
501
502 // make sure we have the requested metadata
503 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
504 if (it == docinfo.metadata.end()) return "";
505
506 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
507
508 switch (meta.parentcommand) {
509 case pNone:
510 {
511 text_t classifier_metaname = docinfo.classifier_metadata_type;
512 int metaname_index
513 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
514 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
515
516 if (meta.metaname == "Date")
517 return format_date (metadata_item);
518 if (meta.metacommand == mCgiSafe)
519 return cgi_safe (metadata_item);
520 else return metadata_item;
521 }
522
523 case pImmediate:
524 if (parent != NULL) {
525 if (meta.metaname == "Date")
526 return format_date (parent->values[0]);
527 if (meta.metacommand == mCgiSafe)
528 return cgi_safe (parent->values[0]);
529 else return parent->values[0];
530 }
531 break;
532
533 case pTop:
534 if (parent != NULL) {
535 while (parent->parent != NULL) parent = parent->parent;
536
537 if (meta.metaname == "Date")
538 return format_date (parent->values[0]);
539 if (meta.metacommand == mCgiSafe)
540 return cgi_safe (parent->values[0]);
541 else return parent->values[0];
542 }
543 break;
544
545 case pAll:
546 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
547 if (parent != NULL) {
548 text_tarray tmparray;
549 while (parent != NULL) {
550 tmparray.push_back (parent->values[0]);
551 parent = parent->parent;
552 }
553 bool first = true;
554 text_t tmp;
555 text_tarray::reverse_iterator here = tmparray.rbegin();
556 text_tarray::reverse_iterator end = tmparray.rend();
557 while (here != end) {
558 if (!first) tmp += meta.parentoptions;
559 if (meta.metaname == "Date") tmp += format_date (*here);
560 else tmp += *here;
561 first = false;
562 here ++;
563 }
564 if (meta.metacommand == mCgiSafe) return cgi_safe (tmp);
565 else return tmp;
566 }
567 }
568 return "";
569}
570
571static text_t get_or (ResultDocInfo_t &docinfo, format_t *orptr,
572 const text_t &link, const text_t &icon,
573 const text_t &text, bool highlight) {
574
575 text_t tmp;
576 while (orptr != NULL) {
577
578 tmp = format_string (docinfo, orptr, link, icon, text, highlight);
579 if (!tmp.empty()) return tmp;
580
581 orptr = orptr->nextptr;
582 }
583 return "";
584}
585
586static text_t get_if (ResultDocInfo_t &docinfo, const decision_t &decision,
587 format_t *ifptr, format_t *elseptr, const text_t &link,
588 const text_t &icon, const text_t &text, bool highlight) {
589
590 // not much of a choice yet ...
591 if (decision.command == dMeta) {
592 if (get_meta (docinfo, decision.meta) != "") {
593 if (ifptr != NULL)
594 return get_formatted_string (docinfo, ifptr, link, icon, text, highlight);
595 }
596 else {
597 if (elseptr != NULL)
598 return get_formatted_string (docinfo, elseptr, link, icon, text, highlight);
599 }
600 }
601 return "";
602}
603
604text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
605 const text_t &link, const text_t &icon,
606 const text_t &text, bool highlight) {
607
608 if (formatlistptr == NULL) return "";
609
610 switch (formatlistptr->command) {
611 case comText:
612 return formatlistptr->text;
613 case comLink:
614 return link;
615 case comEndLink:
616 if (link.empty()) return "";
617 else return "</a>";
618 case comIcon:
619 return icon;
620 case comNum:
621 return docinfo.result_num;
622 case comMeta:
623 return get_meta (docinfo, formatlistptr->meta);
624 case comDoc:
625 return text;
626 case comHighlight:
627 if (highlight) return "<b>";
628 break;
629 case comEndHighlight:
630 if (highlight) return "</b>";
631 break;
632 case comIf:
633 return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr,
634 formatlistptr->elseptr, link, icon, text, highlight);
635 case comOr:
636 return get_or (docinfo, formatlistptr->orptr, link, icon, text, highlight);
637 }
638 return "";
639}
640
641
642text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
643 const text_t &link, const text_t &icon) {
644
645 text_t text;
646
647 text_t ft;
648 while (formatlistptr != NULL) {
649 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
650 formatlistptr = formatlistptr->nextptr;
651 }
652 return ft;
653}
654
655
656text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr) {
657
658 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
659 text_t icon = "_icontext_";
660 text_t text;
661
662 text_t ft;
663 while (formatlistptr != NULL) {
664 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
665 formatlistptr = formatlistptr->nextptr;
666 }
667 return ft;
668}
669
670
671text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
672 const text_t &text) {
673
674 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
675 text_t icon = "_icontext_";
676
677 text_t ft;
678 while (formatlistptr != NULL) {
679 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
680 formatlistptr = formatlistptr->nextptr;
681 }
682 return ft;
683}
684
685
686text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
687 const text_t &link, const text_t &icon, const text_t &text) {
688
689 text_t ft;
690 while (formatlistptr != NULL) {
691 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
692 formatlistptr = formatlistptr->nextptr;
693 }
694 return ft;
695}
696
697text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
698 const text_t &link, const text_t &icon, bool highlight) {
699
700 text_t text, ft;
701 while (formatlistptr != NULL) {
702 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
703 formatlistptr = formatlistptr->nextptr;
704 }
705 return ft;
706}
707
708text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
709 const text_t &link, const text_t &icon,
710 const text_t &text, bool highlight) {
711
712 text_t ft;
713 while (formatlistptr != NULL) {
714 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
715 formatlistptr = formatlistptr->nextptr;
716 }
717 return ft;
718}
719
720
Note: See TracBrowser for help on using the repository browser.