source: branches/New_Config_Format-branch/gsdl/src/recpt/formattools.cpp@ 1258

Last change on this file since 1258 was 1258, checked in by nzdl, 24 years ago

caught New_Config_Format-branch up with changes to trunk

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 20.8 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: formattools.cpp 1258 2000-06-30 00:46:17Z nzdl $
25 *
26 *********************************************************************/
27
28/*
29 $Log$
30 Revision 1.20.2.2 2000/06/30 00:46:17 nzdl
31 caught New_Config_Format-branch up with changes to trunk
32
33
34 Revision 1.21 2000/06/30 00:40:39 sjboddie
35 Tidied up a bit. Fixed bug in formattools. Nested If/Or's should now
36 work within formatstrings.
37
38 Revision 1.20.2.1 2000/04/09 23:16:48 sjboddie
39 Added DocumentColumns stuff to New_Config_Format-branch branch
40
41 Revision 1.20 2000/04/07 04:40:44 sjboddie
42 Reverted back to old DocumentHeader, DocumentTitles, DocumentImages etc.
43 from DocumentColumns stuff. I'll move the DocumentColumns stuff to a
44 separate development branch (New_Config_Format-branch) for now. The plan
45 is to redesign the configuration file format a bit and limit the number of
46 distributions floating around that take different configuration formats).
47
48 Revision 1.19 2000/04/03 07:28:24 sjboddie
49 replaced old DocumentIcon and DocumentContents format options
50 with DocumentColumn stuff
51
52 Revision 1.18 2000/03/31 03:04:31 nzdl
53 tidied up some of the browsing code - replaced DocumentImages,
54 DocumentTitles and DocumentHeading with DocumentIcon
55
56 Revision 1.17 2000/01/26 20:10:31 sjboddie
57 changed the default order of detach/expand/highlight buttons
58
59 Revision 1.16 2000/01/25 22:33:31 sjboddie
60 added DocumentUseHTML
61
62 Revision 1.15 1999/12/13 02:45:16 davidb
63 Support for more than one metavalue for the same metadata name
64
65 Revision 1.14 1999/10/30 22:23:11 sjboddie
66 moved table functions from browsetools
67
68 Revision 1.13 1999/10/14 23:01:24 sjboddie
69 changes for new browsing support
70
71 Revision 1.12 1999/10/10 08:14:07 sjboddie
72 - metadata now returns mp rather than array
73 - redesigned browsing support (although it's not finished so
74 won't currently work ;-)
75
76 Revision 1.11 1999/09/28 20:38:19 rjmcnab
77 fixed a couple of bugs
78
79 Revision 1.10 1999/09/07 04:56:55 sjboddie
80 added GPL notice
81
82 Revision 1.9 1999/09/02 00:31:25 rjmcnab
83 fixed small error.
84
85 Revision 1.8 1999/08/20 00:56:38 sjboddie
86 added cgisafe option - you can now do something like [cgisafe:Title] if
87 you want Title to be entered safely into a url
88
89 Revision 1.7 1999/08/10 22:38:08 sjboddie
90 added some more format options
91
92 Revision 1.6 1999/07/30 02:25:42 sjboddie
93 made format_date function global
94
95 Revision 1.5 1999/07/21 05:00:00 sjboddie
96 added some date formatting
97
98 Revision 1.4 1999/07/20 03:02:15 sjboddie
99 added an [icon] option, added ability to call get_formatted_string
100 with icon and link arguments set
101
102 Revision 1.3 1999/07/09 02:44:35 sjboddie
103 fixed parent(All) function so it only outputs parents and not current
104 level meta
105
106 Revision 1.2 1999/07/08 20:48:33 rjmcnab
107 Added ability to print the result number
108
109 Revision 1.1 1999/07/07 05:49:34 sjboddie
110 had another crack at the format string code - created a new formattools
111 module. It can now handle {If} and {Or} statements although there's a
112 bug preventing nested if's and or's.
113
114 */
115
116
117#include "formattools.h"
118#include "cgiutils.h"
119#include <assert.h>
120
121// a few function prototypes
122static text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
123 const text_t &link, const text_t &icon,
124 const text_t &text, bool highlight);
125
126static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
127 format_t *formatlistptr, text_tset &metadata, bool &getParents);
128
129void metadata_t::clear() {
130 metaname.clear();
131 metacommand = mNone;
132 parentcommand = pNone;
133 parentoptions.clear();
134}
135
136void decision_t::clear() {
137 command = dMeta;
138 meta.clear();
139}
140
141void format_t::clear() {
142 command = comText;
143 decision.clear();
144 text.clear();
145 meta.clear();
146 nextptr = NULL;
147 ifptr = NULL;
148 elseptr = NULL;
149 orptr = NULL;
150}
151
152void formatinfo_t::clear() {
153 DocumentColumns = "2";
154 DocumentColumnsTotalWidth = "_pagewidth_";
155 DocumentColumnLeft = "[Title]<br>[Buttons]";
156 DocumentColumnLeftWidth = "200";
157 DocumentColumnRight = "[TOC]";
158 DocumentArrowsBottom = true;
159 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
160 DocumentButtons.push_back ("Expand Text");
161 DocumentButtons.push_back ("Expand Contents");
162 DocumentButtons.push_back ("Detach");
163 DocumentButtons.push_back ("Highlight");
164 DocumentText = "[Text]";
165 formatstrings.erase (formatstrings.begin(), formatstrings.end());
166 DocumentUseHTML = false;
167}
168
169// simply checks to see if formatstring begins with a <td> tag
170bool is_table_content (const text_t &formatstring) {
171 text_t::const_iterator here = formatstring.begin();
172 text_t::const_iterator end = formatstring.end();
173
174 while (here != end) {
175 if (*here != ' ') {
176 if ((*here == '<') && ((here+3) < end)) {
177 if ((*(here+1) == 't' || *(here+1) == 'T') &&
178 (*(here+2) == 'd' || *(here+2) == 'D') &&
179 (*(here+3) == '>' || *(here+3) == ' '))
180 return true;
181 } else return false;
182 }
183 here ++;
184 }
185 return false;
186}
187
188bool is_table_content (const format_t *formatlistptr) {
189
190 if (formatlistptr == NULL) return false;
191
192 if (formatlistptr->command == comText)
193 return is_table_content (formatlistptr->text);
194
195 return false;
196}
197
198// returns false if key isn't in formatstringmap
199bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
200 text_t &formatstring) {
201
202 formatstring.clear();
203 text_tmap::const_iterator it = formatstringmap.find(key);
204 if (it == formatstringmap.end()) return false;
205 formatstring = (*it).second;
206 return true;
207}
208
209// tries to find "key1key2" then "key1" then "key2"
210bool get_formatstring (const text_t &key1, const text_t &key2,
211 const text_tmap &formatstringmap,
212 text_t &formatstring) {
213
214 formatstring.clear();
215 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
216 if (it != formatstringmap.end()) {
217 formatstring = (*it).second;
218 return true;
219 }
220 it = formatstringmap.find(key1);
221 if (it != formatstringmap.end()) {
222 formatstring = (*it).second;
223 return true;
224 }
225 it = formatstringmap.find(key2);
226 if (it != formatstringmap.end()) {
227 formatstring = (*it).second;
228 return true;
229 }
230 return false;
231}
232
233
234// returns a date of form _textmonthnn_ 31, 1999
235// input is date of type 19991231
236// at least the year must be present in date
237text_t format_date (const text_t &date) {
238
239 if (date.size() < 4) return "";
240
241 text_t::const_iterator datebegin = date.begin();
242
243 text_t year = substr (datebegin, datebegin+4);
244
245 if (date.size() < 6) return year;
246
247 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
248 int imonth = month.getint();
249 if (imonth < 0 || imonth > 12) return year;
250
251 if (date.size() < 8) return month + ", " + year;
252
253 text_t day = substr (datebegin+6, datebegin+8);
254 if (day[0] == '0') day = substr (day.begin()+1, day.end());
255 int iday = day.getint();
256 if (iday < 0 || iday > 31) return month + ", " + year;
257
258 return month + " " + day + ", " + year;
259}
260
261static void get_parent_options (text_t &instring, metadata_t &metaoption) {
262
263 assert (instring.size() > 7);
264 if (instring.size() <= 7) return;
265
266 text_t meta, com, op;
267 bool inbraces = false;
268 bool inquotes = false;
269 bool foundcolon = false;
270 text_t::const_iterator here = instring.begin()+6;
271 text_t::const_iterator end = instring.end();
272 while (here != end) {
273 if (*here == '(') inbraces = true;
274 else if (*here == ')') inbraces = false;
275 else if (*here == '\'' && !inquotes) inquotes = true;
276 else if (*here == '\'' && inquotes) inquotes = false;
277 else if (*here == ':' && !inbraces) foundcolon = true;
278 else if (foundcolon) meta.push_back (*here);
279 else if (inquotes) op.push_back (*here);
280 else com.push_back (*here);
281 here ++;
282 }
283 instring = meta;
284 if (com.empty())
285 metaoption.parentcommand = pImmediate;
286 else if (com == "Top")
287 metaoption.parentcommand = pTop;
288 else if (com == "All") {
289 metaoption.parentcommand = pAll;
290 metaoption.parentoptions = op;
291 }
292}
293
294static void parse_meta (text_t &meta, metadata_t &metaoption,
295 text_tset &metadata, bool &getParents) {
296
297 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
298 metaoption.metacommand = mCgiSafe;
299 meta = substr (meta.begin()+8, meta.end());
300 }
301
302 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
303 getParents = true;
304 get_parent_options (meta, metaoption);
305 }
306
307 metadata.insert (meta);
308 metaoption.metaname = meta;
309}
310
311static void parse_meta (text_t &meta, format_t *formatlistptr,
312 text_tset &metadata, bool &getParents) {
313
314 if (meta == "link")
315 formatlistptr->command = comLink;
316 else if (meta == "/link")
317 formatlistptr->command = comEndLink;
318
319 else if (meta == "num")
320 formatlistptr->command = comNum;
321
322 else if (meta == "icon")
323 formatlistptr->command = comIcon;
324
325 else if (meta == "Text")
326 formatlistptr->command = comDoc;
327
328 else if (meta == "highlight")
329 formatlistptr->command = comHighlight;
330
331 else if (meta == "/highlight")
332 formatlistptr->command = comEndHighlight;
333
334 else {
335 formatlistptr->command = comMeta;
336 parse_meta (meta, formatlistptr->meta, metadata, getParents);
337 }
338}
339
340static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
341 text_tset &metadata, bool &getParents) {
342
343 text_t text;
344 text_t::const_iterator here = formatstring.begin();
345 text_t::const_iterator end = formatstring.end();
346
347 while (here != end) {
348
349 if (*here == '\\') {
350 here ++;
351 if (here != end) text.push_back (*here);
352
353 } else if (*here == '{') {
354 if (!text.empty()) {
355 formatlistptr->command = comText;
356 formatlistptr->text = text;
357 formatlistptr->nextptr = new format_t();
358 formatlistptr = formatlistptr->nextptr;
359
360 text.clear();
361 }
362 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
363 formatlistptr->nextptr = new format_t();
364 formatlistptr = formatlistptr->nextptr;
365 if (here == end) break;
366 }
367 } else if (*here == '[') {
368 if (!text.empty()) {
369 formatlistptr->command = comText;
370 formatlistptr->text = text;
371 formatlistptr->nextptr = new format_t();
372 formatlistptr = formatlistptr->nextptr;
373
374 text.clear();
375 }
376 text_t meta;
377 here ++;
378 while (*here != ']') {
379 if (here == end) return false;
380 meta.push_back (*here);
381 here ++;
382 }
383 parse_meta (meta, formatlistptr, metadata, getParents);
384 formatlistptr->nextptr = new format_t();
385 formatlistptr = formatlistptr->nextptr;
386
387 } else
388 text.push_back (*here);
389
390 if (here != end) here ++;
391 }
392 if (!text.empty()) {
393 formatlistptr->command = comText;
394 formatlistptr->text = text;
395 formatlistptr->nextptr = new format_t();
396 formatlistptr = formatlistptr->nextptr;
397
398 }
399 return true;
400}
401
402
403static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
404 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
405
406 text_t::const_iterator it = findchar (here, end, '}');
407 if (it == end) return false;
408
409 text_t com = substr (here, it);
410 here = findchar (it, end, '{');
411 if (here == end) return false;
412 else here ++;
413
414 if (com == "If") formatlistptr->command = comIf;
415 else if (com == "Or") formatlistptr->command = comOr;
416 else return false;
417
418 int curlycount = 0;
419 int commacount = 0;
420 text_t text;
421 while (here != end) {
422
423 if (*here == '\\') {
424 here++;
425 if (here != end) text.push_back(*here);
426
427 } else if (*here == '{') {curlycount ++; text.push_back(*here);}
428 else if (*here == '}' && curlycount > 0) {
429 curlycount --;
430 text.push_back(*here);
431 }
432
433 else if ((*here == ',' || *here == '}') && curlycount <= 0) {
434
435 if (formatlistptr->command == comOr) {
436 // the {Or}{this, or this, or this, or this} statement
437 // or'ed statements may be either [metadata] or plain text
438 format_t *or_ptr;
439
440 // find the next unused orptr
441 if (formatlistptr->orptr == NULL) {
442 formatlistptr->orptr = new format_t();
443 or_ptr = formatlistptr->orptr;
444 } else {
445 or_ptr = formatlistptr->orptr;
446 while (or_ptr->nextptr != NULL)
447 or_ptr = or_ptr->nextptr;
448 or_ptr->nextptr = new format_t();
449 or_ptr = or_ptr->nextptr;
450 }
451
452 text_t::const_iterator beginbracket = text.begin();
453 text_t::const_iterator endbracket = (text.end() - 1);
454 if ((*beginbracket == '[') && (*endbracket == ']')) {
455 // it's metadata
456 text_t meta = substr (beginbracket+1, endbracket);
457 parse_meta (meta, or_ptr, metadata, getParents);
458
459 } else {
460 parse_string (text, or_ptr, metadata, getParents);
461 }
462 text.clear();
463
464 } else {
465 // the {If}{decide,do,else} statement
466 if (commacount == 0) {
467 // If decision only supports metadata at present
468
469 // remove the surrounding square brackets
470 text_t::const_iterator beginbracket = text.begin();
471 text_t::const_iterator endbracket = (text.end() - 1);
472 if ((*beginbracket == '[') && (*endbracket == ']')) {
473 text_t meta = substr (beginbracket+1, endbracket);
474 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
475 commacount ++;
476 text.clear();
477 }
478
479 } else if (commacount == 1) {
480 formatlistptr->ifptr = new format_t();
481 parse_string (text, formatlistptr->ifptr, metadata, getParents);
482 commacount ++;
483 text.clear();
484
485 } else if (commacount == 2) {
486 formatlistptr->elseptr = new format_t();
487 parse_string (text, formatlistptr->elseptr, metadata, getParents);
488 commacount ++;
489 text.clear();
490 }
491 }
492 if (*here == '}') break;
493
494 } else text.push_back(*here);
495
496 if (here != end) here ++;
497 }
498
499 return true;
500}
501
502
503bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
504 text_tset &metadata, bool &getParents) {
505
506 formatlistptr->clear();
507 getParents = false;
508
509 return (parse_string (formatstring, formatlistptr, metadata, getParents));
510}
511
512
513// note: all the format_date stuff is assuming that all Date metadata is going to
514// be of the form yyyymmdd, this is of course, crap ;)
515
516static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) {
517
518 // make sure we have the requested metadata
519 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
520 if (it == docinfo.metadata.end()) return "";
521
522 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
523
524 switch (meta.parentcommand) {
525 case pNone:
526 {
527 text_t classifier_metaname = docinfo.classifier_metadata_type;
528 int metaname_index
529 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
530 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
531
532 if (meta.metaname == "Date")
533 return format_date (metadata_item);
534 if (meta.metacommand == mCgiSafe)
535 return cgi_safe (metadata_item);
536 else return metadata_item;
537 }
538
539 case pImmediate:
540 if (parent != NULL) {
541 if (meta.metaname == "Date")
542 return format_date (parent->values[0]);
543 if (meta.metacommand == mCgiSafe)
544 return cgi_safe (parent->values[0]);
545 else return parent->values[0];
546 }
547 break;
548
549 case pTop:
550 if (parent != NULL) {
551 while (parent->parent != NULL) parent = parent->parent;
552
553 if (meta.metaname == "Date")
554 return format_date (parent->values[0]);
555 if (meta.metacommand == mCgiSafe)
556 return cgi_safe (parent->values[0]);
557 else return parent->values[0];
558 }
559 break;
560
561 case pAll:
562 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
563 if (parent != NULL) {
564 text_tarray tmparray;
565 while (parent != NULL) {
566 tmparray.push_back (parent->values[0]);
567 parent = parent->parent;
568 }
569 bool first = true;
570 text_t tmp;
571 text_tarray::reverse_iterator here = tmparray.rbegin();
572 text_tarray::reverse_iterator end = tmparray.rend();
573 while (here != end) {
574 if (!first) tmp += meta.parentoptions;
575 if (meta.metaname == "Date") tmp += format_date (*here);
576 else tmp += *here;
577 first = false;
578 here ++;
579 }
580 if (meta.metacommand == mCgiSafe) return cgi_safe (tmp);
581 else return tmp;
582 }
583 }
584 return "";
585}
586
587static text_t get_or (ResultDocInfo_t &docinfo, format_t *orptr,
588 const text_t &link, const text_t &icon,
589 const text_t &text, bool highlight) {
590
591 text_t tmp;
592 while (orptr != NULL) {
593
594 tmp = format_string (docinfo, orptr, link, icon, text, highlight);
595 if (!tmp.empty()) return tmp;
596
597 orptr = orptr->nextptr;
598 }
599 return "";
600}
601
602static text_t get_if (ResultDocInfo_t &docinfo, const decision_t &decision,
603 format_t *ifptr, format_t *elseptr, const text_t &link,
604 const text_t &icon, const text_t &text, bool highlight) {
605
606 // not much of a choice yet ...
607 if (decision.command == dMeta) {
608 if (get_meta (docinfo, decision.meta) != "") {
609 if (ifptr != NULL)
610 return get_formatted_string (docinfo, ifptr, link, icon, text, highlight);
611 }
612 else {
613 if (elseptr != NULL)
614 return get_formatted_string (docinfo, elseptr, link, icon, text, highlight);
615 }
616 }
617 return "";
618}
619
620text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
621 const text_t &link, const text_t &icon,
622 const text_t &text, bool highlight) {
623
624 if (formatlistptr == NULL) return "";
625
626 switch (formatlistptr->command) {
627 case comText:
628 return formatlistptr->text;
629 case comLink:
630 return link;
631 case comEndLink:
632 if (link.empty()) return "";
633 else return "</a>";
634 case comIcon:
635 return icon;
636 case comNum:
637 return docinfo.result_num;
638 case comMeta:
639 return get_meta (docinfo, formatlistptr->meta);
640 case comDoc:
641 return text;
642 case comHighlight:
643 if (highlight) return "<b>";
644 break;
645 case comEndHighlight:
646 if (highlight) return "</b>";
647 break;
648 case comIf:
649 return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr,
650 formatlistptr->elseptr, link, icon, text, highlight);
651 case comOr:
652 return get_or (docinfo, formatlistptr->orptr, link, icon, text, highlight);
653 }
654 return "";
655}
656
657
658text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
659 const text_t &link, const text_t &icon) {
660
661 text_t text;
662
663 text_t ft;
664 while (formatlistptr != NULL) {
665 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
666 formatlistptr = formatlistptr->nextptr;
667 }
668 return ft;
669}
670
671
672text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr) {
673
674 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
675 text_t icon = "_icontext_";
676 text_t text;
677
678 text_t ft;
679 while (formatlistptr != NULL) {
680 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
681 formatlistptr = formatlistptr->nextptr;
682 }
683 return ft;
684}
685
686
687text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
688 const text_t &text) {
689
690 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
691 text_t icon = "_icontext_";
692
693 text_t ft;
694 while (formatlistptr != NULL) {
695 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
696 formatlistptr = formatlistptr->nextptr;
697 }
698 return ft;
699}
700
701
702text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
703 const text_t &link, const text_t &icon, const text_t &text) {
704
705 text_t ft;
706 while (formatlistptr != NULL) {
707 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
708 formatlistptr = formatlistptr->nextptr;
709 }
710 return ft;
711}
712
713text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
714 const text_t &link, const text_t &icon, bool highlight) {
715
716 text_t text, ft;
717 while (formatlistptr != NULL) {
718 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
719 formatlistptr = formatlistptr->nextptr;
720 }
721 return ft;
722}
723
724text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
725 const text_t &link, const text_t &icon,
726 const text_t &text, bool highlight) {
727
728 text_t ft;
729 while (formatlistptr != NULL) {
730 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
731 formatlistptr = formatlistptr->nextptr;
732 }
733 return ft;
734}
735
736
Note: See TracBrowser for help on using the repository browser.