source: trunk/gsdl/src/recpt/formattools.cpp@ 1079

Last change on this file since 1079 was 1079, checked in by sjboddie, 24 years ago

Reverted back to old DocumentHeader, DocumentTitles, DocumentImages etc.
from DocumentColumns stuff. I'll move the DocumentColumns stuff to a
separate development branch (New_Config_Format-branch) for now. The plan
is to redesign the configuration file format a bit and limit the number of
distributions floating around that take different configuration formats).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 20.0 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: formattools.cpp 1079 2000-04-07 04:40:45Z sjboddie $
25 *
26 *********************************************************************/
27
28/*
29 $Log$
30 Revision 1.20 2000/04/07 04:40:44 sjboddie
31 Reverted back to old DocumentHeader, DocumentTitles, DocumentImages etc.
32 from DocumentColumns stuff. I'll move the DocumentColumns stuff to a
33 separate development branch (New_Config_Format-branch) for now. The plan
34 is to redesign the configuration file format a bit and limit the number of
35 distributions floating around that take different configuration formats).
36
37 Revision 1.17 2000/01/26 20:10:31 sjboddie
38 changed the default order of detach/expand/highlight buttons
39
40 Revision 1.16 2000/01/25 22:33:31 sjboddie
41 added DocumentUseHTML
42
43 Revision 1.15 1999/12/13 02:45:16 davidb
44 Support for more than one metavalue for the same metadata name
45
46 Revision 1.14 1999/10/30 22:23:11 sjboddie
47 moved table functions from browsetools
48
49 Revision 1.13 1999/10/14 23:01:24 sjboddie
50 changes for new browsing support
51
52 Revision 1.12 1999/10/10 08:14:07 sjboddie
53 - metadata now returns mp rather than array
54 - redesigned browsing support (although it's not finished so
55 won't currently work ;-)
56
57 Revision 1.11 1999/09/28 20:38:19 rjmcnab
58 fixed a couple of bugs
59
60 Revision 1.10 1999/09/07 04:56:55 sjboddie
61 added GPL notice
62
63 Revision 1.9 1999/09/02 00:31:25 rjmcnab
64 fixed small error.
65
66 Revision 1.8 1999/08/20 00:56:38 sjboddie
67 added cgisafe option - you can now do something like [cgisafe:Title] if
68 you want Title to be entered safely into a url
69
70 Revision 1.7 1999/08/10 22:38:08 sjboddie
71 added some more format options
72
73 Revision 1.6 1999/07/30 02:25:42 sjboddie
74 made format_date function global
75
76 Revision 1.5 1999/07/21 05:00:00 sjboddie
77 added some date formatting
78
79 Revision 1.4 1999/07/20 03:02:15 sjboddie
80 added an [icon] option, added ability to call get_formatted_string
81 with icon and link arguments set
82
83 Revision 1.3 1999/07/09 02:44:35 sjboddie
84 fixed parent(All) function so it only outputs parents and not current
85 level meta
86
87 Revision 1.2 1999/07/08 20:48:33 rjmcnab
88 Added ability to print the result number
89
90 Revision 1.1 1999/07/07 05:49:34 sjboddie
91 had another crack at the format string code - created a new formattools
92 module. It can now handle {If} and {Or} statements although there's a
93 bug preventing nested if's and or's.
94
95 */
96
97
98#include "formattools.h"
99#include "cgiutils.h"
100
101// a few function prototypes
102static text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
103 const text_t &link, const text_t &icon,
104 const text_t &text, bool highlight);
105
106static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
107 format_t *formatlistptr, text_tset &metadata, bool &getParents);
108
109void metadata_t::clear() {
110 metaname.clear();
111 metacommand = mNone;
112 parentcommand = pNone;
113 parentoptions.clear();
114}
115
116void decision_t::clear() {
117 command = dMeta;
118 meta.clear();
119}
120
121void format_t::clear() {
122 command = comText;
123 decision.clear();
124 text.clear();
125 meta.clear();
126 nextptr = NULL;
127 ifptr = NULL;
128 elseptr = NULL;
129 orptr = NULL;
130}
131
132void formatinfo_t::clear() {
133 DocumentImages = false;
134 DocumentTitles = true;
135 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
136 DocumentContents = true;
137 DocumentArrowsBottom = true;
138 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
139 DocumentButtons.push_back ("Expand Text");
140 DocumentButtons.push_back ("Expand Contents");
141 DocumentButtons.push_back ("Detach");
142 DocumentButtons.push_back ("Highlight");
143 DocumentText = "[Text]";
144 formatstrings.erase (formatstrings.begin(), formatstrings.end());
145 DocumentUseHTML = false;
146}
147
148// simply checks to see if formatstring begins with a <td> tag
149bool is_table_content (const text_t &formatstring) {
150 text_t::const_iterator here = formatstring.begin();
151 text_t::const_iterator end = formatstring.end();
152
153 while (here != end) {
154 if (*here != ' ') {
155 if (*here == '<') {
156 if ((*(here+1) == 't' || *(here+1) == 'T') &&
157 (*(here+2) == 'd' || *(here+2) == 'D') &&
158 (*(here+3) == '>' || *(here+3) == ' '))
159 return true;
160 } else return false;
161 }
162 here ++;
163 }
164 return false;
165}
166
167bool is_table_content (const format_t *formatlistptr) {
168
169 if (formatlistptr == NULL) return false;
170
171 if (formatlistptr->command == comText)
172 return is_table_content (formatlistptr->text);
173
174 return false;
175}
176
177// returns false if key isn't in formatstringmap
178bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
179 text_t &formatstring) {
180
181 formatstring.clear();
182 text_tmap::const_iterator it = formatstringmap.find(key);
183 if (it == formatstringmap.end()) return false;
184 formatstring = (*it).second;
185 return true;
186}
187
188// tries to find "key1key2" then "key1" then "key2"
189bool get_formatstring (const text_t &key1, const text_t &key2,
190 const text_tmap &formatstringmap,
191 text_t &formatstring) {
192
193 formatstring.clear();
194 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
195 if (it != formatstringmap.end()) {
196 formatstring = (*it).second;
197 return true;
198 }
199 it = formatstringmap.find(key1);
200 if (it != formatstringmap.end()) {
201 formatstring = (*it).second;
202 return true;
203 }
204 it = formatstringmap.find(key2);
205 if (it != formatstringmap.end()) {
206 formatstring = (*it).second;
207 return true;
208 }
209 return false;
210}
211
212
213// returns a date of form _textmonthnn_ 31, 1999
214// input is date of type 19991231
215// at least the year must be present in date
216text_t format_date (const text_t &date) {
217
218 if (date.size() < 4) return "";
219
220 text_t::const_iterator datebegin = date.begin();
221
222 text_t year = substr (datebegin, datebegin+4);
223
224 if (date.size() < 6) return year;
225
226 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
227 int imonth = month.getint();
228 if (imonth < 0 || imonth > 12) return year;
229
230 if (date.size() < 8) return month + ", " + year;
231
232 text_t day = substr (datebegin+6, datebegin+8);
233 if (day[0] == '0') day = substr (day.begin()+1, day.end());
234 int iday = day.getint();
235 if (iday < 0 || iday > 31) return month + ", " + year;
236
237 return month + " " + day + ", " + year;
238}
239
240static void get_parent_options (text_t &instring, metadata_t &metaoption) {
241
242 text_t meta, com, op;
243 bool inbraces = false;
244 bool inquotes = false;
245 bool foundcolon = false;
246 text_t::const_iterator here = instring.begin()+6;
247 text_t::const_iterator end = instring.end();
248 while (here != end) {
249 if (*here == '(') inbraces = true;
250 else if (*here == ')') inbraces = false;
251 else if (*here == '\'' && !inquotes) inquotes = true;
252 else if (*here == '\'' && inquotes) inquotes = false;
253 else if (*here == ':' && !inbraces) foundcolon = true;
254 else if (foundcolon) meta.push_back (*here);
255 else if (inquotes) op.push_back (*here);
256 else com.push_back (*here);
257 here ++;
258 }
259 instring = meta;
260 if (com.empty())
261 metaoption.parentcommand = pImmediate;
262 else if (com == "Top")
263 metaoption.parentcommand = pTop;
264 else if (com == "All") {
265 metaoption.parentcommand = pAll;
266 metaoption.parentoptions = op;
267 }
268}
269
270static void parse_meta (text_t &meta, metadata_t &metaoption,
271 text_tset &metadata, bool &getParents) {
272
273 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
274 metaoption.metacommand = mCgiSafe;
275 meta = substr (meta.begin()+8, meta.end());
276 }
277
278 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
279 getParents = true;
280 get_parent_options (meta, metaoption);
281 }
282
283 metadata.insert (meta);
284 metaoption.metaname = meta;
285}
286
287static void parse_meta (text_t &meta, format_t *formatlistptr,
288 text_tset &metadata, bool &getParents) {
289
290 if (meta == "link")
291 formatlistptr->command = comLink;
292 else if (meta == "/link")
293 formatlistptr->command = comEndLink;
294
295 else if (meta == "num")
296 formatlistptr->command = comNum;
297
298 else if (meta == "icon")
299 formatlistptr->command = comIcon;
300
301 else if (meta == "Text")
302 formatlistptr->command = comDoc;
303
304 else if (meta == "highlight")
305 formatlistptr->command = comHighlight;
306
307 else if (meta == "/highlight")
308 formatlistptr->command = comEndHighlight;
309
310 else {
311 formatlistptr->command = comMeta;
312 parse_meta (meta, formatlistptr->meta, metadata, getParents);
313 }
314}
315
316static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
317 text_tset &metadata, bool &getParents) {
318
319 text_t text;
320 text_t::const_iterator here = formatstring.begin();
321 text_t::const_iterator end = formatstring.end();
322
323 while (here != end) {
324
325 if (*here == '\\')
326 text.push_back (*(++here));
327
328 else if (*here == '{') {
329 if (!text.empty()) {
330 formatlistptr->command = comText;
331 formatlistptr->text = text;
332 formatlistptr->nextptr = new format_t();
333 formatlistptr = formatlistptr->nextptr;
334
335 text.clear();
336 }
337 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
338 formatlistptr->nextptr = new format_t();
339 formatlistptr = formatlistptr->nextptr;
340 if (here == end) break;
341 }
342 } else if (*here == '[') {
343 if (!text.empty()) {
344 formatlistptr->command = comText;
345 formatlistptr->text = text;
346 formatlistptr->nextptr = new format_t();
347 formatlistptr = formatlistptr->nextptr;
348
349 text.clear();
350 }
351 text_t meta;
352 here ++;
353 while (*here != ']') {
354 if (here == end) return false;
355 meta.push_back (*here);
356 here ++;
357 }
358 parse_meta (meta, formatlistptr, metadata, getParents);
359 formatlistptr->nextptr = new format_t();
360 formatlistptr = formatlistptr->nextptr;
361
362 } else
363 text.push_back (*here);
364
365 if (here != end) here ++;
366 }
367 if (!text.empty()) {
368 formatlistptr->command = comText;
369 formatlistptr->text = text;
370 formatlistptr->nextptr = new format_t();
371 formatlistptr = formatlistptr->nextptr;
372
373 }
374 return true;
375}
376
377
378static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
379 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
380
381 text_t::const_iterator it = findchar (here, end, '}');
382 if (it == end) return false;
383
384 text_t com = substr (here, it);
385 here = findchar (it, end, '{');
386 if (here == end) return false;
387 else here ++;
388
389 if (com == "If") formatlistptr->command = comIf;
390 else if (com == "Or") formatlistptr->command = comOr;
391 else return false;
392
393 int curlycount = 0;
394 int commacount = 0;
395 text_t text;
396 while (here != end) {
397
398 if (*here == '\\') {
399 here++;
400 if (here != end) text.push_back(*here);
401
402 } else if (*here == '{') {curlycount ++; text.push_back(*here);}
403 else if (*here == '}' && curlycount > 0) {
404 curlycount --;
405 text.push_back(*here);
406 }
407
408 else if ((*here == ',' || *here == '}') && curlycount <= 0) {
409
410 if (formatlistptr->command == comOr) {
411 // the {Or}{this, or this, or this, or this} statement
412 // or'ed statements may be either [metadata] or plain text
413 format_t *or_ptr;
414
415 // find the next unused orptr
416 if (formatlistptr->orptr == NULL) {
417 formatlistptr->orptr = new format_t();
418 or_ptr = formatlistptr->orptr;
419 } else {
420 or_ptr = formatlistptr->orptr;
421 while (or_ptr->nextptr != NULL)
422 or_ptr = or_ptr->nextptr;
423 or_ptr->nextptr = new format_t();
424 or_ptr = or_ptr->nextptr;
425 }
426
427 text_t::const_iterator beginbracket = text.begin();
428 text_t::const_iterator endbracket = (text.end() - 1);
429 if ((*beginbracket == '[') && (*endbracket == ']')) {
430 // it's metadata
431 text_t meta = substr (beginbracket+1, endbracket);
432 parse_meta (meta, or_ptr, metadata, getParents);
433
434 } else {
435 // assume it's plain text
436 or_ptr->command = comText;
437 or_ptr->text = text;
438 }
439 text.clear();
440
441 } else {
442 // the {If}{decide,do,else} statement
443 if (commacount == 0) {
444 // If decision only supports metadata at present
445
446 // remove the surrounding square brackets
447 text_t::const_iterator beginbracket = text.begin();
448 text_t::const_iterator endbracket = (text.end() - 1);
449 if ((*beginbracket == '[') && (*endbracket == ']')) {
450 text_t meta = substr (beginbracket+1, endbracket);
451 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
452 commacount ++;
453 text.clear();
454 }
455
456 } else if (commacount == 1) {
457 formatlistptr->ifptr = new format_t();
458 parse_string (text, formatlistptr->ifptr, metadata, getParents);
459 commacount ++;
460 text.clear();
461
462 } else if (commacount == 2) {
463 formatlistptr->elseptr = new format_t();
464 parse_string (text, formatlistptr->elseptr, metadata, getParents);
465 commacount ++;
466 text.clear();
467 }
468 }
469 if (*here == '}') break;
470
471 } else text.push_back(*here);
472
473 if (here != end) here ++;
474 }
475
476 return true;
477}
478
479
480bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
481 text_tset &metadata, bool &getParents) {
482
483 formatlistptr->clear();
484 getParents = false;
485
486 return (parse_string (formatstring, formatlistptr, metadata, getParents));
487}
488
489
490// note: all the format_date stuff is assuming that all Date metadata is going to
491// be of the form yyyymmdd, this is of course, crap ;)
492
493static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) {
494
495 // make sure we have the requested metadata
496 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
497 if (it == docinfo.metadata.end()) return "";
498
499 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
500
501 switch (meta.parentcommand) {
502 case pNone:
503 {
504 text_t classifier_metaname = docinfo.classifier_metadata_type;
505 int metaname_index
506 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
507 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
508
509 if (meta.metaname == "Date")
510 return format_date (metadata_item);
511 if (meta.metacommand == mCgiSafe)
512 return cgi_safe (metadata_item);
513 else return metadata_item;
514 }
515
516 case pImmediate:
517 if (parent != NULL) {
518 if (meta.metaname == "Date")
519 return format_date (parent->values[0]);
520 if (meta.metacommand == mCgiSafe)
521 return cgi_safe (parent->values[0]);
522 else return parent->values[0];
523 }
524 break;
525
526 case pTop:
527 if (parent != NULL) {
528 while (parent->parent != NULL) parent = parent->parent;
529
530 if (meta.metaname == "Date")
531 return format_date (parent->values[0]);
532 if (meta.metacommand == mCgiSafe)
533 return cgi_safe (parent->values[0]);
534 else return parent->values[0];
535 }
536 break;
537
538 case pAll:
539 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
540 if (parent != NULL) {
541 text_tarray tmparray;
542 while (parent != NULL) {
543 tmparray.push_back (parent->values[0]);
544 parent = parent->parent;
545 }
546 bool first = true;
547 text_t tmp;
548 text_tarray::reverse_iterator here = tmparray.rbegin();
549 text_tarray::reverse_iterator end = tmparray.rend();
550 while (here != end) {
551 if (!first) tmp += meta.parentoptions;
552 if (meta.metaname == "Date") tmp += format_date (*here);
553 else tmp += *here;
554 first = false;
555 here ++;
556 }
557 if (meta.metacommand == mCgiSafe) return cgi_safe (tmp);
558 else return tmp;
559 }
560 }
561 return "";
562}
563
564static text_t get_or (ResultDocInfo_t &docinfo, format_t *orptr,
565 const text_t &link, const text_t &icon,
566 const text_t &text, bool highlight) {
567
568 text_t tmp;
569 while (orptr != NULL) {
570
571 tmp = format_string (docinfo, orptr, link, icon, text, highlight);
572 if (!tmp.empty()) return tmp;
573
574 orptr = orptr->nextptr;
575 }
576 return "";
577}
578
579static text_t get_if (ResultDocInfo_t &docinfo, const decision_t &decision,
580 format_t *ifptr, format_t *elseptr, const text_t &link,
581 const text_t &icon, const text_t &text, bool highlight) {
582
583 // not much of a choice yet ...
584 if (decision.command == dMeta) {
585 if (get_meta (docinfo, decision.meta) != "") {
586 if (ifptr != NULL)
587 return get_formatted_string (docinfo, ifptr, link, icon, text, highlight);
588 }
589 else {
590 if (elseptr != NULL)
591 return get_formatted_string (docinfo, elseptr, link, icon, text, highlight);
592 }
593 }
594 return "";
595}
596
597text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
598 const text_t &link, const text_t &icon,
599 const text_t &text, bool highlight) {
600
601 if (formatlistptr == NULL) return "";
602
603 switch (formatlistptr->command) {
604 case comText:
605 return formatlistptr->text;
606 case comLink:
607 return link;
608 case comEndLink:
609 if (link.empty()) return "";
610 else return "</a>";
611 case comIcon:
612 return icon;
613 case comNum:
614 return docinfo.result_num;
615 case comMeta:
616 return get_meta (docinfo, formatlistptr->meta);
617 case comDoc:
618 return text;
619 case comHighlight:
620 if (highlight) return "<b>";
621 break;
622 case comEndHighlight:
623 if (highlight) return "</b>";
624 break;
625 case comIf:
626 return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr,
627 formatlistptr->elseptr, link, icon, text, highlight);
628 case comOr:
629 return get_or (docinfo, formatlistptr->orptr, link, icon, text, highlight);
630 }
631 return "";
632}
633
634
635text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
636 const text_t &link, const text_t &icon) {
637
638 text_t text;
639
640 text_t ft;
641 while (formatlistptr != NULL) {
642 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
643 formatlistptr = formatlistptr->nextptr;
644 }
645 return ft;
646}
647
648
649text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr) {
650
651 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
652 text_t icon = "_icontext_";
653 text_t text;
654
655 text_t ft;
656 while (formatlistptr != NULL) {
657 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
658 formatlistptr = formatlistptr->nextptr;
659 }
660 return ft;
661}
662
663
664text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
665 const text_t &text) {
666
667 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
668 text_t icon = "_icontext_";
669
670 text_t ft;
671 while (formatlistptr != NULL) {
672 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
673 formatlistptr = formatlistptr->nextptr;
674 }
675 return ft;
676}
677
678
679text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
680 const text_t &link, const text_t &icon, const text_t &text) {
681
682 text_t ft;
683 while (formatlistptr != NULL) {
684 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
685 formatlistptr = formatlistptr->nextptr;
686 }
687 return ft;
688}
689
690text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
691 const text_t &link, const text_t &icon, bool highlight) {
692
693 text_t text, ft;
694 while (formatlistptr != NULL) {
695 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
696 formatlistptr = formatlistptr->nextptr;
697 }
698 return ft;
699}
700
701text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
702 const text_t &link, const text_t &icon,
703 const text_t &text, bool highlight) {
704
705 text_t ft;
706 while (formatlistptr != NULL) {
707 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
708 formatlistptr = formatlistptr->nextptr;
709 }
710 return ft;
711}
712
713
Note: See TracBrowser for help on using the repository browser.