source: trunk/gsdl/src/recpt/formattools.cpp@ 1048

Last change on this file since 1048 was 1048, checked in by nzdl, 24 years ago

tidied up some of the browsing code - replaced DocumentImages,
DocumentTitles and DocumentHeading with DocumentIcon

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 19.7 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: formattools.cpp 1048 2000-03-31 03:04:32Z nzdl $
25 *
26 *********************************************************************/
27
28/*
29 $Log$
30 Revision 1.18 2000/03/31 03:04:31 nzdl
31 tidied up some of the browsing code - replaced DocumentImages,
32 DocumentTitles and DocumentHeading with DocumentIcon
33
34 Revision 1.17 2000/01/26 20:10:31 sjboddie
35 changed the default order of detach/expand/highlight buttons
36
37 Revision 1.16 2000/01/25 22:33:31 sjboddie
38 added DocumentUseHTML
39
40 Revision 1.15 1999/12/13 02:45:16 davidb
41 Support for more than one metavalue for the same metadata name
42
43 Revision 1.14 1999/10/30 22:23:11 sjboddie
44 moved table functions from browsetools
45
46 Revision 1.13 1999/10/14 23:01:24 sjboddie
47 changes for new browsing support
48
49 Revision 1.12 1999/10/10 08:14:07 sjboddie
50 - metadata now returns mp rather than array
51 - redesigned browsing support (although it's not finished so
52 won't currently work ;-)
53
54 Revision 1.11 1999/09/28 20:38:19 rjmcnab
55 fixed a couple of bugs
56
57 Revision 1.10 1999/09/07 04:56:55 sjboddie
58 added GPL notice
59
60 Revision 1.9 1999/09/02 00:31:25 rjmcnab
61 fixed small error.
62
63 Revision 1.8 1999/08/20 00:56:38 sjboddie
64 added cgisafe option - you can now do something like [cgisafe:Title] if
65 you want Title to be entered safely into a url
66
67 Revision 1.7 1999/08/10 22:38:08 sjboddie
68 added some more format options
69
70 Revision 1.6 1999/07/30 02:25:42 sjboddie
71 made format_date function global
72
73 Revision 1.5 1999/07/21 05:00:00 sjboddie
74 added some date formatting
75
76 Revision 1.4 1999/07/20 03:02:15 sjboddie
77 added an [icon] option, added ability to call get_formatted_string
78 with icon and link arguments set
79
80 Revision 1.3 1999/07/09 02:44:35 sjboddie
81 fixed parent(All) function so it only outputs parents and not current
82 level meta
83
84 Revision 1.2 1999/07/08 20:48:33 rjmcnab
85 Added ability to print the result number
86
87 Revision 1.1 1999/07/07 05:49:34 sjboddie
88 had another crack at the format string code - created a new formattools
89 module. It can now handle {If} and {Or} statements although there's a
90 bug preventing nested if's and or's.
91
92 */
93
94
95#include "formattools.h"
96#include "cgiutils.h"
97
98// a few function prototypes
99static text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
100 const text_t &link, const text_t &icon,
101 const text_t &text, bool highlight);
102
103static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
104 format_t *formatlistptr, text_tset &metadata, bool &getParents);
105
106void metadata_t::clear() {
107 metaname.clear();
108 metacommand = mNone;
109 parentcommand = pNone;
110 parentoptions.clear();
111}
112
113void decision_t::clear() {
114 command = dMeta;
115 meta.clear();
116}
117
118void format_t::clear() {
119 command = comText;
120 decision.clear();
121 text.clear();
122 meta.clear();
123 nextptr = NULL;
124 ifptr = NULL;
125 elseptr = NULL;
126 orptr = NULL;
127}
128
129void formatinfo_t::clear() {
130 DocumentIcon = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
131 DocumentContents = true;
132 DocumentArrowsBottom = true;
133 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
134 DocumentButtons.push_back ("Expand Text");
135 DocumentButtons.push_back ("Expand Contents");
136 DocumentButtons.push_back ("Detach");
137 DocumentButtons.push_back ("Highlight");
138 DocumentText = "[Text]";
139 formatstrings.erase (formatstrings.begin(), formatstrings.end());
140 DocumentUseHTML = false;
141}
142
143// simply checks to see if formatstring begins with a <td> tag
144bool is_table_content (const text_t &formatstring) {
145 text_t::const_iterator here = formatstring.begin();
146 text_t::const_iterator end = formatstring.end();
147
148 while (here != end) {
149 if (*here != ' ') {
150 if (*here == '<') {
151 if ((*(here+1) == 't' || *(here+1) == 'T') &&
152 (*(here+2) == 'd' || *(here+2) == 'D') &&
153 (*(here+3) == '>' || *(here+3) == ' '))
154 return true;
155 } else return false;
156 }
157 here ++;
158 }
159 return false;
160}
161
162bool is_table_content (const format_t *formatlistptr) {
163
164 if (formatlistptr == NULL) return false;
165
166 if (formatlistptr->command == comText)
167 return is_table_content (formatlistptr->text);
168
169 return false;
170}
171
172// returns false if key isn't in formatstringmap
173bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
174 text_t &formatstring) {
175
176 formatstring.clear();
177 text_tmap::const_iterator it = formatstringmap.find(key);
178 if (it == formatstringmap.end()) return false;
179 formatstring = (*it).second;
180 return true;
181}
182
183// tries to find "key1key2" then "key1" then "key2"
184bool get_formatstring (const text_t &key1, const text_t &key2,
185 const text_tmap &formatstringmap,
186 text_t &formatstring) {
187
188 formatstring.clear();
189 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
190 if (it != formatstringmap.end()) {
191 formatstring = (*it).second;
192 return true;
193 }
194 it = formatstringmap.find(key1);
195 if (it != formatstringmap.end()) {
196 formatstring = (*it).second;
197 return true;
198 }
199 it = formatstringmap.find(key2);
200 if (it != formatstringmap.end()) {
201 formatstring = (*it).second;
202 return true;
203 }
204 return false;
205}
206
207
208// returns a date of form _textmonthnn_ 31, 1999
209// input is date of type 19991231
210// at least the year must be present in date
211text_t format_date (const text_t &date) {
212
213 if (date.size() < 4) return "";
214
215 text_t::const_iterator datebegin = date.begin();
216
217 text_t year = substr (datebegin, datebegin+4);
218
219 if (date.size() < 6) return year;
220
221 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
222 int imonth = month.getint();
223 if (imonth < 0 || imonth > 12) return year;
224
225 if (date.size() < 8) return month + ", " + year;
226
227 text_t day = substr (datebegin+6, datebegin+8);
228 if (day[0] == '0') day = substr (day.begin()+1, day.end());
229 int iday = day.getint();
230 if (iday < 0 || iday > 31) return month + ", " + year;
231
232 return month + " " + day + ", " + year;
233}
234
235static void get_parent_options (text_t &instring, metadata_t &metaoption) {
236
237 text_t meta, com, op;
238 bool inbraces = false;
239 bool inquotes = false;
240 bool foundcolon = false;
241 text_t::const_iterator here = instring.begin()+6;
242 text_t::const_iterator end = instring.end();
243 while (here != end) {
244 if (*here == '(') inbraces = true;
245 else if (*here == ')') inbraces = false;
246 else if (*here == '\'' && !inquotes) inquotes = true;
247 else if (*here == '\'' && inquotes) inquotes = false;
248 else if (*here == ':' && !inbraces) foundcolon = true;
249 else if (foundcolon) meta.push_back (*here);
250 else if (inquotes) op.push_back (*here);
251 else com.push_back (*here);
252 here ++;
253 }
254 instring = meta;
255 if (com.empty())
256 metaoption.parentcommand = pImmediate;
257 else if (com == "Top")
258 metaoption.parentcommand = pTop;
259 else if (com == "All") {
260 metaoption.parentcommand = pAll;
261 metaoption.parentoptions = op;
262 }
263}
264
265static void parse_meta (text_t &meta, metadata_t &metaoption,
266 text_tset &metadata, bool &getParents) {
267
268 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
269 metaoption.metacommand = mCgiSafe;
270 meta = substr (meta.begin()+8, meta.end());
271 }
272
273 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
274 getParents = true;
275 get_parent_options (meta, metaoption);
276 }
277
278 metadata.insert (meta);
279 metaoption.metaname = meta;
280}
281
282static void parse_meta (text_t &meta, format_t *formatlistptr,
283 text_tset &metadata, bool &getParents) {
284
285 if (meta == "link")
286 formatlistptr->command = comLink;
287 else if (meta == "/link")
288 formatlistptr->command = comEndLink;
289
290 else if (meta == "num")
291 formatlistptr->command = comNum;
292
293 else if (meta == "icon")
294 formatlistptr->command = comIcon;
295
296 else if (meta == "Text")
297 formatlistptr->command = comDoc;
298
299 else if (meta == "highlight")
300 formatlistptr->command = comHighlight;
301
302 else if (meta == "/highlight")
303 formatlistptr->command = comEndHighlight;
304
305 else {
306 formatlistptr->command = comMeta;
307 parse_meta (meta, formatlistptr->meta, metadata, getParents);
308 }
309}
310
311static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
312 text_tset &metadata, bool &getParents) {
313
314 text_t text;
315 text_t::const_iterator here = formatstring.begin();
316 text_t::const_iterator end = formatstring.end();
317
318 while (here != end) {
319
320 if (*here == '\\')
321 text.push_back (*(++here));
322
323 else if (*here == '{') {
324 if (!text.empty()) {
325 formatlistptr->command = comText;
326 formatlistptr->text = text;
327 formatlistptr->nextptr = new format_t();
328 formatlistptr = formatlistptr->nextptr;
329
330 text.clear();
331 }
332 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
333 formatlistptr->nextptr = new format_t();
334 formatlistptr = formatlistptr->nextptr;
335 if (here == end) break;
336 }
337 } else if (*here == '[') {
338 if (!text.empty()) {
339 formatlistptr->command = comText;
340 formatlistptr->text = text;
341 formatlistptr->nextptr = new format_t();
342 formatlistptr = formatlistptr->nextptr;
343
344 text.clear();
345 }
346 text_t meta;
347 here ++;
348 while (*here != ']') {
349 if (here == end) return false;
350 meta.push_back (*here);
351 here ++;
352 }
353 parse_meta (meta, formatlistptr, metadata, getParents);
354 formatlistptr->nextptr = new format_t();
355 formatlistptr = formatlistptr->nextptr;
356
357 } else
358 text.push_back (*here);
359
360 if (here != end) here ++;
361 }
362 if (!text.empty()) {
363 formatlistptr->command = comText;
364 formatlistptr->text = text;
365 formatlistptr->nextptr = new format_t();
366 formatlistptr = formatlistptr->nextptr;
367
368 }
369 return true;
370}
371
372
373static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
374 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
375
376 text_t::const_iterator it = findchar (here, end, '}');
377 if (it == end) return false;
378
379 text_t com = substr (here, it);
380 here = findchar (it, end, '{');
381 if (here == end) return false;
382 else here ++;
383
384 if (com == "If") formatlistptr->command = comIf;
385 else if (com == "Or") formatlistptr->command = comOr;
386 else return false;
387
388 int curlycount = 0;
389 int commacount = 0;
390 text_t text;
391 while (here != end) {
392
393 if (*here == '\\') {
394 here++;
395 if (here != end) text.push_back(*here);
396
397 } else if (*here == '{') {curlycount ++; text.push_back(*here);}
398 else if (*here == '}' && curlycount > 0) {
399 curlycount --;
400 text.push_back(*here);
401 }
402
403 else if ((*here == ',' || *here == '}') && curlycount <= 0) {
404
405 if (formatlistptr->command == comOr) {
406 // the {Or}{this, or this, or this, or this} statement
407 // or'ed statements may be either [metadata] or plain text
408 format_t *or_ptr;
409
410 // find the next unused orptr
411 if (formatlistptr->orptr == NULL) {
412 formatlistptr->orptr = new format_t();
413 or_ptr = formatlistptr->orptr;
414 } else {
415 or_ptr = formatlistptr->orptr;
416 while (or_ptr->nextptr != NULL)
417 or_ptr = or_ptr->nextptr;
418 or_ptr->nextptr = new format_t();
419 or_ptr = or_ptr->nextptr;
420 }
421
422 text_t::const_iterator beginbracket = text.begin();
423 text_t::const_iterator endbracket = (text.end() - 1);
424 if ((*beginbracket == '[') && (*endbracket == ']')) {
425 // it's metadata
426 text_t meta = substr (beginbracket+1, endbracket);
427 parse_meta (meta, or_ptr, metadata, getParents);
428
429 } else {
430 // assume it's plain text
431 or_ptr->command = comText;
432 or_ptr->text = text;
433 }
434 text.clear();
435
436 } else {
437 // the {If}{decide,do,else} statement
438 if (commacount == 0) {
439 // If decision only supports metadata at present
440
441 // remove the surrounding square brackets
442 text_t::const_iterator beginbracket = text.begin();
443 text_t::const_iterator endbracket = (text.end() - 1);
444 if ((*beginbracket == '[') && (*endbracket == ']')) {
445 text_t meta = substr (beginbracket+1, endbracket);
446 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
447 commacount ++;
448 text.clear();
449 }
450
451 } else if (commacount == 1) {
452 formatlistptr->ifptr = new format_t();
453 parse_string (text, formatlistptr->ifptr, metadata, getParents);
454 commacount ++;
455 text.clear();
456
457 } else if (commacount == 2) {
458 formatlistptr->elseptr = new format_t();
459 parse_string (text, formatlistptr->elseptr, metadata, getParents);
460 commacount ++;
461 text.clear();
462 }
463 }
464 if (*here == '}') break;
465
466 } else text.push_back(*here);
467
468 if (here != end) here ++;
469 }
470
471 return true;
472}
473
474
475bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
476 text_tset &metadata, bool &getParents) {
477
478 formatlistptr->clear();
479 getParents = false;
480
481 return (parse_string (formatstring, formatlistptr, metadata, getParents));
482}
483
484
485// note: all the format_date stuff is assuming that all Date metadata is going to
486// be of the form yyyymmdd, this is of course, crap ;)
487
488static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) {
489
490 // make sure we have the requested metadata
491 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
492 if (it == docinfo.metadata.end()) return "";
493
494 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
495
496 switch (meta.parentcommand) {
497 case pNone:
498 {
499 text_t classifier_metaname = docinfo.classifier_metadata_type;
500 int metaname_index
501 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
502 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
503
504 if (meta.metaname == "Date")
505 return format_date (metadata_item);
506 if (meta.metacommand == mCgiSafe)
507 return cgi_safe (metadata_item);
508 else return metadata_item;
509 }
510
511 case pImmediate:
512 if (parent != NULL) {
513 if (meta.metaname == "Date")
514 return format_date (parent->values[0]);
515 if (meta.metacommand == mCgiSafe)
516 return cgi_safe (parent->values[0]);
517 else return parent->values[0];
518 }
519 break;
520
521 case pTop:
522 if (parent != NULL) {
523 while (parent->parent != NULL) parent = parent->parent;
524
525 if (meta.metaname == "Date")
526 return format_date (parent->values[0]);
527 if (meta.metacommand == mCgiSafe)
528 return cgi_safe (parent->values[0]);
529 else return parent->values[0];
530 }
531 break;
532
533 case pAll:
534 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
535 if (parent != NULL) {
536 text_tarray tmparray;
537 while (parent != NULL) {
538 tmparray.push_back (parent->values[0]);
539 parent = parent->parent;
540 }
541 bool first = true;
542 text_t tmp;
543 text_tarray::reverse_iterator here = tmparray.rbegin();
544 text_tarray::reverse_iterator end = tmparray.rend();
545 while (here != end) {
546 if (!first) tmp += meta.parentoptions;
547 if (meta.metaname == "Date") tmp += format_date (*here);
548 else tmp += *here;
549 first = false;
550 here ++;
551 }
552 if (meta.metacommand == mCgiSafe) return cgi_safe (tmp);
553 else return tmp;
554 }
555 }
556 return "";
557}
558
559static text_t get_or (ResultDocInfo_t &docinfo, format_t *orptr,
560 const text_t &link, const text_t &icon,
561 const text_t &text, bool highlight) {
562
563 text_t tmp;
564 while (orptr != NULL) {
565
566 tmp = format_string (docinfo, orptr, link, icon, text, highlight);
567 if (!tmp.empty()) return tmp;
568
569 orptr = orptr->nextptr;
570 }
571 return "";
572}
573
574static text_t get_if (ResultDocInfo_t &docinfo, const decision_t &decision,
575 format_t *ifptr, format_t *elseptr, const text_t &link,
576 const text_t &icon, const text_t &text, bool highlight) {
577
578 // not much of a choice yet ...
579 if (decision.command == dMeta) {
580 if (get_meta (docinfo, decision.meta) != "") {
581 if (ifptr != NULL)
582 return get_formatted_string (docinfo, ifptr, link, icon, text, highlight);
583 }
584 else {
585 if (elseptr != NULL)
586 return get_formatted_string (docinfo, elseptr, link, icon, text, highlight);
587 }
588 }
589 return "";
590}
591
592text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
593 const text_t &link, const text_t &icon,
594 const text_t &text, bool highlight) {
595
596 if (formatlistptr == NULL) return "";
597
598 switch (formatlistptr->command) {
599 case comText:
600 return formatlistptr->text;
601 case comLink:
602 return link;
603 case comEndLink:
604 if (link.empty()) return "";
605 else return "</a>";
606 case comIcon:
607 return icon;
608 case comNum:
609 return docinfo.result_num;
610 case comMeta:
611 return get_meta (docinfo, formatlistptr->meta);
612 case comDoc:
613 return text;
614 case comHighlight:
615 if (highlight) return "<b>";
616 break;
617 case comEndHighlight:
618 if (highlight) return "</b>";
619 break;
620 case comIf:
621 return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr,
622 formatlistptr->elseptr, link, icon, text, highlight);
623 case comOr:
624 return get_or (docinfo, formatlistptr->orptr, link, icon, text, highlight);
625 }
626 return "";
627}
628
629
630text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
631 const text_t &link, const text_t &icon) {
632
633 text_t text;
634
635 text_t ft;
636 while (formatlistptr != NULL) {
637 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
638 formatlistptr = formatlistptr->nextptr;
639 }
640 return ft;
641}
642
643
644text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr) {
645
646 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
647 text_t icon = "_icontext_";
648 text_t text;
649
650 text_t ft;
651 while (formatlistptr != NULL) {
652 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
653 formatlistptr = formatlistptr->nextptr;
654 }
655 return ft;
656}
657
658
659text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
660 const text_t &text) {
661
662 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
663 text_t icon = "_icontext_";
664
665 text_t ft;
666 while (formatlistptr != NULL) {
667 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
668 formatlistptr = formatlistptr->nextptr;
669 }
670 return ft;
671}
672
673
674text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
675 const text_t &link, const text_t &icon, const text_t &text) {
676
677 text_t ft;
678 while (formatlistptr != NULL) {
679 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
680 formatlistptr = formatlistptr->nextptr;
681 }
682 return ft;
683}
684
685text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
686 const text_t &link, const text_t &icon, bool highlight) {
687
688 text_t text, ft;
689 while (formatlistptr != NULL) {
690 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
691 formatlistptr = formatlistptr->nextptr;
692 }
693 return ft;
694}
695
696text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
697 const text_t &link, const text_t &icon,
698 const text_t &text, bool highlight) {
699
700 text_t ft;
701 while (formatlistptr != NULL) {
702 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
703 formatlistptr = formatlistptr->nextptr;
704 }
705 return ft;
706}
707
708
Note: See TracBrowser for help on using the repository browser.