source: trunk/gsdl/src/recpt/formattools.cpp@ 868

Last change on this file since 868 was 868, checked in by sjboddie, 24 years ago

added DocumentUseHTML

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 19.5 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: formattools.cpp 868 2000-01-25 22:33:31Z sjboddie $
25 *
26 *********************************************************************/
27
28/*
29 $Log$
30 Revision 1.16 2000/01/25 22:33:31 sjboddie
31 added DocumentUseHTML
32
33 Revision 1.15 1999/12/13 02:45:16 davidb
34 Support for more than one metavalue for the same metadata name
35
36 Revision 1.14 1999/10/30 22:23:11 sjboddie
37 moved table functions from browsetools
38
39 Revision 1.13 1999/10/14 23:01:24 sjboddie
40 changes for new browsing support
41
42 Revision 1.12 1999/10/10 08:14:07 sjboddie
43 - metadata now returns mp rather than array
44 - redesigned browsing support (although it's not finished so
45 won't currently work ;-)
46
47 Revision 1.11 1999/09/28 20:38:19 rjmcnab
48 fixed a couple of bugs
49
50 Revision 1.10 1999/09/07 04:56:55 sjboddie
51 added GPL notice
52
53 Revision 1.9 1999/09/02 00:31:25 rjmcnab
54 fixed small error.
55
56 Revision 1.8 1999/08/20 00:56:38 sjboddie
57 added cgisafe option - you can now do something like [cgisafe:Title] if
58 you want Title to be entered safely into a url
59
60 Revision 1.7 1999/08/10 22:38:08 sjboddie
61 added some more format options
62
63 Revision 1.6 1999/07/30 02:25:42 sjboddie
64 made format_date function global
65
66 Revision 1.5 1999/07/21 05:00:00 sjboddie
67 added some date formatting
68
69 Revision 1.4 1999/07/20 03:02:15 sjboddie
70 added an [icon] option, added ability to call get_formatted_string
71 with icon and link arguments set
72
73 Revision 1.3 1999/07/09 02:44:35 sjboddie
74 fixed parent(All) function so it only outputs parents and not current
75 level meta
76
77 Revision 1.2 1999/07/08 20:48:33 rjmcnab
78 Added ability to print the result number
79
80 Revision 1.1 1999/07/07 05:49:34 sjboddie
81 had another crack at the format string code - created a new formattools
82 module. It can now handle {If} and {Or} statements although there's a
83 bug preventing nested if's and or's.
84
85 */
86
87
88#include "formattools.h"
89#include "cgiutils.h"
90
91// a few function prototypes
92static text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
93 const text_t &link, const text_t &icon,
94 const text_t &text, bool highlight);
95
96static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
97 format_t *formatlistptr, text_tset &metadata, bool &getParents);
98
99void metadata_t::clear() {
100 metaname.clear();
101 metacommand = mNone;
102 parentcommand = pNone;
103 parentoptions.clear();
104}
105
106void decision_t::clear() {
107 command = dMeta;
108 meta.clear();
109}
110
111void format_t::clear() {
112 command = comText;
113 decision.clear();
114 text.clear();
115 meta.clear();
116 nextptr = NULL;
117 ifptr = NULL;
118 elseptr = NULL;
119 orptr = NULL;
120}
121
122void formatinfo_t::clear() {
123 DocumentImages = false;
124 DocumentTitles = true;
125 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
126 DocumentContents = true;
127 DocumentArrowsBottom = true;
128 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
129 DocumentButtons.push_back ("Detach");
130 DocumentButtons.push_back ("Highlight");
131 DocumentButtons.push_back ("Expand Text");
132 DocumentButtons.push_back ("Expand Contents");
133 DocumentText = "[Text]";
134 formatstrings.erase (formatstrings.begin(), formatstrings.end());
135 DocumentUseHTML = false;
136}
137
138// simply checks to see if formatstring begins with a <td> tag
139bool is_table_content (const text_t &formatstring) {
140 text_t::const_iterator here = formatstring.begin();
141 text_t::const_iterator end = formatstring.end();
142
143 while (here != end) {
144 if (*here != ' ') {
145 if (*here == '<') {
146 if ((*(here+1) == 't' || *(here+1) == 'T') &&
147 (*(here+2) == 'd' || *(here+2) == 'D') &&
148 (*(here+3) == '>' || *(here+3) == ' '))
149 return true;
150 } else return false;
151 }
152 here ++;
153 }
154 return false;
155}
156
157bool is_table_content (const format_t *formatlistptr) {
158
159 if (formatlistptr == NULL) return false;
160
161 if (formatlistptr->command == comText)
162 return is_table_content (formatlistptr->text);
163
164 return false;
165}
166
167// returns false if key isn't in formatstringmap
168bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
169 text_t &formatstring) {
170
171 formatstring.clear();
172 text_tmap::const_iterator it = formatstringmap.find(key);
173 if (it == formatstringmap.end()) return false;
174 formatstring = (*it).second;
175 return true;
176}
177
178// tries to find "key1key2" then "key1" then "key2"
179bool get_formatstring (const text_t &key1, const text_t &key2,
180 const text_tmap &formatstringmap,
181 text_t &formatstring) {
182
183 formatstring.clear();
184 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
185 if (it != formatstringmap.end()) {
186 formatstring = (*it).second;
187 return true;
188 }
189 it = formatstringmap.find(key1);
190 if (it != formatstringmap.end()) {
191 formatstring = (*it).second;
192 return true;
193 }
194 it = formatstringmap.find(key2);
195 if (it != formatstringmap.end()) {
196 formatstring = (*it).second;
197 return true;
198 }
199 return false;
200}
201
202
203// returns a date of form _textmonthnn_ 31, 1999
204// input is date of type 19991231
205// at least the year must be present in date
206text_t format_date (const text_t &date) {
207
208 if (date.size() < 4) return "";
209
210 text_t::const_iterator datebegin = date.begin();
211
212 text_t year = substr (datebegin, datebegin+4);
213
214 if (date.size() < 6) return year;
215
216 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
217 int imonth = month.getint();
218 if (imonth < 0 || imonth > 12) return year;
219
220 if (date.size() < 8) return month + ", " + year;
221
222 text_t day = substr (datebegin+6, datebegin+8);
223 if (day[0] == '0') day = substr (day.begin()+1, day.end());
224 int iday = day.getint();
225 if (iday < 0 || iday > 31) return month + ", " + year;
226
227 return month + " " + day + ", " + year;
228}
229
230static void get_parent_options (text_t &instring, metadata_t &metaoption) {
231
232 text_t meta, com, op;
233 bool inbraces = false;
234 bool inquotes = false;
235 bool foundcolon = false;
236 text_t::const_iterator here = instring.begin()+6;
237 text_t::const_iterator end = instring.end();
238 while (here != end) {
239 if (*here == '(') inbraces = true;
240 else if (*here == ')') inbraces = false;
241 else if (*here == '\'' && !inquotes) inquotes = true;
242 else if (*here == '\'' && inquotes) inquotes = false;
243 else if (*here == ':' && !inbraces) foundcolon = true;
244 else if (foundcolon) meta.push_back (*here);
245 else if (inquotes) op.push_back (*here);
246 else com.push_back (*here);
247 here ++;
248 }
249 instring = meta;
250 if (com.empty())
251 metaoption.parentcommand = pImmediate;
252 else if (com == "Top")
253 metaoption.parentcommand = pTop;
254 else if (com == "All") {
255 metaoption.parentcommand = pAll;
256 metaoption.parentoptions = op;
257 }
258}
259
260static void parse_meta (text_t &meta, metadata_t &metaoption,
261 text_tset &metadata, bool &getParents) {
262
263 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
264 metaoption.metacommand = mCgiSafe;
265 meta = substr (meta.begin()+8, meta.end());
266 }
267
268 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
269 getParents = true;
270 get_parent_options (meta, metaoption);
271 }
272
273 metadata.insert (meta);
274 metaoption.metaname = meta;
275}
276
277static void parse_meta (text_t &meta, format_t *formatlistptr,
278 text_tset &metadata, bool &getParents) {
279
280 if (meta == "link")
281 formatlistptr->command = comLink;
282 else if (meta == "/link")
283 formatlistptr->command = comEndLink;
284
285 else if (meta == "num")
286 formatlistptr->command = comNum;
287
288 else if (meta == "icon")
289 formatlistptr->command = comIcon;
290
291 else if (meta == "Text")
292 formatlistptr->command = comDoc;
293
294 else if (meta == "highlight")
295 formatlistptr->command = comHighlight;
296
297 else if (meta == "/highlight")
298 formatlistptr->command = comEndHighlight;
299
300 else {
301 formatlistptr->command = comMeta;
302 parse_meta (meta, formatlistptr->meta, metadata, getParents);
303 }
304}
305
306static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
307 text_tset &metadata, bool &getParents) {
308
309 text_t text;
310 text_t::const_iterator here = formatstring.begin();
311 text_t::const_iterator end = formatstring.end();
312
313 while (here != end) {
314
315 if (*here == '\\')
316 text.push_back (*(++here));
317
318 else if (*here == '{') {
319 if (!text.empty()) {
320 formatlistptr->command = comText;
321 formatlistptr->text = text;
322 formatlistptr->nextptr = new format_t();
323 formatlistptr = formatlistptr->nextptr;
324
325 text.clear();
326 }
327 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
328 formatlistptr->nextptr = new format_t();
329 formatlistptr = formatlistptr->nextptr;
330 if (here == end) break;
331 }
332 } else if (*here == '[') {
333 if (!text.empty()) {
334 formatlistptr->command = comText;
335 formatlistptr->text = text;
336 formatlistptr->nextptr = new format_t();
337 formatlistptr = formatlistptr->nextptr;
338
339 text.clear();
340 }
341 text_t meta;
342 here ++;
343 while (*here != ']') {
344 if (here == end) return false;
345 meta.push_back (*here);
346 here ++;
347 }
348 parse_meta (meta, formatlistptr, metadata, getParents);
349 formatlistptr->nextptr = new format_t();
350 formatlistptr = formatlistptr->nextptr;
351
352 } else
353 text.push_back (*here);
354
355 if (here != end) here ++;
356 }
357 if (!text.empty()) {
358 formatlistptr->command = comText;
359 formatlistptr->text = text;
360 formatlistptr->nextptr = new format_t();
361 formatlistptr = formatlistptr->nextptr;
362
363 }
364 return true;
365}
366
367
368static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
369 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
370
371 text_t::const_iterator it = findchar (here, end, '}');
372 if (it == end) return false;
373
374 text_t com = substr (here, it);
375 here = findchar (it, end, '{');
376 if (here == end) return false;
377 else here ++;
378
379 if (com == "If") formatlistptr->command = comIf;
380 else if (com == "Or") formatlistptr->command = comOr;
381 else return false;
382
383 int curlycount = 0;
384 int commacount = 0;
385 text_t text;
386 while (here != end) {
387
388 if (*here == '\\') {
389 here++;
390 if (here != end) text.push_back(*here);
391
392 } else if (*here == '{') {curlycount ++; text.push_back(*here);}
393 else if (*here == '}' && curlycount > 0) {
394 curlycount --;
395 text.push_back(*here);
396 }
397
398 else if ((*here == ',' || *here == '}') && curlycount <= 0) {
399
400 if (formatlistptr->command == comOr) {
401 // the {Or}{this, or this, or this, or this} statement
402 // or'ed statements may be either [metadata] or plain text
403 format_t *or_ptr;
404
405 // find the next unused orptr
406 if (formatlistptr->orptr == NULL) {
407 formatlistptr->orptr = new format_t();
408 or_ptr = formatlistptr->orptr;
409 } else {
410 or_ptr = formatlistptr->orptr;
411 while (or_ptr->nextptr != NULL)
412 or_ptr = or_ptr->nextptr;
413 or_ptr->nextptr = new format_t();
414 or_ptr = or_ptr->nextptr;
415 }
416
417 text_t::const_iterator beginbracket = text.begin();
418 text_t::const_iterator endbracket = (text.end() - 1);
419 if ((*beginbracket == '[') && (*endbracket == ']')) {
420 // it's metadata
421 text_t meta = substr (beginbracket+1, endbracket);
422 parse_meta (meta, or_ptr, metadata, getParents);
423
424 } else {
425 // assume it's plain text
426 or_ptr->command = comText;
427 or_ptr->text = text;
428 }
429 text.clear();
430
431 } else {
432 // the {If}{decide,do,else} statement
433 if (commacount == 0) {
434 // If decision only supports metadata at present
435
436 // remove the surrounding square brackets
437 text_t::const_iterator beginbracket = text.begin();
438 text_t::const_iterator endbracket = (text.end() - 1);
439 if ((*beginbracket == '[') && (*endbracket == ']')) {
440 text_t meta = substr (beginbracket+1, endbracket);
441 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
442 commacount ++;
443 text.clear();
444 }
445
446 } else if (commacount == 1) {
447 formatlistptr->ifptr = new format_t();
448 parse_string (text, formatlistptr->ifptr, metadata, getParents);
449 commacount ++;
450 text.clear();
451
452 } else if (commacount == 2) {
453 formatlistptr->elseptr = new format_t();
454 parse_string (text, formatlistptr->elseptr, metadata, getParents);
455 commacount ++;
456 text.clear();
457 }
458 }
459 if (*here == '}') break;
460
461 } else text.push_back(*here);
462
463 if (here != end) here ++;
464 }
465
466 return true;
467}
468
469
470bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
471 text_tset &metadata, bool &getParents) {
472
473 formatlistptr->clear();
474 getParents = false;
475
476 return (parse_string (formatstring, formatlistptr, metadata, getParents));
477}
478
479
480// note: all the format_date stuff is assuming that all Date metadata is going to
481// be of the form yyyymmdd, this is of course, crap ;)
482
483static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) {
484
485 // make sure we have the requested metadata
486 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
487 if (it == docinfo.metadata.end()) return "";
488
489 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
490
491 switch (meta.parentcommand) {
492 case pNone:
493 {
494 text_t classifier_metaname = docinfo.classifier_metadata_type;
495 int metaname_index
496 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
497 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
498
499 if (meta.metaname == "Date")
500 return format_date (metadata_item);
501 if (meta.metacommand == mCgiSafe)
502 return cgi_safe (metadata_item);
503 else return metadata_item;
504 }
505
506 case pImmediate:
507 if (parent != NULL) {
508 if (meta.metaname == "Date")
509 return format_date (parent->values[0]);
510 if (meta.metacommand == mCgiSafe)
511 return cgi_safe (parent->values[0]);
512 else return parent->values[0];
513 }
514 break;
515
516 case pTop:
517 if (parent != NULL) {
518 while (parent->parent != NULL) parent = parent->parent;
519
520 if (meta.metaname == "Date")
521 return format_date (parent->values[0]);
522 if (meta.metacommand == mCgiSafe)
523 return cgi_safe (parent->values[0]);
524 else return parent->values[0];
525 }
526 break;
527
528 case pAll:
529 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
530 if (parent != NULL) {
531 text_tarray tmparray;
532 while (parent != NULL) {
533 tmparray.push_back (parent->values[0]);
534 parent = parent->parent;
535 }
536 bool first = true;
537 text_t tmp;
538 text_tarray::reverse_iterator here = tmparray.rbegin();
539 text_tarray::reverse_iterator end = tmparray.rend();
540 while (here != end) {
541 if (!first) tmp += meta.parentoptions;
542 if (meta.metaname == "Date") tmp += format_date (*here);
543 else tmp += *here;
544 first = false;
545 here ++;
546 }
547 if (meta.metacommand == mCgiSafe) return cgi_safe (tmp);
548 else return tmp;
549 }
550 }
551 return "";
552}
553
554static text_t get_or (ResultDocInfo_t &docinfo, format_t *orptr,
555 const text_t &link, const text_t &icon,
556 const text_t &text, bool highlight) {
557
558 text_t tmp;
559 while (orptr != NULL) {
560
561 tmp = format_string (docinfo, orptr, link, icon, text, highlight);
562 if (!tmp.empty()) return tmp;
563
564 orptr = orptr->nextptr;
565 }
566 return "";
567}
568
569static text_t get_if (ResultDocInfo_t &docinfo, const decision_t &decision,
570 format_t *ifptr, format_t *elseptr, const text_t &link,
571 const text_t &icon, const text_t &text, bool highlight) {
572
573 // not much of a choice yet ...
574 if (decision.command == dMeta) {
575 if (get_meta (docinfo, decision.meta) != "") {
576 if (ifptr != NULL)
577 return get_formatted_string (docinfo, ifptr, link, icon, text, highlight);
578 }
579 else {
580 if (elseptr != NULL)
581 return get_formatted_string (docinfo, elseptr, link, icon, text, highlight);
582 }
583 }
584 return "";
585}
586
587text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
588 const text_t &link, const text_t &icon,
589 const text_t &text, bool highlight) {
590
591 if (formatlistptr == NULL) return "";
592
593 switch (formatlistptr->command) {
594 case comText:
595 return formatlistptr->text;
596 case comLink:
597 return link;
598 case comEndLink:
599 if (link.empty()) return "";
600 else return "</a>";
601 case comIcon:
602 return icon;
603 case comNum:
604 return docinfo.result_num;
605 case comMeta:
606 return get_meta (docinfo, formatlistptr->meta);
607 case comDoc:
608 return text;
609 case comHighlight:
610 if (highlight) return "<b>";
611 break;
612 case comEndHighlight:
613 if (highlight) return "</b>";
614 break;
615 case comIf:
616 return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr,
617 formatlistptr->elseptr, link, icon, text, highlight);
618 case comOr:
619 return get_or (docinfo, formatlistptr->orptr, link, icon, text, highlight);
620 }
621 return "";
622}
623
624
625text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
626 const text_t &link, const text_t &icon) {
627
628 text_t text;
629
630 text_t ft;
631 while (formatlistptr != NULL) {
632 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
633 formatlistptr = formatlistptr->nextptr;
634 }
635 return ft;
636}
637
638
639text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr) {
640
641 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
642 text_t icon = "_icontext_";
643 text_t text;
644
645 text_t ft;
646 while (formatlistptr != NULL) {
647 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
648 formatlistptr = formatlistptr->nextptr;
649 }
650 return ft;
651}
652
653
654text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
655 const text_t &text) {
656
657 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
658 text_t icon = "_icontext_";
659
660 text_t ft;
661 while (formatlistptr != NULL) {
662 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
663 formatlistptr = formatlistptr->nextptr;
664 }
665 return ft;
666}
667
668
669text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
670 const text_t &link, const text_t &icon, const text_t &text) {
671
672 text_t ft;
673 while (formatlistptr != NULL) {
674 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
675 formatlistptr = formatlistptr->nextptr;
676 }
677 return ft;
678}
679
680text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
681 const text_t &link, const text_t &icon, bool highlight) {
682
683 text_t text, ft;
684 while (formatlistptr != NULL) {
685 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
686 formatlistptr = formatlistptr->nextptr;
687 }
688 return ft;
689}
690
691text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
692 const text_t &link, const text_t &icon,
693 const text_t &text, bool highlight) {
694
695 text_t ft;
696 while (formatlistptr != NULL) {
697 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
698 formatlistptr = formatlistptr->nextptr;
699 }
700 return ft;
701}
702
703
Note: See TracBrowser for help on using the repository browser.