source: branches/z3950-branch/gsdl/src/recpt/formattools.cpp@ 1342

Last change on this file since 1342 was 1342, checked in by johnmcp, 24 years ago

Relatively stable z39.50 implementation now, merged with the mgpp source.
(Still needs a decent interface and query language though...)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 17.8 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include <assert.h>
29
30// a few function prototypes
31static text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
32 const text_t &link, const text_t &icon,
33 const text_t &text, bool highlight);
34
35static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
36 format_t *formatlistptr, text_tset &metadata, bool &getParents);
37
38void metadata_t::clear() {
39 metaname.clear();
40 metacommand = mNone;
41 parentcommand = pNone;
42 parentoptions.clear();
43}
44
45void decision_t::clear() {
46 command = dMeta;
47 meta.clear();
48}
49
50void format_t::clear() {
51 command = comText;
52 decision.clear();
53 text.clear();
54 meta.clear();
55 nextptr = NULL;
56 ifptr = NULL;
57 elseptr = NULL;
58 orptr = NULL;
59}
60
61void formatinfo_t::clear() {
62 DocumentImages = false;
63 DocumentTitles = true;
64 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
65 DocumentContents = true;
66 DocumentArrowsBottom = true;
67 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
68 DocumentButtons.push_back ("Expand Text");
69 DocumentButtons.push_back ("Expand Contents");
70 DocumentButtons.push_back ("Detach");
71 DocumentButtons.push_back ("Highlight");
72 DocumentText = "[Text]";
73 formatstrings.erase (formatstrings.begin(), formatstrings.end());
74 DocumentUseHTML = false;
75}
76
77// simply checks to see if formatstring begins with a <td> tag
78bool is_table_content (const text_t &formatstring) {
79 text_t::const_iterator here = formatstring.begin();
80 text_t::const_iterator end = formatstring.end();
81
82 while (here != end) {
83 if (*here != ' ') {
84 if ((*here == '<') && ((here+3) < end)) {
85 if ((*(here+1) == 't' || *(here+1) == 'T') &&
86 (*(here+2) == 'd' || *(here+2) == 'D') &&
87 (*(here+3) == '>' || *(here+3) == ' '))
88 return true;
89 } else return false;
90 }
91 here ++;
92 }
93 return false;
94}
95
96bool is_table_content (const format_t *formatlistptr) {
97
98 if (formatlistptr == NULL) return false;
99
100 if (formatlistptr->command == comText)
101 return is_table_content (formatlistptr->text);
102
103 return false;
104}
105
106// returns false if key isn't in formatstringmap
107bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
108 text_t &formatstring) {
109
110 formatstring.clear();
111 text_tmap::const_iterator it = formatstringmap.find(key);
112 if (it == formatstringmap.end()) return false;
113 formatstring = (*it).second;
114 return true;
115}
116
117// tries to find "key1key2" then "key1" then "key2"
118bool get_formatstring (const text_t &key1, const text_t &key2,
119 const text_tmap &formatstringmap,
120 text_t &formatstring) {
121
122 formatstring.clear();
123 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
124 if (it != formatstringmap.end()) {
125 formatstring = (*it).second;
126 return true;
127 }
128 it = formatstringmap.find(key1);
129 if (it != formatstringmap.end()) {
130 formatstring = (*it).second;
131 return true;
132 }
133 it = formatstringmap.find(key2);
134 if (it != formatstringmap.end()) {
135 formatstring = (*it).second;
136 return true;
137 }
138 return false;
139}
140
141
142// returns a date of form _textmonthnn_ 31, 1999
143// input is date of type 19991231
144// at least the year must be present in date
145text_t format_date (const text_t &date) {
146
147 if (date.size() < 4) return "";
148
149 text_t::const_iterator datebegin = date.begin();
150
151 text_t year = substr (datebegin, datebegin+4);
152
153 if (date.size() < 6) return year;
154
155 text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
156 int imonth = month.getint();
157 if (imonth < 0 || imonth > 12) return year;
158
159 if (date.size() < 8) return month + ", " + year;
160
161 text_t day = substr (datebegin+6, datebegin+8);
162 if (day[0] == '0') day = substr (day.begin()+1, day.end());
163 int iday = day.getint();
164 if (iday < 0 || iday > 31) return month + ", " + year;
165
166 return month + " " + day + ", " + year;
167}
168
169static void get_parent_options (text_t &instring, metadata_t &metaoption) {
170
171 assert (instring.size() > 7);
172 if (instring.size() <= 7) return;
173
174 text_t meta, com, op;
175 bool inbraces = false;
176 bool inquotes = false;
177 bool foundcolon = false;
178 text_t::const_iterator here = instring.begin()+6;
179 text_t::const_iterator end = instring.end();
180 while (here != end) {
181 if (*here == '(') inbraces = true;
182 else if (*here == ')') inbraces = false;
183 else if (*here == '\'' && !inquotes) inquotes = true;
184 else if (*here == '\'' && inquotes) inquotes = false;
185 else if (*here == ':' && !inbraces) foundcolon = true;
186 else if (foundcolon) meta.push_back (*here);
187 else if (inquotes) op.push_back (*here);
188 else com.push_back (*here);
189 here ++;
190 }
191 instring = meta;
192 if (com.empty())
193 metaoption.parentcommand = pImmediate;
194 else if (com == "Top")
195 metaoption.parentcommand = pTop;
196 else if (com == "All") {
197 metaoption.parentcommand = pAll;
198 metaoption.parentoptions = op;
199 }
200}
201
202static void parse_meta (text_t &meta, metadata_t &metaoption,
203 text_tset &metadata, bool &getParents) {
204
205 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
206 metaoption.metacommand = mCgiSafe;
207 meta = substr (meta.begin()+8, meta.end());
208 }
209
210 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
211 getParents = true;
212 get_parent_options (meta, metaoption);
213 }
214
215 metadata.insert (meta);
216 metaoption.metaname = meta;
217}
218
219static void parse_meta (text_t &meta, format_t *formatlistptr,
220 text_tset &metadata, bool &getParents) {
221
222 if (meta == "link")
223 formatlistptr->command = comLink;
224 else if (meta == "/link")
225 formatlistptr->command = comEndLink;
226
227 else if (meta == "num")
228 formatlistptr->command = comNum;
229
230 else if (meta == "icon")
231 formatlistptr->command = comIcon;
232
233 else if (meta == "Text")
234 formatlistptr->command = comDoc;
235
236 else if (meta == "highlight")
237 formatlistptr->command = comHighlight;
238
239 else if (meta == "/highlight")
240 formatlistptr->command = comEndHighlight;
241
242 else {
243 formatlistptr->command = comMeta;
244 parse_meta (meta, formatlistptr->meta, metadata, getParents);
245 }
246}
247
248static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
249 text_tset &metadata, bool &getParents) {
250
251 text_t text;
252 text_t::const_iterator here = formatstring.begin();
253 text_t::const_iterator end = formatstring.end();
254
255 while (here != end) {
256
257 if (*here == '\\') {
258 here ++;
259 if (here != end) text.push_back (*here);
260
261 } else if (*here == '{') {
262 if (!text.empty()) {
263 formatlistptr->command = comText;
264 formatlistptr->text = text;
265 formatlistptr->nextptr = new format_t();
266 formatlistptr = formatlistptr->nextptr;
267
268 text.clear();
269 }
270 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
271 formatlistptr->nextptr = new format_t();
272 formatlistptr = formatlistptr->nextptr;
273 if (here == end) break;
274 }
275 } else if (*here == '[') {
276 if (!text.empty()) {
277 formatlistptr->command = comText;
278 formatlistptr->text = text;
279 formatlistptr->nextptr = new format_t();
280 formatlistptr = formatlistptr->nextptr;
281
282 text.clear();
283 }
284 text_t meta;
285 here ++;
286 while (*here != ']') {
287 if (here == end) return false;
288 meta.push_back (*here);
289 here ++;
290 }
291 parse_meta (meta, formatlistptr, metadata, getParents);
292 formatlistptr->nextptr = new format_t();
293 formatlistptr = formatlistptr->nextptr;
294
295 } else
296 text.push_back (*here);
297
298 if (here != end) here ++;
299 }
300 if (!text.empty()) {
301 formatlistptr->command = comText;
302 formatlistptr->text = text;
303 formatlistptr->nextptr = new format_t();
304 formatlistptr = formatlistptr->nextptr;
305
306 }
307 return true;
308}
309
310
311static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
312 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
313
314 text_t::const_iterator it = findchar (here, end, '}');
315 if (it == end) return false;
316
317 text_t com = substr (here, it);
318 here = findchar (it, end, '{');
319 if (here == end) return false;
320 else here ++;
321
322 if (com == "If") formatlistptr->command = comIf;
323 else if (com == "Or") formatlistptr->command = comOr;
324 else return false;
325
326 int curlycount = 0;
327 int commacount = 0;
328 text_t text;
329 while (here != end) {
330
331 if (*here == '\\') {
332 here++;
333 if (here != end) text.push_back(*here);
334
335 } else if (*here == '{') {curlycount ++; text.push_back(*here);}
336 else if (*here == '}' && curlycount > 0) {
337 curlycount --;
338 text.push_back(*here);
339 }
340
341 else if ((*here == ',' || *here == '}') && curlycount <= 0) {
342
343 if (formatlistptr->command == comOr) {
344 // the {Or}{this, or this, or this, or this} statement
345 // or'ed statements may be either [metadata] or plain text
346 format_t *or_ptr;
347
348 // find the next unused orptr
349 if (formatlistptr->orptr == NULL) {
350 formatlistptr->orptr = new format_t();
351 or_ptr = formatlistptr->orptr;
352 } else {
353 or_ptr = formatlistptr->orptr;
354 while (or_ptr->nextptr != NULL)
355 or_ptr = or_ptr->nextptr;
356 or_ptr->nextptr = new format_t();
357 or_ptr = or_ptr->nextptr;
358 }
359
360 text_t::const_iterator beginbracket = text.begin();
361 text_t::const_iterator endbracket = (text.end() - 1);
362 if ((*beginbracket == '[') && (*endbracket == ']')) {
363 // it's metadata
364 text_t meta = substr (beginbracket+1, endbracket);
365 parse_meta (meta, or_ptr, metadata, getParents);
366
367 } else {
368 parse_string (text, or_ptr, metadata, getParents);
369 }
370 text.clear();
371
372 } else {
373 // the {If}{decide,do,else} statement
374 if (commacount == 0) {
375 // If decision only supports metadata at present
376
377 // remove the surrounding square brackets
378 text_t::const_iterator beginbracket = text.begin();
379 text_t::const_iterator endbracket = (text.end() - 1);
380 if ((*beginbracket == '[') && (*endbracket == ']')) {
381 text_t meta = substr (beginbracket+1, endbracket);
382 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
383 commacount ++;
384 text.clear();
385 }
386
387 } else if (commacount == 1) {
388 formatlistptr->ifptr = new format_t();
389 parse_string (text, formatlistptr->ifptr, metadata, getParents);
390 commacount ++;
391 text.clear();
392
393 } else if (commacount == 2) {
394 formatlistptr->elseptr = new format_t();
395 parse_string (text, formatlistptr->elseptr, metadata, getParents);
396 commacount ++;
397 text.clear();
398 }
399 }
400 if (*here == '}') break;
401
402 } else text.push_back(*here);
403
404 if (here != end) here ++;
405 }
406
407 return true;
408}
409
410
411bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
412 text_tset &metadata, bool &getParents) {
413
414 formatlistptr->clear();
415 getParents = false;
416
417 return (parse_string (formatstring, formatlistptr, metadata, getParents));
418}
419
420
421// note: all the format_date stuff is assuming that all Date metadata is going to
422// be of the form yyyymmdd, this is of course, crap ;)
423
424static text_t get_meta (ResultDocInfo_t &docinfo, const metadata_t &meta) {
425
426 // make sure we have the requested metadata
427 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
428 if (it == docinfo.metadata.end()) return "";
429
430 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
431
432 switch (meta.parentcommand) {
433 case pNone:
434 {
435 text_t classifier_metaname = docinfo.classifier_metadata_type;
436 int metaname_index
437 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
438 text_t metadata_item = docinfo.metadata[meta.metaname].values[metaname_index];
439
440 if (meta.metaname == "Date")
441 return format_date (metadata_item);
442 if (meta.metacommand == mCgiSafe)
443 return cgi_safe (metadata_item);
444 else return metadata_item;
445 }
446
447 case pImmediate:
448 if (parent != NULL) {
449 if (meta.metaname == "Date")
450 return format_date (parent->values[0]);
451 if (meta.metacommand == mCgiSafe)
452 return cgi_safe (parent->values[0]);
453 else return parent->values[0];
454 }
455 break;
456
457 case pTop:
458 if (parent != NULL) {
459 while (parent->parent != NULL) parent = parent->parent;
460
461 if (meta.metaname == "Date")
462 return format_date (parent->values[0]);
463 if (meta.metacommand == mCgiSafe)
464 return cgi_safe (parent->values[0]);
465 else return parent->values[0];
466 }
467 break;
468
469 case pAll:
470 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
471 if (parent != NULL) {
472 text_tarray tmparray;
473 while (parent != NULL) {
474 tmparray.push_back (parent->values[0]);
475 parent = parent->parent;
476 }
477 bool first = true;
478 text_t tmp;
479 text_tarray::reverse_iterator here = tmparray.rbegin();
480 text_tarray::reverse_iterator end = tmparray.rend();
481 while (here != end) {
482 if (!first) tmp += meta.parentoptions;
483 if (meta.metaname == "Date") tmp += format_date (*here);
484 else tmp += *here;
485 first = false;
486 here ++;
487 }
488 if (meta.metacommand == mCgiSafe) return cgi_safe (tmp);
489 else return tmp;
490 }
491 }
492 return "";
493}
494
495static text_t get_or (ResultDocInfo_t &docinfo, format_t *orptr,
496 const text_t &link, const text_t &icon,
497 const text_t &text, bool highlight) {
498
499 text_t tmp;
500 while (orptr != NULL) {
501
502 tmp = format_string (docinfo, orptr, link, icon, text, highlight);
503 if (!tmp.empty()) return tmp;
504
505 orptr = orptr->nextptr;
506 }
507 return "";
508}
509
510static text_t get_if (ResultDocInfo_t &docinfo, const decision_t &decision,
511 format_t *ifptr, format_t *elseptr, const text_t &link,
512 const text_t &icon, const text_t &text, bool highlight) {
513
514 // not much of a choice yet ...
515 if (decision.command == dMeta) {
516 if (get_meta (docinfo, decision.meta) != "") {
517 if (ifptr != NULL)
518 return get_formatted_string (docinfo, ifptr, link, icon, text, highlight);
519 }
520 else {
521 if (elseptr != NULL)
522 return get_formatted_string (docinfo, elseptr, link, icon, text, highlight);
523 }
524 }
525 return "";
526}
527
528text_t format_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
529 const text_t &link, const text_t &icon,
530 const text_t &text, bool highlight) {
531
532 if (formatlistptr == NULL) return "";
533
534 switch (formatlistptr->command) {
535 case comText:
536 return formatlistptr->text;
537 case comLink:
538 return link;
539 case comEndLink:
540 if (link.empty()) return "";
541 else return "</a>";
542 case comIcon:
543 return icon;
544 case comNum:
545 return docinfo.result_num;
546 case comMeta:
547 return get_meta (docinfo, formatlistptr->meta);
548 case comDoc:
549 return text;
550 case comHighlight:
551 if (highlight) return "<b>";
552 break;
553 case comEndHighlight:
554 if (highlight) return "</b>";
555 break;
556 case comIf:
557 return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr,
558 formatlistptr->elseptr, link, icon, text, highlight);
559 case comOr:
560 return get_or (docinfo, formatlistptr->orptr, link, icon, text, highlight);
561 }
562 return "";
563}
564
565
566text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
567 const text_t &link, const text_t &icon) {
568
569 text_t text;
570
571 text_t ft;
572 while (formatlistptr != NULL) {
573 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
574 formatlistptr = formatlistptr->nextptr;
575 }
576 return ft;
577}
578
579
580text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr) {
581
582 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
583 text_t icon = "_icontext_";
584 text_t text;
585
586 text_t ft;
587 while (formatlistptr != NULL) {
588 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
589 formatlistptr = formatlistptr->nextptr;
590 }
591 return ft;
592}
593
594
595text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
596 const text_t &text) {
597
598 text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
599 text_t icon = "_icontext_";
600
601 text_t ft;
602 while (formatlistptr != NULL) {
603 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
604 formatlistptr = formatlistptr->nextptr;
605 }
606 return ft;
607}
608
609
610text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
611 const text_t &link, const text_t &icon, const text_t &text) {
612
613 text_t ft;
614 while (formatlistptr != NULL) {
615 ft += format_string (docinfo, formatlistptr, link, icon, text, false);
616 formatlistptr = formatlistptr->nextptr;
617 }
618 return ft;
619}
620
621text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
622 const text_t &link, const text_t &icon, bool highlight) {
623
624 text_t text, ft;
625 while (formatlistptr != NULL) {
626 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
627 formatlistptr = formatlistptr->nextptr;
628 }
629 return ft;
630}
631
632text_t get_formatted_string (ResultDocInfo_t &docinfo, format_t *formatlistptr,
633 const text_t &link, const text_t &icon,
634 const text_t &text, bool highlight) {
635
636 text_t ft;
637 while (formatlistptr != NULL) {
638 ft += format_string (docinfo, formatlistptr, link, icon, text, highlight);
639 formatlistptr = formatlistptr->nextptr;
640 }
641 return ft;
642}
Note: See TracBrowser for help on using the repository browser.