source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 23379

Last change on this file since 23379 was 23305, checked in by davidb, 13 years ago

htmlsafe: expanded to also protect commas and single apostrophe as these can cause problems in the HTML when embedded in format or macro statements

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 60.1 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "gsdltools.h"
29#include "recptprototools.h"
30#include "OIDtools.h"
31#include "summarise.h"
32
33#include <assert.h>
34
35static bool metadata_spanwrap = false;
36
37// a few function prototypes
38
39static text_t format_string (const text_t& collection, recptproto* collectproto,
40 ResultDocInfo_t &docinfo, displayclass &disp,
41 format_t *formatlistptr, text_tmap &options,
42 ostream& logout);
43
44static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
45 format_t *formatlistptr, text_tset &metadata, bool &getParents);
46
47static text_t format_summary (const text_t& collection, recptproto* collectproto,
48 ResultDocInfo_t &docinfo, displayclass &disp,
49 text_tmap &options, ostream& logout);
50static text_t format_text (const text_t& collection, recptproto* collectproto,
51 ResultDocInfo_t &docinfo, displayclass &disp,
52 text_tmap &options, ostream& logout);
53
54static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
55 recptproto* collectproto, ResultDocInfo_t &docinfo,
56 displayclass &disp, text_tmap &options,
57 ostream &logout);
58
59
60void metadata_t::clear() {
61 metaname.clear();
62 metacommand = mNone;
63 mqualifier.parent = pNone;
64 mqualifier.sibling = sNone;
65 mqualifier.child = cNone;
66 pre_tree_traverse.clear();
67 parentoptions.clear();
68 siblingoptions.clear();
69 childoptions.clear();
70}
71
72void decision_t::clear() {
73 command = dMeta;
74 meta.clear();
75 text.clear();
76}
77
78format_t::~format_t()
79{
80 if (nextptr != NULL) delete nextptr;
81 if (ifptr != NULL) delete ifptr;
82 if (elseptr != NULL) delete elseptr;
83 if (orptr != NULL) delete orptr;
84}
85
86void format_t::clear() {
87 command = comText;
88 decision.clear();
89 text.clear();
90 meta.clear();
91 nextptr = NULL;
92 ifptr = NULL;
93 elseptr = NULL;
94 orptr = NULL;
95}
96
97void formatinfo_t::clear() {
98 DocumentImages = false;
99 DocumentTitles = true;
100 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
101 DocumentContents = true;
102 DocumentArrowsBottom = true;
103 DocumentArrowsTop = false;
104 DocumentSearchResultLinks = false;
105 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
106 // DocumentButtons.push_back ("Expand Text");
107 // DocumentButtons.push_back ("Expand Contents");
108 DocumentButtons.push_back ("Detach");
109 DocumentButtons.push_back ("Highlight");
110 RelatedDocuments = "";
111 DocumentText = "[Text]";
112 formatstrings.erase (formatstrings.begin(), formatstrings.end());
113 DocumentUseHTML = false;
114 AllowExtendedOptions = false;
115}
116
117// simply checks to see if formatstring begins with a <td> tag
118bool is_table_content (const text_t &formatstring) {
119 text_t::const_iterator here = formatstring.begin();
120 text_t::const_iterator end = formatstring.end();
121
122 while (here != end) {
123 if (*here != ' ') {
124 if ((*here == '<') && ((here+3) < end)) {
125 if ((*(here+1) == 't' || *(here+1) == 'T') &&
126 (*(here+2) == 'd' || *(here+2) == 'D') &&
127 (*(here+3) == '>' || *(here+3) == ' '))
128 return true;
129 } else return false;
130 }
131 ++here;
132 }
133 return false;
134}
135
136bool is_table_content (const format_t *formatlistptr) {
137
138 if (formatlistptr == NULL) return false;
139
140 if (formatlistptr->command == comText)
141 return is_table_content (formatlistptr->text);
142
143 return false;
144}
145
146// returns false if key isn't in formatstringmap
147bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
148 text_t &formatstring) {
149
150 formatstring.clear();
151 text_tmap::const_iterator it = formatstringmap.find(key);
152 if (it == formatstringmap.end()) return false;
153 formatstring = (*it).second;
154 return true;
155}
156
157// tries to find "key1key2" then "key1" then "key2"
158bool get_formatstring (const text_t &key1, const text_t &key2,
159 const text_tmap &formatstringmap,
160 text_t &formatstring) {
161
162 formatstring.clear();
163 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
164 if (it != formatstringmap.end()) {
165 formatstring = (*it).second;
166 return true;
167 }
168 it = formatstringmap.find(key1);
169 if (it != formatstringmap.end()) {
170 formatstring = (*it).second;
171 return true;
172 }
173 it = formatstringmap.find(key2);
174 if (it != formatstringmap.end()) {
175 formatstring = (*it).second;
176 return true;
177 }
178 return false;
179}
180
181
182text_t remove_namespace(const text_t &meta_name) {
183 text_t::const_iterator end = meta_name.end();
184 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
185 if (it != end) {
186 return substr(it+1, end);
187 }
188
189 return meta_name;
190
191}
192// returns a date of form _format:date_(year, month, day)
193// input is date of type yyyy-?mm-?dd
194// at least the year must be present in date
195text_t format_date (const text_t &date) {
196
197 if (date.size() < 4) return "";
198
199 text_t::const_iterator datebegin = date.begin();
200
201 text_t year = substr (datebegin, datebegin+4);
202 int chars_seen_so_far = 4;
203 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
204
205 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
206 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
207
208 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
209 int imonth = month.getint();
210 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
211
212 chars_seen_so_far += 2;
213 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
214
215 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
216 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
217
218 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
219 if (day[0] == '0') day = substr (day.begin()+1, day.end());
220 int iday = day.getint();
221 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
222
223 return "_format:date_("+year+","+month+","+day+")";
224}
225
226// converts an iso639 language code to its English equivalent
227// should we be checking that the macro exists??
228text_t iso639 (const text_t &langcode) {
229 if (langcode.empty()) return "";
230 return "_iso639:iso639"+langcode+"_";
231}
232
233
234text_t get_href (const text_t &link) {
235
236 text_t href;
237
238 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
239 text_t::const_iterator end = link.end();
240 if (here == end) return g_EmptyText;
241
242 ++here;
243 while (here != end) {
244 if (*here == '"') break;
245 href.push_back(*here);
246 ++here;
247 }
248
249 return href;
250}
251
252//this function gets the information associated with the relation
253//metadata for the document associated with 'docinfo'. This relation
254//metadata consists of a line of pairs containing 'collection, document OID'
255//(this is the OID of the document related to the current document, and
256//the collection the related document belongs to). For each of these pairs
257//the title metadata is obtained and then an html link between the title
258//of the related doc and the document's position (the document will be
259//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
260//(where collection is the related documents collection, and OID is the
261//related documents OID). A list of these html links are made for as many
262//related documents as there are. This list is then returned. If there are
263//no related documents available for the current document then the string
264//'.. no related documents .. ' is returned.
265text_t get_related_docs(const text_t& collection, recptproto* collectproto,
266 ResultDocInfo_t &docinfo, ostream& logout){
267
268 text_tset metadata;
269
270 //insert the metadata we wish to collect
271 metadata.insert("dc.Relation");
272 metadata.insert("Title");
273 metadata.insert("Subject"); //for emails, where title data doesn't apply
274
275 FilterResponse_t response;
276 text_t relation = ""; //string for displaying relation metadata
277 text_t relationTitle = ""; //the related documents Title (or subject)
278 text_t relationOID = ""; //the related documents OID
279
280 //get the information associated with the metadata for current doc
281 if (get_info (docinfo.OID, collection, "", metadata,
282 false, collectproto, response, logout)) {
283
284 //if the relation metadata exists, store for displaying
285 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
286 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
287
288 //split relation data into pairs of collectionname,ID number
289 text_tarray relationpairs;
290 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
291
292 text_tarray::const_iterator currDoc = relationpairs.begin();
293 text_tarray::const_iterator lastDoc = relationpairs.end();
294
295 //iterate through the pairs to split and display
296 while(currDoc != lastDoc){
297
298 //split pairs into collectionname and ID
299 text_tarray relationdata;
300 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
301
302 //get first element in the array (collection)
303 text_tarray::const_iterator doc_data = relationdata.begin();
304 text_t document_collection = *doc_data;
305 ++doc_data; //increment to get next item in array (oid)
306 text_t document_OID = *doc_data;
307
308 //create html link to related document
309 relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
310 relation += "&amp;cl=search&amp;d=" + document_OID;
311
312 //get the information associated with the metadata for related doc
313 if (get_info (document_OID, document_collection, "", metadata,
314 false, collectproto, response, logout)) {
315
316 //if title metadata doesn't exist, collect subject metadata
317 //if that doesn't exist, just call it 'related document'
318 if (!response.docInfo[0].metadata["Title"].values[0].empty())
319 relationTitle = response.docInfo[0].metadata["Title"].values[0];
320 else if (!response.docInfo[0].metadata["Subject"].values.empty())
321 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
322 else relationTitle = "RELATED DOCUMENT";
323
324 }
325
326 //link the related document's title to its page
327 relation += "\">" + relationTitle + "</a>";
328 relation += " (" + document_collection + ")<br>";
329
330 ++currDoc;
331 }
332 }
333
334 }
335
336 if(relation.empty()) //no relation data for documnet
337 relation = ".. no related documents .. ";
338
339 return relation;
340}
341
342
343
344static void get_parent_options (text_t &instring, metadata_t &metaoption) {
345
346 assert (instring.size() > 7);
347 if (instring.size() <= 7) return;
348
349 text_t meta, com, op;
350 bool inbraces = false;
351 bool inquotes = false;
352 bool foundcolon = false;
353 text_t::const_iterator here = instring.begin()+6;
354 text_t::const_iterator end = instring.end();
355 while (here != end) {
356 if (foundcolon) meta.push_back (*here);
357 else if (*here == '(') inbraces = true;
358 else if (*here == ')') inbraces = false;
359 else if (*here == '\'' && !inquotes) inquotes = true;
360 else if (*here == '\'' && inquotes) inquotes = false;
361 else if (*here == ':' && !inbraces) foundcolon = true;
362 else if (inquotes) op.push_back (*here);
363 else com.push_back (*here);
364 ++here;
365 }
366
367 instring = meta;
368 if (com.empty())
369 metaoption.mqualifier.parent = pImmediate;
370 else if (com == "Top")
371 metaoption.mqualifier.parent = pTop;
372 else if (com == "All") {
373 metaoption.mqualifier.parent = pAll;
374 metaoption.parentoptions = op;
375 }
376}
377
378
379static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
380
381 assert (instring.size() > 8);
382 if (instring.size() <= 8) return;
383 text_t meta, com, op;
384 bool inbraces = false;
385 bool inquotes = false;
386 bool foundcolon = false;
387 text_t::const_iterator here = instring.begin()+7;
388 text_t::const_iterator end = instring.end();
389 while (here != end) {
390 if (foundcolon) meta.push_back (*here);
391 else if (*here == '(') inbraces = true;
392 else if (*here == ')') inbraces = false;
393 else if (*here == '\'' && !inquotes) inquotes = true;
394 else if (*here == '\'' && inquotes) inquotes = false;
395 else if (*here == ':' && !inbraces) foundcolon = true;
396 else if (inquotes) op.push_back (*here);
397 else com.push_back (*here);
398 ++here;
399 }
400
401 instring = meta;
402 metaoption.siblingoptions.clear();
403
404 if (com.empty()) {
405 metaoption.mqualifier.sibling = sAll;
406 metaoption.siblingoptions = " ";
407 }
408 else if (com == "first") {
409 metaoption.mqualifier.sibling = sNum;
410 metaoption.siblingoptions = "0";
411 }
412 else if (com == "last") {
413 metaoption.mqualifier.sibling = sNum;
414 metaoption.siblingoptions = "-2"; // == last
415 }
416 else if (com.getint()>0) {
417 metaoption.mqualifier.sibling = sNum;
418 int pos = com.getint()-1;
419 metaoption.siblingoptions +=pos;
420 }
421 else {
422 metaoption.mqualifier.sibling = sAll;
423 metaoption.siblingoptions = op;
424 }
425}
426
427static void get_child_options (text_t &instring, metadata_t &metaoption) {
428
429 assert (instring.size() > 6);
430 if (instring.size() <= 6) return;
431 text_t meta, com, op;
432 bool inbraces = false;
433 bool inquotes = false;
434 bool foundcolon = false;
435 text_t::const_iterator here = instring.begin()+5;
436 text_t::const_iterator end = instring.end();
437 while (here != end) {
438 if (foundcolon) meta.push_back (*here);
439 else if (*here == '(') inbraces = true;
440 else if (*here == ')') inbraces = false;
441 else if (*here == '\'' && !inquotes) inquotes = true;
442 else if (*here == '\'' && inquotes) inquotes = false;
443 else if (*here == ':' && !inbraces) foundcolon = true;
444 else if (inquotes) op.push_back (*here);
445 else com.push_back (*here);
446 ++here;
447 }
448
449 instring = meta;
450 if (com.empty()) {
451 metaoption.mqualifier.child = cAll;
452 metaoption.childoptions = " ";
453 }
454 else if (com == "first") {
455 metaoption.mqualifier.child = cNum;
456 metaoption.childoptions = ".fc";
457 }
458 else if (com == "last") {
459 metaoption.mqualifier.child = cNum;
460 metaoption.childoptions = ".lc";
461 }
462 else if (com.getint()>0) {
463 metaoption.mqualifier.child = cNum;
464 metaoption.childoptions = "."+com;
465 }
466 else {
467 metaoption.mqualifier.child = cAll;
468 metaoption.childoptions = op;
469 }
470}
471
472
473static void get_truncate_options (text_t &instring, metadata_t &metaoption)
474{
475 assert (instring.size() > ((text_t) "truncate").size());
476 if (instring.size() <= ((text_t) "truncate").size()) return;
477 text_t meta, com;
478 bool inbraces = false;
479 bool foundcolon = false;
480 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
481 text_t::const_iterator end = instring.end();
482 while (here != end) {
483 if (foundcolon) meta.push_back (*here);
484 else if (*here == '(') inbraces = true;
485 else if (*here == ')') inbraces = false;
486 else if (*here == ':' && !inbraces) foundcolon = true;
487 else com.push_back (*here);
488 ++here;
489 }
490
491 instring = meta;
492
493 if (!com.empty())
494 {
495 metaoption.siblingoptions = com;
496 }
497 else
498 {
499 // Default is 100 characters if not specified
500 metaoption.siblingoptions = "100";
501 }
502}
503
504
505
506static void parse_meta (text_t &meta, metadata_t &metaoption,
507 text_tset &metadata, bool &getParents) {
508
509 // Look for the various format statement modifiers
510 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
511 // is irrelevant because this is not stored in metaoption.metacommand anyway
512 bool keep_trying = true;
513 while (keep_trying)
514 {
515 keep_trying = false;
516
517 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
518 {
519 metaoption.metacommand |= mCgiSafe;
520 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
521 keep_trying = true;
522 }
523 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
524 {
525 metaoption.metacommand |= mSpecial;
526 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
527 keep_trying = true;
528 }
529
530 // New "truncate" special formatting option
531 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
532 {
533 metaoption.metacommand |= mTruncate;
534 get_truncate_options (meta, metaoption);
535 keep_trying = true;
536 }
537 // New "htmlsafe" special formatting option
538 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
539 {
540 metaoption.metacommand |= mHTMLSafe;
541 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
542 keep_trying = true;
543 }
544 // New "xmlsafe" special formatting option
545 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
546 {
547 metaoption.metacommand |= mXMLSafe;
548 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
549 keep_trying = true;
550 }
551 // New "dmsafe" special formatting option
552 if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
553 {
554 metaoption.metacommand |= mDMSafe;
555 meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
556 keep_trying = true;
557 }
558 }
559
560 bool had_parent_or_child = true;
561 bool prev_was_parent = false;
562 bool prev_was_child = false;
563
564 while (had_parent_or_child) {
565 if (meta.size() > 7
566 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
567
568 // clear out sibling and child (cmd and options)
569 metaoption.metacommand &= ~(mChild|mSibling);
570 metaoption.childoptions.clear();
571 metaoption.siblingoptions.clear();
572
573 getParents = true;
574 metaoption.metacommand |= mParent;
575 get_parent_options (meta, metaoption);
576
577 if (prev_was_parent) {
578 metaoption.pre_tree_traverse += ".pr";
579 }
580 else if (prev_was_child) {
581 metaoption.pre_tree_traverse += ".fc";
582 }
583
584 prev_was_parent = true;
585 prev_was_child = false;
586 }
587 else if (meta.size() > 6
588 && (substr (meta.begin(), meta.begin()+5) == "child")) {
589
590 // clear out sibling and parent (cmd and options)
591 metaoption.metacommand &= ~(mParent|mSibling);
592 metaoption.parentoptions.clear();
593 metaoption.siblingoptions.clear();
594
595 metaoption.metacommand |= mChild;
596 get_child_options (meta, metaoption);
597 metadata.insert("contains");
598
599 if (prev_was_parent) {
600 metaoption.pre_tree_traverse += ".pr";
601 }
602 else if (prev_was_child) {
603 metaoption.pre_tree_traverse += ".fc";
604 }
605
606 prev_was_child = true;
607 prev_was_parent = false;
608 }
609 else {
610 prev_was_child = false;
611 prev_was_parent = false;
612 had_parent_or_child = false;
613 }
614 }
615
616 // parent/child can have sibling tacked on end also
617 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
618 metaoption.metacommand |= mSibling;
619 get_sibling_options (meta, metaoption);
620 }
621
622 // check for ex. which may occur in format statements
623 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
624 meta = substr (meta.begin()+3, meta.end());
625 }
626 metadata.insert (meta);
627 metaoption.metaname = meta;
628}
629
630static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
631 if (meta == "collection") {
632 // no qualifiers
633 metaoption.metaname = g_EmptyText;
634 return;
635 }
636 meta = substr (meta.begin()+11, meta.end());
637 metaoption.metaname = meta;
638
639}
640
641static void parse_meta (text_t &meta, format_t *formatlistptr,
642 text_tset &metadata, bool &getParents) {
643
644 // check for ex. which may occur in format statements
645 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
646 meta = substr (meta.begin()+3, meta.end());
647 }
648 if (meta == "link")
649 formatlistptr->command = comLink;
650 else if (meta == "/link")
651 formatlistptr->command = comEndLink;
652
653 else if (meta == "srclink") {
654 formatlistptr->command = comAssocLink;
655 formatlistptr->meta.metaname = "srclink_file";
656 metadata.insert("srclink_file");
657 }
658 else if (meta == "srchref") {
659 formatlistptr->command = comAssocLink;
660 formatlistptr->text = "href";
661 formatlistptr->meta.metaname = "srclink_file";
662 metadata.insert("srclink_file");
663 }
664 else if (meta == "/srclink") {
665 formatlistptr->command = comEndAssocLink;
666 formatlistptr->meta.metaname = "srclink_file";
667 }
668 // and weblink etc
669 else if (meta == "href")
670 formatlistptr->command = comHref;
671
672 else if (meta == "num")
673 formatlistptr->command = comNum;
674
675 else if (meta == "icon")
676 formatlistptr->command = comIcon;
677
678 else if (meta == "Text")
679 formatlistptr->command = comDoc;
680
681 else if (meta == "RelatedDocuments")
682 formatlistptr->command = comRel;
683
684 else if (meta == "highlight")
685 formatlistptr->command = comHighlight;
686
687 else if (meta == "/highlight")
688 formatlistptr->command = comEndHighlight;
689
690 else if (meta == "metadata-spanwrap")
691 formatlistptr->command = comMetadataSpanWrap;
692
693 else if (meta == "/metadata-spanwrap")
694 formatlistptr->command = comEndMetadataSpanWrap;
695
696 else if (meta == "Summary")
697 formatlistptr->command = comSummary;
698
699 else if (meta == "DocImage")
700 formatlistptr->command = comImage;
701
702 else if (meta == "DocTOC")
703 formatlistptr->command = comTOC;
704
705 else if (meta == "DocumentButtonDetach")
706 formatlistptr->command = comDocumentButtonDetach;
707
708 else if (meta == "DocumentButtonHighlight")
709 formatlistptr->command = comDocumentButtonHighlight;
710
711 else if (meta == "DocumentButtonExpandContents")
712 formatlistptr->command = comDocumentButtonExpandContents;
713
714 else if (meta == "DocumentButtonExpandText")
715 formatlistptr->command = comDocumentButtonExpandText;
716
717 else if (meta == "DocOID")
718 formatlistptr->command = comOID;
719 else if (meta == "DocTopOID")
720 formatlistptr->command = comTopOID;
721 else if (meta == "DocRank")
722 formatlistptr->command = comRank;
723 else if (meta == "DocTermsFreqTotal")
724 formatlistptr->command = comDocTermsFreqTotal;
725 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
726 formatlistptr->command = comCollection;
727 parse_coll_meta(meta, formatlistptr->meta);
728 }
729 else {
730 formatlistptr->command = comMeta;
731 parse_meta (meta, formatlistptr->meta, metadata, getParents);
732 }
733}
734
735
736static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
737 text_tset &metadata, bool &getParents) {
738
739 text_t text;
740 text_t::const_iterator here = formatstring.begin();
741 text_t::const_iterator end = formatstring.end();
742
743 while (here != end) {
744
745 if (*here == '\\') {
746 ++here;
747 if (here != end) text.push_back (*here);
748
749 } else if (*here == '{') {
750 if (!text.empty()) {
751 formatlistptr->command = comText;
752 formatlistptr->text = text;
753 formatlistptr->nextptr = new format_t();
754 formatlistptr = formatlistptr->nextptr;
755
756 text.clear();
757 }
758 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
759
760 formatlistptr->nextptr = new format_t();
761 formatlistptr = formatlistptr->nextptr;
762 if (here == end) break;
763 }
764 } else if (*here == '[') {
765 if (!text.empty()) {
766 formatlistptr->command = comText;
767 formatlistptr->text = text;
768 formatlistptr->nextptr = new format_t();
769 formatlistptr = formatlistptr->nextptr;
770
771 text.clear();
772 }
773 text_t meta;
774 ++here;
775 while (*here != ']') {
776 if (here == end) return false;
777 meta.push_back (*here);
778 ++here;
779 }
780 parse_meta (meta, formatlistptr, metadata, getParents);
781 formatlistptr->nextptr = new format_t();
782 formatlistptr = formatlistptr->nextptr;
783
784 } else
785 text.push_back (*here);
786
787 if (here != end) ++here;
788 }
789 if (!text.empty()) {
790 formatlistptr->command = comText;
791 formatlistptr->text = text;
792 formatlistptr->nextptr = new format_t();
793 formatlistptr = formatlistptr->nextptr;
794
795 }
796 return true;
797}
798
799
800static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
801 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
802
803 text_t::const_iterator it = findchar (here, end, '}');
804 if (it == end) return false;
805
806 text_t com = substr (here, it);
807 here = findchar (it, end, '{');
808 if (here == end) return false;
809 else ++here;
810
811 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
812 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
813 else return false;
814
815 int commacount = 0;
816 text_t text;
817 while (here != end) {
818
819 if (*here == '\\') {
820 ++here;
821 if (here != end) text.push_back(*here);
822
823 }
824
825 else if (*here == ',' || *here == '}' || *here == '{') {
826
827 if (formatlistptr->command == comOr) {
828 // the {Or}{this, or this, or this, or this} statement
829 format_t *or_ptr;
830
831 // find the next unused orptr
832 if (formatlistptr->orptr == NULL) {
833 formatlistptr->orptr = new format_t();
834 or_ptr = formatlistptr->orptr;
835 } else {
836 or_ptr = formatlistptr->orptr;
837 while (or_ptr->nextptr != NULL)
838 or_ptr = or_ptr->nextptr;
839 or_ptr->nextptr = new format_t();
840 or_ptr = or_ptr->nextptr;
841 }
842
843 if (!text.empty())
844 {
845 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
846 }
847
848 if (*here == '{')
849 {
850 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
851 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
852 // The latter can always be re-written:
853 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
854
855 if (!text.empty()) // already used up allocated format_t
856 {
857 // => allocate new one for detected action
858 or_ptr->nextptr = new format_t();
859 or_ptr = or_ptr->nextptr;
860 }
861 if (!parse_action(++here, end, or_ptr, metadata, getParents))
862 {
863 return false;
864 }
865 }
866 else
867 {
868 if (*here == '}') break;
869 }
870 text.clear();
871
872 }
873
874 // Parse an {If}{decide,do,else} statement
875 else {
876
877 // Read the decision component.
878 if (commacount == 0) {
879 // Decsion can be a metadata element, or a piece of text.
880 // Originally Stefan's code, updated 25/10/2000 by Gordon.
881
882 text_t::const_iterator beginbracket = text.begin();
883 text_t::const_iterator endbracket = (text.end() - 1);
884
885 // Decision is based on a metadata element
886 if ((*beginbracket == '[') && (*endbracket == ']')) {
887 // Ignore the surrounding square brackets
888 text_t meta = substr (beginbracket+1, endbracket);
889 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
890 ++commacount;
891 text.clear();
892 }
893
894 // Decision is a piece of text (probably a macro like _cgiargmode_).
895 else {
896
897 // hunt for any metadata in string, which might be uses in
898 // to test a condition, e.g. [Format] eq 'PDF'
899 format_t* dummyformat = new format_t();
900 // update which metadata fields needed
901 // (not interested in updatng formatlistptr)
902 parse_string (text, dummyformat, metadata, getParents);
903 delete dummyformat;
904
905 formatlistptr->decision.command = dText;
906 formatlistptr->decision.text = text;
907 ++commacount;
908 text.clear();
909 }
910 }
911
912 // Read the "then" and "else" components of the {If} statement.
913 else {
914 format_t** nextlistptr = NULL;
915 if (commacount == 1) {
916 nextlistptr = &formatlistptr->ifptr;
917 } else if (commacount == 2 ) {
918 nextlistptr = &formatlistptr->elseptr;
919 } else {
920 return false;
921 }
922
923 if (!text.empty()) {
924 if (*nextlistptr == NULL) {
925 *nextlistptr = new format_t();
926 } else {
927
928 // skip to the end of any format_t statements already added
929 while ((*nextlistptr)->nextptr != NULL)
930 {
931 nextlistptr = &(*nextlistptr)->nextptr;
932 }
933
934 (*nextlistptr)->nextptr = new format_t();
935 nextlistptr = &(*nextlistptr)->nextptr;
936 }
937
938 if (!parse_string (text, *nextlistptr, metadata, getParents))
939 {
940 return false;
941 }
942 text.clear();
943 }
944
945 if (*here == '{')
946 {
947 if (*nextlistptr == NULL) {
948 *nextlistptr = new format_t();
949 } else {
950 // skip to the end of any format_t statements already added
951 while ((*nextlistptr)->nextptr != NULL)
952 {
953 nextlistptr = &(*nextlistptr)->nextptr;
954 }
955
956 (*nextlistptr)->nextptr = new format_t();
957 nextlistptr = &(*nextlistptr)->nextptr;
958 }
959
960 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
961 {
962 return false;
963 }
964 }
965 else
966 {
967 if (*here == '}') break;
968 ++commacount;
969 }
970 }
971 }
972
973 } else text.push_back(*here);
974
975 if (here != end) ++here;
976 }
977
978 return true;
979}
980
981
982static text_t spanwrap_metatext(const text_t& metatext, const text_t& OID,
983 const text_t metaname, int metapos=-1)
984{
985
986 text_t tag_type = (metaname == "Text") ? "div" : "span";
987 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
988
989 text_t wrapped_metatext = "<" + tag_type + " ";
990 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
991
992 wrapped_metatext += "docoid=\"" + OID + "\" ";
993 wrapped_metatext += "metaname=\"" + metaname + "\"";
994
995 if (metapos>=0) {
996 text_t metapos_str = metapos;
997 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
998 }
999
1000 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1001
1002 return wrapped_metatext;
1003}
1004
1005
1006
1007bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1008 text_tset &metadata, bool &getParents) {
1009
1010 formatlistptr->clear();
1011 getParents = false;
1012
1013 return (parse_string (formatstring, formatlistptr, metadata, getParents));
1014}
1015
1016// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1017// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1018
1019static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1020{
1021 text_t no_ns_metaname = remove_namespace(meta.metaname);
1022 text_t formatted_metatext;
1023 bool first = true;
1024
1025 const int start_i=0;
1026 const int end_i = metainfo.values.size()-1;
1027
1028 if (position == -1) { // all
1029 for (int i=start_i; i<=end_i; ++i) {
1030 if (!first) formatted_metatext += meta.siblingoptions;
1031
1032 text_t fresh_metatext;
1033
1034 if (meta.metacommand & mSpecial) {
1035 // special formatting
1036 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1037 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1038 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1039 }
1040 else fresh_metatext = metainfo.values[i];
1041
1042 // New "truncate" special formatting option
1043 if (meta.metacommand & mTruncate)
1044 {
1045 int truncate_length = meta.siblingoptions.getint();
1046 text_t truncated_value = fresh_metatext;
1047 if (truncated_value.size() > truncate_length)
1048 {
1049 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1050 }
1051 fresh_metatext = truncated_value;
1052 }
1053 // New "xmlsafe" special formatting option
1054 if (meta.metacommand & mXMLSafe)
1055 {
1056 // Make it XML-safe
1057 text_t text_xml_safe = "";
1058 text_t::const_iterator text_iterator = fresh_metatext.begin();
1059 while (text_iterator != fresh_metatext.end())
1060 {
1061 if (*text_iterator == '&') text_xml_safe += "&amp;";
1062 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1063 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1064 else text_xml_safe.push_back(*text_iterator);
1065 text_iterator++;
1066 }
1067 fresh_metatext = text_xml_safe;
1068 }
1069 // New "htmlsafe" special formatting option
1070 if (meta.metacommand & mHTMLSafe)
1071 {
1072 // Make it HTML-safe
1073 text_t text_html_safe = "";
1074 text_t::const_iterator text_iterator = fresh_metatext.begin();
1075 while (text_iterator != fresh_metatext.end())
1076 {
1077 if (*text_iterator == '&') text_html_safe += "&amp;";
1078 else if (*text_iterator == '<') text_html_safe += "&lt;";
1079 else if (*text_iterator == '>') text_html_safe += "&gt;";
1080 else if (*text_iterator == '"') text_html_safe += "&quot;";
1081 else text_html_safe.push_back(*text_iterator);
1082 text_iterator++;
1083 }
1084 fresh_metatext = text_html_safe;
1085 }
1086 // New "dmsafe" special formatting option (always apply to "srclink_file" metadata)
1087 if (meta.metacommand & mDMSafe || meta.metaname == "srclink_file")
1088 {
1089 // Make it macro-safe
1090 text_t text_dm_safe = dm_safe(fresh_metatext);
1091 fresh_metatext = text_dm_safe;
1092 }
1093
1094 if (metadata_spanwrap) {
1095 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,i);
1096 }
1097 formatted_metatext += fresh_metatext;
1098
1099 first = false;
1100
1101 }
1102 } else {
1103 if (position == -2) { // end
1104 position = end_i;
1105 } else if (position < start_i || position > end_i) {
1106 return "";
1107 }
1108
1109 text_t fresh_metatext;
1110 if (meta.metacommand & mSpecial) {
1111
1112 // special formatting
1113 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1114 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1115 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1116 }
1117 else fresh_metatext = metainfo.values[position];
1118
1119 // New "truncate" special formatting option
1120 if (meta.metacommand & mTruncate)
1121 {
1122 int truncate_length = meta.siblingoptions.getint();
1123 text_t truncated_value = fresh_metatext;
1124 if (truncated_value.size() > truncate_length)
1125 {
1126 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1127 }
1128 fresh_metatext = truncated_value;
1129 }
1130 // New "xmlsafe" special formatting option
1131 if (meta.metacommand & mXMLSafe)
1132 {
1133 // Make it XML-safe
1134 text_t text_xml_safe = "";
1135 text_t::const_iterator text_iterator = fresh_metatext.begin();
1136 while (text_iterator != fresh_metatext.end())
1137 {
1138 if (*text_iterator == '&') text_xml_safe += "&amp;";
1139 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1140 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1141 else text_xml_safe.push_back(*text_iterator);
1142 text_iterator++;
1143 }
1144 fresh_metatext = text_xml_safe;
1145 }
1146 // New "htmlsafe" special formatting option
1147 if (meta.metacommand & mHTMLSafe)
1148 {
1149 // Make it HTML-safe
1150 text_t text_html_safe = "";
1151 text_t::const_iterator text_iterator = fresh_metatext.begin();
1152 while (text_iterator != fresh_metatext.end())
1153 {
1154 if (*text_iterator == '&') text_html_safe += "&amp;";
1155 else if (*text_iterator == '<') text_html_safe += "&lt;";
1156 else if (*text_iterator == '>') text_html_safe += "&gt;";
1157 else if (*text_iterator == '"') text_html_safe += "&quot;";
1158 else if (*text_iterator == '\'') text_html_safe += "&#39;";
1159 else if (*text_iterator == ',') text_html_safe += "&#44;";
1160 else text_html_safe.push_back(*text_iterator);
1161 text_iterator++;
1162 }
1163 fresh_metatext = text_html_safe;
1164 }
1165 // New "dmsafe" special formatting option (always apply to "srclink_file" metadata)
1166 if (meta.metacommand & mDMSafe || meta.metaname == "srclink_file")
1167 {
1168 // Make it macro-safe
1169 text_t text_dm_safe = dm_safe(fresh_metatext);
1170 fresh_metatext = text_dm_safe;
1171 }
1172
1173 if (metadata_spanwrap) {
1174 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,position);
1175 }
1176
1177 formatted_metatext += fresh_metatext;
1178 }
1179
1180 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1181 else return formatted_metatext;
1182}
1183
1184static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1185{
1186
1187 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1188
1189 switch (meta.mqualifier.parent) {
1190 case pNone:
1191 return "Nothing!!";
1192 break;
1193
1194 case pImmediate:
1195 if (parent != NULL) {
1196 text_t parent_oid = get_parent(docinfo.OID);
1197 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1198 }
1199 break;
1200
1201 case pTop:
1202 if (parent != NULL) {
1203 text_t parent_oid = get_parent(docinfo.OID);
1204
1205 while (parent->parent != NULL) {
1206 parent = parent->parent;
1207 parent_oid = get_parent(parent_oid);
1208 }
1209 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1210 }
1211 break;
1212
1213 case pAll:
1214 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1215 if (parent != NULL) {
1216 text_t parent_oid = get_parent(docinfo.OID);
1217
1218 text_tarray tmparray;
1219 while (parent != NULL) {
1220 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1221 parent = parent->parent;
1222 parent_oid = get_parent(parent_oid);
1223
1224 }
1225 // now join them up - use teh parent separator
1226 bool first = true;
1227 text_t tmp;
1228 text_tarray::reverse_iterator here = tmparray.rbegin();
1229 text_tarray::reverse_iterator end = tmparray.rend();
1230 while (here != end) {
1231 if (!first) tmp += meta.parentoptions;
1232 tmp += *here;
1233 first = false;
1234 ++here;
1235 }
1236 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1237 else return tmp;
1238 }
1239 }
1240 return "";
1241
1242}
1243
1244static text_t get_child_meta (const text_t& collection,
1245 recptproto* collectproto,
1246 ResultDocInfo_t &docinfo, displayclass &disp,
1247 const metadata_t &meta, text_tmap &options,
1248 ostream& logout, int siblings_values)
1249{
1250 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1251
1252 const text_t& pre_tree_trav = meta.pre_tree_traverse;
1253 const text_t& child_metaname = meta.metaname;
1254 const text_t& child_field = meta.childoptions;
1255 text_tset child_metadata;
1256 child_metadata.insert(child_metaname);
1257
1258 FilterResponse_t child_response;
1259 if (meta.mqualifier.child == cNum) {
1260 // just one child
1261 //get the information associated with the metadata for child doc
1262 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1263 child_metadata, false, collectproto, child_response,
1264 logout)) return ""; // invalid child number
1265
1266 if (child_response.docInfo.empty()) return false; // no info for the child
1267
1268 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1269 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1270
1271 text_t child_metavalue
1272 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1273 return expand_metadata(child_metavalue,collection,collectproto,
1274 child_docinfo,disp,options,logout);
1275 }
1276
1277
1278 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1279
1280
1281 if (!pre_tree_trav.empty()) {
1282 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1283 FilterResponse_t trav_response;
1284
1285 text_tset trav_metadata;
1286 trav_metadata.insert("contains");
1287
1288 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1289 trav_metadata, false, collectproto, trav_response,
1290 logout)) return ""; // invalid pre_tree_trav
1291
1292 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1293
1294 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1295
1296 // use this for rest of routine
1297 docinfo = trav_docinfo;
1298 }
1299
1300 // we need to get all children
1301 text_t result = "";
1302 text_tarray children;
1303 text_t contains = docinfo.metadata["contains"].values[0];
1304 splitchar (contains.begin(), contains.end(), ';', children);
1305 text_tarray::const_iterator here = children.begin();
1306 text_tarray::const_iterator end = children.end();
1307 bool first = true;
1308 while (here !=end) {
1309 text_t oid = *here;
1310 here++;
1311 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1312
1313 //get the information associated with the metadata for child doc
1314 if (!get_info (oid, collection, "", child_metadata,
1315 false, collectproto, child_response, logout) ||
1316 child_response.docInfo.empty()) {
1317 first = false;
1318 continue;
1319 }
1320
1321
1322 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1323 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1324
1325 text_t child_metavalue
1326 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1327
1328
1329 if (!first) result += child_field;
1330 first = false;
1331 // need to do this here cos otherwise we are in the wrong document
1332 text_t em = expand_metadata(child_metavalue,collection,collectproto,
1333 child_docinfo,disp,options,logout);
1334
1335 result += em;
1336 }
1337 return result;
1338
1339}
1340
1341static text_t get_meta (const text_t& collection, recptproto* collectproto,
1342 ResultDocInfo_t &docinfo, displayclass &disp,
1343 const metadata_t &meta, text_tmap &options,
1344 ostream& logout) {
1345
1346 // make sure we have the requested metadata
1347 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1348 if (it == docinfo.metadata.end()) return "";
1349
1350 int siblings_values = 0; // default is no siblings, just the first metadata available
1351 if (meta.metacommand & mSibling) {
1352 if (meta.mqualifier.sibling == sAll) {
1353 siblings_values = -1; //all
1354 } else if (meta.mqualifier.sibling == sNum) {
1355 siblings_values = meta.siblingoptions.getint();
1356 }
1357 }
1358 if (meta.metacommand & mParent) {
1359 return get_parent_meta(docinfo,meta,siblings_values);
1360 }
1361
1362 else if (meta.metacommand & mChild) {
1363 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1364 options,logout, siblings_values);
1365 }
1366 else if (meta.metacommand & mSibling) { // only siblings
1367 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1368 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1369 }
1370 else {
1371
1372 // straightforward metadata request (nothing fancy)
1373
1374 text_t classifier_metaname = docinfo.classifier_metadata_type;
1375 int metaname_index
1376 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1377 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1378 }
1379
1380 return "";
1381}
1382
1383static text_t get_or (const text_t& collection, recptproto* collectproto,
1384 ResultDocInfo_t &docinfo, displayclass &disp,
1385 format_t *orptr, text_tmap &options,
1386 ostream& logout) {
1387
1388 while (orptr != NULL) {
1389
1390 if (metadata_spanwrap) {
1391 // need to be a bit more careful about this
1392 // => test for it *without* spanwrap, and if defined, then
1393 // got back and generate it again, this time with spanwrap on
1394
1395 metadata_spanwrap = false;
1396 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1397 options, logout);
1398 metadata_spanwrap = true;
1399 if (!test_tmp.empty()) {
1400
1401 return format_string (collection,collectproto,docinfo, disp, orptr,
1402 options, logout);
1403 }
1404 }
1405 else {
1406 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1407 options, logout);
1408 if (!tmp.empty()) return tmp;
1409 }
1410
1411 orptr = orptr->nextptr;
1412 }
1413 return "";
1414}
1415
1416static bool char_is_whitespace(const char c)
1417{
1418 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1419
1420}
1421
1422static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1423{
1424 int pos = start_pos;
1425 while (pos<outstring.size()) {
1426 if (!char_is_whitespace(outstring[pos])) {
1427 break;
1428 }
1429 ++pos;
1430 }
1431
1432 return pos;
1433}
1434
1435static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1436{
1437 int pos = start_pos;
1438 while (pos>=0) {
1439 if (!char_is_whitespace(outstring[pos])) {
1440 break;
1441 }
1442 --pos;
1443 }
1444
1445 return pos;
1446}
1447
1448static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1449{
1450 int pos = start_pos;
1451 while (pos>=0) {
1452 if (char_is_whitespace(outstring[pos])) {
1453 break;
1454 }
1455 --pos;
1456 }
1457
1458 return pos;
1459}
1460
1461
1462static int rscan_for(const text_t& outstring, const int start_pos,
1463 const char find_c)
1464{
1465 int pos = start_pos;
1466 while (pos>=0) {
1467 char c = outstring[pos];
1468 if (outstring[pos] == find_c) {
1469 break;
1470 }
1471 --pos;
1472 }
1473
1474 return pos;
1475}
1476
1477text_t extract_substr(const text_t& outstring, const int start_pos,
1478 const int end_pos)
1479{
1480 text_t extracted_str;
1481 extracted_str.clear();
1482
1483 for (int pos=start_pos; pos<=end_pos; ++pos) {
1484 extracted_str.push_back(outstring[pos]);
1485 }
1486
1487 return extracted_str;
1488}
1489
1490
1491static text_t expand_potential_metadata(const text_t& collection,
1492 recptproto* collectproto,
1493 ResultDocInfo_t &docinfo,
1494 displayclass &disp,
1495 const text_t& intext,
1496 text_tmap &options,
1497 ostream& logout)
1498{
1499 text_t outtext;
1500
1501 // decide if dealing with metadata or text
1502
1503 text_t::const_iterator beginbracket = intext.begin();
1504 text_t::const_iterator endbracket = (intext.end() - 1);
1505
1506 // Decision is based on a metadata element
1507 if ((*beginbracket == '[') && (*endbracket == ']')) {
1508 // Ignore the surrounding square brackets
1509 text_t meta_text = substr (beginbracket+1, endbracket);
1510
1511 if (meta_text == "Text") {
1512 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1513 }
1514 else {
1515
1516 text_tset metadata;
1517 bool getParents =false;
1518 metadata_t meta;
1519
1520 parse_meta (meta_text, meta, metadata, getParents);
1521 outtext
1522 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1523 }
1524
1525 }
1526 else {
1527 outtext = intext;
1528 }
1529
1530 return outtext;
1531}
1532
1533
1534
1535
1536static bool uses_expression(const text_t& collection, recptproto* collectproto,
1537 ResultDocInfo_t &docinfo,
1538 displayclass &disp,
1539 const text_t& outstring, text_t& lhs_expr,
1540 text_t& op_expr, text_t& rhs_expr,
1541 text_tmap &options,
1542 ostream& logout)
1543{
1544 // Note: the string may not be of the form: str1 op str2, however
1545 // to deterine this we have to process it on the assumption it is,
1546 // and if at any point an 'erroneous' value is encountered, return
1547 // false and let something else have a go at evaluating it
1548
1549 // Starting at the end of the string and working backwards ..
1550
1551 const int outstring_len = outstring.size();
1552
1553 // skip over white space
1554 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1555
1556 if (rhs_end<=0) {
1557 // no meaningful text or (rhs_end==0) no room for operator
1558 return false;
1559 }
1560
1561 // check for ' or " and then scan over token
1562 const char potential_quote = outstring[rhs_end];
1563 int rhs_start=rhs_end;
1564 bool quoted = false;
1565
1566 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1567 --rhs_end;
1568 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1569 quoted = true;
1570 }
1571 else {
1572 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1573 }
1574
1575 if ((rhs_end-rhs_start)<0) {
1576 // no meaningful rhs expression
1577 return false;
1578 }
1579
1580 // form rhs_expr
1581 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1582
1583 // skip over white space
1584 const int to_whitespace = (quoted) ? 2 : 1;
1585
1586 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1587 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1588
1589 if ((op_end<0) && (op_start<0)) {
1590 // no meaningful expression operator
1591 return false;
1592 }
1593
1594 if (op_end-op_start<0) {
1595 // no meaningful expression operator
1596 return false;
1597 }
1598
1599 op_expr = extract_substr(outstring,op_start,op_end);
1600
1601
1602 // check for operator
1603 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1604 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1605
1606 // not a valid operator
1607 return false;
1608 }
1609
1610 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1611 if (lhs_end<0) {
1612 // no meaningful lhs expression
1613 return false;
1614 }
1615
1616 int lhs_start = scan_over_whitespace(outstring,0);
1617
1618 // form lhs_expr from remainder of string
1619 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1620
1621 // Now we know we have a valid expression, look up any
1622 // metadata terms
1623
1624 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1625 disp,rhs_expr,options,logout);
1626 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1627 disp,lhs_expr,options,logout);
1628
1629 return true;
1630}
1631
1632static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1633 const text_t& rhs_expr, ostream& logout)
1634{
1635 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1636 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1637 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1638 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1639 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1640 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1641 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1642 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1643 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1644 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1645 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1646 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1647 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1648 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1649 else {
1650 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1651 }
1652
1653 return false;
1654}
1655
1656
1657static text_t get_if (const text_t& collection, recptproto* collectproto,
1658 ResultDocInfo_t &docinfo, displayclass &disp,
1659 const decision_t &decision,
1660 format_t *ifptr, format_t *elseptr,
1661 text_tmap &options, ostream& logout)
1662{
1663 // If the decision component is a metadata element, then evaluate it
1664 // to see whether we output the "then" or the "else" clause
1665 if (decision.command == dMeta) {
1666
1667 bool store_metadata_spanwrap = metadata_spanwrap;
1668 metadata_spanwrap = 0;
1669
1670 // temporarily suspend metadata-spanwrap (if on) so can test if metadata item really exits or not
1671 bool metadata_exists
1672 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1673 logout) != "");
1674
1675 metadata_spanwrap = store_metadata_spanwrap;
1676
1677 if (metadata_exists) {
1678 if (ifptr != NULL)
1679 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1680 options, logout);
1681 }
1682 else {
1683 if (elseptr != NULL)
1684 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1685 options, logout);
1686 }
1687 }
1688
1689 // If the decision component is text, then evaluate it (it is probably a
1690 // macro like _cgiargmode_) to decide what to output.
1691 else if (decision.command == dText) {
1692
1693 text_t outstring;
1694 disp.expandstring (decision.text, outstring);
1695
1696 // Check for if expression in form: str1 op str2
1697 // (such as [x] eq "y")
1698 text_t lhs_expr, op_expr, rhs_expr;
1699 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1700 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1701 if (ifptr != NULL) {
1702 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1703 options, logout);
1704 }
1705 else {
1706 return "";
1707 }
1708 } else {
1709 if (elseptr != NULL) {
1710 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1711 options, logout);
1712 }
1713 else {
1714 return "";
1715 }
1716 }
1717 }
1718
1719
1720 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1721 // a cgi argument macro that has not been set, it evaluates to itself.
1722 // Therefore, were have to say that a piece of text evalautes true if
1723 // it is non-empty and if it is a cgi argument evaulating to itself.
1724
1725 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1726 if (ifptr != NULL)
1727 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1728 options, logout);
1729 } else {
1730 if (elseptr != NULL)
1731 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1732 options, logout);
1733 }
1734 }
1735
1736 return "";
1737}
1738
1739bool includes_metadata(const text_t& text)
1740{
1741 text_t::const_iterator here = text.begin();
1742 text_t::const_iterator end = text.end();
1743 while (here != end) {
1744 if (*here == '[') return true;
1745 ++here;
1746 }
1747
1748 return false;
1749}
1750
1751static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1752 recptproto* collectproto,
1753 ResultDocInfo_t &docinfo,
1754 displayclass &disp, text_tmap &options,
1755 ostream &logout) {
1756
1757 if (includes_metadata(metavalue)) {
1758
1759 // text has embedded metadata in it => expand it
1760 FilterRequest_t request;
1761 FilterResponse_t response;
1762
1763 request.getParents = false;
1764
1765 format_t *expanded_formatlistptr = new format_t();
1766 parse_formatstring (metavalue, expanded_formatlistptr,
1767 request.fields, request.getParents);
1768
1769 // retrieve metadata
1770 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1771 collectproto, response, logout);
1772
1773 if (!response.docInfo.empty()) {
1774
1775 text_t expanded_metavalue
1776 = get_formatted_string(collection, collectproto,
1777 response.docInfo[0], disp, expanded_formatlistptr,
1778 options, logout);
1779
1780 return expanded_metavalue;
1781 }
1782 else {
1783 return metavalue;
1784 }
1785 }
1786 else {
1787
1788 return metavalue;
1789 }
1790}
1791
1792text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1793 displayclass &disp,
1794 text_t meta_name, ostream& logout) {
1795
1796 ColInfoResponse_t collectinfo;
1797 comerror_t err;
1798 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1799 text_t meta_value = "";
1800 text_t lang;
1801 disp.expandstring("_cgiargl_",lang);
1802 if (lang.empty()) {
1803 lang = "en";
1804 }
1805
1806 if (err == noError) {
1807 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1808 }
1809 return meta_value;
1810
1811
1812}
1813text_t format_string (const text_t& collection, recptproto* collectproto,
1814 ResultDocInfo_t &docinfo, displayclass &disp,
1815 format_t *formatlistptr, text_tmap &options,
1816 ostream& logout) {
1817
1818 if (formatlistptr == NULL) return "";
1819
1820 switch (formatlistptr->command) {
1821 case comOID:
1822 return docinfo.OID;
1823 case comTopOID:
1824 {
1825 text_t top_id;
1826 get_top(docinfo.OID, top_id);
1827 return top_id;
1828 }
1829 case comRank:
1830 return text_t(docinfo.ranking);
1831 case comText:
1832 return formatlistptr->text;
1833 case comLink:
1834 return options["link"];
1835 case comEndLink:
1836 {
1837 if (options["link"].empty()) return "";
1838 else return "</a>";
1839 }
1840 case comHref:
1841 return get_href(options["link"]);
1842 case comIcon:
1843 return options["icon"];
1844 case comNum:
1845 return docinfo.result_num;
1846 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1847 return get_related_docs(collection, collectproto, docinfo, logout);
1848
1849 case comSummary:
1850 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1851 case comAssocLink:
1852 {
1853 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1854 if (!link_filename.empty()) {
1855 text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
1856 if (formatlistptr->text == "href") {
1857 return href;
1858 }
1859 return "<a href=\""+ href + "\">";
1860 }
1861 return "";
1862 }
1863 case comEndAssocLink:
1864 {
1865 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1866 if (!link_filename.empty()) {
1867 return "</a>";
1868 }
1869 return "";
1870 }
1871 case comMeta:
1872 {
1873 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1874 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1875 }
1876
1877 case comDoc:
1878 return format_text(collection, collectproto, docinfo, disp, options, logout);
1879
1880 case comImage:
1881 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1882 case comTOC:
1883 return options["DocTOC"];
1884 case comDocumentButtonDetach:
1885 return options["DocumentButtonDetach"];
1886 case comDocumentButtonHighlight:
1887 return options["DocumentButtonHighlight"];
1888 case comDocumentButtonExpandContents:
1889 return options["DocumentButtonExpandContents"];
1890 case comDocumentButtonExpandText:
1891 return options["DocumentButtonExpandText"];
1892 case comHighlight:
1893 if (options["highlight"] == "1") return "<b>";
1894 break;
1895 case comEndHighlight:
1896 if (options["highlight"] == "1") return "</b>";
1897 break;
1898 case comMetadataSpanWrap:
1899 metadata_spanwrap=true; return "";
1900 break;
1901 case comEndMetadataSpanWrap:
1902 metadata_spanwrap=false; return "";
1903 break;
1904 case comIf:
1905 return get_if (collection, collectproto, docinfo, disp,
1906 formatlistptr->decision, formatlistptr->ifptr,
1907 formatlistptr->elseptr, options, logout);
1908 case comOr:
1909 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1910 options, logout);
1911 case comDocTermsFreqTotal:
1912 return docinfo.num_terms_matched;
1913 case comCollection:
1914 if (formatlistptr->meta.metaname == g_EmptyText) {
1915 return collection;
1916 }
1917 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1918
1919 }
1920 return "";
1921}
1922
1923text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1924 ResultDocInfo_t &docinfo, displayclass &disp,
1925 format_t *formatlistptr, text_tmap &options,
1926 ostream& logout) {
1927
1928 text_t ft;
1929 while (formatlistptr != NULL)
1930 {
1931 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1932 options, logout);
1933 formatlistptr = formatlistptr->nextptr;
1934 }
1935
1936 return ft;
1937}
1938
1939
1940// we have only preloaded the text in DocumentAction. But you may want
1941// to get the text in query, so copy what we have done with
1942// format_summary and get the text here. Probably is quite expensive?
1943text_t format_text (const text_t& collection, recptproto* collectproto,
1944 ResultDocInfo_t &docinfo, displayclass &disp,
1945 text_tmap &options, ostream& logout)
1946{
1947 text_t text;
1948
1949 if (!options["text"].empty()) {
1950 text = options["text"];
1951 }
1952 else {
1953 // get document text here
1954 DocumentRequest_t docrequest;
1955 DocumentResponse_t docresponse;
1956 comerror_t err;
1957 docrequest.OID = docinfo.OID;
1958 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1959 text = docresponse.doc;
1960 }
1961
1962 if (metadata_spanwrap) {
1963 text = spanwrap_metatext(text,docinfo.OID,"Text");
1964 }
1965
1966 return text;
1967}
1968
1969/* FUNCTION NAME: format_summary
1970 * DESC: this is invoked when a [Summary] special metadata is processed.
1971 * RETURNS: a query-biased summary for the document */
1972
1973text_t format_summary (const text_t& collection, recptproto* collectproto,
1974 ResultDocInfo_t &docinfo, displayclass &disp,
1975 text_tmap &options, ostream& logout) {
1976
1977 // GRB: added code here to ensure that the cstr (and other collections)
1978 // uses the document metadata item Summary, rather than compressing
1979 // the text of the document, processed via the methods in
1980 // summarise.cpp
1981
1982 text_t summary;
1983
1984 if (docinfo.metadata.count("Summary") > 0 &&
1985 docinfo.metadata["Summary"].values.size() > 0) {
1986 summary = docinfo.metadata["Summary"].values[0];
1987 }
1988 else {
1989
1990 text_t textToSummarise, query;
1991
1992 if(options["text"].empty()) { // get document text
1993 DocumentRequest_t docrequest;
1994 DocumentResponse_t docresponse;
1995 comerror_t err;
1996 docrequest.OID = docinfo.OID;
1997 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1998 textToSummarise = docresponse.doc;
1999 }
2000 else {
2001 // in practice, this would not happen, because text is only
2002 // loaded with the [Text] command
2003 textToSummarise = options["text"];
2004 }
2005
2006 disp.expandstring("_cgiargq_",query);
2007 summary = summarise(textToSummarise,query,80);
2008 //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
2009 }
2010
2011 summary.replace("'","&#039;");
2012 summary.replace("\n","&#013;");
2013
2014 if (metadata_spanwrap) {
2015 summary = spanwrap_metatext(summary,docinfo.OID,"Summary");
2016 }
2017
2018 return summary;
2019}
Note: See TracBrowser for help on using the repository browser.