source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 21758

Last change on this file since 21758 was 21758, checked in by kjdon, 14 years ago

now we dynamically generate srclink (and /srclink, and new srchref which is the link without the a tag), using srclink_file metadata. This is to get gs2 receptionist stuff out of metadata and into the source code where it belongs

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 58.6 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "recptprototools.h"
29#include "OIDtools.h"
30#include "summarise.h"
31
32#include <assert.h>
33
34static bool metadata_spanwrap = false;
35
36// a few function prototypes
37
38static text_t format_string (const text_t& collection, recptproto* collectproto,
39 ResultDocInfo_t &docinfo, displayclass &disp,
40 format_t *formatlistptr, text_tmap &options,
41 ostream& logout);
42
43static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
44 format_t *formatlistptr, text_tset &metadata, bool &getParents);
45
46static text_t format_summary (const text_t& collection, recptproto* collectproto,
47 ResultDocInfo_t &docinfo, displayclass &disp,
48 text_tmap &options, ostream& logout);
49static text_t format_text (const text_t& collection, recptproto* collectproto,
50 ResultDocInfo_t &docinfo, displayclass &disp,
51 text_tmap &options, ostream& logout);
52
53static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
54 recptproto* collectproto, ResultDocInfo_t &docinfo,
55 displayclass &disp, text_tmap &options,
56 ostream &logout);
57
58
59void metadata_t::clear() {
60 metaname.clear();
61 metacommand = mNone;
62 mqualifier.parent = pNone;
63 mqualifier.sibling = sNone;
64 mqualifier.child = cNone;
65 pre_tree_traverse.clear();
66 parentoptions.clear();
67 siblingoptions.clear();
68 childoptions.clear();
69}
70
71void decision_t::clear() {
72 command = dMeta;
73 meta.clear();
74 text.clear();
75}
76
77void format_t::clear() {
78 command = comText;
79 decision.clear();
80 text.clear();
81 meta.clear();
82 nextptr = NULL;
83 ifptr = NULL;
84 elseptr = NULL;
85 orptr = NULL;
86}
87
88void formatinfo_t::clear() {
89 DocumentImages = false;
90 DocumentTitles = true;
91 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
92 DocumentContents = true;
93 DocumentArrowsBottom = true;
94 DocumentArrowsTop = false;
95 DocumentSearchResultLinks = false;
96 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
97 // DocumentButtons.push_back ("Expand Text");
98 // DocumentButtons.push_back ("Expand Contents");
99 DocumentButtons.push_back ("Detach");
100 DocumentButtons.push_back ("Highlight");
101 RelatedDocuments = "";
102 DocumentText = "[Text]";
103 formatstrings.erase (formatstrings.begin(), formatstrings.end());
104 DocumentUseHTML = false;
105 AllowExtendedOptions = false;
106}
107
108// simply checks to see if formatstring begins with a <td> tag
109bool is_table_content (const text_t &formatstring) {
110 text_t::const_iterator here = formatstring.begin();
111 text_t::const_iterator end = formatstring.end();
112
113 while (here != end) {
114 if (*here != ' ') {
115 if ((*here == '<') && ((here+3) < end)) {
116 if ((*(here+1) == 't' || *(here+1) == 'T') &&
117 (*(here+2) == 'd' || *(here+2) == 'D') &&
118 (*(here+3) == '>' || *(here+3) == ' '))
119 return true;
120 } else return false;
121 }
122 ++here;
123 }
124 return false;
125}
126
127bool is_table_content (const format_t *formatlistptr) {
128
129 if (formatlistptr == NULL) return false;
130
131 if (formatlistptr->command == comText)
132 return is_table_content (formatlistptr->text);
133
134 return false;
135}
136
137// returns false if key isn't in formatstringmap
138bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
139 text_t &formatstring) {
140
141 formatstring.clear();
142 text_tmap::const_iterator it = formatstringmap.find(key);
143 if (it == formatstringmap.end()) return false;
144 formatstring = (*it).second;
145 return true;
146}
147
148// tries to find "key1key2" then "key1" then "key2"
149bool get_formatstring (const text_t &key1, const text_t &key2,
150 const text_tmap &formatstringmap,
151 text_t &formatstring) {
152
153 formatstring.clear();
154 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
155 if (it != formatstringmap.end()) {
156 formatstring = (*it).second;
157 return true;
158 }
159 it = formatstringmap.find(key1);
160 if (it != formatstringmap.end()) {
161 formatstring = (*it).second;
162 return true;
163 }
164 it = formatstringmap.find(key2);
165 if (it != formatstringmap.end()) {
166 formatstring = (*it).second;
167 return true;
168 }
169 return false;
170}
171
172
173text_t remove_namespace(const text_t &meta_name) {
174 text_t::const_iterator end = meta_name.end();
175 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
176 if (it != end) {
177 return substr(it+1, end);
178 }
179
180 return meta_name;
181
182}
183// returns a date of form _format:date_(year, month, day)
184// input is date of type yyyy-?mm-?dd
185// at least the year must be present in date
186text_t format_date (const text_t &date) {
187
188 if (date.size() < 4) return "";
189
190 text_t::const_iterator datebegin = date.begin();
191
192 text_t year = substr (datebegin, datebegin+4);
193 int chars_seen_so_far = 4;
194 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
195
196 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
197 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
198
199 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
200 int imonth = month.getint();
201 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
202
203 chars_seen_so_far += 2;
204 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
205
206 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
207 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
208
209 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
210 if (day[0] == '0') day = substr (day.begin()+1, day.end());
211 int iday = day.getint();
212 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
213
214 return "_format:date_("+year+","+month+","+day+")";
215}
216
217// converts an iso639 language code to its English equivalent
218// should we be checking that the macro exists??
219text_t iso639 (const text_t &langcode) {
220 if (langcode.empty()) return "";
221 return "_iso639:iso639"+langcode+"_";
222}
223
224
225text_t get_href (const text_t &link) {
226
227 text_t href;
228
229 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
230 text_t::const_iterator end = link.end();
231 if (here == end) return g_EmptyText;
232
233 ++here;
234 while (here != end) {
235 if (*here == '"') break;
236 href.push_back(*here);
237 ++here;
238 }
239
240 return href;
241}
242
243//this function gets the information associated with the relation
244//metadata for the document associated with 'docinfo'. This relation
245//metadata consists of a line of pairs containing 'collection, document OID'
246//(this is the OID of the document related to the current document, and
247//the collection the related document belongs to). For each of these pairs
248//the title metadata is obtained and then an html link between the title
249//of the related doc and the document's position (the document will be
250//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
251//(where collection is the related documents collection, and OID is the
252//related documents OID). A list of these html links are made for as many
253//related documents as there are. This list is then returned. If there are
254//no related documents available for the current document then the string
255//'.. no related documents .. ' is returned.
256text_t get_related_docs(const text_t& collection, recptproto* collectproto,
257 ResultDocInfo_t &docinfo, ostream& logout){
258
259 text_tset metadata;
260
261 //insert the metadata we wish to collect
262 metadata.insert("dc.Relation");
263 metadata.insert("Title");
264 metadata.insert("Subject"); //for emails, where title data doesn't apply
265
266 FilterResponse_t response;
267 text_t relation = ""; //string for displaying relation metadata
268 text_t relationTitle = ""; //the related documents Title (or subject)
269 text_t relationOID = ""; //the related documents OID
270
271 //get the information associated with the metadata for current doc
272 if (get_info (docinfo.OID, collection, "", metadata,
273 false, collectproto, response, logout)) {
274
275 //if the relation metadata exists, store for displaying
276 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
277 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
278
279 //split relation data into pairs of collectionname,ID number
280 text_tarray relationpairs;
281 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
282
283 text_tarray::const_iterator currDoc = relationpairs.begin();
284 text_tarray::const_iterator lastDoc = relationpairs.end();
285
286 //iterate through the pairs to split and display
287 while(currDoc != lastDoc){
288
289 //split pairs into collectionname and ID
290 text_tarray relationdata;
291 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
292
293 //get first element in the array (collection)
294 text_tarray::const_iterator doc_data = relationdata.begin();
295 text_t document_collection = *doc_data;
296 ++doc_data; //increment to get next item in array (oid)
297 text_t document_OID = *doc_data;
298
299 //create html link to related document
300 relation += "<a href=\"_httpdocument_&c=" + document_collection;
301 relation += "&cl=search&d=" + document_OID;
302
303 //get the information associated with the metadata for related doc
304 if (get_info (document_OID, document_collection, "", metadata,
305 false, collectproto, response, logout)) {
306
307 //if title metadata doesn't exist, collect subject metadata
308 //if that doesn't exist, just call it 'related document'
309 if (!response.docInfo[0].metadata["Title"].values[0].empty())
310 relationTitle = response.docInfo[0].metadata["Title"].values[0];
311 else if (!response.docInfo[0].metadata["Subject"].values.empty())
312 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
313 else relationTitle = "RELATED DOCUMENT";
314
315 }
316
317 //link the related document's title to its page
318 relation += "\">" + relationTitle + "</a>";
319 relation += " (" + document_collection + ")<br>";
320
321 ++currDoc;
322 }
323 }
324
325 }
326
327 if(relation.empty()) //no relation data for documnet
328 relation = ".. no related documents .. ";
329
330 return relation;
331}
332
333
334
335static void get_parent_options (text_t &instring, metadata_t &metaoption) {
336
337 assert (instring.size() > 7);
338 if (instring.size() <= 7) return;
339
340 text_t meta, com, op;
341 bool inbraces = false;
342 bool inquotes = false;
343 bool foundcolon = false;
344 text_t::const_iterator here = instring.begin()+6;
345 text_t::const_iterator end = instring.end();
346 while (here != end) {
347 if (foundcolon) meta.push_back (*here);
348 else if (*here == '(') inbraces = true;
349 else if (*here == ')') inbraces = false;
350 else if (*here == '\'' && !inquotes) inquotes = true;
351 else if (*here == '\'' && inquotes) inquotes = false;
352 else if (*here == ':' && !inbraces) foundcolon = true;
353 else if (inquotes) op.push_back (*here);
354 else com.push_back (*here);
355 ++here;
356 }
357
358 instring = meta;
359 if (com.empty())
360 metaoption.mqualifier.parent = pImmediate;
361 else if (com == "Top")
362 metaoption.mqualifier.parent = pTop;
363 else if (com == "All") {
364 metaoption.mqualifier.parent = pAll;
365 metaoption.parentoptions = op;
366 }
367}
368
369
370static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
371
372 assert (instring.size() > 8);
373 if (instring.size() <= 8) return;
374 text_t meta, com, op;
375 bool inbraces = false;
376 bool inquotes = false;
377 bool foundcolon = false;
378 text_t::const_iterator here = instring.begin()+7;
379 text_t::const_iterator end = instring.end();
380 while (here != end) {
381 if (foundcolon) meta.push_back (*here);
382 else if (*here == '(') inbraces = true;
383 else if (*here == ')') inbraces = false;
384 else if (*here == '\'' && !inquotes) inquotes = true;
385 else if (*here == '\'' && inquotes) inquotes = false;
386 else if (*here == ':' && !inbraces) foundcolon = true;
387 else if (inquotes) op.push_back (*here);
388 else com.push_back (*here);
389 ++here;
390 }
391
392 instring = meta;
393 metaoption.siblingoptions.clear();
394
395 if (com.empty()) {
396 metaoption.mqualifier.sibling = sAll;
397 metaoption.siblingoptions = " ";
398 }
399 else if (com == "first") {
400 metaoption.mqualifier.sibling = sNum;
401 metaoption.siblingoptions = "0";
402 }
403 else if (com == "last") {
404 metaoption.mqualifier.sibling = sNum;
405 metaoption.siblingoptions = "-2"; // == last
406 }
407 else if (com.getint()>0) {
408 metaoption.mqualifier.sibling = sNum;
409 int pos = com.getint()-1;
410 metaoption.siblingoptions +=pos;
411 }
412 else {
413 metaoption.mqualifier.sibling = sAll;
414 metaoption.siblingoptions = op;
415 }
416}
417
418static void get_child_options (text_t &instring, metadata_t &metaoption) {
419
420 assert (instring.size() > 6);
421 if (instring.size() <= 6) return;
422 text_t meta, com, op;
423 bool inbraces = false;
424 bool inquotes = false;
425 bool foundcolon = false;
426 text_t::const_iterator here = instring.begin()+5;
427 text_t::const_iterator end = instring.end();
428 while (here != end) {
429 if (foundcolon) meta.push_back (*here);
430 else if (*here == '(') inbraces = true;
431 else if (*here == ')') inbraces = false;
432 else if (*here == '\'' && !inquotes) inquotes = true;
433 else if (*here == '\'' && inquotes) inquotes = false;
434 else if (*here == ':' && !inbraces) foundcolon = true;
435 else if (inquotes) op.push_back (*here);
436 else com.push_back (*here);
437 ++here;
438 }
439
440 instring = meta;
441 if (com.empty()) {
442 metaoption.mqualifier.child = cAll;
443 metaoption.childoptions = " ";
444 }
445 else if (com == "first") {
446 metaoption.mqualifier.child = cNum;
447 metaoption.childoptions = ".fc";
448 }
449 else if (com == "last") {
450 metaoption.mqualifier.child = cNum;
451 metaoption.childoptions = ".lc";
452 }
453 else if (com.getint()>0) {
454 metaoption.mqualifier.child = cNum;
455 metaoption.childoptions = "."+com;
456 }
457 else {
458 metaoption.mqualifier.child = cAll;
459 metaoption.childoptions = op;
460 }
461}
462
463
464static void get_truncate_options (text_t &instring, metadata_t &metaoption)
465{
466 assert (instring.size() > ((text_t) "truncate").size());
467 if (instring.size() <= ((text_t) "truncate").size()) return;
468 text_t meta, com;
469 bool inbraces = false;
470 bool foundcolon = false;
471 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
472 text_t::const_iterator end = instring.end();
473 while (here != end) {
474 if (foundcolon) meta.push_back (*here);
475 else if (*here == '(') inbraces = true;
476 else if (*here == ')') inbraces = false;
477 else if (*here == ':' && !inbraces) foundcolon = true;
478 else com.push_back (*here);
479 ++here;
480 }
481
482 instring = meta;
483
484 if (!com.empty())
485 {
486 metaoption.siblingoptions = com;
487 }
488 else
489 {
490 // Default is 100 characters if not specified
491 metaoption.siblingoptions = "100";
492 }
493}
494
495
496
497static void parse_meta (text_t &meta, metadata_t &metaoption,
498 text_tset &metadata, bool &getParents) {
499
500 // Look for the various format statement modifiers
501 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
502 // is irrelevant because this is not stored in metaoption.metacommand anyway
503 bool keep_trying = true;
504 while (keep_trying)
505 {
506 keep_trying = false;
507
508 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
509 {
510 metaoption.metacommand |= mCgiSafe;
511 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
512 keep_trying = true;
513 }
514 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
515 {
516 metaoption.metacommand |= mSpecial;
517 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
518 keep_trying = true;
519 }
520
521 // New "truncate" special formatting option
522 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
523 {
524 metaoption.metacommand |= mTruncate;
525 get_truncate_options (meta, metaoption);
526 keep_trying = true;
527 }
528 // New "htmlsafe" special formatting option
529 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
530 {
531 metaoption.metacommand |= mHTMLSafe;
532 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
533 keep_trying = true;
534 }
535 // New "xmlsafe" special formatting option
536 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
537 {
538 metaoption.metacommand |= mXMLSafe;
539 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
540 keep_trying = true;
541 }
542 }
543
544 bool had_parent_or_child = true;
545 bool prev_was_parent = false;
546 bool prev_was_child = false;
547
548 while (had_parent_or_child) {
549 if (meta.size() > 7
550 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
551
552 // clear out sibling and child (cmd and options)
553 metaoption.metacommand &= ~(mChild|mSibling);
554 metaoption.childoptions.clear();
555 metaoption.siblingoptions.clear();
556
557 getParents = true;
558 metaoption.metacommand |= mParent;
559 get_parent_options (meta, metaoption);
560
561 if (prev_was_parent) {
562 metaoption.pre_tree_traverse += ".pr";
563 }
564 else if (prev_was_child) {
565 metaoption.pre_tree_traverse += ".fc";
566 }
567
568 prev_was_parent = true;
569 prev_was_child = false;
570 }
571 else if (meta.size() > 6
572 && (substr (meta.begin(), meta.begin()+5) == "child")) {
573
574 // clear out sibling and parent (cmd and options)
575 metaoption.metacommand &= ~(mParent|mSibling);
576 metaoption.parentoptions.clear();
577 metaoption.siblingoptions.clear();
578
579 metaoption.metacommand |= mChild;
580 get_child_options (meta, metaoption);
581 metadata.insert("contains");
582
583 if (prev_was_parent) {
584 metaoption.pre_tree_traverse += ".pr";
585 }
586 else if (prev_was_child) {
587 metaoption.pre_tree_traverse += ".fc";
588 }
589
590 prev_was_child = true;
591 prev_was_parent = false;
592 }
593 else {
594 prev_was_child = false;
595 prev_was_parent = false;
596 had_parent_or_child = false;
597 }
598 }
599
600 // parent/child can have sibling tacked on end also
601 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
602 metaoption.metacommand |= mSibling;
603 get_sibling_options (meta, metaoption);
604 }
605
606 // check for ex. which may occur in format statements
607 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
608 meta = substr (meta.begin()+3, meta.end());
609 }
610 metadata.insert (meta);
611 metaoption.metaname = meta;
612}
613
614static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
615 if (meta == "collection") {
616 // no qualifiers
617 metaoption.metaname = g_EmptyText;
618 return;
619 }
620 meta = substr (meta.begin()+11, meta.end());
621 metaoption.metaname = meta;
622
623}
624
625static void parse_meta (text_t &meta, format_t *formatlistptr,
626 text_tset &metadata, bool &getParents) {
627
628 if (meta == "link")
629 formatlistptr->command = comLink;
630 else if (meta == "/link")
631 formatlistptr->command = comEndLink;
632
633 else if (meta == "srclink") {
634 formatlistptr->command = comAssocLink;
635 formatlistptr->meta.metaname = "srclink_file";
636 metadata.insert("srclink_file");
637 }
638 else if (meta == "srchref") {
639 formatlistptr->command = comAssocLink;
640 formatlistptr->text = "href";
641 formatlistptr->meta.metaname = "srclink_file";
642 metadata.insert("srclink_file");
643 }
644 else if (meta == "/srclink") {
645 formatlistptr->command = comEndAssocLink;
646 formatlistptr->meta.metaname = "srclink_file";
647 }
648 // and weblink etc
649 else if (meta == "href")
650 formatlistptr->command = comHref;
651
652 else if (meta == "num")
653 formatlistptr->command = comNum;
654
655 else if (meta == "icon")
656 formatlistptr->command = comIcon;
657
658 else if (meta == "Text")
659 formatlistptr->command = comDoc;
660
661 else if (meta == "RelatedDocuments")
662 formatlistptr->command = comRel;
663
664 else if (meta == "highlight")
665 formatlistptr->command = comHighlight;
666
667 else if (meta == "/highlight")
668 formatlistptr->command = comEndHighlight;
669
670 else if (meta == "metadata-spanwrap")
671 formatlistptr->command = comMetadataSpanWrap;
672
673 else if (meta == "/metadata-spanwrap")
674 formatlistptr->command = comEndMetadataSpanWrap;
675
676 else if (meta == "Summary")
677 formatlistptr->command = comSummary;
678
679 else if (meta == "DocImage")
680 formatlistptr->command = comImage;
681
682 else if (meta == "DocTOC")
683 formatlistptr->command = comTOC;
684
685 else if (meta == "DocumentButtonDetach")
686 formatlistptr->command = comDocumentButtonDetach;
687
688 else if (meta == "DocumentButtonHighlight")
689 formatlistptr->command = comDocumentButtonHighlight;
690
691 else if (meta == "DocumentButtonExpandContents")
692 formatlistptr->command = comDocumentButtonExpandContents;
693
694 else if (meta == "DocumentButtonExpandText")
695 formatlistptr->command = comDocumentButtonExpandText;
696
697 else if (meta == "DocOID")
698 formatlistptr->command = comOID;
699 else if (meta == "DocTopOID")
700 formatlistptr->command = comTopOID;
701 else if (meta == "DocRank")
702 formatlistptr->command = comRank;
703 else if (meta == "DocTermsFreqTotal")
704 formatlistptr->command = comDocTermsFreqTotal;
705 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
706 formatlistptr->command = comCollection;
707 parse_coll_meta(meta, formatlistptr->meta);
708 }
709 else {
710 formatlistptr->command = comMeta;
711 parse_meta (meta, formatlistptr->meta, metadata, getParents);
712 }
713}
714
715
716static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
717 text_tset &metadata, bool &getParents) {
718
719 text_t text;
720 text_t::const_iterator here = formatstring.begin();
721 text_t::const_iterator end = formatstring.end();
722
723 while (here != end) {
724
725 if (*here == '\\') {
726 ++here;
727 if (here != end) text.push_back (*here);
728
729 } else if (*here == '{') {
730 if (!text.empty()) {
731 formatlistptr->command = comText;
732 formatlistptr->text = text;
733 formatlistptr->nextptr = new format_t();
734 formatlistptr = formatlistptr->nextptr;
735
736 text.clear();
737 }
738 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
739
740 formatlistptr->nextptr = new format_t();
741 formatlistptr = formatlistptr->nextptr;
742 if (here == end) break;
743 }
744 } else if (*here == '[') {
745 if (!text.empty()) {
746 formatlistptr->command = comText;
747 formatlistptr->text = text;
748 formatlistptr->nextptr = new format_t();
749 formatlistptr = formatlistptr->nextptr;
750
751 text.clear();
752 }
753 text_t meta;
754 ++here;
755 while (*here != ']') {
756 if (here == end) return false;
757 meta.push_back (*here);
758 ++here;
759 }
760 parse_meta (meta, formatlistptr, metadata, getParents);
761 formatlistptr->nextptr = new format_t();
762 formatlistptr = formatlistptr->nextptr;
763
764 } else
765 text.push_back (*here);
766
767 if (here != end) ++here;
768 }
769 if (!text.empty()) {
770 formatlistptr->command = comText;
771 formatlistptr->text = text;
772 formatlistptr->nextptr = new format_t();
773 formatlistptr = formatlistptr->nextptr;
774
775 }
776 return true;
777}
778
779
780static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
781 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
782
783 text_t::const_iterator it = findchar (here, end, '}');
784 if (it == end) return false;
785
786 text_t com = substr (here, it);
787 here = findchar (it, end, '{');
788 if (here == end) return false;
789 else ++here;
790
791 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
792 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
793 else return false;
794
795 int commacount = 0;
796 text_t text;
797 while (here != end) {
798
799 if (*here == '\\') {
800 ++here;
801 if (here != end) text.push_back(*here);
802
803 }
804
805 else if (*here == ',' || *here == '}' || *here == '{') {
806
807 if (formatlistptr->command == comOr) {
808 // the {Or}{this, or this, or this, or this} statement
809 format_t *or_ptr;
810
811 // find the next unused orptr
812 if (formatlistptr->orptr == NULL) {
813 formatlistptr->orptr = new format_t();
814 or_ptr = formatlistptr->orptr;
815 } else {
816 or_ptr = formatlistptr->orptr;
817 while (or_ptr->nextptr != NULL)
818 or_ptr = or_ptr->nextptr;
819 or_ptr->nextptr = new format_t();
820 or_ptr = or_ptr->nextptr;
821 }
822
823 if (!text.empty())
824 {
825 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
826 }
827
828 if (*here == '{')
829 {
830 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
831 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
832 // The latter can always be re-written:
833 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
834
835 if (!text.empty()) // already used up allocated format_t
836 {
837 // => allocate new one for detected action
838 or_ptr->nextptr = new format_t();
839 or_ptr = or_ptr->nextptr;
840 }
841 if (!parse_action(++here, end, or_ptr, metadata, getParents))
842 {
843 return false;
844 }
845 }
846 else
847 {
848 if (*here == '}') break;
849 }
850 text.clear();
851
852 }
853
854 // Parse an {If}{decide,do,else} statement
855 else {
856
857 // Read the decision component.
858 if (commacount == 0) {
859 // Decsion can be a metadata element, or a piece of text.
860 // Originally Stefan's code, updated 25/10/2000 by Gordon.
861
862 text_t::const_iterator beginbracket = text.begin();
863 text_t::const_iterator endbracket = (text.end() - 1);
864
865 // Decision is based on a metadata element
866 if ((*beginbracket == '[') && (*endbracket == ']')) {
867 // Ignore the surrounding square brackets
868 text_t meta = substr (beginbracket+1, endbracket);
869 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
870 ++commacount;
871 text.clear();
872 }
873
874 // Decision is a piece of text (probably a macro like _cgiargmode_).
875 else {
876
877 // hunt for any metadata in string, which might be uses in
878 // to test a condition, e.g. [Format] eq 'PDF'
879 format_t* dummyformat = new format_t();
880 // update which metadata fields needed
881 // (not interested in updatng formatlistptr)
882 parse_string (text, dummyformat, metadata, getParents);
883 delete dummyformat;
884
885 formatlistptr->decision.command = dText;
886 formatlistptr->decision.text = text;
887 ++commacount;
888 text.clear();
889 }
890 }
891
892 // Read the "then" and "else" components of the {If} statement.
893 else {
894 format_t** nextlistptr = NULL;
895 if (commacount == 1) {
896 nextlistptr = &formatlistptr->ifptr;
897 } else if (commacount == 2 ) {
898 nextlistptr = &formatlistptr->elseptr;
899 } else {
900 return false;
901 }
902
903 if (!text.empty()) {
904 if (*nextlistptr == NULL) {
905 *nextlistptr = new format_t();
906 } else {
907
908 // skip to the end of any format_t statements already added
909 while ((*nextlistptr)->nextptr != NULL)
910 {
911 nextlistptr = &(*nextlistptr)->nextptr;
912 }
913
914 (*nextlistptr)->nextptr = new format_t();
915 nextlistptr = &(*nextlistptr)->nextptr;
916 }
917
918 if (!parse_string (text, *nextlistptr, metadata, getParents))
919 {
920 return false;
921 }
922 text.clear();
923 }
924
925 if (*here == '{')
926 {
927 if (*nextlistptr == NULL) {
928 *nextlistptr = new format_t();
929 } else {
930 // skip to the end of any format_t statements already added
931 while ((*nextlistptr)->nextptr != NULL)
932 {
933 nextlistptr = &(*nextlistptr)->nextptr;
934 }
935
936 (*nextlistptr)->nextptr = new format_t();
937 nextlistptr = &(*nextlistptr)->nextptr;
938 }
939
940 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
941 {
942 return false;
943 }
944 }
945 else
946 {
947 if (*here == '}') break;
948 ++commacount;
949 }
950 }
951 }
952
953 } else text.push_back(*here);
954
955 if (here != end) ++here;
956 }
957
958 return true;
959}
960
961
962static text_t spanwrap_metatext(const text_t& metatext, const text_t& OID,
963 const text_t metaname, int metapos=-1)
964{
965
966 text_t tag_type = (metaname == "Text") ? "div" : "span";
967 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
968
969 text_t wrapped_metatext = "<" + tag_type + " ";
970 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
971
972 wrapped_metatext += "docoid=\"" + OID + "\" ";
973 wrapped_metatext += "metaname=\"" + metaname + "\"";
974
975 if (metapos>=0) {
976 text_t metapos_str = metapos;
977 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
978 }
979
980 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
981
982 return wrapped_metatext;
983}
984
985
986
987bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
988 text_tset &metadata, bool &getParents) {
989
990 formatlistptr->clear();
991 getParents = false;
992
993 return (parse_string (formatstring, formatlistptr, metadata, getParents));
994}
995
996// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
997// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
998
999static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1000{
1001 text_t no_ns_metaname = remove_namespace(meta.metaname);
1002 text_t formatted_metatext;
1003 bool first = true;
1004
1005 const int start_i=0;
1006 const int end_i = metainfo.values.size()-1;
1007
1008 if (position == -1) { // all
1009 for (int i=start_i; i<=end_i; ++i) {
1010 if (!first) formatted_metatext += meta.siblingoptions;
1011
1012 text_t fresh_metatext;
1013
1014 if (meta.metacommand & mSpecial) {
1015 // special formatting
1016 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1017 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1018 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1019 }
1020 else fresh_metatext = metainfo.values[i];
1021
1022 // New "truncate" special formatting option
1023 if (meta.metacommand & mTruncate)
1024 {
1025 int truncate_length = meta.siblingoptions.getint();
1026 text_t truncated_value = fresh_metatext;
1027 if (truncated_value.size() > truncate_length)
1028 {
1029 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1030 }
1031 fresh_metatext = truncated_value;
1032 }
1033 // New "xmlsafe" special formatting option
1034 if (meta.metacommand & mXMLSafe)
1035 {
1036 // Make it XML-safe
1037 text_t text_xml_safe = "";
1038 text_t::const_iterator text_iterator = fresh_metatext.begin();
1039 while (text_iterator != fresh_metatext.end())
1040 {
1041 if (*text_iterator == '&') text_xml_safe += "&amp;";
1042 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1043 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1044 else text_xml_safe.push_back(*text_iterator);
1045 text_iterator++;
1046 }
1047 fresh_metatext = text_xml_safe;
1048 }
1049 // New "htmlsafe" special formatting option
1050 if (meta.metacommand & mHTMLSafe)
1051 {
1052 // Make it HTML-safe
1053 text_t text_html_safe = "";
1054 text_t::const_iterator text_iterator = fresh_metatext.begin();
1055 while (text_iterator != fresh_metatext.end())
1056 {
1057 if (*text_iterator == '&') text_html_safe += "&amp;";
1058 else if (*text_iterator == '<') text_html_safe += "&lt;";
1059 else if (*text_iterator == '>') text_html_safe += "&gt;";
1060 else if (*text_iterator == '"') text_html_safe += "&quot;";
1061 else text_html_safe.push_back(*text_iterator);
1062 text_iterator++;
1063 }
1064 fresh_metatext = text_html_safe;
1065 }
1066
1067 if (metadata_spanwrap) {
1068 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,i);
1069 }
1070 formatted_metatext += fresh_metatext;
1071
1072 first = false;
1073
1074 }
1075 } else {
1076 if (position == -2) { // end
1077 position = end_i;
1078 } else if (position < start_i || position > end_i) {
1079 return "";
1080 }
1081
1082 text_t fresh_metatext;
1083 if (meta.metacommand & mSpecial) {
1084
1085 // special formatting
1086 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1087 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1088 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1089 }
1090 else fresh_metatext = metainfo.values[position];
1091
1092 // New "truncate" special formatting option
1093 if (meta.metacommand & mTruncate)
1094 {
1095 int truncate_length = meta.siblingoptions.getint();
1096 text_t truncated_value = fresh_metatext;
1097 if (truncated_value.size() > truncate_length)
1098 {
1099 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1100 }
1101 fresh_metatext = truncated_value;
1102 }
1103 // New "xmlsafe" special formatting option
1104 if (meta.metacommand & mXMLSafe)
1105 {
1106 // Make it XML-safe
1107 text_t text_xml_safe = "";
1108 text_t::const_iterator text_iterator = fresh_metatext.begin();
1109 while (text_iterator != fresh_metatext.end())
1110 {
1111 if (*text_iterator == '&') text_xml_safe += "&amp;";
1112 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1113 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1114 else text_xml_safe.push_back(*text_iterator);
1115 text_iterator++;
1116 }
1117 fresh_metatext = text_xml_safe;
1118 }
1119 // New "htmlsafe" special formatting option
1120 if (meta.metacommand & mHTMLSafe)
1121 {
1122 // Make it HTML-safe
1123 text_t text_html_safe = "";
1124 text_t::const_iterator text_iterator = fresh_metatext.begin();
1125 while (text_iterator != fresh_metatext.end())
1126 {
1127 if (*text_iterator == '&') text_html_safe += "&amp;";
1128 else if (*text_iterator == '<') text_html_safe += "&lt;";
1129 else if (*text_iterator == '>') text_html_safe += "&gt;";
1130 else if (*text_iterator == '"') text_html_safe += "&quot;";
1131 else text_html_safe.push_back(*text_iterator);
1132 text_iterator++;
1133 }
1134 fresh_metatext = text_html_safe;
1135 }
1136
1137 if (metadata_spanwrap) {
1138 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,position);
1139 }
1140
1141 formatted_metatext += fresh_metatext;
1142 }
1143
1144 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1145 else return formatted_metatext;
1146}
1147
1148static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1149{
1150
1151 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1152
1153 switch (meta.mqualifier.parent) {
1154 case pNone:
1155 return "Nothing!!";
1156 break;
1157
1158 case pImmediate:
1159 if (parent != NULL) {
1160 text_t parent_oid = get_parent(docinfo.OID);
1161 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1162 }
1163 break;
1164
1165 case pTop:
1166 if (parent != NULL) {
1167 text_t parent_oid = get_parent(docinfo.OID);
1168
1169 while (parent->parent != NULL) {
1170 parent = parent->parent;
1171 parent_oid = get_parent(parent_oid);
1172 }
1173 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1174 }
1175 break;
1176
1177 case pAll:
1178 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1179 if (parent != NULL) {
1180 text_t parent_oid = get_parent(docinfo.OID);
1181
1182 text_tarray tmparray;
1183 while (parent != NULL) {
1184 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1185 parent = parent->parent;
1186 parent_oid = get_parent(parent_oid);
1187
1188 }
1189 // now join them up - use teh parent separator
1190 bool first = true;
1191 text_t tmp;
1192 text_tarray::reverse_iterator here = tmparray.rbegin();
1193 text_tarray::reverse_iterator end = tmparray.rend();
1194 while (here != end) {
1195 if (!first) tmp += meta.parentoptions;
1196 tmp += *here;
1197 first = false;
1198 ++here;
1199 }
1200 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1201 else return tmp;
1202 }
1203 }
1204 return "";
1205
1206}
1207
1208static text_t get_child_meta (const text_t& collection,
1209 recptproto* collectproto,
1210 ResultDocInfo_t &docinfo, displayclass &disp,
1211 const metadata_t &meta, text_tmap &options,
1212 ostream& logout, int siblings_values)
1213{
1214 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1215
1216 const text_t& pre_tree_trav = meta.pre_tree_traverse;
1217 const text_t& child_metaname = meta.metaname;
1218 const text_t& child_field = meta.childoptions;
1219 text_tset child_metadata;
1220 child_metadata.insert(child_metaname);
1221
1222 FilterResponse_t child_response;
1223 if (meta.mqualifier.child == cNum) {
1224 // just one child
1225 //get the information associated with the metadata for child doc
1226 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1227 child_metadata, false, collectproto, child_response,
1228 logout)) return ""; // invalid child number
1229
1230 if (child_response.docInfo.empty()) return false; // no info for the child
1231
1232 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1233 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1234
1235 text_t child_metavalue
1236 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1237 return expand_metadata(child_metavalue,collection,collectproto,
1238 child_docinfo,disp,options,logout);
1239 }
1240
1241
1242 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1243
1244
1245 if (!pre_tree_trav.empty()) {
1246 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1247 FilterResponse_t trav_response;
1248
1249 text_tset trav_metadata;
1250 trav_metadata.insert("contains");
1251
1252 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1253 trav_metadata, false, collectproto, trav_response,
1254 logout)) return ""; // invalid pre_tree_trav
1255
1256 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1257
1258 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1259 // use this for rest of routine
1260 docinfo = trav_docinfo;
1261 }
1262
1263 // we need to get all children
1264 text_t result = "";
1265 text_tarray children;
1266 text_t contains = docinfo.metadata["contains"].values[0];
1267 splitchar (contains.begin(), contains.end(), ';', children);
1268 text_tarray::const_iterator here = children.begin();
1269 text_tarray::const_iterator end = children.end();
1270 bool first = true;
1271 while (here !=end) {
1272 text_t oid = *here;
1273 here++;
1274 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1275
1276 //get the information associated with the metadata for child doc
1277 if (!get_info (oid, collection, "", child_metadata,
1278 false, collectproto, child_response, logout) ||
1279 child_response.docInfo.empty()) {
1280 first = false;
1281 continue;
1282 }
1283
1284
1285 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1286 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1287
1288 text_t child_metavalue
1289 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1290 if (!first) result += child_field;
1291 first = false;
1292 // need to do this here cos otherwise we are in the wrong document
1293 result += expand_metadata(child_metavalue,collection,collectproto,
1294 child_docinfo,disp,options,logout);
1295 }
1296 return result;
1297
1298}
1299
1300static text_t get_meta (const text_t& collection, recptproto* collectproto,
1301 ResultDocInfo_t &docinfo, displayclass &disp,
1302 const metadata_t &meta, text_tmap &options,
1303 ostream& logout) {
1304
1305 // make sure we have the requested metadata
1306 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1307 if (it == docinfo.metadata.end()) return "";
1308
1309 int siblings_values = 0; // default is no siblings, just the first metadata available
1310 if (meta.metacommand & mSibling) {
1311 if (meta.mqualifier.sibling == sAll) {
1312 siblings_values = -1; //all
1313 } else if (meta.mqualifier.sibling == sNum) {
1314 siblings_values = meta.siblingoptions.getint();
1315 }
1316 }
1317 if (meta.metacommand & mParent) {
1318 return get_parent_meta(docinfo,meta,siblings_values);
1319 }
1320
1321 else if (meta.metacommand & mChild) {
1322 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1323 options,logout, siblings_values);
1324 }
1325 else if (meta.metacommand & mSibling) { // only siblings
1326 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1327 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1328 }
1329 else {
1330
1331 // straightforward metadata request (nothing fancy)
1332
1333 text_t classifier_metaname = docinfo.classifier_metadata_type;
1334 int metaname_index
1335 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1336 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1337 }
1338
1339 return "";
1340}
1341
1342static text_t get_or (const text_t& collection, recptproto* collectproto,
1343 ResultDocInfo_t &docinfo, displayclass &disp,
1344 format_t *orptr, text_tmap &options,
1345 ostream& logout) {
1346
1347 while (orptr != NULL) {
1348
1349 if (metadata_spanwrap) {
1350 // need to be a bit more careful about this
1351 // => test for it *without* spanwrap, and if defined, then
1352 // got back and generate it again, this time with spanwrap on
1353
1354 metadata_spanwrap = false;
1355 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1356 options, logout);
1357 metadata_spanwrap = true;
1358 if (!test_tmp.empty()) {
1359
1360 return format_string (collection,collectproto,docinfo, disp, orptr,
1361 options, logout);
1362 }
1363 }
1364 else {
1365 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1366 options, logout);
1367 if (!tmp.empty()) return tmp;
1368 }
1369
1370 orptr = orptr->nextptr;
1371 }
1372 return "";
1373}
1374
1375static bool char_is_whitespace(const char c)
1376{
1377 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1378
1379}
1380
1381static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1382{
1383 int pos = start_pos;
1384 while (pos<outstring.size()) {
1385 if (!char_is_whitespace(outstring[pos])) {
1386 break;
1387 }
1388 ++pos;
1389 }
1390
1391 return pos;
1392}
1393
1394static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1395{
1396 int pos = start_pos;
1397 while (pos>=0) {
1398 if (!char_is_whitespace(outstring[pos])) {
1399 break;
1400 }
1401 --pos;
1402 }
1403
1404 return pos;
1405}
1406
1407static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1408{
1409 int pos = start_pos;
1410 while (pos>=0) {
1411 if (char_is_whitespace(outstring[pos])) {
1412 break;
1413 }
1414 --pos;
1415 }
1416
1417 return pos;
1418}
1419
1420
1421static int rscan_for(const text_t& outstring, const int start_pos,
1422 const char find_c)
1423{
1424 int pos = start_pos;
1425 while (pos>=0) {
1426 char c = outstring[pos];
1427 if (outstring[pos] == find_c) {
1428 break;
1429 }
1430 --pos;
1431 }
1432
1433 return pos;
1434}
1435
1436text_t extract_substr(const text_t& outstring, const int start_pos,
1437 const int end_pos)
1438{
1439 text_t extracted_str;
1440 extracted_str.clear();
1441
1442 for (int pos=start_pos; pos<=end_pos; ++pos) {
1443 extracted_str.push_back(outstring[pos]);
1444 }
1445
1446 return extracted_str;
1447}
1448
1449
1450static text_t expand_potential_metadata(const text_t& collection,
1451 recptproto* collectproto,
1452 ResultDocInfo_t &docinfo,
1453 displayclass &disp,
1454 const text_t& intext,
1455 text_tmap &options,
1456 ostream& logout)
1457{
1458 text_t outtext;
1459
1460 // decide if dealing with metadata or text
1461
1462 text_t::const_iterator beginbracket = intext.begin();
1463 text_t::const_iterator endbracket = (intext.end() - 1);
1464
1465 // Decision is based on a metadata element
1466 if ((*beginbracket == '[') && (*endbracket == ']')) {
1467 // Ignore the surrounding square brackets
1468 text_t meta_text = substr (beginbracket+1, endbracket);
1469
1470 if (meta_text == "Text") {
1471 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1472 }
1473 else {
1474
1475 text_tset metadata;
1476 bool getParents =false;
1477 metadata_t meta;
1478
1479 parse_meta (meta_text, meta, metadata, getParents);
1480 outtext
1481 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1482 }
1483
1484 }
1485 else {
1486 outtext = intext;
1487 }
1488
1489 return outtext;
1490}
1491
1492
1493
1494
1495static bool uses_expression(const text_t& collection, recptproto* collectproto,
1496 ResultDocInfo_t &docinfo,
1497 displayclass &disp,
1498 const text_t& outstring, text_t& lhs_expr,
1499 text_t& op_expr, text_t& rhs_expr,
1500 text_tmap &options,
1501 ostream& logout)
1502{
1503 // Note: the string may not be of the form: str1 op str2, however
1504 // to deterine this we have to process it on the assumption it is,
1505 // and if at any point an 'erroneous' value is encountered, return
1506 // false and let something else have a go at evaluating it
1507
1508 // Starting at the end of the string and working backwards ..
1509
1510 const int outstring_len = outstring.size();
1511
1512 // skip over white space
1513 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1514
1515 if (rhs_end<=0) {
1516 // no meaningful text or (rhs_end==0) no room for operator
1517 return false;
1518 }
1519
1520 // check for ' or " and then scan over token
1521 const char potential_quote = outstring[rhs_end];
1522 int rhs_start=rhs_end;
1523 bool quoted = false;
1524
1525 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1526 --rhs_end;
1527 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1528 quoted = true;
1529 }
1530 else {
1531 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1532 }
1533
1534 if ((rhs_end-rhs_start)<0) {
1535 // no meaningful rhs expression
1536 return false;
1537 }
1538
1539 // form rhs_expr
1540 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1541
1542 // skip over white space
1543 const int to_whitespace = (quoted) ? 2 : 1;
1544
1545 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1546 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1547
1548 if ((op_end<0) && (op_start<0)) {
1549 // no meaningful expression operator
1550 return false;
1551 }
1552
1553 if (op_end-op_start<0) {
1554 // no meaningful expression operator
1555 return false;
1556 }
1557
1558 op_expr = extract_substr(outstring,op_start,op_end);
1559
1560
1561 // check for operator
1562 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1563 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1564
1565 // not a valid operator
1566 return false;
1567 }
1568
1569 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1570 if (lhs_end<0) {
1571 // no meaningful lhs expression
1572 return false;
1573 }
1574
1575 int lhs_start = scan_over_whitespace(outstring,0);
1576
1577 // form lhs_expr from remainder of string
1578 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1579
1580 // Now we know we have a valid expression, look up any
1581 // metadata terms
1582
1583 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1584 disp,rhs_expr,options,logout);
1585 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1586 disp,lhs_expr,options,logout);
1587
1588 return true;
1589}
1590
1591static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1592 const text_t& rhs_expr, ostream& logout)
1593{
1594 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1595 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1596 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1597 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1598 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1599 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1600 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1601 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1602 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1603 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1604 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1605 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1606 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1607 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1608 else {
1609 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1610 }
1611
1612 return false;
1613}
1614
1615
1616static text_t get_if (const text_t& collection, recptproto* collectproto,
1617 ResultDocInfo_t &docinfo, displayclass &disp,
1618 const decision_t &decision,
1619 format_t *ifptr, format_t *elseptr,
1620 text_tmap &options, ostream& logout)
1621{
1622 // If the decision component is a metadata element, then evaluate it
1623 // to see whether we output the "then" or the "else" clause
1624 if (decision.command == dMeta) {
1625
1626 bool store_metadata_spanwrap = metadata_spanwrap;
1627 metadata_spanwrap = 0;
1628
1629 // temporarily suspend metadata-spanwrap (if on) so can test if metadata item really exits or not
1630 bool metadata_exists
1631 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1632 logout) != "");
1633
1634 metadata_spanwrap = store_metadata_spanwrap;
1635
1636 if (metadata_exists) {
1637 if (ifptr != NULL)
1638 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1639 options, logout);
1640 }
1641 else {
1642 if (elseptr != NULL)
1643 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1644 options, logout);
1645 }
1646 }
1647
1648 // If the decision component is text, then evaluate it (it is probably a
1649 // macro like _cgiargmode_) to decide what to output.
1650 else if (decision.command == dText) {
1651
1652 text_t outstring;
1653 disp.expandstring (decision.text, outstring);
1654
1655 // Check for if expression in form: str1 op str2
1656 // (such as [x] eq "y")
1657 text_t lhs_expr, op_expr, rhs_expr;
1658 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1659 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1660 if (ifptr != NULL) {
1661 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1662 options, logout);
1663 }
1664 else {
1665 return "";
1666 }
1667 } else {
1668 if (elseptr != NULL) {
1669 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1670 options, logout);
1671 }
1672 else {
1673 return "";
1674 }
1675 }
1676 }
1677
1678
1679 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1680 // a cgi argument macro that has not been set, it evaluates to itself.
1681 // Therefore, were have to say that a piece of text evalautes true if
1682 // it is non-empty and if it is a cgi argument evaulating to itself.
1683
1684 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1685 if (ifptr != NULL)
1686 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1687 options, logout);
1688 } else {
1689 if (elseptr != NULL)
1690 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1691 options, logout);
1692 }
1693 }
1694
1695 return "";
1696}
1697
1698bool includes_metadata(const text_t& text)
1699{
1700 text_t::const_iterator here = text.begin();
1701 text_t::const_iterator end = text.end();
1702 while (here != end) {
1703 if (*here == '[') return true;
1704 ++here;
1705 }
1706
1707 return false;
1708}
1709
1710static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1711 recptproto* collectproto,
1712 ResultDocInfo_t &docinfo,
1713 displayclass &disp, text_tmap &options,
1714 ostream &logout) {
1715
1716 if (includes_metadata(metavalue)) {
1717
1718 // text has embedded metadata in it => expand it
1719 FilterRequest_t request;
1720 FilterResponse_t response;
1721
1722 request.getParents = false;
1723
1724 format_t *expanded_formatlistptr = new format_t();
1725 parse_formatstring (metavalue, expanded_formatlistptr,
1726 request.fields, request.getParents);
1727
1728 // retrieve metadata
1729 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1730 collectproto, response, logout);
1731
1732 if (!response.docInfo.empty()) {
1733
1734 text_t expanded_metavalue
1735 = get_formatted_string(collection, collectproto,
1736 response.docInfo[0], disp, expanded_formatlistptr,
1737 options, logout);
1738
1739 return expanded_metavalue;
1740 }
1741 else {
1742 return metavalue;
1743 }
1744 }
1745 else {
1746
1747 return metavalue;
1748 }
1749}
1750
1751text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1752 displayclass &disp,
1753 text_t meta_name, ostream& logout) {
1754
1755 ColInfoResponse_t collectinfo;
1756 comerror_t err;
1757 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1758 text_t meta_value = "";
1759 text_t lang;
1760 disp.expandstring("_cgiargl_",lang);
1761 if (lang.empty()) {
1762 lang = "en";
1763 }
1764
1765 if (err == noError) {
1766 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1767 }
1768 return meta_value;
1769
1770
1771}
1772text_t format_string (const text_t& collection, recptproto* collectproto,
1773 ResultDocInfo_t &docinfo, displayclass &disp,
1774 format_t *formatlistptr, text_tmap &options,
1775 ostream& logout) {
1776
1777 if (formatlistptr == NULL) return "";
1778
1779 switch (formatlistptr->command) {
1780 case comOID:
1781 return docinfo.OID;
1782 case comTopOID:
1783 {
1784 text_t top_id;
1785 get_top(docinfo.OID, top_id);
1786 return top_id;
1787 }
1788 case comRank:
1789 return text_t(docinfo.ranking);
1790 case comText:
1791 return formatlistptr->text;
1792 case comLink:
1793 return options["link"];
1794 case comEndLink:
1795 {
1796 if (options["link"].empty()) return "";
1797 else return "</a>";
1798 }
1799 case comHref:
1800 return get_href(options["link"]);
1801 case comIcon:
1802 return options["icon"];
1803 case comNum:
1804 return docinfo.result_num;
1805 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1806 return get_related_docs(collection, collectproto, docinfo, logout);
1807
1808 case comSummary:
1809 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1810 case comAssocLink:
1811 {
1812 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1813 if (!link_filename.empty()) {
1814 text_t href= expand_metadata(options["assocfilepath"]+link_filename, collection, collectproto, docinfo, disp, options, logout);
1815 if (formatlistptr->text == "href") {
1816 return href;
1817 }
1818 return "<a href=\""+ href + "\">";
1819 }
1820 return "";
1821 }
1822 case comEndAssocLink:
1823 {
1824 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1825 if (!link_filename.empty()) {
1826 return "</a>";
1827 }
1828 return "";
1829 }
1830 case comMeta:
1831 {
1832 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1833 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1834 }
1835
1836 case comDoc:
1837 return format_text(collection, collectproto, docinfo, disp, options, logout);
1838
1839 case comImage:
1840 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1841 case comTOC:
1842 return options["DocTOC"];
1843 case comDocumentButtonDetach:
1844 return options["DocumentButtonDetach"];
1845 case comDocumentButtonHighlight:
1846 return options["DocumentButtonHighlight"];
1847 case comDocumentButtonExpandContents:
1848 return options["DocumentButtonExpandContents"];
1849 case comDocumentButtonExpandText:
1850 return options["DocumentButtonExpandText"];
1851 case comHighlight:
1852 if (options["highlight"] == "1") return "<b>";
1853 break;
1854 case comEndHighlight:
1855 if (options["highlight"] == "1") return "</b>";
1856 break;
1857 case comMetadataSpanWrap:
1858 metadata_spanwrap=true; return "";
1859 break;
1860 case comEndMetadataSpanWrap:
1861 metadata_spanwrap=false; return "";
1862 break;
1863 case comIf:
1864 return get_if (collection, collectproto, docinfo, disp,
1865 formatlistptr->decision, formatlistptr->ifptr,
1866 formatlistptr->elseptr, options, logout);
1867 case comOr:
1868 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1869 options, logout);
1870 case comDocTermsFreqTotal:
1871 return docinfo.num_terms_matched;
1872 case comCollection:
1873 if (formatlistptr->meta.metaname == g_EmptyText) {
1874 return collection;
1875 }
1876 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1877
1878 }
1879 return "";
1880}
1881
1882text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1883 ResultDocInfo_t &docinfo, displayclass &disp,
1884 format_t *formatlistptr, text_tmap &options,
1885 ostream& logout) {
1886
1887 text_t ft;
1888 while (formatlistptr != NULL)
1889 {
1890 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1891 options, logout);
1892 formatlistptr = formatlistptr->nextptr;
1893 }
1894
1895 return ft;
1896}
1897
1898
1899// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1900text_t format_text (const text_t& collection, recptproto* collectproto,
1901 ResultDocInfo_t &docinfo, displayclass &disp,
1902 text_tmap &options, ostream& logout)
1903{
1904 text_t text;
1905
1906 if(!options["text"].empty()) {
1907 text = options["text"];
1908 }
1909 else {
1910 // get document text here
1911 DocumentRequest_t docrequest;
1912 DocumentResponse_t docresponse;
1913 comerror_t err;
1914 docrequest.OID = docinfo.OID;
1915 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1916 text = docresponse.doc;
1917 }
1918
1919 if (metadata_spanwrap) {
1920 text = spanwrap_metatext(text,docinfo.OID,"Text");
1921 }
1922
1923 return text;
1924}
1925
1926/* FUNCTION NAME: format_summary
1927 * DESC: this is invoked when a [Summary] special metadata is processed.
1928 * RETURNS: a query-biased summary for the document */
1929
1930text_t format_summary (const text_t& collection, recptproto* collectproto,
1931 ResultDocInfo_t &docinfo, displayclass &disp,
1932 text_tmap &options, ostream& logout) {
1933
1934 // GRB: added code here to ensure that the cstr (and other collections)
1935 // uses the document metadata item Summary, rather than compressing
1936 // the text of the document, processed via the methods in
1937 // summarise.cpp
1938
1939 text_t summary;
1940
1941 if (docinfo.metadata.count("Summary") > 0 &&
1942 docinfo.metadata["Summary"].values.size() > 0) {
1943 summary = docinfo.metadata["Summary"].values[0];
1944 }
1945 else {
1946
1947 text_t textToSummarise, query;
1948
1949 if(options["text"].empty()) { // get document text
1950 DocumentRequest_t docrequest;
1951 DocumentResponse_t docresponse;
1952 comerror_t err;
1953 docrequest.OID = docinfo.OID;
1954 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1955 textToSummarise = docresponse.doc;
1956 }
1957 else {
1958 // in practice, this would not happen, because text is only
1959 // loaded with the [Text] command
1960 textToSummarise = options["text"];
1961 }
1962
1963 disp.expandstring("_cgiargq_",query);
1964 summary = summarise(textToSummarise,query,80);
1965 }
1966
1967 if (metadata_spanwrap) {
1968 summary = spanwrap_metatext(summary,docinfo.OID,"Summary");
1969 }
1970
1971 return summary;
1972}
Note: See TracBrowser for help on using the repository browser.