source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 21752

Last change on this file since 21752 was 21752, checked in by mdewsnip, 14 years ago

Added new "htmlsafe:", "xmlsafe:", and "truncate(X):" (truncate metadata value to X characters) format statement modifiers. By Michael Dewsnip at DL Consulting Ltd.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 57.4 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "recptprototools.h"
29#include "OIDtools.h"
30#include "summarise.h"
31
32#include <assert.h>
33
34static bool metadata_spanwrap = false;
35
36// a few function prototypes
37
38static text_t format_string (const text_t& collection, recptproto* collectproto,
39 ResultDocInfo_t &docinfo, displayclass &disp,
40 format_t *formatlistptr, text_tmap &options,
41 ostream& logout);
42
43static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
44 format_t *formatlistptr, text_tset &metadata, bool &getParents);
45
46static text_t format_summary (const text_t& collection, recptproto* collectproto,
47 ResultDocInfo_t &docinfo, displayclass &disp,
48 text_tmap &options, ostream& logout);
49static text_t format_text (const text_t& collection, recptproto* collectproto,
50 ResultDocInfo_t &docinfo, displayclass &disp,
51 text_tmap &options, ostream& logout);
52
53static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
54 recptproto* collectproto, ResultDocInfo_t &docinfo,
55 displayclass &disp, text_tmap &options,
56 ostream &logout);
57
58
59void metadata_t::clear() {
60 metaname.clear();
61 metacommand = mNone;
62 mqualifier.parent = pNone;
63 mqualifier.sibling = sNone;
64 mqualifier.child = cNone;
65 pre_tree_traverse.clear();
66 parentoptions.clear();
67 siblingoptions.clear();
68 childoptions.clear();
69}
70
71void decision_t::clear() {
72 command = dMeta;
73 meta.clear();
74 text.clear();
75}
76
77void format_t::clear() {
78 command = comText;
79 decision.clear();
80 text.clear();
81 meta.clear();
82 nextptr = NULL;
83 ifptr = NULL;
84 elseptr = NULL;
85 orptr = NULL;
86}
87
88void formatinfo_t::clear() {
89 DocumentImages = false;
90 DocumentTitles = true;
91 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
92 DocumentContents = true;
93 DocumentArrowsBottom = true;
94 DocumentArrowsTop = false;
95 DocumentSearchResultLinks = false;
96 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
97 // DocumentButtons.push_back ("Expand Text");
98 // DocumentButtons.push_back ("Expand Contents");
99 DocumentButtons.push_back ("Detach");
100 DocumentButtons.push_back ("Highlight");
101 RelatedDocuments = "";
102 DocumentText = "[Text]";
103 formatstrings.erase (formatstrings.begin(), formatstrings.end());
104 DocumentUseHTML = false;
105 AllowExtendedOptions = false;
106}
107
108// simply checks to see if formatstring begins with a <td> tag
109bool is_table_content (const text_t &formatstring) {
110 text_t::const_iterator here = formatstring.begin();
111 text_t::const_iterator end = formatstring.end();
112
113 while (here != end) {
114 if (*here != ' ') {
115 if ((*here == '<') && ((here+3) < end)) {
116 if ((*(here+1) == 't' || *(here+1) == 'T') &&
117 (*(here+2) == 'd' || *(here+2) == 'D') &&
118 (*(here+3) == '>' || *(here+3) == ' '))
119 return true;
120 } else return false;
121 }
122 ++here;
123 }
124 return false;
125}
126
127bool is_table_content (const format_t *formatlistptr) {
128
129 if (formatlistptr == NULL) return false;
130
131 if (formatlistptr->command == comText)
132 return is_table_content (formatlistptr->text);
133
134 return false;
135}
136
137// returns false if key isn't in formatstringmap
138bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
139 text_t &formatstring) {
140
141 formatstring.clear();
142 text_tmap::const_iterator it = formatstringmap.find(key);
143 if (it == formatstringmap.end()) return false;
144 formatstring = (*it).second;
145 return true;
146}
147
148// tries to find "key1key2" then "key1" then "key2"
149bool get_formatstring (const text_t &key1, const text_t &key2,
150 const text_tmap &formatstringmap,
151 text_t &formatstring) {
152
153 formatstring.clear();
154 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
155 if (it != formatstringmap.end()) {
156 formatstring = (*it).second;
157 return true;
158 }
159 it = formatstringmap.find(key1);
160 if (it != formatstringmap.end()) {
161 formatstring = (*it).second;
162 return true;
163 }
164 it = formatstringmap.find(key2);
165 if (it != formatstringmap.end()) {
166 formatstring = (*it).second;
167 return true;
168 }
169 return false;
170}
171
172
173text_t remove_namespace(const text_t &meta_name) {
174 text_t::const_iterator end = meta_name.end();
175 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
176 if (it != end) {
177 return substr(it+1, end);
178 }
179
180 return meta_name;
181
182}
183// returns a date of form _format:date_(year, month, day)
184// input is date of type yyyy-?mm-?dd
185// at least the year must be present in date
186text_t format_date (const text_t &date) {
187
188 if (date.size() < 4) return "";
189
190 text_t::const_iterator datebegin = date.begin();
191
192 text_t year = substr (datebegin, datebegin+4);
193 int chars_seen_so_far = 4;
194 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
195
196 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
197 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
198
199 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
200 int imonth = month.getint();
201 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
202
203 chars_seen_so_far += 2;
204 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
205
206 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
207 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
208
209 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
210 if (day[0] == '0') day = substr (day.begin()+1, day.end());
211 int iday = day.getint();
212 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
213
214 return "_format:date_("+year+","+month+","+day+")";
215}
216
217// converts an iso639 language code to its English equivalent
218// should we be checking that the macro exists??
219text_t iso639 (const text_t &langcode) {
220 if (langcode.empty()) return "";
221 return "_iso639:iso639"+langcode+"_";
222}
223
224
225text_t get_href (const text_t &link) {
226
227 text_t href;
228
229 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
230 text_t::const_iterator end = link.end();
231 if (here == end) return g_EmptyText;
232
233 ++here;
234 while (here != end) {
235 if (*here == '"') break;
236 href.push_back(*here);
237 ++here;
238 }
239
240 return href;
241}
242
243//this function gets the information associated with the relation
244//metadata for the document associated with 'docinfo'. This relation
245//metadata consists of a line of pairs containing 'collection, document OID'
246//(this is the OID of the document related to the current document, and
247//the collection the related document belongs to). For each of these pairs
248//the title metadata is obtained and then an html link between the title
249//of the related doc and the document's position (the document will be
250//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
251//(where collection is the related documents collection, and OID is the
252//related documents OID). A list of these html links are made for as many
253//related documents as there are. This list is then returned. If there are
254//no related documents available for the current document then the string
255//'.. no related documents .. ' is returned.
256text_t get_related_docs(const text_t& collection, recptproto* collectproto,
257 ResultDocInfo_t &docinfo, ostream& logout){
258
259 text_tset metadata;
260
261 //insert the metadata we wish to collect
262 metadata.insert("dc.Relation");
263 metadata.insert("Title");
264 metadata.insert("Subject"); //for emails, where title data doesn't apply
265
266 FilterResponse_t response;
267 text_t relation = ""; //string for displaying relation metadata
268 text_t relationTitle = ""; //the related documents Title (or subject)
269 text_t relationOID = ""; //the related documents OID
270
271 //get the information associated with the metadata for current doc
272 if (get_info (docinfo.OID, collection, "", metadata,
273 false, collectproto, response, logout)) {
274
275 //if the relation metadata exists, store for displaying
276 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
277 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
278
279 //split relation data into pairs of collectionname,ID number
280 text_tarray relationpairs;
281 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
282
283 text_tarray::const_iterator currDoc = relationpairs.begin();
284 text_tarray::const_iterator lastDoc = relationpairs.end();
285
286 //iterate through the pairs to split and display
287 while(currDoc != lastDoc){
288
289 //split pairs into collectionname and ID
290 text_tarray relationdata;
291 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
292
293 //get first element in the array (collection)
294 text_tarray::const_iterator doc_data = relationdata.begin();
295 text_t document_collection = *doc_data;
296 ++doc_data; //increment to get next item in array (oid)
297 text_t document_OID = *doc_data;
298
299 //create html link to related document
300 relation += "<a href=\"_httpdocument_&c=" + document_collection;
301 relation += "&cl=search&d=" + document_OID;
302
303 //get the information associated with the metadata for related doc
304 if (get_info (document_OID, document_collection, "", metadata,
305 false, collectproto, response, logout)) {
306
307 //if title metadata doesn't exist, collect subject metadata
308 //if that doesn't exist, just call it 'related document'
309 if (!response.docInfo[0].metadata["Title"].values[0].empty())
310 relationTitle = response.docInfo[0].metadata["Title"].values[0];
311 else if (!response.docInfo[0].metadata["Subject"].values.empty())
312 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
313 else relationTitle = "RELATED DOCUMENT";
314
315 }
316
317 //link the related document's title to its page
318 relation += "\">" + relationTitle + "</a>";
319 relation += " (" + document_collection + ")<br>";
320
321 ++currDoc;
322 }
323 }
324
325 }
326
327 if(relation.empty()) //no relation data for documnet
328 relation = ".. no related documents .. ";
329
330 return relation;
331}
332
333
334
335static void get_parent_options (text_t &instring, metadata_t &metaoption) {
336
337 assert (instring.size() > 7);
338 if (instring.size() <= 7) return;
339
340 text_t meta, com, op;
341 bool inbraces = false;
342 bool inquotes = false;
343 bool foundcolon = false;
344 text_t::const_iterator here = instring.begin()+6;
345 text_t::const_iterator end = instring.end();
346 while (here != end) {
347 if (foundcolon) meta.push_back (*here);
348 else if (*here == '(') inbraces = true;
349 else if (*here == ')') inbraces = false;
350 else if (*here == '\'' && !inquotes) inquotes = true;
351 else if (*here == '\'' && inquotes) inquotes = false;
352 else if (*here == ':' && !inbraces) foundcolon = true;
353 else if (inquotes) op.push_back (*here);
354 else com.push_back (*here);
355 ++here;
356 }
357
358 instring = meta;
359 if (com.empty())
360 metaoption.mqualifier.parent = pImmediate;
361 else if (com == "Top")
362 metaoption.mqualifier.parent = pTop;
363 else if (com == "All") {
364 metaoption.mqualifier.parent = pAll;
365 metaoption.parentoptions = op;
366 }
367}
368
369
370static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
371
372 assert (instring.size() > 8);
373 if (instring.size() <= 8) return;
374 text_t meta, com, op;
375 bool inbraces = false;
376 bool inquotes = false;
377 bool foundcolon = false;
378 text_t::const_iterator here = instring.begin()+7;
379 text_t::const_iterator end = instring.end();
380 while (here != end) {
381 if (foundcolon) meta.push_back (*here);
382 else if (*here == '(') inbraces = true;
383 else if (*here == ')') inbraces = false;
384 else if (*here == '\'' && !inquotes) inquotes = true;
385 else if (*here == '\'' && inquotes) inquotes = false;
386 else if (*here == ':' && !inbraces) foundcolon = true;
387 else if (inquotes) op.push_back (*here);
388 else com.push_back (*here);
389 ++here;
390 }
391
392 instring = meta;
393 metaoption.siblingoptions.clear();
394
395 if (com.empty()) {
396 metaoption.mqualifier.sibling = sAll;
397 metaoption.siblingoptions = " ";
398 }
399 else if (com == "first") {
400 metaoption.mqualifier.sibling = sNum;
401 metaoption.siblingoptions = "0";
402 }
403 else if (com == "last") {
404 metaoption.mqualifier.sibling = sNum;
405 metaoption.siblingoptions = "-2"; // == last
406 }
407 else if (com.getint()>0) {
408 metaoption.mqualifier.sibling = sNum;
409 int pos = com.getint()-1;
410 metaoption.siblingoptions +=pos;
411 }
412 else {
413 metaoption.mqualifier.sibling = sAll;
414 metaoption.siblingoptions = op;
415 }
416}
417
418static void get_child_options (text_t &instring, metadata_t &metaoption) {
419
420 assert (instring.size() > 6);
421 if (instring.size() <= 6) return;
422 text_t meta, com, op;
423 bool inbraces = false;
424 bool inquotes = false;
425 bool foundcolon = false;
426 text_t::const_iterator here = instring.begin()+5;
427 text_t::const_iterator end = instring.end();
428 while (here != end) {
429 if (foundcolon) meta.push_back (*here);
430 else if (*here == '(') inbraces = true;
431 else if (*here == ')') inbraces = false;
432 else if (*here == '\'' && !inquotes) inquotes = true;
433 else if (*here == '\'' && inquotes) inquotes = false;
434 else if (*here == ':' && !inbraces) foundcolon = true;
435 else if (inquotes) op.push_back (*here);
436 else com.push_back (*here);
437 ++here;
438 }
439
440 instring = meta;
441 if (com.empty()) {
442 metaoption.mqualifier.child = cAll;
443 metaoption.childoptions = " ";
444 }
445 else if (com == "first") {
446 metaoption.mqualifier.child = cNum;
447 metaoption.childoptions = ".fc";
448 }
449 else if (com == "last") {
450 metaoption.mqualifier.child = cNum;
451 metaoption.childoptions = ".lc";
452 }
453 else if (com.getint()>0) {
454 metaoption.mqualifier.child = cNum;
455 metaoption.childoptions = "."+com;
456 }
457 else {
458 metaoption.mqualifier.child = cAll;
459 metaoption.childoptions = op;
460 }
461}
462
463
464static void get_truncate_options (text_t &instring, metadata_t &metaoption)
465{
466 assert (instring.size() > ((text_t) "truncate").size());
467 if (instring.size() <= ((text_t) "truncate").size()) return;
468 text_t meta, com;
469 bool inbraces = false;
470 bool foundcolon = false;
471 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
472 text_t::const_iterator end = instring.end();
473 while (here != end) {
474 if (foundcolon) meta.push_back (*here);
475 else if (*here == '(') inbraces = true;
476 else if (*here == ')') inbraces = false;
477 else if (*here == ':' && !inbraces) foundcolon = true;
478 else com.push_back (*here);
479 ++here;
480 }
481
482 instring = meta;
483
484 if (!com.empty())
485 {
486 metaoption.siblingoptions = com;
487 }
488 else
489 {
490 // Default is 100 characters if not specified
491 metaoption.siblingoptions = "100";
492 }
493}
494
495
496
497static void parse_meta (text_t &meta, metadata_t &metaoption,
498 text_tset &metadata, bool &getParents) {
499
500 // Look for the various format statement modifiers
501 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
502 // is irrelevant because this is not stored in metaoption.metacommand anyway
503 bool keep_trying = true;
504 while (keep_trying)
505 {
506 keep_trying = false;
507
508 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
509 {
510 metaoption.metacommand |= mCgiSafe;
511 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
512 keep_trying = true;
513 }
514 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
515 {
516 metaoption.metacommand |= mSpecial;
517 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
518 keep_trying = true;
519 }
520
521 // New "truncate" special formatting option
522 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
523 {
524 metaoption.metacommand |= mTruncate;
525 get_truncate_options (meta, metaoption);
526 keep_trying = true;
527 }
528 // New "htmlsafe" special formatting option
529 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
530 {
531 metaoption.metacommand |= mHTMLSafe;
532 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
533 keep_trying = true;
534 }
535 // New "xmlsafe" special formatting option
536 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
537 {
538 metaoption.metacommand |= mXMLSafe;
539 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
540 keep_trying = true;
541 }
542 }
543
544 bool had_parent_or_child = true;
545 bool prev_was_parent = false;
546 bool prev_was_child = false;
547
548 while (had_parent_or_child) {
549 if (meta.size() > 7
550 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
551
552 // clear out sibling and child (cmd and options)
553 metaoption.metacommand &= ~(mChild|mSibling);
554 metaoption.childoptions.clear();
555 metaoption.siblingoptions.clear();
556
557 getParents = true;
558 metaoption.metacommand |= mParent;
559 get_parent_options (meta, metaoption);
560
561 if (prev_was_parent) {
562 metaoption.pre_tree_traverse += ".pr";
563 }
564 else if (prev_was_child) {
565 metaoption.pre_tree_traverse += ".fc";
566 }
567
568 prev_was_parent = true;
569 prev_was_child = false;
570 }
571 else if (meta.size() > 6
572 && (substr (meta.begin(), meta.begin()+5) == "child")) {
573
574 // clear out sibling and parent (cmd and options)
575 metaoption.metacommand &= ~(mParent|mSibling);
576 metaoption.parentoptions.clear();
577 metaoption.siblingoptions.clear();
578
579 metaoption.metacommand |= mChild;
580 get_child_options (meta, metaoption);
581 metadata.insert("contains");
582
583 if (prev_was_parent) {
584 metaoption.pre_tree_traverse += ".pr";
585 }
586 else if (prev_was_child) {
587 metaoption.pre_tree_traverse += ".fc";
588 }
589
590 prev_was_child = true;
591 prev_was_parent = false;
592 }
593 else {
594 prev_was_child = false;
595 prev_was_parent = false;
596 had_parent_or_child = false;
597 }
598 }
599
600 // parent/child can have sibling tacked on end also
601 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
602 metaoption.metacommand |= mSibling;
603 get_sibling_options (meta, metaoption);
604 }
605
606 // check for ex. which may occur in format statements
607 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
608 meta = substr (meta.begin()+3, meta.end());
609 }
610 metadata.insert (meta);
611 metaoption.metaname = meta;
612}
613
614static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
615 if (meta == "collection") {
616 // no qualifiers
617 metaoption.metaname = g_EmptyText;
618 return;
619 }
620 meta = substr (meta.begin()+11, meta.end());
621 metaoption.metaname = meta;
622
623}
624
625static void parse_meta (text_t &meta, format_t *formatlistptr,
626 text_tset &metadata, bool &getParents) {
627
628 if (meta == "link")
629 formatlistptr->command = comLink;
630 else if (meta == "/link")
631 formatlistptr->command = comEndLink;
632
633 else if (meta == "href")
634 formatlistptr->command = comHref;
635
636 else if (meta == "num")
637 formatlistptr->command = comNum;
638
639 else if (meta == "icon")
640 formatlistptr->command = comIcon;
641
642 else if (meta == "Text")
643 formatlistptr->command = comDoc;
644
645 else if (meta == "RelatedDocuments")
646 formatlistptr->command = comRel;
647
648 else if (meta == "highlight")
649 formatlistptr->command = comHighlight;
650
651 else if (meta == "/highlight")
652 formatlistptr->command = comEndHighlight;
653
654 else if (meta == "metadata-spanwrap")
655 formatlistptr->command = comMetadataSpanWrap;
656
657 else if (meta == "/metadata-spanwrap")
658 formatlistptr->command = comEndMetadataSpanWrap;
659
660 else if (meta == "Summary")
661 formatlistptr->command = comSummary;
662
663 else if (meta == "DocImage")
664 formatlistptr->command = comImage;
665
666 else if (meta == "DocTOC")
667 formatlistptr->command = comTOC;
668
669 else if (meta == "DocumentButtonDetach")
670 formatlistptr->command = comDocumentButtonDetach;
671
672 else if (meta == "DocumentButtonHighlight")
673 formatlistptr->command = comDocumentButtonHighlight;
674
675 else if (meta == "DocumentButtonExpandContents")
676 formatlistptr->command = comDocumentButtonExpandContents;
677
678 else if (meta == "DocumentButtonExpandText")
679 formatlistptr->command = comDocumentButtonExpandText;
680
681 else if (meta == "DocOID")
682 formatlistptr->command = comOID;
683 else if (meta == "DocTopOID")
684 formatlistptr->command = comTopOID;
685 else if (meta == "DocRank")
686 formatlistptr->command = comRank;
687 else if (meta == "DocTermsFreqTotal")
688 formatlistptr->command = comDocTermsFreqTotal;
689 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
690 formatlistptr->command = comCollection;
691 parse_coll_meta(meta, formatlistptr->meta);
692 }
693 else {
694 formatlistptr->command = comMeta;
695 parse_meta (meta, formatlistptr->meta, metadata, getParents);
696 }
697}
698
699
700static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
701 text_tset &metadata, bool &getParents) {
702
703 text_t text;
704 text_t::const_iterator here = formatstring.begin();
705 text_t::const_iterator end = formatstring.end();
706
707 while (here != end) {
708
709 if (*here == '\\') {
710 ++here;
711 if (here != end) text.push_back (*here);
712
713 } else if (*here == '{') {
714 if (!text.empty()) {
715 formatlistptr->command = comText;
716 formatlistptr->text = text;
717 formatlistptr->nextptr = new format_t();
718 formatlistptr = formatlistptr->nextptr;
719
720 text.clear();
721 }
722 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
723
724 formatlistptr->nextptr = new format_t();
725 formatlistptr = formatlistptr->nextptr;
726 if (here == end) break;
727 }
728 } else if (*here == '[') {
729 if (!text.empty()) {
730 formatlistptr->command = comText;
731 formatlistptr->text = text;
732 formatlistptr->nextptr = new format_t();
733 formatlistptr = formatlistptr->nextptr;
734
735 text.clear();
736 }
737 text_t meta;
738 ++here;
739 while (*here != ']') {
740 if (here == end) return false;
741 meta.push_back (*here);
742 ++here;
743 }
744 parse_meta (meta, formatlistptr, metadata, getParents);
745 formatlistptr->nextptr = new format_t();
746 formatlistptr = formatlistptr->nextptr;
747
748 } else
749 text.push_back (*here);
750
751 if (here != end) ++here;
752 }
753 if (!text.empty()) {
754 formatlistptr->command = comText;
755 formatlistptr->text = text;
756 formatlistptr->nextptr = new format_t();
757 formatlistptr = formatlistptr->nextptr;
758
759 }
760 return true;
761}
762
763
764static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
765 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
766
767 text_t::const_iterator it = findchar (here, end, '}');
768 if (it == end) return false;
769
770 text_t com = substr (here, it);
771 here = findchar (it, end, '{');
772 if (here == end) return false;
773 else ++here;
774
775 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
776 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
777 else return false;
778
779 int commacount = 0;
780 text_t text;
781 while (here != end) {
782
783 if (*here == '\\') {
784 ++here;
785 if (here != end) text.push_back(*here);
786
787 }
788
789 else if (*here == ',' || *here == '}' || *here == '{') {
790
791 if (formatlistptr->command == comOr) {
792 // the {Or}{this, or this, or this, or this} statement
793 format_t *or_ptr;
794
795 // find the next unused orptr
796 if (formatlistptr->orptr == NULL) {
797 formatlistptr->orptr = new format_t();
798 or_ptr = formatlistptr->orptr;
799 } else {
800 or_ptr = formatlistptr->orptr;
801 while (or_ptr->nextptr != NULL)
802 or_ptr = or_ptr->nextptr;
803 or_ptr->nextptr = new format_t();
804 or_ptr = or_ptr->nextptr;
805 }
806
807 if (!text.empty())
808 {
809 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
810 }
811
812 if (*here == '{')
813 {
814 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
815 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
816 // The latter can always be re-written:
817 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
818
819 if (!text.empty()) // already used up allocated format_t
820 {
821 // => allocate new one for detected action
822 or_ptr->nextptr = new format_t();
823 or_ptr = or_ptr->nextptr;
824 }
825 if (!parse_action(++here, end, or_ptr, metadata, getParents))
826 {
827 return false;
828 }
829 }
830 else
831 {
832 if (*here == '}') break;
833 }
834 text.clear();
835
836 }
837
838 // Parse an {If}{decide,do,else} statement
839 else {
840
841 // Read the decision component.
842 if (commacount == 0) {
843 // Decsion can be a metadata element, or a piece of text.
844 // Originally Stefan's code, updated 25/10/2000 by Gordon.
845
846 text_t::const_iterator beginbracket = text.begin();
847 text_t::const_iterator endbracket = (text.end() - 1);
848
849 // Decision is based on a metadata element
850 if ((*beginbracket == '[') && (*endbracket == ']')) {
851 // Ignore the surrounding square brackets
852 text_t meta = substr (beginbracket+1, endbracket);
853 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
854 ++commacount;
855 text.clear();
856 }
857
858 // Decision is a piece of text (probably a macro like _cgiargmode_).
859 else {
860
861 // hunt for any metadata in string, which might be uses in
862 // to test a condition, e.g. [Format] eq 'PDF'
863 format_t* dummyformat = new format_t();
864 // update which metadata fields needed
865 // (not interested in updatng formatlistptr)
866 parse_string (text, dummyformat, metadata, getParents);
867 delete dummyformat;
868
869 formatlistptr->decision.command = dText;
870 formatlistptr->decision.text = text;
871 ++commacount;
872 text.clear();
873 }
874 }
875
876 // Read the "then" and "else" components of the {If} statement.
877 else {
878 format_t** nextlistptr = NULL;
879 if (commacount == 1) {
880 nextlistptr = &formatlistptr->ifptr;
881 } else if (commacount == 2 ) {
882 nextlistptr = &formatlistptr->elseptr;
883 } else {
884 return false;
885 }
886
887 if (!text.empty()) {
888 if (*nextlistptr == NULL) {
889 *nextlistptr = new format_t();
890 } else {
891
892 // skip to the end of any format_t statements already added
893 while ((*nextlistptr)->nextptr != NULL)
894 {
895 nextlistptr = &(*nextlistptr)->nextptr;
896 }
897
898 (*nextlistptr)->nextptr = new format_t();
899 nextlistptr = &(*nextlistptr)->nextptr;
900 }
901
902 if (!parse_string (text, *nextlistptr, metadata, getParents))
903 {
904 return false;
905 }
906 text.clear();
907 }
908
909 if (*here == '{')
910 {
911 if (*nextlistptr == NULL) {
912 *nextlistptr = new format_t();
913 } else {
914 // skip to the end of any format_t statements already added
915 while ((*nextlistptr)->nextptr != NULL)
916 {
917 nextlistptr = &(*nextlistptr)->nextptr;
918 }
919
920 (*nextlistptr)->nextptr = new format_t();
921 nextlistptr = &(*nextlistptr)->nextptr;
922 }
923
924 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
925 {
926 return false;
927 }
928 }
929 else
930 {
931 if (*here == '}') break;
932 ++commacount;
933 }
934 }
935 }
936
937 } else text.push_back(*here);
938
939 if (here != end) ++here;
940 }
941
942 return true;
943}
944
945
946static text_t spanwrap_metatext(const text_t& metatext, const text_t& OID,
947 const text_t metaname, int metapos=-1)
948{
949
950 text_t tag_type = (metaname == "Text") ? "div" : "span";
951 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
952
953 text_t wrapped_metatext = "<" + tag_type + " ";
954 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
955
956 wrapped_metatext += "docoid=\"" + OID + "\" ";
957 wrapped_metatext += "metaname=\"" + metaname + "\"";
958
959 if (metapos>=0) {
960 text_t metapos_str = metapos;
961 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
962 }
963
964 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
965
966 return wrapped_metatext;
967}
968
969
970
971bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
972 text_tset &metadata, bool &getParents) {
973
974 formatlistptr->clear();
975 getParents = false;
976
977 return (parse_string (formatstring, formatlistptr, metadata, getParents));
978}
979
980// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
981// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
982
983static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
984{
985 text_t no_ns_metaname = remove_namespace(meta.metaname);
986 text_t formatted_metatext;
987 bool first = true;
988
989 const int start_i=0;
990 const int end_i = metainfo.values.size()-1;
991
992 if (position == -1) { // all
993 for (int i=start_i; i<=end_i; ++i) {
994 if (!first) formatted_metatext += meta.siblingoptions;
995
996 text_t fresh_metatext;
997
998 if (meta.metacommand & mSpecial) {
999 // special formatting
1000 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1001 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1002 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1003 }
1004 else fresh_metatext = metainfo.values[i];
1005
1006 // New "truncate" special formatting option
1007 if (meta.metacommand & mTruncate)
1008 {
1009 int truncate_length = meta.siblingoptions.getint();
1010 text_t truncated_value = fresh_metatext;
1011 if (truncated_value.size() > truncate_length)
1012 {
1013 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1014 }
1015 fresh_metatext = truncated_value;
1016 }
1017 // New "xmlsafe" special formatting option
1018 if (meta.metacommand & mXMLSafe)
1019 {
1020 // Make it XML-safe
1021 text_t text_xml_safe = "";
1022 text_t::const_iterator text_iterator = fresh_metatext.begin();
1023 while (text_iterator != fresh_metatext.end())
1024 {
1025 if (*text_iterator == '&') text_xml_safe += "&amp;";
1026 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1027 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1028 else text_xml_safe.push_back(*text_iterator);
1029 text_iterator++;
1030 }
1031 fresh_metatext = text_xml_safe;
1032 }
1033 // New "htmlsafe" special formatting option
1034 if (meta.metacommand & mHTMLSafe)
1035 {
1036 // Make it HTML-safe
1037 text_t text_html_safe = "";
1038 text_t::const_iterator text_iterator = fresh_metatext.begin();
1039 while (text_iterator != fresh_metatext.end())
1040 {
1041 if (*text_iterator == '&') text_html_safe += "&amp;";
1042 else if (*text_iterator == '<') text_html_safe += "&lt;";
1043 else if (*text_iterator == '>') text_html_safe += "&gt;";
1044 else if (*text_iterator == '"') text_html_safe += "&quot;";
1045 else text_html_safe.push_back(*text_iterator);
1046 text_iterator++;
1047 }
1048 fresh_metatext = text_html_safe;
1049 }
1050
1051 if (metadata_spanwrap) {
1052 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,i);
1053 }
1054 formatted_metatext += fresh_metatext;
1055
1056 first = false;
1057
1058 }
1059 } else {
1060 if (position == -2) { // end
1061 position = end_i;
1062 } else if (position < start_i || position > end_i) {
1063 return "";
1064 }
1065
1066 text_t fresh_metatext;
1067 if (meta.metacommand & mSpecial) {
1068
1069 // special formatting
1070 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1071 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1072 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1073 }
1074 else fresh_metatext = metainfo.values[position];
1075
1076 // New "truncate" special formatting option
1077 if (meta.metacommand & mTruncate)
1078 {
1079 int truncate_length = meta.siblingoptions.getint();
1080 text_t truncated_value = fresh_metatext;
1081 if (truncated_value.size() > truncate_length)
1082 {
1083 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1084 }
1085 fresh_metatext = truncated_value;
1086 }
1087 // New "xmlsafe" special formatting option
1088 if (meta.metacommand & mXMLSafe)
1089 {
1090 // Make it XML-safe
1091 text_t text_xml_safe = "";
1092 text_t::const_iterator text_iterator = fresh_metatext.begin();
1093 while (text_iterator != fresh_metatext.end())
1094 {
1095 if (*text_iterator == '&') text_xml_safe += "&amp;";
1096 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1097 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1098 else text_xml_safe.push_back(*text_iterator);
1099 text_iterator++;
1100 }
1101 fresh_metatext = text_xml_safe;
1102 }
1103 // New "htmlsafe" special formatting option
1104 if (meta.metacommand & mHTMLSafe)
1105 {
1106 // Make it HTML-safe
1107 text_t text_html_safe = "";
1108 text_t::const_iterator text_iterator = fresh_metatext.begin();
1109 while (text_iterator != fresh_metatext.end())
1110 {
1111 if (*text_iterator == '&') text_html_safe += "&amp;";
1112 else if (*text_iterator == '<') text_html_safe += "&lt;";
1113 else if (*text_iterator == '>') text_html_safe += "&gt;";
1114 else if (*text_iterator == '"') text_html_safe += "&quot;";
1115 else text_html_safe.push_back(*text_iterator);
1116 text_iterator++;
1117 }
1118 fresh_metatext = text_html_safe;
1119 }
1120
1121 if (metadata_spanwrap) {
1122 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,position);
1123 }
1124
1125 formatted_metatext += fresh_metatext;
1126 }
1127
1128 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1129 else return formatted_metatext;
1130}
1131
1132static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1133{
1134
1135 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1136
1137 switch (meta.mqualifier.parent) {
1138 case pNone:
1139 return "Nothing!!";
1140 break;
1141
1142 case pImmediate:
1143 if (parent != NULL) {
1144 text_t parent_oid = get_parent(docinfo.OID);
1145 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1146 }
1147 break;
1148
1149 case pTop:
1150 if (parent != NULL) {
1151 text_t parent_oid = get_parent(docinfo.OID);
1152
1153 while (parent->parent != NULL) {
1154 parent = parent->parent;
1155 parent_oid = get_parent(parent_oid);
1156 }
1157 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1158 }
1159 break;
1160
1161 case pAll:
1162 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1163 if (parent != NULL) {
1164 text_t parent_oid = get_parent(docinfo.OID);
1165
1166 text_tarray tmparray;
1167 while (parent != NULL) {
1168 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1169 parent = parent->parent;
1170 parent_oid = get_parent(parent_oid);
1171
1172 }
1173 // now join them up - use teh parent separator
1174 bool first = true;
1175 text_t tmp;
1176 text_tarray::reverse_iterator here = tmparray.rbegin();
1177 text_tarray::reverse_iterator end = tmparray.rend();
1178 while (here != end) {
1179 if (!first) tmp += meta.parentoptions;
1180 tmp += *here;
1181 first = false;
1182 ++here;
1183 }
1184 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1185 else return tmp;
1186 }
1187 }
1188 return "";
1189
1190}
1191
1192static text_t get_child_meta (const text_t& collection,
1193 recptproto* collectproto,
1194 ResultDocInfo_t &docinfo, displayclass &disp,
1195 const metadata_t &meta, text_tmap &options,
1196 ostream& logout, int siblings_values)
1197{
1198 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1199
1200 const text_t& pre_tree_trav = meta.pre_tree_traverse;
1201 const text_t& child_metaname = meta.metaname;
1202 const text_t& child_field = meta.childoptions;
1203 text_tset child_metadata;
1204 child_metadata.insert(child_metaname);
1205
1206 FilterResponse_t child_response;
1207 if (meta.mqualifier.child == cNum) {
1208 // just one child
1209 //get the information associated with the metadata for child doc
1210 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1211 child_metadata, false, collectproto, child_response,
1212 logout)) return ""; // invalid child number
1213
1214 if (child_response.docInfo.empty()) return false; // no info for the child
1215
1216 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1217 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1218
1219 text_t child_metavalue
1220 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1221 return expand_metadata(child_metavalue,collection,collectproto,
1222 child_docinfo,disp,options,logout);
1223 }
1224
1225
1226 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1227
1228
1229 if (!pre_tree_trav.empty()) {
1230 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1231 FilterResponse_t trav_response;
1232
1233 text_tset trav_metadata;
1234 trav_metadata.insert("contains");
1235
1236 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1237 trav_metadata, false, collectproto, trav_response,
1238 logout)) return ""; // invalid pre_tree_trav
1239
1240 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1241
1242 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1243 // use this for rest of routine
1244 docinfo = trav_docinfo;
1245 }
1246
1247 // we need to get all children
1248 text_t result = "";
1249 text_tarray children;
1250 text_t contains = docinfo.metadata["contains"].values[0];
1251 splitchar (contains.begin(), contains.end(), ';', children);
1252 text_tarray::const_iterator here = children.begin();
1253 text_tarray::const_iterator end = children.end();
1254 bool first = true;
1255 while (here !=end) {
1256 text_t oid = *here;
1257 here++;
1258 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1259
1260 //get the information associated with the metadata for child doc
1261 if (!get_info (oid, collection, "", child_metadata,
1262 false, collectproto, child_response, logout) ||
1263 child_response.docInfo.empty()) {
1264 first = false;
1265 continue;
1266 }
1267
1268
1269 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1270 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1271
1272 text_t child_metavalue
1273 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1274 if (!first) result += child_field;
1275 first = false;
1276 // need to do this here cos otherwise we are in the wrong document
1277 result += expand_metadata(child_metavalue,collection,collectproto,
1278 child_docinfo,disp,options,logout);
1279 }
1280 return result;
1281
1282}
1283
1284static text_t get_meta (const text_t& collection, recptproto* collectproto,
1285 ResultDocInfo_t &docinfo, displayclass &disp,
1286 const metadata_t &meta, text_tmap &options,
1287 ostream& logout) {
1288
1289 // make sure we have the requested metadata
1290 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1291 if (it == docinfo.metadata.end()) return "";
1292
1293 int siblings_values = 0; // default is no siblings, just the first metadata available
1294 if (meta.metacommand & mSibling) {
1295 if (meta.mqualifier.sibling == sAll) {
1296 siblings_values = -1; //all
1297 } else if (meta.mqualifier.sibling == sNum) {
1298 siblings_values = meta.siblingoptions.getint();
1299 }
1300 }
1301 if (meta.metacommand & mParent) {
1302 return get_parent_meta(docinfo,meta,siblings_values);
1303 }
1304
1305 else if (meta.metacommand & mChild) {
1306 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1307 options,logout, siblings_values);
1308 }
1309 else if (meta.metacommand & mSibling) { // only siblings
1310 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1311 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1312 }
1313 else {
1314
1315 // straightforward metadata request (nothing fancy)
1316
1317 text_t classifier_metaname = docinfo.classifier_metadata_type;
1318 int metaname_index
1319 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1320 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1321 }
1322
1323 return "";
1324}
1325
1326static text_t get_or (const text_t& collection, recptproto* collectproto,
1327 ResultDocInfo_t &docinfo, displayclass &disp,
1328 format_t *orptr, text_tmap &options,
1329 ostream& logout) {
1330
1331 while (orptr != NULL) {
1332
1333 if (metadata_spanwrap) {
1334 // need to be a bit more careful about this
1335 // => test for it *without* spanwrap, and if defined, then
1336 // got back and generate it again, this time with spanwrap on
1337
1338 metadata_spanwrap = false;
1339 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1340 options, logout);
1341 metadata_spanwrap = true;
1342 if (!test_tmp.empty()) {
1343
1344 return format_string (collection,collectproto,docinfo, disp, orptr,
1345 options, logout);
1346 }
1347 }
1348 else {
1349 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1350 options, logout);
1351 if (!tmp.empty()) return tmp;
1352 }
1353
1354 orptr = orptr->nextptr;
1355 }
1356 return "";
1357}
1358
1359static bool char_is_whitespace(const char c)
1360{
1361 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1362
1363}
1364
1365static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1366{
1367 int pos = start_pos;
1368 while (pos<outstring.size()) {
1369 if (!char_is_whitespace(outstring[pos])) {
1370 break;
1371 }
1372 ++pos;
1373 }
1374
1375 return pos;
1376}
1377
1378static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1379{
1380 int pos = start_pos;
1381 while (pos>=0) {
1382 if (!char_is_whitespace(outstring[pos])) {
1383 break;
1384 }
1385 --pos;
1386 }
1387
1388 return pos;
1389}
1390
1391static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1392{
1393 int pos = start_pos;
1394 while (pos>=0) {
1395 if (char_is_whitespace(outstring[pos])) {
1396 break;
1397 }
1398 --pos;
1399 }
1400
1401 return pos;
1402}
1403
1404
1405static int rscan_for(const text_t& outstring, const int start_pos,
1406 const char find_c)
1407{
1408 int pos = start_pos;
1409 while (pos>=0) {
1410 char c = outstring[pos];
1411 if (outstring[pos] == find_c) {
1412 break;
1413 }
1414 --pos;
1415 }
1416
1417 return pos;
1418}
1419
1420text_t extract_substr(const text_t& outstring, const int start_pos,
1421 const int end_pos)
1422{
1423 text_t extracted_str;
1424 extracted_str.clear();
1425
1426 for (int pos=start_pos; pos<=end_pos; ++pos) {
1427 extracted_str.push_back(outstring[pos]);
1428 }
1429
1430 return extracted_str;
1431}
1432
1433
1434static text_t expand_potential_metadata(const text_t& collection,
1435 recptproto* collectproto,
1436 ResultDocInfo_t &docinfo,
1437 displayclass &disp,
1438 const text_t& intext,
1439 text_tmap &options,
1440 ostream& logout)
1441{
1442 text_t outtext;
1443
1444 // decide if dealing with metadata or text
1445
1446 text_t::const_iterator beginbracket = intext.begin();
1447 text_t::const_iterator endbracket = (intext.end() - 1);
1448
1449 // Decision is based on a metadata element
1450 if ((*beginbracket == '[') && (*endbracket == ']')) {
1451 // Ignore the surrounding square brackets
1452 text_t meta_text = substr (beginbracket+1, endbracket);
1453
1454 if (meta_text == "Text") {
1455 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1456 }
1457 else {
1458
1459 text_tset metadata;
1460 bool getParents =false;
1461 metadata_t meta;
1462
1463 parse_meta (meta_text, meta, metadata, getParents);
1464 outtext
1465 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1466 }
1467
1468 }
1469 else {
1470 outtext = intext;
1471 }
1472
1473 return outtext;
1474}
1475
1476
1477
1478
1479static bool uses_expression(const text_t& collection, recptproto* collectproto,
1480 ResultDocInfo_t &docinfo,
1481 displayclass &disp,
1482 const text_t& outstring, text_t& lhs_expr,
1483 text_t& op_expr, text_t& rhs_expr,
1484 text_tmap &options,
1485 ostream& logout)
1486{
1487 // Note: the string may not be of the form: str1 op str2, however
1488 // to deterine this we have to process it on the assumption it is,
1489 // and if at any point an 'erroneous' value is encountered, return
1490 // false and let something else have a go at evaluating it
1491
1492 // Starting at the end of the string and working backwards ..
1493
1494 const int outstring_len = outstring.size();
1495
1496 // skip over white space
1497 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1498
1499 if (rhs_end<=0) {
1500 // no meaningful text or (rhs_end==0) no room for operator
1501 return false;
1502 }
1503
1504 // check for ' or " and then scan over token
1505 const char potential_quote = outstring[rhs_end];
1506 int rhs_start=rhs_end;
1507 bool quoted = false;
1508
1509 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1510 --rhs_end;
1511 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1512 quoted = true;
1513 }
1514 else {
1515 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1516 }
1517
1518 if ((rhs_end-rhs_start)<0) {
1519 // no meaningful rhs expression
1520 return false;
1521 }
1522
1523 // form rhs_expr
1524 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1525
1526 // skip over white space
1527 const int to_whitespace = (quoted) ? 2 : 1;
1528
1529 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1530 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1531
1532 if ((op_end<0) && (op_start<0)) {
1533 // no meaningful expression operator
1534 return false;
1535 }
1536
1537 if (op_end-op_start<0) {
1538 // no meaningful expression operator
1539 return false;
1540 }
1541
1542 op_expr = extract_substr(outstring,op_start,op_end);
1543
1544
1545 // check for operator
1546 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1547 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1548
1549 // not a valid operator
1550 return false;
1551 }
1552
1553 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1554 if (lhs_end<0) {
1555 // no meaningful lhs expression
1556 return false;
1557 }
1558
1559 int lhs_start = scan_over_whitespace(outstring,0);
1560
1561 // form lhs_expr from remainder of string
1562 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1563
1564 // Now we know we have a valid expression, look up any
1565 // metadata terms
1566
1567 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1568 disp,rhs_expr,options,logout);
1569 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1570 disp,lhs_expr,options,logout);
1571
1572 return true;
1573}
1574
1575static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1576 const text_t& rhs_expr, ostream& logout)
1577{
1578 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1579 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1580 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1581 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1582 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1583 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1584 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1585 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1586 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1587 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1588 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1589 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1590 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1591 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1592 else {
1593 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1594 }
1595
1596 return false;
1597}
1598
1599
1600static text_t get_if (const text_t& collection, recptproto* collectproto,
1601 ResultDocInfo_t &docinfo, displayclass &disp,
1602 const decision_t &decision,
1603 format_t *ifptr, format_t *elseptr,
1604 text_tmap &options, ostream& logout)
1605{
1606 // If the decision component is a metadata element, then evaluate it
1607 // to see whether we output the "then" or the "else" clause
1608 if (decision.command == dMeta) {
1609
1610 bool store_metadata_spanwrap = metadata_spanwrap;
1611 metadata_spanwrap = 0;
1612
1613 // temporarily suspend metadata-spanwrap (if on) so can test if metadata item really exits or not
1614 bool metadata_exists
1615 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1616 logout) != "");
1617
1618 metadata_spanwrap = store_metadata_spanwrap;
1619
1620 if (metadata_exists) {
1621 if (ifptr != NULL)
1622 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1623 options, logout);
1624 }
1625 else {
1626 if (elseptr != NULL)
1627 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1628 options, logout);
1629 }
1630 }
1631
1632 // If the decision component is text, then evaluate it (it is probably a
1633 // macro like _cgiargmode_) to decide what to output.
1634 else if (decision.command == dText) {
1635
1636 text_t outstring;
1637 disp.expandstring (decision.text, outstring);
1638
1639 // Check for if expression in form: str1 op str2
1640 // (such as [x] eq "y")
1641 text_t lhs_expr, op_expr, rhs_expr;
1642 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1643 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1644 if (ifptr != NULL) {
1645 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1646 options, logout);
1647 }
1648 else {
1649 return "";
1650 }
1651 } else {
1652 if (elseptr != NULL) {
1653 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1654 options, logout);
1655 }
1656 else {
1657 return "";
1658 }
1659 }
1660 }
1661
1662
1663 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1664 // a cgi argument macro that has not been set, it evaluates to itself.
1665 // Therefore, were have to say that a piece of text evalautes true if
1666 // it is non-empty and if it is a cgi argument evaulating to itself.
1667
1668 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1669 if (ifptr != NULL)
1670 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1671 options, logout);
1672 } else {
1673 if (elseptr != NULL)
1674 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1675 options, logout);
1676 }
1677 }
1678
1679 return "";
1680}
1681
1682bool includes_metadata(const text_t& text)
1683{
1684 text_t::const_iterator here = text.begin();
1685 text_t::const_iterator end = text.end();
1686 while (here != end) {
1687 if (*here == '[') return true;
1688 ++here;
1689 }
1690
1691 return false;
1692}
1693
1694static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1695 recptproto* collectproto,
1696 ResultDocInfo_t &docinfo,
1697 displayclass &disp, text_tmap &options,
1698 ostream &logout) {
1699
1700 if (includes_metadata(metavalue)) {
1701
1702 // text has embedded metadata in it => expand it
1703 FilterRequest_t request;
1704 FilterResponse_t response;
1705
1706 request.getParents = false;
1707
1708 format_t *expanded_formatlistptr = new format_t();
1709 parse_formatstring (metavalue, expanded_formatlistptr,
1710 request.fields, request.getParents);
1711
1712 // retrieve metadata
1713 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1714 collectproto, response, logout);
1715
1716 if (!response.docInfo.empty()) {
1717
1718 text_t expanded_metavalue
1719 = get_formatted_string(collection, collectproto,
1720 response.docInfo[0], disp, expanded_formatlistptr,
1721 options, logout);
1722
1723 return expanded_metavalue;
1724 }
1725 else {
1726 return metavalue;
1727 }
1728 }
1729 else {
1730
1731 return metavalue;
1732 }
1733}
1734
1735text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1736 displayclass &disp,
1737 text_t meta_name, ostream& logout) {
1738
1739 ColInfoResponse_t collectinfo;
1740 comerror_t err;
1741 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1742 text_t meta_value = "";
1743 text_t lang;
1744 disp.expandstring("_cgiargl_",lang);
1745 if (lang.empty()) {
1746 lang = "en";
1747 }
1748
1749 if (err == noError) {
1750 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1751 }
1752 return meta_value;
1753
1754
1755}
1756text_t format_string (const text_t& collection, recptproto* collectproto,
1757 ResultDocInfo_t &docinfo, displayclass &disp,
1758 format_t *formatlistptr, text_tmap &options,
1759 ostream& logout) {
1760
1761 if (formatlistptr == NULL) return "";
1762
1763 switch (formatlistptr->command) {
1764 case comOID:
1765 return docinfo.OID;
1766 case comTopOID:
1767 {
1768 text_t top_id;
1769 get_top(docinfo.OID, top_id);
1770 return top_id;
1771 }
1772 case comRank:
1773 return text_t(docinfo.ranking);
1774 case comText:
1775 return formatlistptr->text;
1776 case comLink:
1777 return options["link"];
1778 case comEndLink:
1779 if (options["link"].empty()) return "";
1780 else return "</a>";
1781 case comHref:
1782 return get_href(options["link"]);
1783 case comIcon:
1784 return options["icon"];
1785 case comNum:
1786 return docinfo.result_num;
1787 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1788 return get_related_docs(collection, collectproto, docinfo, logout);
1789
1790 case comSummary:
1791 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1792
1793 case comMeta:
1794 {
1795 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1796 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1797 }
1798
1799 case comDoc:
1800 return format_text(collection, collectproto, docinfo, disp, options, logout);
1801
1802 case comImage:
1803 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1804 case comTOC:
1805 return options["DocTOC"];
1806 case comDocumentButtonDetach:
1807 return options["DocumentButtonDetach"];
1808 case comDocumentButtonHighlight:
1809 return options["DocumentButtonHighlight"];
1810 case comDocumentButtonExpandContents:
1811 return options["DocumentButtonExpandContents"];
1812 case comDocumentButtonExpandText:
1813 return options["DocumentButtonExpandText"];
1814 case comHighlight:
1815 if (options["highlight"] == "1") return "<b>";
1816 break;
1817 case comEndHighlight:
1818 if (options["highlight"] == "1") return "</b>";
1819 break;
1820 case comMetadataSpanWrap:
1821 metadata_spanwrap=true; return "";
1822 break;
1823 case comEndMetadataSpanWrap:
1824 metadata_spanwrap=false; return "";
1825 break;
1826 case comIf:
1827 return get_if (collection, collectproto, docinfo, disp,
1828 formatlistptr->decision, formatlistptr->ifptr,
1829 formatlistptr->elseptr, options, logout);
1830 case comOr:
1831 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1832 options, logout);
1833 case comDocTermsFreqTotal:
1834 return docinfo.num_terms_matched;
1835 case comCollection:
1836 if (formatlistptr->meta.metaname == g_EmptyText) {
1837 return collection;
1838 }
1839 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1840
1841 }
1842 return "";
1843}
1844
1845text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1846 ResultDocInfo_t &docinfo, displayclass &disp,
1847 format_t *formatlistptr, text_tmap &options,
1848 ostream& logout) {
1849
1850 text_t ft;
1851 while (formatlistptr != NULL)
1852 {
1853 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1854 options, logout);
1855 formatlistptr = formatlistptr->nextptr;
1856 }
1857
1858 return ft;
1859}
1860
1861
1862// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1863text_t format_text (const text_t& collection, recptproto* collectproto,
1864 ResultDocInfo_t &docinfo, displayclass &disp,
1865 text_tmap &options, ostream& logout)
1866{
1867 text_t text;
1868
1869 if(!options["text"].empty()) {
1870 text = options["text"];
1871 }
1872 else {
1873 // get document text here
1874 DocumentRequest_t docrequest;
1875 DocumentResponse_t docresponse;
1876 comerror_t err;
1877 docrequest.OID = docinfo.OID;
1878 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1879 text = docresponse.doc;
1880 }
1881
1882 if (metadata_spanwrap) {
1883 text = spanwrap_metatext(text,docinfo.OID,"Text");
1884 }
1885
1886 return text;
1887}
1888
1889/* FUNCTION NAME: format_summary
1890 * DESC: this is invoked when a [Summary] special metadata is processed.
1891 * RETURNS: a query-biased summary for the document */
1892
1893text_t format_summary (const text_t& collection, recptproto* collectproto,
1894 ResultDocInfo_t &docinfo, displayclass &disp,
1895 text_tmap &options, ostream& logout) {
1896
1897 // GRB: added code here to ensure that the cstr (and other collections)
1898 // uses the document metadata item Summary, rather than compressing
1899 // the text of the document, processed via the methods in
1900 // summarise.cpp
1901
1902 text_t summary;
1903
1904 if (docinfo.metadata.count("Summary") > 0 &&
1905 docinfo.metadata["Summary"].values.size() > 0) {
1906 summary = docinfo.metadata["Summary"].values[0];
1907 }
1908 else {
1909
1910 text_t textToSummarise, query;
1911
1912 if(options["text"].empty()) { // get document text
1913 DocumentRequest_t docrequest;
1914 DocumentResponse_t docresponse;
1915 comerror_t err;
1916 docrequest.OID = docinfo.OID;
1917 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1918 textToSummarise = docresponse.doc;
1919 }
1920 else {
1921 // in practice, this would not happen, because text is only
1922 // loaded with the [Text] command
1923 textToSummarise = options["text"];
1924 }
1925
1926 disp.expandstring("_cgiargq_",query);
1927 summary = summarise(textToSummarise,query,80);
1928 }
1929
1930 if (metadata_spanwrap) {
1931 summary = spanwrap_metatext(summary,docinfo.OID,"Summary");
1932 }
1933
1934 return summary;
1935}
Note: See TracBrowser for help on using the repository browser.