source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 22437

Last change on this file since 22437 was 22437, checked in by kjdon, 14 years ago

need to check for ex. before testing whether meta = srclink etc, ex.srclink won't match

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 58.9 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "recptprototools.h"
29#include "OIDtools.h"
30#include "summarise.h"
31
32#include <assert.h>
33
34static bool metadata_spanwrap = false;
35
36// a few function prototypes
37
38static text_t format_string (const text_t& collection, recptproto* collectproto,
39 ResultDocInfo_t &docinfo, displayclass &disp,
40 format_t *formatlistptr, text_tmap &options,
41 ostream& logout);
42
43static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
44 format_t *formatlistptr, text_tset &metadata, bool &getParents);
45
46static text_t format_summary (const text_t& collection, recptproto* collectproto,
47 ResultDocInfo_t &docinfo, displayclass &disp,
48 text_tmap &options, ostream& logout);
49static text_t format_text (const text_t& collection, recptproto* collectproto,
50 ResultDocInfo_t &docinfo, displayclass &disp,
51 text_tmap &options, ostream& logout);
52
53static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
54 recptproto* collectproto, ResultDocInfo_t &docinfo,
55 displayclass &disp, text_tmap &options,
56 ostream &logout);
57
58
59void metadata_t::clear() {
60 metaname.clear();
61 metacommand = mNone;
62 mqualifier.parent = pNone;
63 mqualifier.sibling = sNone;
64 mqualifier.child = cNone;
65 pre_tree_traverse.clear();
66 parentoptions.clear();
67 siblingoptions.clear();
68 childoptions.clear();
69}
70
71void decision_t::clear() {
72 command = dMeta;
73 meta.clear();
74 text.clear();
75}
76
77format_t::~format_t()
78{
79 if (nextptr != NULL) delete nextptr;
80 if (ifptr != NULL) delete ifptr;
81 if (elseptr != NULL) delete elseptr;
82 if (orptr != NULL) delete orptr;
83}
84
85void format_t::clear() {
86 command = comText;
87 decision.clear();
88 text.clear();
89 meta.clear();
90 nextptr = NULL;
91 ifptr = NULL;
92 elseptr = NULL;
93 orptr = NULL;
94}
95
96void formatinfo_t::clear() {
97 DocumentImages = false;
98 DocumentTitles = true;
99 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
100 DocumentContents = true;
101 DocumentArrowsBottom = true;
102 DocumentArrowsTop = false;
103 DocumentSearchResultLinks = false;
104 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
105 // DocumentButtons.push_back ("Expand Text");
106 // DocumentButtons.push_back ("Expand Contents");
107 DocumentButtons.push_back ("Detach");
108 DocumentButtons.push_back ("Highlight");
109 RelatedDocuments = "";
110 DocumentText = "[Text]";
111 formatstrings.erase (formatstrings.begin(), formatstrings.end());
112 DocumentUseHTML = false;
113 AllowExtendedOptions = false;
114}
115
116// simply checks to see if formatstring begins with a <td> tag
117bool is_table_content (const text_t &formatstring) {
118 text_t::const_iterator here = formatstring.begin();
119 text_t::const_iterator end = formatstring.end();
120
121 while (here != end) {
122 if (*here != ' ') {
123 if ((*here == '<') && ((here+3) < end)) {
124 if ((*(here+1) == 't' || *(here+1) == 'T') &&
125 (*(here+2) == 'd' || *(here+2) == 'D') &&
126 (*(here+3) == '>' || *(here+3) == ' '))
127 return true;
128 } else return false;
129 }
130 ++here;
131 }
132 return false;
133}
134
135bool is_table_content (const format_t *formatlistptr) {
136
137 if (formatlistptr == NULL) return false;
138
139 if (formatlistptr->command == comText)
140 return is_table_content (formatlistptr->text);
141
142 return false;
143}
144
145// returns false if key isn't in formatstringmap
146bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
147 text_t &formatstring) {
148
149 formatstring.clear();
150 text_tmap::const_iterator it = formatstringmap.find(key);
151 if (it == formatstringmap.end()) return false;
152 formatstring = (*it).second;
153 return true;
154}
155
156// tries to find "key1key2" then "key1" then "key2"
157bool get_formatstring (const text_t &key1, const text_t &key2,
158 const text_tmap &formatstringmap,
159 text_t &formatstring) {
160
161 formatstring.clear();
162 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
163 if (it != formatstringmap.end()) {
164 formatstring = (*it).second;
165 return true;
166 }
167 it = formatstringmap.find(key1);
168 if (it != formatstringmap.end()) {
169 formatstring = (*it).second;
170 return true;
171 }
172 it = formatstringmap.find(key2);
173 if (it != formatstringmap.end()) {
174 formatstring = (*it).second;
175 return true;
176 }
177 return false;
178}
179
180
181text_t remove_namespace(const text_t &meta_name) {
182 text_t::const_iterator end = meta_name.end();
183 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
184 if (it != end) {
185 return substr(it+1, end);
186 }
187
188 return meta_name;
189
190}
191// returns a date of form _format:date_(year, month, day)
192// input is date of type yyyy-?mm-?dd
193// at least the year must be present in date
194text_t format_date (const text_t &date) {
195
196 if (date.size() < 4) return "";
197
198 text_t::const_iterator datebegin = date.begin();
199
200 text_t year = substr (datebegin, datebegin+4);
201 int chars_seen_so_far = 4;
202 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
203
204 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
205 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
206
207 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
208 int imonth = month.getint();
209 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
210
211 chars_seen_so_far += 2;
212 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
213
214 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
215 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
216
217 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
218 if (day[0] == '0') day = substr (day.begin()+1, day.end());
219 int iday = day.getint();
220 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
221
222 return "_format:date_("+year+","+month+","+day+")";
223}
224
225// converts an iso639 language code to its English equivalent
226// should we be checking that the macro exists??
227text_t iso639 (const text_t &langcode) {
228 if (langcode.empty()) return "";
229 return "_iso639:iso639"+langcode+"_";
230}
231
232
233text_t get_href (const text_t &link) {
234
235 text_t href;
236
237 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
238 text_t::const_iterator end = link.end();
239 if (here == end) return g_EmptyText;
240
241 ++here;
242 while (here != end) {
243 if (*here == '"') break;
244 href.push_back(*here);
245 ++here;
246 }
247
248 return href;
249}
250
251//this function gets the information associated with the relation
252//metadata for the document associated with 'docinfo'. This relation
253//metadata consists of a line of pairs containing 'collection, document OID'
254//(this is the OID of the document related to the current document, and
255//the collection the related document belongs to). For each of these pairs
256//the title metadata is obtained and then an html link between the title
257//of the related doc and the document's position (the document will be
258//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
259//(where collection is the related documents collection, and OID is the
260//related documents OID). A list of these html links are made for as many
261//related documents as there are. This list is then returned. If there are
262//no related documents available for the current document then the string
263//'.. no related documents .. ' is returned.
264text_t get_related_docs(const text_t& collection, recptproto* collectproto,
265 ResultDocInfo_t &docinfo, ostream& logout){
266
267 text_tset metadata;
268
269 //insert the metadata we wish to collect
270 metadata.insert("dc.Relation");
271 metadata.insert("Title");
272 metadata.insert("Subject"); //for emails, where title data doesn't apply
273
274 FilterResponse_t response;
275 text_t relation = ""; //string for displaying relation metadata
276 text_t relationTitle = ""; //the related documents Title (or subject)
277 text_t relationOID = ""; //the related documents OID
278
279 //get the information associated with the metadata for current doc
280 if (get_info (docinfo.OID, collection, "", metadata,
281 false, collectproto, response, logout)) {
282
283 //if the relation metadata exists, store for displaying
284 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
285 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
286
287 //split relation data into pairs of collectionname,ID number
288 text_tarray relationpairs;
289 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
290
291 text_tarray::const_iterator currDoc = relationpairs.begin();
292 text_tarray::const_iterator lastDoc = relationpairs.end();
293
294 //iterate through the pairs to split and display
295 while(currDoc != lastDoc){
296
297 //split pairs into collectionname and ID
298 text_tarray relationdata;
299 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
300
301 //get first element in the array (collection)
302 text_tarray::const_iterator doc_data = relationdata.begin();
303 text_t document_collection = *doc_data;
304 ++doc_data; //increment to get next item in array (oid)
305 text_t document_OID = *doc_data;
306
307 //create html link to related document
308 relation += "<a href=\"_httpdocument_&c=" + document_collection;
309 relation += "&cl=search&d=" + document_OID;
310
311 //get the information associated with the metadata for related doc
312 if (get_info (document_OID, document_collection, "", metadata,
313 false, collectproto, response, logout)) {
314
315 //if title metadata doesn't exist, collect subject metadata
316 //if that doesn't exist, just call it 'related document'
317 if (!response.docInfo[0].metadata["Title"].values[0].empty())
318 relationTitle = response.docInfo[0].metadata["Title"].values[0];
319 else if (!response.docInfo[0].metadata["Subject"].values.empty())
320 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
321 else relationTitle = "RELATED DOCUMENT";
322
323 }
324
325 //link the related document's title to its page
326 relation += "\">" + relationTitle + "</a>";
327 relation += " (" + document_collection + ")<br>";
328
329 ++currDoc;
330 }
331 }
332
333 }
334
335 if(relation.empty()) //no relation data for documnet
336 relation = ".. no related documents .. ";
337
338 return relation;
339}
340
341
342
343static void get_parent_options (text_t &instring, metadata_t &metaoption) {
344
345 assert (instring.size() > 7);
346 if (instring.size() <= 7) return;
347
348 text_t meta, com, op;
349 bool inbraces = false;
350 bool inquotes = false;
351 bool foundcolon = false;
352 text_t::const_iterator here = instring.begin()+6;
353 text_t::const_iterator end = instring.end();
354 while (here != end) {
355 if (foundcolon) meta.push_back (*here);
356 else if (*here == '(') inbraces = true;
357 else if (*here == ')') inbraces = false;
358 else if (*here == '\'' && !inquotes) inquotes = true;
359 else if (*here == '\'' && inquotes) inquotes = false;
360 else if (*here == ':' && !inbraces) foundcolon = true;
361 else if (inquotes) op.push_back (*here);
362 else com.push_back (*here);
363 ++here;
364 }
365
366 instring = meta;
367 if (com.empty())
368 metaoption.mqualifier.parent = pImmediate;
369 else if (com == "Top")
370 metaoption.mqualifier.parent = pTop;
371 else if (com == "All") {
372 metaoption.mqualifier.parent = pAll;
373 metaoption.parentoptions = op;
374 }
375}
376
377
378static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
379
380 assert (instring.size() > 8);
381 if (instring.size() <= 8) return;
382 text_t meta, com, op;
383 bool inbraces = false;
384 bool inquotes = false;
385 bool foundcolon = false;
386 text_t::const_iterator here = instring.begin()+7;
387 text_t::const_iterator end = instring.end();
388 while (here != end) {
389 if (foundcolon) meta.push_back (*here);
390 else if (*here == '(') inbraces = true;
391 else if (*here == ')') inbraces = false;
392 else if (*here == '\'' && !inquotes) inquotes = true;
393 else if (*here == '\'' && inquotes) inquotes = false;
394 else if (*here == ':' && !inbraces) foundcolon = true;
395 else if (inquotes) op.push_back (*here);
396 else com.push_back (*here);
397 ++here;
398 }
399
400 instring = meta;
401 metaoption.siblingoptions.clear();
402
403 if (com.empty()) {
404 metaoption.mqualifier.sibling = sAll;
405 metaoption.siblingoptions = " ";
406 }
407 else if (com == "first") {
408 metaoption.mqualifier.sibling = sNum;
409 metaoption.siblingoptions = "0";
410 }
411 else if (com == "last") {
412 metaoption.mqualifier.sibling = sNum;
413 metaoption.siblingoptions = "-2"; // == last
414 }
415 else if (com.getint()>0) {
416 metaoption.mqualifier.sibling = sNum;
417 int pos = com.getint()-1;
418 metaoption.siblingoptions +=pos;
419 }
420 else {
421 metaoption.mqualifier.sibling = sAll;
422 metaoption.siblingoptions = op;
423 }
424}
425
426static void get_child_options (text_t &instring, metadata_t &metaoption) {
427
428 assert (instring.size() > 6);
429 if (instring.size() <= 6) return;
430 text_t meta, com, op;
431 bool inbraces = false;
432 bool inquotes = false;
433 bool foundcolon = false;
434 text_t::const_iterator here = instring.begin()+5;
435 text_t::const_iterator end = instring.end();
436 while (here != end) {
437 if (foundcolon) meta.push_back (*here);
438 else if (*here == '(') inbraces = true;
439 else if (*here == ')') inbraces = false;
440 else if (*here == '\'' && !inquotes) inquotes = true;
441 else if (*here == '\'' && inquotes) inquotes = false;
442 else if (*here == ':' && !inbraces) foundcolon = true;
443 else if (inquotes) op.push_back (*here);
444 else com.push_back (*here);
445 ++here;
446 }
447
448 instring = meta;
449 if (com.empty()) {
450 metaoption.mqualifier.child = cAll;
451 metaoption.childoptions = " ";
452 }
453 else if (com == "first") {
454 metaoption.mqualifier.child = cNum;
455 metaoption.childoptions = ".fc";
456 }
457 else if (com == "last") {
458 metaoption.mqualifier.child = cNum;
459 metaoption.childoptions = ".lc";
460 }
461 else if (com.getint()>0) {
462 metaoption.mqualifier.child = cNum;
463 metaoption.childoptions = "."+com;
464 }
465 else {
466 metaoption.mqualifier.child = cAll;
467 metaoption.childoptions = op;
468 }
469}
470
471
472static void get_truncate_options (text_t &instring, metadata_t &metaoption)
473{
474 assert (instring.size() > ((text_t) "truncate").size());
475 if (instring.size() <= ((text_t) "truncate").size()) return;
476 text_t meta, com;
477 bool inbraces = false;
478 bool foundcolon = false;
479 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
480 text_t::const_iterator end = instring.end();
481 while (here != end) {
482 if (foundcolon) meta.push_back (*here);
483 else if (*here == '(') inbraces = true;
484 else if (*here == ')') inbraces = false;
485 else if (*here == ':' && !inbraces) foundcolon = true;
486 else com.push_back (*here);
487 ++here;
488 }
489
490 instring = meta;
491
492 if (!com.empty())
493 {
494 metaoption.siblingoptions = com;
495 }
496 else
497 {
498 // Default is 100 characters if not specified
499 metaoption.siblingoptions = "100";
500 }
501}
502
503
504
505static void parse_meta (text_t &meta, metadata_t &metaoption,
506 text_tset &metadata, bool &getParents) {
507
508 // Look for the various format statement modifiers
509 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
510 // is irrelevant because this is not stored in metaoption.metacommand anyway
511 bool keep_trying = true;
512 while (keep_trying)
513 {
514 keep_trying = false;
515
516 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
517 {
518 metaoption.metacommand |= mCgiSafe;
519 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
520 keep_trying = true;
521 }
522 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
523 {
524 metaoption.metacommand |= mSpecial;
525 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
526 keep_trying = true;
527 }
528
529 // New "truncate" special formatting option
530 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
531 {
532 metaoption.metacommand |= mTruncate;
533 get_truncate_options (meta, metaoption);
534 keep_trying = true;
535 }
536 // New "htmlsafe" special formatting option
537 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
538 {
539 metaoption.metacommand |= mHTMLSafe;
540 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
541 keep_trying = true;
542 }
543 // New "xmlsafe" special formatting option
544 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
545 {
546 metaoption.metacommand |= mXMLSafe;
547 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
548 keep_trying = true;
549 }
550 }
551
552 bool had_parent_or_child = true;
553 bool prev_was_parent = false;
554 bool prev_was_child = false;
555
556 while (had_parent_or_child) {
557 if (meta.size() > 7
558 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
559
560 // clear out sibling and child (cmd and options)
561 metaoption.metacommand &= ~(mChild|mSibling);
562 metaoption.childoptions.clear();
563 metaoption.siblingoptions.clear();
564
565 getParents = true;
566 metaoption.metacommand |= mParent;
567 get_parent_options (meta, metaoption);
568
569 if (prev_was_parent) {
570 metaoption.pre_tree_traverse += ".pr";
571 }
572 else if (prev_was_child) {
573 metaoption.pre_tree_traverse += ".fc";
574 }
575
576 prev_was_parent = true;
577 prev_was_child = false;
578 }
579 else if (meta.size() > 6
580 && (substr (meta.begin(), meta.begin()+5) == "child")) {
581
582 // clear out sibling and parent (cmd and options)
583 metaoption.metacommand &= ~(mParent|mSibling);
584 metaoption.parentoptions.clear();
585 metaoption.siblingoptions.clear();
586
587 metaoption.metacommand |= mChild;
588 get_child_options (meta, metaoption);
589 metadata.insert("contains");
590
591 if (prev_was_parent) {
592 metaoption.pre_tree_traverse += ".pr";
593 }
594 else if (prev_was_child) {
595 metaoption.pre_tree_traverse += ".fc";
596 }
597
598 prev_was_child = true;
599 prev_was_parent = false;
600 }
601 else {
602 prev_was_child = false;
603 prev_was_parent = false;
604 had_parent_or_child = false;
605 }
606 }
607
608 // parent/child can have sibling tacked on end also
609 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
610 metaoption.metacommand |= mSibling;
611 get_sibling_options (meta, metaoption);
612 }
613
614 // check for ex. which may occur in format statements
615 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
616 meta = substr (meta.begin()+3, meta.end());
617 }
618 metadata.insert (meta);
619 metaoption.metaname = meta;
620}
621
622static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
623 if (meta == "collection") {
624 // no qualifiers
625 metaoption.metaname = g_EmptyText;
626 return;
627 }
628 meta = substr (meta.begin()+11, meta.end());
629 metaoption.metaname = meta;
630
631}
632
633static void parse_meta (text_t &meta, format_t *formatlistptr,
634 text_tset &metadata, bool &getParents) {
635
636 // check for ex. which may occur in format statements
637 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
638 meta = substr (meta.begin()+3, meta.end());
639 }
640 if (meta == "link")
641 formatlistptr->command = comLink;
642 else if (meta == "/link")
643 formatlistptr->command = comEndLink;
644
645 else if (meta == "srclink") {
646 formatlistptr->command = comAssocLink;
647 formatlistptr->meta.metaname = "srclink_file";
648 metadata.insert("srclink_file");
649 }
650 else if (meta == "srchref") {
651 formatlistptr->command = comAssocLink;
652 formatlistptr->text = "href";
653 formatlistptr->meta.metaname = "srclink_file";
654 metadata.insert("srclink_file");
655 }
656 else if (meta == "/srclink") {
657 formatlistptr->command = comEndAssocLink;
658 formatlistptr->meta.metaname = "srclink_file";
659 }
660 // and weblink etc
661 else if (meta == "href")
662 formatlistptr->command = comHref;
663
664 else if (meta == "num")
665 formatlistptr->command = comNum;
666
667 else if (meta == "icon")
668 formatlistptr->command = comIcon;
669
670 else if (meta == "Text")
671 formatlistptr->command = comDoc;
672
673 else if (meta == "RelatedDocuments")
674 formatlistptr->command = comRel;
675
676 else if (meta == "highlight")
677 formatlistptr->command = comHighlight;
678
679 else if (meta == "/highlight")
680 formatlistptr->command = comEndHighlight;
681
682 else if (meta == "metadata-spanwrap")
683 formatlistptr->command = comMetadataSpanWrap;
684
685 else if (meta == "/metadata-spanwrap")
686 formatlistptr->command = comEndMetadataSpanWrap;
687
688 else if (meta == "Summary")
689 formatlistptr->command = comSummary;
690
691 else if (meta == "DocImage")
692 formatlistptr->command = comImage;
693
694 else if (meta == "DocTOC")
695 formatlistptr->command = comTOC;
696
697 else if (meta == "DocumentButtonDetach")
698 formatlistptr->command = comDocumentButtonDetach;
699
700 else if (meta == "DocumentButtonHighlight")
701 formatlistptr->command = comDocumentButtonHighlight;
702
703 else if (meta == "DocumentButtonExpandContents")
704 formatlistptr->command = comDocumentButtonExpandContents;
705
706 else if (meta == "DocumentButtonExpandText")
707 formatlistptr->command = comDocumentButtonExpandText;
708
709 else if (meta == "DocOID")
710 formatlistptr->command = comOID;
711 else if (meta == "DocTopOID")
712 formatlistptr->command = comTopOID;
713 else if (meta == "DocRank")
714 formatlistptr->command = comRank;
715 else if (meta == "DocTermsFreqTotal")
716 formatlistptr->command = comDocTermsFreqTotal;
717 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
718 formatlistptr->command = comCollection;
719 parse_coll_meta(meta, formatlistptr->meta);
720 }
721 else {
722 formatlistptr->command = comMeta;
723 parse_meta (meta, formatlistptr->meta, metadata, getParents);
724 }
725}
726
727
728static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
729 text_tset &metadata, bool &getParents) {
730
731 text_t text;
732 text_t::const_iterator here = formatstring.begin();
733 text_t::const_iterator end = formatstring.end();
734
735 while (here != end) {
736
737 if (*here == '\\') {
738 ++here;
739 if (here != end) text.push_back (*here);
740
741 } else if (*here == '{') {
742 if (!text.empty()) {
743 formatlistptr->command = comText;
744 formatlistptr->text = text;
745 formatlistptr->nextptr = new format_t();
746 formatlistptr = formatlistptr->nextptr;
747
748 text.clear();
749 }
750 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
751
752 formatlistptr->nextptr = new format_t();
753 formatlistptr = formatlistptr->nextptr;
754 if (here == end) break;
755 }
756 } else if (*here == '[') {
757 if (!text.empty()) {
758 formatlistptr->command = comText;
759 formatlistptr->text = text;
760 formatlistptr->nextptr = new format_t();
761 formatlistptr = formatlistptr->nextptr;
762
763 text.clear();
764 }
765 text_t meta;
766 ++here;
767 while (*here != ']') {
768 if (here == end) return false;
769 meta.push_back (*here);
770 ++here;
771 }
772 parse_meta (meta, formatlistptr, metadata, getParents);
773 formatlistptr->nextptr = new format_t();
774 formatlistptr = formatlistptr->nextptr;
775
776 } else
777 text.push_back (*here);
778
779 if (here != end) ++here;
780 }
781 if (!text.empty()) {
782 formatlistptr->command = comText;
783 formatlistptr->text = text;
784 formatlistptr->nextptr = new format_t();
785 formatlistptr = formatlistptr->nextptr;
786
787 }
788 return true;
789}
790
791
792static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
793 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
794
795 text_t::const_iterator it = findchar (here, end, '}');
796 if (it == end) return false;
797
798 text_t com = substr (here, it);
799 here = findchar (it, end, '{');
800 if (here == end) return false;
801 else ++here;
802
803 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
804 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
805 else return false;
806
807 int commacount = 0;
808 text_t text;
809 while (here != end) {
810
811 if (*here == '\\') {
812 ++here;
813 if (here != end) text.push_back(*here);
814
815 }
816
817 else if (*here == ',' || *here == '}' || *here == '{') {
818
819 if (formatlistptr->command == comOr) {
820 // the {Or}{this, or this, or this, or this} statement
821 format_t *or_ptr;
822
823 // find the next unused orptr
824 if (formatlistptr->orptr == NULL) {
825 formatlistptr->orptr = new format_t();
826 or_ptr = formatlistptr->orptr;
827 } else {
828 or_ptr = formatlistptr->orptr;
829 while (or_ptr->nextptr != NULL)
830 or_ptr = or_ptr->nextptr;
831 or_ptr->nextptr = new format_t();
832 or_ptr = or_ptr->nextptr;
833 }
834
835 if (!text.empty())
836 {
837 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
838 }
839
840 if (*here == '{')
841 {
842 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
843 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
844 // The latter can always be re-written:
845 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
846
847 if (!text.empty()) // already used up allocated format_t
848 {
849 // => allocate new one for detected action
850 or_ptr->nextptr = new format_t();
851 or_ptr = or_ptr->nextptr;
852 }
853 if (!parse_action(++here, end, or_ptr, metadata, getParents))
854 {
855 return false;
856 }
857 }
858 else
859 {
860 if (*here == '}') break;
861 }
862 text.clear();
863
864 }
865
866 // Parse an {If}{decide,do,else} statement
867 else {
868
869 // Read the decision component.
870 if (commacount == 0) {
871 // Decsion can be a metadata element, or a piece of text.
872 // Originally Stefan's code, updated 25/10/2000 by Gordon.
873
874 text_t::const_iterator beginbracket = text.begin();
875 text_t::const_iterator endbracket = (text.end() - 1);
876
877 // Decision is based on a metadata element
878 if ((*beginbracket == '[') && (*endbracket == ']')) {
879 // Ignore the surrounding square brackets
880 text_t meta = substr (beginbracket+1, endbracket);
881 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
882 ++commacount;
883 text.clear();
884 }
885
886 // Decision is a piece of text (probably a macro like _cgiargmode_).
887 else {
888
889 // hunt for any metadata in string, which might be uses in
890 // to test a condition, e.g. [Format] eq 'PDF'
891 format_t* dummyformat = new format_t();
892 // update which metadata fields needed
893 // (not interested in updatng formatlistptr)
894 parse_string (text, dummyformat, metadata, getParents);
895 delete dummyformat;
896
897 formatlistptr->decision.command = dText;
898 formatlistptr->decision.text = text;
899 ++commacount;
900 text.clear();
901 }
902 }
903
904 // Read the "then" and "else" components of the {If} statement.
905 else {
906 format_t** nextlistptr = NULL;
907 if (commacount == 1) {
908 nextlistptr = &formatlistptr->ifptr;
909 } else if (commacount == 2 ) {
910 nextlistptr = &formatlistptr->elseptr;
911 } else {
912 return false;
913 }
914
915 if (!text.empty()) {
916 if (*nextlistptr == NULL) {
917 *nextlistptr = new format_t();
918 } else {
919
920 // skip to the end of any format_t statements already added
921 while ((*nextlistptr)->nextptr != NULL)
922 {
923 nextlistptr = &(*nextlistptr)->nextptr;
924 }
925
926 (*nextlistptr)->nextptr = new format_t();
927 nextlistptr = &(*nextlistptr)->nextptr;
928 }
929
930 if (!parse_string (text, *nextlistptr, metadata, getParents))
931 {
932 return false;
933 }
934 text.clear();
935 }
936
937 if (*here == '{')
938 {
939 if (*nextlistptr == NULL) {
940 *nextlistptr = new format_t();
941 } else {
942 // skip to the end of any format_t statements already added
943 while ((*nextlistptr)->nextptr != NULL)
944 {
945 nextlistptr = &(*nextlistptr)->nextptr;
946 }
947
948 (*nextlistptr)->nextptr = new format_t();
949 nextlistptr = &(*nextlistptr)->nextptr;
950 }
951
952 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
953 {
954 return false;
955 }
956 }
957 else
958 {
959 if (*here == '}') break;
960 ++commacount;
961 }
962 }
963 }
964
965 } else text.push_back(*here);
966
967 if (here != end) ++here;
968 }
969
970 return true;
971}
972
973
974static text_t spanwrap_metatext(const text_t& metatext, const text_t& OID,
975 const text_t metaname, int metapos=-1)
976{
977
978 text_t tag_type = (metaname == "Text") ? "div" : "span";
979 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
980
981 text_t wrapped_metatext = "<" + tag_type + " ";
982 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
983
984 wrapped_metatext += "docoid=\"" + OID + "\" ";
985 wrapped_metatext += "metaname=\"" + metaname + "\"";
986
987 if (metapos>=0) {
988 text_t metapos_str = metapos;
989 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
990 }
991
992 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
993
994 return wrapped_metatext;
995}
996
997
998
999bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1000 text_tset &metadata, bool &getParents) {
1001
1002 formatlistptr->clear();
1003 getParents = false;
1004
1005 return (parse_string (formatstring, formatlistptr, metadata, getParents));
1006}
1007
1008// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1009// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1010
1011static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1012{
1013 text_t no_ns_metaname = remove_namespace(meta.metaname);
1014 text_t formatted_metatext;
1015 bool first = true;
1016
1017 const int start_i=0;
1018 const int end_i = metainfo.values.size()-1;
1019
1020 if (position == -1) { // all
1021 for (int i=start_i; i<=end_i; ++i) {
1022 if (!first) formatted_metatext += meta.siblingoptions;
1023
1024 text_t fresh_metatext;
1025
1026 if (meta.metacommand & mSpecial) {
1027 // special formatting
1028 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1029 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1030 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1031 }
1032 else fresh_metatext = metainfo.values[i];
1033
1034 // New "truncate" special formatting option
1035 if (meta.metacommand & mTruncate)
1036 {
1037 int truncate_length = meta.siblingoptions.getint();
1038 text_t truncated_value = fresh_metatext;
1039 if (truncated_value.size() > truncate_length)
1040 {
1041 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1042 }
1043 fresh_metatext = truncated_value;
1044 }
1045 // New "xmlsafe" special formatting option
1046 if (meta.metacommand & mXMLSafe)
1047 {
1048 // Make it XML-safe
1049 text_t text_xml_safe = "";
1050 text_t::const_iterator text_iterator = fresh_metatext.begin();
1051 while (text_iterator != fresh_metatext.end())
1052 {
1053 if (*text_iterator == '&') text_xml_safe += "&amp;";
1054 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1055 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1056 else text_xml_safe.push_back(*text_iterator);
1057 text_iterator++;
1058 }
1059 fresh_metatext = text_xml_safe;
1060 }
1061 // New "htmlsafe" special formatting option
1062 if (meta.metacommand & mHTMLSafe)
1063 {
1064 // Make it HTML-safe
1065 text_t text_html_safe = "";
1066 text_t::const_iterator text_iterator = fresh_metatext.begin();
1067 while (text_iterator != fresh_metatext.end())
1068 {
1069 if (*text_iterator == '&') text_html_safe += "&amp;";
1070 else if (*text_iterator == '<') text_html_safe += "&lt;";
1071 else if (*text_iterator == '>') text_html_safe += "&gt;";
1072 else if (*text_iterator == '"') text_html_safe += "&quot;";
1073 else text_html_safe.push_back(*text_iterator);
1074 text_iterator++;
1075 }
1076 fresh_metatext = text_html_safe;
1077 }
1078
1079 if (metadata_spanwrap) {
1080 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,i);
1081 }
1082 formatted_metatext += fresh_metatext;
1083
1084 first = false;
1085
1086 }
1087 } else {
1088 if (position == -2) { // end
1089 position = end_i;
1090 } else if (position < start_i || position > end_i) {
1091 return "";
1092 }
1093
1094 text_t fresh_metatext;
1095 if (meta.metacommand & mSpecial) {
1096
1097 // special formatting
1098 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1099 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1100 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1101 }
1102 else fresh_metatext = metainfo.values[position];
1103
1104 // New "truncate" special formatting option
1105 if (meta.metacommand & mTruncate)
1106 {
1107 int truncate_length = meta.siblingoptions.getint();
1108 text_t truncated_value = fresh_metatext;
1109 if (truncated_value.size() > truncate_length)
1110 {
1111 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1112 }
1113 fresh_metatext = truncated_value;
1114 }
1115 // New "xmlsafe" special formatting option
1116 if (meta.metacommand & mXMLSafe)
1117 {
1118 // Make it XML-safe
1119 text_t text_xml_safe = "";
1120 text_t::const_iterator text_iterator = fresh_metatext.begin();
1121 while (text_iterator != fresh_metatext.end())
1122 {
1123 if (*text_iterator == '&') text_xml_safe += "&amp;";
1124 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1125 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1126 else text_xml_safe.push_back(*text_iterator);
1127 text_iterator++;
1128 }
1129 fresh_metatext = text_xml_safe;
1130 }
1131 // New "htmlsafe" special formatting option
1132 if (meta.metacommand & mHTMLSafe)
1133 {
1134 // Make it HTML-safe
1135 text_t text_html_safe = "";
1136 text_t::const_iterator text_iterator = fresh_metatext.begin();
1137 while (text_iterator != fresh_metatext.end())
1138 {
1139 if (*text_iterator == '&') text_html_safe += "&amp;";
1140 else if (*text_iterator == '<') text_html_safe += "&lt;";
1141 else if (*text_iterator == '>') text_html_safe += "&gt;";
1142 else if (*text_iterator == '"') text_html_safe += "&quot;";
1143 else text_html_safe.push_back(*text_iterator);
1144 text_iterator++;
1145 }
1146 fresh_metatext = text_html_safe;
1147 }
1148
1149 if (metadata_spanwrap) {
1150 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,position);
1151 }
1152
1153 formatted_metatext += fresh_metatext;
1154 }
1155
1156 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1157 else return formatted_metatext;
1158}
1159
1160static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1161{
1162
1163 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1164
1165 switch (meta.mqualifier.parent) {
1166 case pNone:
1167 return "Nothing!!";
1168 break;
1169
1170 case pImmediate:
1171 if (parent != NULL) {
1172 text_t parent_oid = get_parent(docinfo.OID);
1173 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1174 }
1175 break;
1176
1177 case pTop:
1178 if (parent != NULL) {
1179 text_t parent_oid = get_parent(docinfo.OID);
1180
1181 while (parent->parent != NULL) {
1182 parent = parent->parent;
1183 parent_oid = get_parent(parent_oid);
1184 }
1185 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1186 }
1187 break;
1188
1189 case pAll:
1190 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1191 if (parent != NULL) {
1192 text_t parent_oid = get_parent(docinfo.OID);
1193
1194 text_tarray tmparray;
1195 while (parent != NULL) {
1196 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1197 parent = parent->parent;
1198 parent_oid = get_parent(parent_oid);
1199
1200 }
1201 // now join them up - use teh parent separator
1202 bool first = true;
1203 text_t tmp;
1204 text_tarray::reverse_iterator here = tmparray.rbegin();
1205 text_tarray::reverse_iterator end = tmparray.rend();
1206 while (here != end) {
1207 if (!first) tmp += meta.parentoptions;
1208 tmp += *here;
1209 first = false;
1210 ++here;
1211 }
1212 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1213 else return tmp;
1214 }
1215 }
1216 return "";
1217
1218}
1219
1220static text_t get_child_meta (const text_t& collection,
1221 recptproto* collectproto,
1222 ResultDocInfo_t &docinfo, displayclass &disp,
1223 const metadata_t &meta, text_tmap &options,
1224 ostream& logout, int siblings_values)
1225{
1226 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1227
1228 const text_t& pre_tree_trav = meta.pre_tree_traverse;
1229 const text_t& child_metaname = meta.metaname;
1230 const text_t& child_field = meta.childoptions;
1231 text_tset child_metadata;
1232 child_metadata.insert(child_metaname);
1233
1234 FilterResponse_t child_response;
1235 if (meta.mqualifier.child == cNum) {
1236 // just one child
1237 //get the information associated with the metadata for child doc
1238 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1239 child_metadata, false, collectproto, child_response,
1240 logout)) return ""; // invalid child number
1241
1242 if (child_response.docInfo.empty()) return false; // no info for the child
1243
1244 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1245 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1246
1247 text_t child_metavalue
1248 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1249 return expand_metadata(child_metavalue,collection,collectproto,
1250 child_docinfo,disp,options,logout);
1251 }
1252
1253
1254 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1255
1256
1257 if (!pre_tree_trav.empty()) {
1258 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1259 FilterResponse_t trav_response;
1260
1261 text_tset trav_metadata;
1262 trav_metadata.insert("contains");
1263
1264 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1265 trav_metadata, false, collectproto, trav_response,
1266 logout)) return ""; // invalid pre_tree_trav
1267
1268 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1269
1270 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1271
1272 // use this for rest of routine
1273 docinfo = trav_docinfo;
1274 }
1275
1276 // we need to get all children
1277 text_t result = "";
1278 text_tarray children;
1279 text_t contains = docinfo.metadata["contains"].values[0];
1280 splitchar (contains.begin(), contains.end(), ';', children);
1281 text_tarray::const_iterator here = children.begin();
1282 text_tarray::const_iterator end = children.end();
1283 bool first = true;
1284 while (here !=end) {
1285 text_t oid = *here;
1286 here++;
1287 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1288
1289 //get the information associated with the metadata for child doc
1290 if (!get_info (oid, collection, "", child_metadata,
1291 false, collectproto, child_response, logout) ||
1292 child_response.docInfo.empty()) {
1293 first = false;
1294 continue;
1295 }
1296
1297
1298 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1299 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1300
1301 text_t child_metavalue
1302 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1303
1304
1305 if (!first) result += child_field;
1306 first = false;
1307 // need to do this here cos otherwise we are in the wrong document
1308 text_t em = expand_metadata(child_metavalue,collection,collectproto,
1309 child_docinfo,disp,options,logout);
1310
1311 result += em;
1312 }
1313 return result;
1314
1315}
1316
1317static text_t get_meta (const text_t& collection, recptproto* collectproto,
1318 ResultDocInfo_t &docinfo, displayclass &disp,
1319 const metadata_t &meta, text_tmap &options,
1320 ostream& logout) {
1321
1322 // make sure we have the requested metadata
1323 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1324 if (it == docinfo.metadata.end()) return "";
1325
1326 int siblings_values = 0; // default is no siblings, just the first metadata available
1327 if (meta.metacommand & mSibling) {
1328 if (meta.mqualifier.sibling == sAll) {
1329 siblings_values = -1; //all
1330 } else if (meta.mqualifier.sibling == sNum) {
1331 siblings_values = meta.siblingoptions.getint();
1332 }
1333 }
1334 if (meta.metacommand & mParent) {
1335 return get_parent_meta(docinfo,meta,siblings_values);
1336 }
1337
1338 else if (meta.metacommand & mChild) {
1339 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1340 options,logout, siblings_values);
1341 }
1342 else if (meta.metacommand & mSibling) { // only siblings
1343 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1344 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1345 }
1346 else {
1347
1348 // straightforward metadata request (nothing fancy)
1349
1350 text_t classifier_metaname = docinfo.classifier_metadata_type;
1351 int metaname_index
1352 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1353 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1354 }
1355
1356 return "";
1357}
1358
1359static text_t get_or (const text_t& collection, recptproto* collectproto,
1360 ResultDocInfo_t &docinfo, displayclass &disp,
1361 format_t *orptr, text_tmap &options,
1362 ostream& logout) {
1363
1364 while (orptr != NULL) {
1365
1366 if (metadata_spanwrap) {
1367 // need to be a bit more careful about this
1368 // => test for it *without* spanwrap, and if defined, then
1369 // got back and generate it again, this time with spanwrap on
1370
1371 metadata_spanwrap = false;
1372 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1373 options, logout);
1374 metadata_spanwrap = true;
1375 if (!test_tmp.empty()) {
1376
1377 return format_string (collection,collectproto,docinfo, disp, orptr,
1378 options, logout);
1379 }
1380 }
1381 else {
1382 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1383 options, logout);
1384 if (!tmp.empty()) return tmp;
1385 }
1386
1387 orptr = orptr->nextptr;
1388 }
1389 return "";
1390}
1391
1392static bool char_is_whitespace(const char c)
1393{
1394 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1395
1396}
1397
1398static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1399{
1400 int pos = start_pos;
1401 while (pos<outstring.size()) {
1402 if (!char_is_whitespace(outstring[pos])) {
1403 break;
1404 }
1405 ++pos;
1406 }
1407
1408 return pos;
1409}
1410
1411static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1412{
1413 int pos = start_pos;
1414 while (pos>=0) {
1415 if (!char_is_whitespace(outstring[pos])) {
1416 break;
1417 }
1418 --pos;
1419 }
1420
1421 return pos;
1422}
1423
1424static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1425{
1426 int pos = start_pos;
1427 while (pos>=0) {
1428 if (char_is_whitespace(outstring[pos])) {
1429 break;
1430 }
1431 --pos;
1432 }
1433
1434 return pos;
1435}
1436
1437
1438static int rscan_for(const text_t& outstring, const int start_pos,
1439 const char find_c)
1440{
1441 int pos = start_pos;
1442 while (pos>=0) {
1443 char c = outstring[pos];
1444 if (outstring[pos] == find_c) {
1445 break;
1446 }
1447 --pos;
1448 }
1449
1450 return pos;
1451}
1452
1453text_t extract_substr(const text_t& outstring, const int start_pos,
1454 const int end_pos)
1455{
1456 text_t extracted_str;
1457 extracted_str.clear();
1458
1459 for (int pos=start_pos; pos<=end_pos; ++pos) {
1460 extracted_str.push_back(outstring[pos]);
1461 }
1462
1463 return extracted_str;
1464}
1465
1466
1467static text_t expand_potential_metadata(const text_t& collection,
1468 recptproto* collectproto,
1469 ResultDocInfo_t &docinfo,
1470 displayclass &disp,
1471 const text_t& intext,
1472 text_tmap &options,
1473 ostream& logout)
1474{
1475 text_t outtext;
1476
1477 // decide if dealing with metadata or text
1478
1479 text_t::const_iterator beginbracket = intext.begin();
1480 text_t::const_iterator endbracket = (intext.end() - 1);
1481
1482 // Decision is based on a metadata element
1483 if ((*beginbracket == '[') && (*endbracket == ']')) {
1484 // Ignore the surrounding square brackets
1485 text_t meta_text = substr (beginbracket+1, endbracket);
1486
1487 if (meta_text == "Text") {
1488 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1489 }
1490 else {
1491
1492 text_tset metadata;
1493 bool getParents =false;
1494 metadata_t meta;
1495
1496 parse_meta (meta_text, meta, metadata, getParents);
1497 outtext
1498 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1499 }
1500
1501 }
1502 else {
1503 outtext = intext;
1504 }
1505
1506 return outtext;
1507}
1508
1509
1510
1511
1512static bool uses_expression(const text_t& collection, recptproto* collectproto,
1513 ResultDocInfo_t &docinfo,
1514 displayclass &disp,
1515 const text_t& outstring, text_t& lhs_expr,
1516 text_t& op_expr, text_t& rhs_expr,
1517 text_tmap &options,
1518 ostream& logout)
1519{
1520 // Note: the string may not be of the form: str1 op str2, however
1521 // to deterine this we have to process it on the assumption it is,
1522 // and if at any point an 'erroneous' value is encountered, return
1523 // false and let something else have a go at evaluating it
1524
1525 // Starting at the end of the string and working backwards ..
1526
1527 const int outstring_len = outstring.size();
1528
1529 // skip over white space
1530 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1531
1532 if (rhs_end<=0) {
1533 // no meaningful text or (rhs_end==0) no room for operator
1534 return false;
1535 }
1536
1537 // check for ' or " and then scan over token
1538 const char potential_quote = outstring[rhs_end];
1539 int rhs_start=rhs_end;
1540 bool quoted = false;
1541
1542 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1543 --rhs_end;
1544 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1545 quoted = true;
1546 }
1547 else {
1548 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1549 }
1550
1551 if ((rhs_end-rhs_start)<0) {
1552 // no meaningful rhs expression
1553 return false;
1554 }
1555
1556 // form rhs_expr
1557 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1558
1559 // skip over white space
1560 const int to_whitespace = (quoted) ? 2 : 1;
1561
1562 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1563 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1564
1565 if ((op_end<0) && (op_start<0)) {
1566 // no meaningful expression operator
1567 return false;
1568 }
1569
1570 if (op_end-op_start<0) {
1571 // no meaningful expression operator
1572 return false;
1573 }
1574
1575 op_expr = extract_substr(outstring,op_start,op_end);
1576
1577
1578 // check for operator
1579 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1580 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1581
1582 // not a valid operator
1583 return false;
1584 }
1585
1586 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1587 if (lhs_end<0) {
1588 // no meaningful lhs expression
1589 return false;
1590 }
1591
1592 int lhs_start = scan_over_whitespace(outstring,0);
1593
1594 // form lhs_expr from remainder of string
1595 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1596
1597 // Now we know we have a valid expression, look up any
1598 // metadata terms
1599
1600 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1601 disp,rhs_expr,options,logout);
1602 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1603 disp,lhs_expr,options,logout);
1604
1605 return true;
1606}
1607
1608static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1609 const text_t& rhs_expr, ostream& logout)
1610{
1611 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1612 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1613 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1614 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1615 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1616 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1617 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1618 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1619 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1620 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1621 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1622 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1623 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1624 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1625 else {
1626 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1627 }
1628
1629 return false;
1630}
1631
1632
1633static text_t get_if (const text_t& collection, recptproto* collectproto,
1634 ResultDocInfo_t &docinfo, displayclass &disp,
1635 const decision_t &decision,
1636 format_t *ifptr, format_t *elseptr,
1637 text_tmap &options, ostream& logout)
1638{
1639 // If the decision component is a metadata element, then evaluate it
1640 // to see whether we output the "then" or the "else" clause
1641 if (decision.command == dMeta) {
1642
1643 bool store_metadata_spanwrap = metadata_spanwrap;
1644 metadata_spanwrap = 0;
1645
1646 // temporarily suspend metadata-spanwrap (if on) so can test if metadata item really exits or not
1647 bool metadata_exists
1648 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1649 logout) != "");
1650
1651 metadata_spanwrap = store_metadata_spanwrap;
1652
1653 if (metadata_exists) {
1654 if (ifptr != NULL)
1655 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1656 options, logout);
1657 }
1658 else {
1659 if (elseptr != NULL)
1660 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1661 options, logout);
1662 }
1663 }
1664
1665 // If the decision component is text, then evaluate it (it is probably a
1666 // macro like _cgiargmode_) to decide what to output.
1667 else if (decision.command == dText) {
1668
1669 text_t outstring;
1670 disp.expandstring (decision.text, outstring);
1671
1672 // Check for if expression in form: str1 op str2
1673 // (such as [x] eq "y")
1674 text_t lhs_expr, op_expr, rhs_expr;
1675 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1676 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1677 if (ifptr != NULL) {
1678 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1679 options, logout);
1680 }
1681 else {
1682 return "";
1683 }
1684 } else {
1685 if (elseptr != NULL) {
1686 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1687 options, logout);
1688 }
1689 else {
1690 return "";
1691 }
1692 }
1693 }
1694
1695
1696 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1697 // a cgi argument macro that has not been set, it evaluates to itself.
1698 // Therefore, were have to say that a piece of text evalautes true if
1699 // it is non-empty and if it is a cgi argument evaulating to itself.
1700
1701 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1702 if (ifptr != NULL)
1703 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1704 options, logout);
1705 } else {
1706 if (elseptr != NULL)
1707 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1708 options, logout);
1709 }
1710 }
1711
1712 return "";
1713}
1714
1715bool includes_metadata(const text_t& text)
1716{
1717 text_t::const_iterator here = text.begin();
1718 text_t::const_iterator end = text.end();
1719 while (here != end) {
1720 if (*here == '[') return true;
1721 ++here;
1722 }
1723
1724 return false;
1725}
1726
1727static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1728 recptproto* collectproto,
1729 ResultDocInfo_t &docinfo,
1730 displayclass &disp, text_tmap &options,
1731 ostream &logout) {
1732
1733 if (includes_metadata(metavalue)) {
1734
1735 // text has embedded metadata in it => expand it
1736 FilterRequest_t request;
1737 FilterResponse_t response;
1738
1739 request.getParents = false;
1740
1741 format_t *expanded_formatlistptr = new format_t();
1742 parse_formatstring (metavalue, expanded_formatlistptr,
1743 request.fields, request.getParents);
1744
1745 // retrieve metadata
1746 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1747 collectproto, response, logout);
1748
1749 if (!response.docInfo.empty()) {
1750
1751 text_t expanded_metavalue
1752 = get_formatted_string(collection, collectproto,
1753 response.docInfo[0], disp, expanded_formatlistptr,
1754 options, logout);
1755
1756 return expanded_metavalue;
1757 }
1758 else {
1759 return metavalue;
1760 }
1761 }
1762 else {
1763
1764 return metavalue;
1765 }
1766}
1767
1768text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1769 displayclass &disp,
1770 text_t meta_name, ostream& logout) {
1771
1772 ColInfoResponse_t collectinfo;
1773 comerror_t err;
1774 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1775 text_t meta_value = "";
1776 text_t lang;
1777 disp.expandstring("_cgiargl_",lang);
1778 if (lang.empty()) {
1779 lang = "en";
1780 }
1781
1782 if (err == noError) {
1783 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1784 }
1785 return meta_value;
1786
1787
1788}
1789text_t format_string (const text_t& collection, recptproto* collectproto,
1790 ResultDocInfo_t &docinfo, displayclass &disp,
1791 format_t *formatlistptr, text_tmap &options,
1792 ostream& logout) {
1793
1794 if (formatlistptr == NULL) return "";
1795
1796 switch (formatlistptr->command) {
1797 case comOID:
1798 return docinfo.OID;
1799 case comTopOID:
1800 {
1801 text_t top_id;
1802 get_top(docinfo.OID, top_id);
1803 return top_id;
1804 }
1805 case comRank:
1806 return text_t(docinfo.ranking);
1807 case comText:
1808 return formatlistptr->text;
1809 case comLink:
1810 return options["link"];
1811 case comEndLink:
1812 {
1813 if (options["link"].empty()) return "";
1814 else return "</a>";
1815 }
1816 case comHref:
1817 return get_href(options["link"]);
1818 case comIcon:
1819 return options["icon"];
1820 case comNum:
1821 return docinfo.result_num;
1822 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1823 return get_related_docs(collection, collectproto, docinfo, logout);
1824
1825 case comSummary:
1826 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1827 case comAssocLink:
1828 {
1829 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1830 if (!link_filename.empty()) {
1831 text_t href= expand_metadata(options["assocfilepath"]+link_filename, collection, collectproto, docinfo, disp, options, logout);
1832 if (formatlistptr->text == "href") {
1833 return href;
1834 }
1835 return "<a href=\""+ href + "\">";
1836 }
1837 return "";
1838 }
1839 case comEndAssocLink:
1840 {
1841 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1842 if (!link_filename.empty()) {
1843 return "</a>";
1844 }
1845 return "";
1846 }
1847 case comMeta:
1848 {
1849 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1850 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1851 }
1852
1853 case comDoc:
1854 return format_text(collection, collectproto, docinfo, disp, options, logout);
1855
1856 case comImage:
1857 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1858 case comTOC:
1859 return options["DocTOC"];
1860 case comDocumentButtonDetach:
1861 return options["DocumentButtonDetach"];
1862 case comDocumentButtonHighlight:
1863 return options["DocumentButtonHighlight"];
1864 case comDocumentButtonExpandContents:
1865 return options["DocumentButtonExpandContents"];
1866 case comDocumentButtonExpandText:
1867 return options["DocumentButtonExpandText"];
1868 case comHighlight:
1869 if (options["highlight"] == "1") return "<b>";
1870 break;
1871 case comEndHighlight:
1872 if (options["highlight"] == "1") return "</b>";
1873 break;
1874 case comMetadataSpanWrap:
1875 metadata_spanwrap=true; return "";
1876 break;
1877 case comEndMetadataSpanWrap:
1878 metadata_spanwrap=false; return "";
1879 break;
1880 case comIf:
1881 return get_if (collection, collectproto, docinfo, disp,
1882 formatlistptr->decision, formatlistptr->ifptr,
1883 formatlistptr->elseptr, options, logout);
1884 case comOr:
1885 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1886 options, logout);
1887 case comDocTermsFreqTotal:
1888 return docinfo.num_terms_matched;
1889 case comCollection:
1890 if (formatlistptr->meta.metaname == g_EmptyText) {
1891 return collection;
1892 }
1893 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1894
1895 }
1896 return "";
1897}
1898
1899text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1900 ResultDocInfo_t &docinfo, displayclass &disp,
1901 format_t *formatlistptr, text_tmap &options,
1902 ostream& logout) {
1903
1904 text_t ft;
1905 while (formatlistptr != NULL)
1906 {
1907 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1908 options, logout);
1909 formatlistptr = formatlistptr->nextptr;
1910 }
1911
1912 return ft;
1913}
1914
1915
1916// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1917text_t format_text (const text_t& collection, recptproto* collectproto,
1918 ResultDocInfo_t &docinfo, displayclass &disp,
1919 text_tmap &options, ostream& logout)
1920{
1921 text_t text;
1922
1923 if(!options["text"].empty()) {
1924 text = options["text"];
1925 }
1926 else {
1927 // get document text here
1928 DocumentRequest_t docrequest;
1929 DocumentResponse_t docresponse;
1930 comerror_t err;
1931 docrequest.OID = docinfo.OID;
1932 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1933 text = docresponse.doc;
1934 }
1935
1936 if (metadata_spanwrap) {
1937 text = spanwrap_metatext(text,docinfo.OID,"Text");
1938 }
1939
1940 return text;
1941}
1942
1943/* FUNCTION NAME: format_summary
1944 * DESC: this is invoked when a [Summary] special metadata is processed.
1945 * RETURNS: a query-biased summary for the document */
1946
1947text_t format_summary (const text_t& collection, recptproto* collectproto,
1948 ResultDocInfo_t &docinfo, displayclass &disp,
1949 text_tmap &options, ostream& logout) {
1950
1951 // GRB: added code here to ensure that the cstr (and other collections)
1952 // uses the document metadata item Summary, rather than compressing
1953 // the text of the document, processed via the methods in
1954 // summarise.cpp
1955
1956 text_t summary;
1957
1958 if (docinfo.metadata.count("Summary") > 0 &&
1959 docinfo.metadata["Summary"].values.size() > 0) {
1960 summary = docinfo.metadata["Summary"].values[0];
1961 }
1962 else {
1963
1964 text_t textToSummarise, query;
1965
1966 if(options["text"].empty()) { // get document text
1967 DocumentRequest_t docrequest;
1968 DocumentResponse_t docresponse;
1969 comerror_t err;
1970 docrequest.OID = docinfo.OID;
1971 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1972 textToSummarise = docresponse.doc;
1973 }
1974 else {
1975 // in practice, this would not happen, because text is only
1976 // loaded with the [Text] command
1977 textToSummarise = options["text"];
1978 }
1979
1980 disp.expandstring("_cgiargq_",query);
1981 summary = summarise(textToSummarise,query,80);
1982 }
1983
1984 if (metadata_spanwrap) {
1985 summary = spanwrap_metatext(summary,docinfo.OID,"Summary");
1986 }
1987
1988 return summary;
1989}
Note: See TracBrowser for help on using the repository browser.