source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 24306

Last change on this file since 24306 was 24306, checked in by ak19, 10 years ago

More changes to do with the ex. prefixed to embedded metadata (that may have an additional metadata set as namespace qualifier). The C code now removes the ex. prefix only if there are no other metadataset qualifiers in the metadata name.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 61.6 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "gsdltools.h"
29#include "recptprototools.h"
30#include "OIDtools.h"
31#include "summarise.h"
32
33#include <assert.h>
34
35static bool metadata_wrap = false;
36static text_t metadata_wrap_type = "";
37
38// a few function prototypes
39
40static text_t format_string (const text_t& collection, recptproto* collectproto,
41 ResultDocInfo_t &docinfo, displayclass &disp,
42 format_t *formatlistptr, text_tmap &options,
43 ostream& logout);
44
45static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
46 format_t *formatlistptr, text_tset &metadata, bool &getParents);
47
48static text_t format_summary (const text_t& collection, recptproto* collectproto,
49 ResultDocInfo_t &docinfo, displayclass &disp,
50 text_tmap &options, ostream& logout);
51static text_t format_text (const text_t& collection, recptproto* collectproto,
52 ResultDocInfo_t &docinfo, displayclass &disp,
53 text_tmap &options, ostream& logout);
54
55static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
56 recptproto* collectproto, ResultDocInfo_t &docinfo,
57 displayclass &disp, text_tmap &options,
58 ostream &logout);
59
60
61void metadata_t::clear() {
62 metaname.clear();
63 metacommand = mNone;
64 mqualifier.parent = pNone;
65 mqualifier.sibling = sNone;
66 mqualifier.child = cNone;
67 pre_tree_traverse.clear();
68 parentoptions.clear();
69 siblingoptions.clear();
70 childoptions.clear();
71}
72
73void decision_t::clear() {
74 command = dMeta;
75 meta.clear();
76 text.clear();
77}
78
79format_t::~format_t()
80{
81 if (nextptr != NULL) delete nextptr;
82 if (ifptr != NULL) delete ifptr;
83 if (elseptr != NULL) delete elseptr;
84 if (orptr != NULL) delete orptr;
85}
86
87void format_t::clear() {
88 command = comText;
89 decision.clear();
90 text.clear();
91 meta.clear();
92 nextptr = NULL;
93 ifptr = NULL;
94 elseptr = NULL;
95 orptr = NULL;
96}
97
98void formatinfo_t::clear() {
99 DocumentImages = false;
100 DocumentTitles = true;
101 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
102 DocumentContents = true;
103 DocumentArrowsBottom = true;
104 DocumentArrowsTop = false;
105 DocumentSearchResultLinks = false;
106 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
107 // DocumentButtons.push_back ("Expand Text");
108 // DocumentButtons.push_back ("Expand Contents");
109 DocumentButtons.push_back ("Detach");
110 DocumentButtons.push_back ("Highlight");
111 RelatedDocuments = "";
112 DocumentText = "[Text]";
113 formatstrings.erase (formatstrings.begin(), formatstrings.end());
114 DocumentUseHTML = false;
115 AllowExtendedOptions = false;
116}
117
118// simply checks to see if formatstring begins with a <td> tag
119bool is_table_content (const text_t &formatstring) {
120 text_t::const_iterator here = formatstring.begin();
121 text_t::const_iterator end = formatstring.end();
122
123 while (here != end) {
124 if (*here != ' ') {
125 if ((*here == '<') && ((here+3) < end)) {
126 if ((*(here+1) == 't' || *(here+1) == 'T') &&
127 (*(here+2) == 'd' || *(here+2) == 'D') &&
128 (*(here+3) == '>' || *(here+3) == ' '))
129 return true;
130 } else return false;
131 }
132 ++here;
133 }
134 return false;
135}
136
137bool is_table_content (const format_t *formatlistptr) {
138
139 if (formatlistptr == NULL) return false;
140
141 if (formatlistptr->command == comText)
142 return is_table_content (formatlistptr->text);
143
144 return false;
145}
146
147// returns false if key isn't in formatstringmap
148bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
149 text_t &formatstring) {
150
151 formatstring.clear();
152 text_tmap::const_iterator it = formatstringmap.find(key);
153 if (it == formatstringmap.end()) return false;
154 formatstring = (*it).second;
155 return true;
156}
157
158// tries to find "key1key2" then "key1" then "key2"
159bool get_formatstring (const text_t &key1, const text_t &key2,
160 const text_tmap &formatstringmap,
161 text_t &formatstring) {
162
163 formatstring.clear();
164 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
165 if (it != formatstringmap.end()) {
166 formatstring = (*it).second;
167 return true;
168 }
169 it = formatstringmap.find(key1);
170 if (it != formatstringmap.end()) {
171 formatstring = (*it).second;
172 return true;
173 }
174 it = formatstringmap.find(key2);
175 if (it != formatstringmap.end()) {
176 formatstring = (*it).second;
177 return true;
178 }
179 return false;
180}
181
182
183text_t remove_namespace(const text_t &meta_name) {
184 text_t::const_iterator end = meta_name.end();
185 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
186 if (it != end) {
187 return substr(it+1, end);
188 }
189
190 return meta_name;
191
192}
193// returns a date of form _format:date_(year, month, day)
194// input is date of type yyyy-?mm-?dd
195// at least the year must be present in date
196text_t format_date (const text_t &date) {
197
198 if (date.size() < 4) return "";
199
200 text_t::const_iterator datebegin = date.begin();
201
202 text_t year = substr (datebegin, datebegin+4);
203 int chars_seen_so_far = 4;
204 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
205
206 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
207 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
208
209 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
210 int imonth = month.getint();
211 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
212
213 chars_seen_so_far += 2;
214 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
215
216 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
217 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
218
219 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
220 if (day[0] == '0') day = substr (day.begin()+1, day.end());
221 int iday = day.getint();
222 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
223
224 return "_format:date_("+year+","+month+","+day+")";
225}
226
227// converts an iso639 language code to its English equivalent
228// should we be checking that the macro exists??
229text_t iso639 (const text_t &langcode) {
230 if (langcode.empty()) return "";
231 return "_iso639:iso639"+langcode+"_";
232}
233
234
235text_t get_href (const text_t &link) {
236
237 text_t href;
238
239 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
240 text_t::const_iterator end = link.end();
241 if (here == end) return g_EmptyText;
242
243 ++here;
244 while (here != end) {
245 if (*here == '"') break;
246 href.push_back(*here);
247 ++here;
248 }
249
250 return href;
251}
252
253//this function gets the information associated with the relation
254//metadata for the document associated with 'docinfo'. This relation
255//metadata consists of a line of pairs containing 'collection, document OID'
256//(this is the OID of the document related to the current document, and
257//the collection the related document belongs to). For each of these pairs
258//the title metadata is obtained and then an html link between the title
259//of the related doc and the document's position (the document will be
260//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
261//(where collection is the related documents collection, and OID is the
262//related documents OID). A list of these html links are made for as many
263//related documents as there are. This list is then returned. If there are
264//no related documents available for the current document then the string
265//'.. no related documents .. ' is returned.
266text_t get_related_docs(const text_t& collection, recptproto* collectproto,
267 ResultDocInfo_t &docinfo, ostream& logout){
268
269 text_tset metadata;
270
271 //insert the metadata we wish to collect
272 metadata.insert("dc.Relation");
273 metadata.insert("Title");
274 metadata.insert("Subject"); //for emails, where title data doesn't apply
275
276 FilterResponse_t response;
277 text_t relation = ""; //string for displaying relation metadata
278 text_t relationTitle = ""; //the related documents Title (or subject)
279 text_t relationOID = ""; //the related documents OID
280
281 //get the information associated with the metadata for current doc
282 if (get_info (docinfo.OID, collection, "", metadata,
283 false, collectproto, response, logout)) {
284
285 //if the relation metadata exists, store for displaying
286 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
287 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
288
289 //split relation data into pairs of collectionname,ID number
290 text_tarray relationpairs;
291 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
292
293 text_tarray::const_iterator currDoc = relationpairs.begin();
294 text_tarray::const_iterator lastDoc = relationpairs.end();
295
296 //iterate through the pairs to split and display
297 while(currDoc != lastDoc){
298
299 //split pairs into collectionname and ID
300 text_tarray relationdata;
301 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
302
303 //get first element in the array (collection)
304 text_tarray::const_iterator doc_data = relationdata.begin();
305 text_t document_collection = *doc_data;
306 ++doc_data; //increment to get next item in array (oid)
307 text_t document_OID = *doc_data;
308
309 //create html link to related document
310 relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
311 relation += "&amp;cl=search&amp;d=" + document_OID;
312
313 //get the information associated with the metadata for related doc
314 if (get_info (document_OID, document_collection, "", metadata,
315 false, collectproto, response, logout)) {
316
317 //if title metadata doesn't exist, collect subject metadata
318 //if that doesn't exist, just call it 'related document'
319 if (!response.docInfo[0].metadata["Title"].values[0].empty())
320 relationTitle = response.docInfo[0].metadata["Title"].values[0];
321 else if (!response.docInfo[0].metadata["Subject"].values.empty())
322 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
323 else relationTitle = "RELATED DOCUMENT";
324
325 }
326
327 //link the related document's title to its page
328 relation += "\">" + relationTitle + "</a>";
329 relation += " (" + document_collection + ")<br>";
330
331 ++currDoc;
332 }
333 }
334
335 }
336
337 if(relation.empty()) //no relation data for documnet
338 relation = ".. no related documents .. ";
339
340 return relation;
341}
342
343
344
345static void get_parent_options (text_t &instring, metadata_t &metaoption) {
346
347 assert (instring.size() > 7);
348 if (instring.size() <= 7) return;
349
350 text_t meta, com, op;
351 bool inbraces = false;
352 bool inquotes = false;
353 bool foundcolon = false;
354 text_t::const_iterator here = instring.begin()+6;
355 text_t::const_iterator end = instring.end();
356 while (here != end) {
357 if (foundcolon) meta.push_back (*here);
358 else if (*here == '(') inbraces = true;
359 else if (*here == ')') inbraces = false;
360 else if (*here == '\'' && !inquotes) inquotes = true;
361 else if (*here == '\'' && inquotes) inquotes = false;
362 else if (*here == ':' && !inbraces) foundcolon = true;
363 else if (inquotes) op.push_back (*here);
364 else com.push_back (*here);
365 ++here;
366 }
367
368 instring = meta;
369 if (com.empty())
370 metaoption.mqualifier.parent = pImmediate;
371 else if (com == "Top")
372 metaoption.mqualifier.parent = pTop;
373 else if (com == "All") {
374 metaoption.mqualifier.parent = pAll;
375 metaoption.parentoptions = op;
376 }
377}
378
379
380static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
381
382 assert (instring.size() > 8);
383 if (instring.size() <= 8) return;
384 text_t meta, com, op;
385 bool inbraces = false;
386 bool inquotes = false;
387 bool foundcolon = false;
388 text_t::const_iterator here = instring.begin()+7;
389 text_t::const_iterator end = instring.end();
390 while (here != end) {
391 if (foundcolon) meta.push_back (*here);
392 else if (*here == '(') inbraces = true;
393 else if (*here == ')') inbraces = false;
394 else if (*here == '\'' && !inquotes) inquotes = true;
395 else if (*here == '\'' && inquotes) inquotes = false;
396 else if (*here == ':' && !inbraces) foundcolon = true;
397 else if (inquotes) op.push_back (*here);
398 else com.push_back (*here);
399 ++here;
400 }
401
402 instring = meta;
403 metaoption.siblingoptions.clear();
404
405 if (com.empty()) {
406 metaoption.mqualifier.sibling = sAll;
407 metaoption.siblingoptions = " ";
408 }
409 else if (com == "first") {
410 metaoption.mqualifier.sibling = sNum;
411 metaoption.siblingoptions = "0";
412 }
413 else if (com == "last") {
414 metaoption.mqualifier.sibling = sNum;
415 metaoption.siblingoptions = "-2"; // == last
416 }
417 else if (com.getint()>0) {
418 metaoption.mqualifier.sibling = sNum;
419 int pos = com.getint()-1;
420 metaoption.siblingoptions +=pos;
421 }
422 else {
423 metaoption.mqualifier.sibling = sAll;
424 metaoption.siblingoptions = op;
425 }
426}
427
428static void get_child_options (text_t &instring, metadata_t &metaoption) {
429
430 assert (instring.size() > 6);
431 if (instring.size() <= 6) return;
432 text_t meta, com, op;
433 bool inbraces = false;
434 bool inquotes = false;
435 bool foundcolon = false;
436 text_t::const_iterator here = instring.begin()+5;
437 text_t::const_iterator end = instring.end();
438 while (here != end) {
439 if (foundcolon) meta.push_back (*here);
440 else if (*here == '(') inbraces = true;
441 else if (*here == ')') inbraces = false;
442 else if (*here == '\'' && !inquotes) inquotes = true;
443 else if (*here == '\'' && inquotes) inquotes = false;
444 else if (*here == ':' && !inbraces) foundcolon = true;
445 else if (inquotes) op.push_back (*here);
446 else com.push_back (*here);
447 ++here;
448 }
449
450 instring = meta;
451 if (com.empty()) {
452 metaoption.mqualifier.child = cAll;
453 metaoption.childoptions = " ";
454 }
455 else if (com == "first") {
456 metaoption.mqualifier.child = cNum;
457 metaoption.childoptions = ".fc";
458 }
459 else if (com == "last") {
460 metaoption.mqualifier.child = cNum;
461 metaoption.childoptions = ".lc";
462 }
463 else if (com.getint()>0) {
464 metaoption.mqualifier.child = cNum;
465 metaoption.childoptions = "."+com;
466 }
467 else {
468 metaoption.mqualifier.child = cAll;
469 metaoption.childoptions = op;
470 }
471}
472
473
474static void get_truncate_options (text_t &instring, metadata_t &metaoption)
475{
476 assert (instring.size() > ((text_t) "truncate").size());
477 if (instring.size() <= ((text_t) "truncate").size()) return;
478 text_t meta, com;
479 bool inbraces = false;
480 bool foundcolon = false;
481 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
482 text_t::const_iterator end = instring.end();
483 while (here != end) {
484 if (foundcolon) meta.push_back (*here);
485 else if (*here == '(') inbraces = true;
486 else if (*here == ')') inbraces = false;
487 else if (*here == ':' && !inbraces) foundcolon = true;
488 else com.push_back (*here);
489 ++here;
490 }
491
492 instring = meta;
493
494 if (!com.empty())
495 {
496 metaoption.siblingoptions = com;
497 }
498 else
499 {
500 // Default is 100 characters if not specified
501 metaoption.siblingoptions = "100";
502 }
503}
504
505
506
507static void parse_meta (text_t &meta, metadata_t &metaoption,
508 text_tset &metadata, bool &getParents) {
509
510 // Look for the various format statement modifiers
511 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
512 // is irrelevant because this is not stored in metaoption.metacommand anyway
513 bool keep_trying = true;
514 while (keep_trying)
515 {
516 keep_trying = false;
517
518 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
519 {
520 metaoption.metacommand |= mCgiSafe;
521 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
522 keep_trying = true;
523 }
524 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
525 {
526 metaoption.metacommand |= mSpecial;
527 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
528 keep_trying = true;
529 }
530
531 // New "truncate" special formatting option
532 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
533 {
534 metaoption.metacommand |= mTruncate;
535 get_truncate_options (meta, metaoption);
536 keep_trying = true;
537 }
538 // New "htmlsafe" special formatting option
539 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
540 {
541 metaoption.metacommand |= mHTMLSafe;
542 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
543 keep_trying = true;
544 }
545 // New "xmlsafe" special formatting option
546 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
547 {
548 metaoption.metacommand |= mXMLSafe;
549 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
550 keep_trying = true;
551 }
552 // New "dmsafe" special formatting option
553 if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
554 {
555 metaoption.metacommand |= mDMSafe;
556 meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
557 keep_trying = true;
558 }
559 }
560
561 bool had_parent_or_child = true;
562 bool prev_was_parent = false;
563 bool prev_was_child = false;
564
565 while (had_parent_or_child) {
566 if (meta.size() > 7
567 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
568
569 // clear out sibling and child (cmd and options)
570 metaoption.metacommand &= ~(mChild|mSibling);
571 metaoption.childoptions.clear();
572 metaoption.siblingoptions.clear();
573
574 getParents = true;
575 metaoption.metacommand |= mParent;
576 get_parent_options (meta, metaoption);
577
578 if (prev_was_parent) {
579 metaoption.pre_tree_traverse += ".pr";
580 }
581 else if (prev_was_child) {
582 metaoption.pre_tree_traverse += ".fc";
583 }
584
585 prev_was_parent = true;
586 prev_was_child = false;
587 }
588 else if (meta.size() > 6
589 && (substr (meta.begin(), meta.begin()+5) == "child")) {
590
591 // clear out sibling and parent (cmd and options)
592 metaoption.metacommand &= ~(mParent|mSibling);
593 metaoption.parentoptions.clear();
594 metaoption.siblingoptions.clear();
595
596 metaoption.metacommand |= mChild;
597 get_child_options (meta, metaoption);
598 metadata.insert("contains");
599
600 if (prev_was_parent) {
601 metaoption.pre_tree_traverse += ".pr";
602 }
603 else if (prev_was_child) {
604 metaoption.pre_tree_traverse += ".fc";
605 }
606
607 prev_was_child = true;
608 prev_was_parent = false;
609 }
610 else {
611 prev_was_child = false;
612 prev_was_parent = false;
613 had_parent_or_child = false;
614 }
615 }
616
617 // parent/child can have sibling tacked on end also
618 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
619 metaoption.metacommand |= mSibling;
620 get_sibling_options (meta, metaoption);
621 }
622
623 // check for ex. which may occur in format statements
624 // remove "ex." prefix, but only if there are no other metadata set qualifiers
625 // in the metaname, since we want to retain prefixes like "ex.dc." as-is
626 text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
627 text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
628
629 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
630 meta = substr (meta.begin()+3, meta.end());
631 }
632 metadata.insert (meta);
633 metaoption.metaname = meta;
634}
635
636static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
637 if (meta == "collection") {
638 // no qualifiers
639 metaoption.metaname = g_EmptyText;
640 return;
641 }
642 meta = substr (meta.begin()+11, meta.end());
643 metaoption.metaname = meta;
644
645}
646
647static void parse_meta (text_t &meta, format_t *formatlistptr,
648 text_tset &metadata, bool &getParents) {
649
650 // check for ex. which may occur in format statements
651 // remove "ex." prefix, but only if there are no other metadata set qualifiers
652 // in the metaname, since we want to retain prefixes like "ex.dc." as-is
653 text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
654 text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
655
656 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
657 meta = substr (meta.begin()+3, meta.end());
658 }
659 if (meta == "link")
660 formatlistptr->command = comLink;
661 else if (meta == "/link")
662 formatlistptr->command = comEndLink;
663
664 // the metaname "srclink_file" is deprecated, use "srclinkFile"
665 else if (meta == "srclink") {
666 formatlistptr->command = comAssocLink;
667 formatlistptr->meta.metaname = "srclinkFile";
668 metadata.insert("srclinkFile");
669 }
670 else if (meta == "srchref") {
671 formatlistptr->command = comAssocLink;
672 formatlistptr->text = "href";
673 formatlistptr->meta.metaname = "srclinkFile";
674 metadata.insert("srclinkFile");
675 }
676 else if (meta == "/srclink") {
677 formatlistptr->command = comEndAssocLink;
678 formatlistptr->meta.metaname = "srclinkFile";
679 }
680 // and weblink etc
681 else if (meta == "href")
682 formatlistptr->command = comHref;
683
684 else if (meta == "num")
685 formatlistptr->command = comNum;
686
687 else if (meta == "icon")
688 formatlistptr->command = comIcon;
689
690 else if (meta == "Text")
691 formatlistptr->command = comDoc;
692
693 else if (meta == "RelatedDocuments")
694 formatlistptr->command = comRel;
695
696 else if (meta == "highlight")
697 formatlistptr->command = comHighlight;
698
699 else if (meta == "/highlight")
700 formatlistptr->command = comEndHighlight;
701
702 else if (meta == "metadata-spanwrap")
703 formatlistptr->command = comMetadataSpanWrap;
704
705 else if (meta == "/metadata-spanwrap")
706 formatlistptr->command = comEndMetadataSpanWrap;
707
708 else if (meta == "metadata-divwrap")
709 formatlistptr->command = comMetadataDivWrap;
710
711 else if (meta == "/metadata-divwrap")
712 formatlistptr->command = comEndMetadataDivWrap;
713
714 else if (meta == "Summary")
715 formatlistptr->command = comSummary;
716
717 else if (meta == "DocImage")
718 formatlistptr->command = comImage;
719
720 else if (meta == "DocTOC")
721 formatlistptr->command = comTOC;
722
723 else if (meta == "DocumentButtonDetach")
724 formatlistptr->command = comDocumentButtonDetach;
725
726 else if (meta == "DocumentButtonHighlight")
727 formatlistptr->command = comDocumentButtonHighlight;
728
729 else if (meta == "DocumentButtonExpandContents")
730 formatlistptr->command = comDocumentButtonExpandContents;
731
732 else if (meta == "DocumentButtonExpandText")
733 formatlistptr->command = comDocumentButtonExpandText;
734
735 else if (meta == "DocOID")
736 formatlistptr->command = comOID;
737 else if (meta == "DocTopOID")
738 formatlistptr->command = comTopOID;
739 else if (meta == "DocRank")
740 formatlistptr->command = comRank;
741 else if (meta == "DocTermsFreqTotal")
742 formatlistptr->command = comDocTermsFreqTotal;
743 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
744 formatlistptr->command = comCollection;
745 parse_coll_meta(meta, formatlistptr->meta);
746 }
747 else {
748 formatlistptr->command = comMeta;
749 parse_meta (meta, formatlistptr->meta, metadata, getParents);
750 }
751}
752
753
754static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
755 text_tset &metadata, bool &getParents) {
756
757 text_t text;
758 text_t::const_iterator here = formatstring.begin();
759 text_t::const_iterator end = formatstring.end();
760
761 while (here != end) {
762
763 if (*here == '\\') {
764 ++here;
765 if (here != end) text.push_back (*here);
766
767 } else if (*here == '{') {
768 if (!text.empty()) {
769 formatlistptr->command = comText;
770 formatlistptr->text = text;
771 formatlistptr->nextptr = new format_t();
772 formatlistptr = formatlistptr->nextptr;
773
774 text.clear();
775 }
776 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
777
778 formatlistptr->nextptr = new format_t();
779 formatlistptr = formatlistptr->nextptr;
780 if (here == end) break;
781 }
782 } else if (*here == '[') {
783 if (!text.empty()) {
784 formatlistptr->command = comText;
785 formatlistptr->text = text;
786 formatlistptr->nextptr = new format_t();
787 formatlistptr = formatlistptr->nextptr;
788
789 text.clear();
790 }
791 text_t meta;
792 ++here;
793 while (*here != ']') {
794 if (here == end) return false;
795 meta.push_back (*here);
796 ++here;
797 }
798 parse_meta (meta, formatlistptr, metadata, getParents);
799 formatlistptr->nextptr = new format_t();
800 formatlistptr = formatlistptr->nextptr;
801
802 } else
803 text.push_back (*here);
804
805 if (here != end) ++here;
806 }
807 if (!text.empty()) {
808 formatlistptr->command = comText;
809 formatlistptr->text = text;
810 formatlistptr->nextptr = new format_t();
811 formatlistptr = formatlistptr->nextptr;
812
813 }
814 return true;
815}
816
817
818static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
819 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
820
821 text_t::const_iterator it = findchar (here, end, '}');
822 if (it == end) return false;
823
824 text_t com = substr (here, it);
825 here = findchar (it, end, '{');
826 if (here == end) return false;
827 else ++here;
828
829 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
830 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
831 else return false;
832
833 int commacount = 0;
834 text_t text;
835 while (here != end) {
836
837 if (*here == '\\') {
838 ++here;
839 if (here != end) text.push_back(*here);
840
841 }
842
843 else if (*here == ',' || *here == '}' || *here == '{') {
844
845 if (formatlistptr->command == comOr) {
846 // the {Or}{this, or this, or this, or this} statement
847 format_t *or_ptr;
848
849 // find the next unused orptr
850 if (formatlistptr->orptr == NULL) {
851 formatlistptr->orptr = new format_t();
852 or_ptr = formatlistptr->orptr;
853 } else {
854 or_ptr = formatlistptr->orptr;
855 while (or_ptr->nextptr != NULL)
856 or_ptr = or_ptr->nextptr;
857 or_ptr->nextptr = new format_t();
858 or_ptr = or_ptr->nextptr;
859 }
860
861 if (!text.empty())
862 {
863 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
864 }
865
866 if (*here == '{')
867 {
868 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
869 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
870 // The latter can always be re-written:
871 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
872
873 if (!text.empty()) // already used up allocated format_t
874 {
875 // => allocate new one for detected action
876 or_ptr->nextptr = new format_t();
877 or_ptr = or_ptr->nextptr;
878 }
879 if (!parse_action(++here, end, or_ptr, metadata, getParents))
880 {
881 return false;
882 }
883 }
884 else
885 {
886 if (*here == '}') break;
887 }
888 text.clear();
889
890 }
891
892 // Parse an {If}{decide,do,else} statement
893 else {
894
895 // Read the decision component.
896 if (commacount == 0) {
897 // Decsion can be a metadata element, or a piece of text.
898 // Originally Stefan's code, updated 25/10/2000 by Gordon.
899
900 text_t::const_iterator beginbracket = text.begin();
901 text_t::const_iterator endbracket = (text.end() - 1);
902
903 // Decision is based on a metadata element
904 if ((*beginbracket == '[') && (*endbracket == ']')) {
905 // Ignore the surrounding square brackets
906 text_t meta = substr (beginbracket+1, endbracket);
907 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
908 ++commacount;
909 text.clear();
910 }
911
912 // Decision is a piece of text (probably a macro like _cgiargmode_).
913 else {
914
915 // hunt for any metadata in string, which might be uses in
916 // to test a condition, e.g. [Format] eq 'PDF'
917 format_t* dummyformat = new format_t();
918 // update which metadata fields needed
919 // (not interested in updatng formatlistptr)
920 parse_string (text, dummyformat, metadata, getParents);
921 delete dummyformat;
922
923 formatlistptr->decision.command = dText;
924 formatlistptr->decision.text = text;
925 ++commacount;
926 text.clear();
927 }
928 }
929
930 // Read the "then" and "else" components of the {If} statement.
931 else {
932 format_t** nextlistptr = NULL;
933 if (commacount == 1) {
934 nextlistptr = &formatlistptr->ifptr;
935 } else if (commacount == 2 ) {
936 nextlistptr = &formatlistptr->elseptr;
937 } else {
938 return false;
939 }
940
941 if (!text.empty()) {
942 if (*nextlistptr == NULL) {
943 *nextlistptr = new format_t();
944 } else {
945
946 // skip to the end of any format_t statements already added
947 while ((*nextlistptr)->nextptr != NULL)
948 {
949 nextlistptr = &(*nextlistptr)->nextptr;
950 }
951
952 (*nextlistptr)->nextptr = new format_t();
953 nextlistptr = &(*nextlistptr)->nextptr;
954 }
955
956 if (!parse_string (text, *nextlistptr, metadata, getParents))
957 {
958 return false;
959 }
960 text.clear();
961 }
962
963 if (*here == '{')
964 {
965 if (*nextlistptr == NULL) {
966 *nextlistptr = new format_t();
967 } else {
968 // skip to the end of any format_t statements already added
969 while ((*nextlistptr)->nextptr != NULL)
970 {
971 nextlistptr = &(*nextlistptr)->nextptr;
972 }
973
974 (*nextlistptr)->nextptr = new format_t();
975 nextlistptr = &(*nextlistptr)->nextptr;
976 }
977
978 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
979 {
980 return false;
981 }
982 }
983 else
984 {
985 if (*here == '}') break;
986 ++commacount;
987 }
988 }
989 }
990
991 } else text.push_back(*here);
992
993 if (here != end) ++here;
994 }
995
996 return true;
997}
998
999
1000static text_t wrap_metatext(const text_t& metatext, const text_t& OID,
1001 const text_t metaname, int metapos=-1)
1002{
1003
1004 text_t tag_type = metadata_wrap_type;
1005 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
1006
1007 text_t wrapped_metatext = "<" + tag_type + " ";
1008 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
1009
1010 wrapped_metatext += "docoid=\"" + OID + "\" ";
1011 wrapped_metatext += "metaname=\"" + metaname + "\"";
1012
1013 if (metapos>=0) {
1014 text_t metapos_str = metapos;
1015 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
1016 }
1017
1018 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1019
1020 return wrapped_metatext;
1021}
1022
1023
1024
1025bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1026 text_tset &metadata, bool &getParents) {
1027
1028 formatlistptr->clear();
1029 getParents = false;
1030
1031 return (parse_string (formatstring, formatlistptr, metadata, getParents));
1032}
1033
1034// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1035// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1036
1037static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1038{
1039 text_t no_ns_metaname = remove_namespace(meta.metaname);
1040 text_t formatted_metatext;
1041 bool first = true;
1042
1043 const int start_i=0;
1044 const int end_i = metainfo.values.size()-1;
1045
1046 if (position == -1) { // all
1047 for (int i=start_i; i<=end_i; ++i) {
1048 if (!first) formatted_metatext += meta.siblingoptions;
1049
1050 text_t fresh_metatext;
1051
1052 if (meta.metacommand & mSpecial) {
1053 // special formatting
1054 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1055 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1056 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1057 }
1058 else fresh_metatext = metainfo.values[i];
1059
1060 // New "truncate" special formatting option
1061 if (meta.metacommand & mTruncate)
1062 {
1063 int truncate_length = meta.siblingoptions.getint();
1064 text_t truncated_value = fresh_metatext;
1065 if (truncated_value.size() > truncate_length)
1066 {
1067 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1068 }
1069 fresh_metatext = truncated_value;
1070 }
1071 // New "xmlsafe" special formatting option
1072 if (meta.metacommand & mXMLSafe)
1073 {
1074 // Make it XML-safe
1075 text_t text_xml_safe = "";
1076 text_t::const_iterator text_iterator = fresh_metatext.begin();
1077 while (text_iterator != fresh_metatext.end())
1078 {
1079 if (*text_iterator == '&') text_xml_safe += "&amp;";
1080 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1081 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1082 else text_xml_safe.push_back(*text_iterator);
1083 text_iterator++;
1084 }
1085 fresh_metatext = text_xml_safe;
1086 }
1087 // New "htmlsafe" special formatting option
1088 if (meta.metacommand & mHTMLSafe)
1089 {
1090 // Make it HTML-safe
1091 text_t text_html_safe = "";
1092 text_t::const_iterator text_iterator = fresh_metatext.begin();
1093 while (text_iterator != fresh_metatext.end())
1094 {
1095 if (*text_iterator == '&') text_html_safe += "&amp;";
1096 else if (*text_iterator == '<') text_html_safe += "&lt;";
1097 else if (*text_iterator == '>') text_html_safe += "&gt;";
1098 else if (*text_iterator == '"') text_html_safe += "&quot;";
1099 else text_html_safe.push_back(*text_iterator);
1100 text_iterator++;
1101 }
1102 fresh_metatext = text_html_safe;
1103 }
1104 // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1105 // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1106 if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1107 {
1108 // Make it macro-safe
1109 text_t text_dm_safe = dm_safe(fresh_metatext);
1110 fresh_metatext = text_dm_safe;
1111 }
1112
1113 if (metadata_wrap) {
1114 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,i);
1115 }
1116 formatted_metatext += fresh_metatext;
1117
1118 first = false;
1119
1120 }
1121 } else {
1122 if (position == -2) { // end
1123 position = end_i;
1124 } else if (position < start_i || position > end_i) {
1125 return "";
1126 }
1127
1128 text_t fresh_metatext;
1129 if (meta.metacommand & mSpecial) {
1130
1131 // special formatting
1132 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1133 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1134 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1135 }
1136 else fresh_metatext = metainfo.values[position];
1137
1138 // New "truncate" special formatting option
1139 if (meta.metacommand & mTruncate)
1140 {
1141 int truncate_length = meta.siblingoptions.getint();
1142 text_t truncated_value = fresh_metatext;
1143 if (truncated_value.size() > truncate_length)
1144 {
1145 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1146 }
1147 fresh_metatext = truncated_value;
1148 }
1149 // New "xmlsafe" special formatting option
1150 if (meta.metacommand & mXMLSafe)
1151 {
1152 // Make it XML-safe
1153 text_t text_xml_safe = "";
1154 text_t::const_iterator text_iterator = fresh_metatext.begin();
1155 while (text_iterator != fresh_metatext.end())
1156 {
1157 if (*text_iterator == '&') text_xml_safe += "&amp;";
1158 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1159 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1160 else text_xml_safe.push_back(*text_iterator);
1161 text_iterator++;
1162 }
1163 fresh_metatext = text_xml_safe;
1164 }
1165 // New "htmlsafe" special formatting option
1166 if (meta.metacommand & mHTMLSafe)
1167 {
1168 // Make it HTML-safe
1169 text_t text_html_safe = "";
1170 text_t::const_iterator text_iterator = fresh_metatext.begin();
1171 while (text_iterator != fresh_metatext.end())
1172 {
1173 if (*text_iterator == '&') text_html_safe += "&amp;";
1174 else if (*text_iterator == '<') text_html_safe += "&lt;";
1175 else if (*text_iterator == '>') text_html_safe += "&gt;";
1176 else if (*text_iterator == '"') text_html_safe += "&quot;";
1177 else if (*text_iterator == '\'') text_html_safe += "&#39;";
1178 else if (*text_iterator == ',') text_html_safe += "&#44;";
1179 else text_html_safe.push_back(*text_iterator);
1180 text_iterator++;
1181 }
1182 fresh_metatext = text_html_safe;
1183 }
1184 // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1185 // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1186 if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1187 {
1188 // Make it macro-safe
1189 text_t text_dm_safe = dm_safe(fresh_metatext);
1190 fresh_metatext = text_dm_safe;
1191 }
1192
1193 if (metadata_wrap) {
1194 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,position);
1195 }
1196
1197 formatted_metatext += fresh_metatext;
1198 }
1199
1200 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1201 else return formatted_metatext;
1202}
1203
1204static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1205{
1206
1207 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1208
1209 switch (meta.mqualifier.parent) {
1210 case pNone:
1211 return "Nothing!!";
1212 break;
1213
1214 case pImmediate:
1215 if (parent != NULL) {
1216 text_t parent_oid = get_parent(docinfo.OID);
1217 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1218 }
1219 break;
1220
1221 case pTop:
1222 if (parent != NULL) {
1223 text_t parent_oid = get_parent(docinfo.OID);
1224
1225 while (parent->parent != NULL) {
1226 parent = parent->parent;
1227 parent_oid = get_parent(parent_oid);
1228 }
1229 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1230 }
1231 break;
1232
1233 case pAll:
1234 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1235 if (parent != NULL) {
1236 text_t parent_oid = get_parent(docinfo.OID);
1237
1238 text_tarray tmparray;
1239 while (parent != NULL) {
1240 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1241 parent = parent->parent;
1242 parent_oid = get_parent(parent_oid);
1243
1244 }
1245 // now join them up - use teh parent separator
1246 bool first = true;
1247 text_t tmp;
1248 text_tarray::reverse_iterator here = tmparray.rbegin();
1249 text_tarray::reverse_iterator end = tmparray.rend();
1250 while (here != end) {
1251 if (!first) tmp += meta.parentoptions;
1252 tmp += *here;
1253 first = false;
1254 ++here;
1255 }
1256 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1257 else return tmp;
1258 }
1259 }
1260 return "";
1261
1262}
1263
1264static text_t get_child_meta (const text_t& collection,
1265 recptproto* collectproto,
1266 ResultDocInfo_t &docinfo, displayclass &disp,
1267 const metadata_t &meta, text_tmap &options,
1268 ostream& logout, int siblings_values)
1269{
1270 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1271
1272 const text_t& pre_tree_trav = meta.pre_tree_traverse;
1273 const text_t& child_metaname = meta.metaname;
1274 const text_t& child_field = meta.childoptions;
1275 text_tset child_metadata;
1276 child_metadata.insert(child_metaname);
1277
1278 FilterResponse_t child_response;
1279 if (meta.mqualifier.child == cNum) {
1280 // just one child
1281 //get the information associated with the metadata for child doc
1282 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1283 child_metadata, false, collectproto, child_response,
1284 logout)) return ""; // invalid child number
1285
1286 if (child_response.docInfo.empty()) return false; // no info for the child
1287
1288 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1289 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1290
1291 text_t child_metavalue
1292 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1293 return expand_metadata(child_metavalue,collection,collectproto,
1294 child_docinfo,disp,options,logout);
1295 }
1296
1297
1298 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1299
1300
1301 if (!pre_tree_trav.empty()) {
1302 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1303 FilterResponse_t trav_response;
1304
1305 text_tset trav_metadata;
1306 trav_metadata.insert("contains");
1307
1308 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1309 trav_metadata, false, collectproto, trav_response,
1310 logout)) return ""; // invalid pre_tree_trav
1311
1312 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1313
1314 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1315
1316 // use this for rest of routine
1317 docinfo = trav_docinfo;
1318 }
1319
1320 // we need to get all children
1321 text_t result = "";
1322 text_tarray children;
1323 text_t contains = docinfo.metadata["contains"].values[0];
1324 splitchar (contains.begin(), contains.end(), ';', children);
1325 text_tarray::const_iterator here = children.begin();
1326 text_tarray::const_iterator end = children.end();
1327 bool first = true;
1328 while (here !=end) {
1329 text_t oid = *here;
1330 here++;
1331 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1332
1333 //get the information associated with the metadata for child doc
1334 if (!get_info (oid, collection, "", child_metadata,
1335 false, collectproto, child_response, logout) ||
1336 child_response.docInfo.empty()) {
1337 first = false;
1338 continue;
1339 }
1340
1341
1342 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1343 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1344
1345 text_t child_metavalue
1346 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1347
1348
1349 if (!first) result += child_field;
1350 first = false;
1351 // need to do this here cos otherwise we are in the wrong document
1352 text_t em = expand_metadata(child_metavalue,collection,collectproto,
1353 child_docinfo,disp,options,logout);
1354
1355 result += em;
1356 }
1357 return result;
1358
1359}
1360
1361static text_t get_meta (const text_t& collection, recptproto* collectproto,
1362 ResultDocInfo_t &docinfo, displayclass &disp,
1363 const metadata_t &meta, text_tmap &options,
1364 ostream& logout) {
1365
1366 // make sure we have the requested metadata
1367 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1368 if (it == docinfo.metadata.end()) return "";
1369
1370 int siblings_values = 0; // default is no siblings, just the first metadata available
1371 if (meta.metacommand & mSibling) {
1372 if (meta.mqualifier.sibling == sAll) {
1373 siblings_values = -1; //all
1374 } else if (meta.mqualifier.sibling == sNum) {
1375 siblings_values = meta.siblingoptions.getint();
1376 }
1377 }
1378 if (meta.metacommand & mParent) {
1379 return get_parent_meta(docinfo,meta,siblings_values);
1380 }
1381
1382 else if (meta.metacommand & mChild) {
1383 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1384 options,logout, siblings_values);
1385 }
1386 else if (meta.metacommand & mSibling) { // only siblings
1387 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1388 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1389 }
1390 else {
1391
1392 // straightforward metadata request (nothing fancy)
1393
1394 text_t classifier_metaname = docinfo.classifier_metadata_type;
1395 int metaname_index
1396 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1397 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1398 }
1399
1400 return "";
1401}
1402
1403static text_t get_or (const text_t& collection, recptproto* collectproto,
1404 ResultDocInfo_t &docinfo, displayclass &disp,
1405 format_t *orptr, text_tmap &options,
1406 ostream& logout) {
1407
1408 while (orptr != NULL) {
1409
1410 if (metadata_wrap) {
1411 // need to be a bit more careful about this
1412 // => test for it *without* spanwrap or divwrap, and if defined, then
1413 // got back and generate it again, this time with spanwrap/divwrap on
1414
1415 metadata_wrap = false;
1416 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1417 options, logout);
1418 metadata_wrap = true;
1419 if (!test_tmp.empty()) {
1420
1421 return format_string (collection,collectproto,docinfo, disp, orptr,
1422 options, logout);
1423 }
1424 }
1425 else {
1426 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1427 options, logout);
1428 if (!tmp.empty()) return tmp;
1429 }
1430
1431 orptr = orptr->nextptr;
1432 }
1433 return "";
1434}
1435
1436static bool char_is_whitespace(const char c)
1437{
1438 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1439
1440}
1441
1442static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1443{
1444 int pos = start_pos;
1445 while (pos<outstring.size()) {
1446 if (!char_is_whitespace(outstring[pos])) {
1447 break;
1448 }
1449 ++pos;
1450 }
1451
1452 return pos;
1453}
1454
1455static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1456{
1457 int pos = start_pos;
1458 while (pos>=0) {
1459 if (!char_is_whitespace(outstring[pos])) {
1460 break;
1461 }
1462 --pos;
1463 }
1464
1465 return pos;
1466}
1467
1468static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1469{
1470 int pos = start_pos;
1471 while (pos>=0) {
1472 if (char_is_whitespace(outstring[pos])) {
1473 break;
1474 }
1475 --pos;
1476 }
1477
1478 return pos;
1479}
1480
1481
1482static int rscan_for(const text_t& outstring, const int start_pos,
1483 const char find_c)
1484{
1485 int pos = start_pos;
1486 while (pos>=0) {
1487 char c = outstring[pos];
1488 if (outstring[pos] == find_c) {
1489 break;
1490 }
1491 --pos;
1492 }
1493
1494 return pos;
1495}
1496
1497text_t extract_substr(const text_t& outstring, const int start_pos,
1498 const int end_pos)
1499{
1500 text_t extracted_str;
1501 extracted_str.clear();
1502
1503 for (int pos=start_pos; pos<=end_pos; ++pos) {
1504 extracted_str.push_back(outstring[pos]);
1505 }
1506
1507 return extracted_str;
1508}
1509
1510
1511static text_t expand_potential_metadata(const text_t& collection,
1512 recptproto* collectproto,
1513 ResultDocInfo_t &docinfo,
1514 displayclass &disp,
1515 const text_t& intext,
1516 text_tmap &options,
1517 ostream& logout)
1518{
1519 text_t outtext;
1520
1521 // decide if dealing with metadata or text
1522
1523 text_t::const_iterator beginbracket = intext.begin();
1524 text_t::const_iterator endbracket = (intext.end() - 1);
1525
1526 // Decision is based on a metadata element
1527 if ((*beginbracket == '[') && (*endbracket == ']')) {
1528 // Ignore the surrounding square brackets
1529 text_t meta_text = substr (beginbracket+1, endbracket);
1530
1531 if (meta_text == "Text") {
1532 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1533 }
1534 else {
1535
1536 text_tset metadata;
1537 bool getParents =false;
1538 metadata_t meta;
1539
1540 parse_meta (meta_text, meta, metadata, getParents);
1541 outtext
1542 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1543 }
1544
1545 }
1546 else {
1547 outtext = intext;
1548 }
1549
1550 return outtext;
1551}
1552
1553
1554
1555
1556static bool uses_expression(const text_t& collection, recptproto* collectproto,
1557 ResultDocInfo_t &docinfo,
1558 displayclass &disp,
1559 const text_t& outstring, text_t& lhs_expr,
1560 text_t& op_expr, text_t& rhs_expr,
1561 text_tmap &options,
1562 ostream& logout)
1563{
1564 // Note: the string may not be of the form: str1 op str2, however
1565 // to deterine this we have to process it on the assumption it is,
1566 // and if at any point an 'erroneous' value is encountered, return
1567 // false and let something else have a go at evaluating it
1568
1569 // Starting at the end of the string and working backwards ..
1570
1571 const int outstring_len = outstring.size();
1572
1573 // skip over white space
1574 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1575
1576 if (rhs_end<=0) {
1577 // no meaningful text or (rhs_end==0) no room for operator
1578 return false;
1579 }
1580
1581 // check for ' or " and then scan over token
1582 const char potential_quote = outstring[rhs_end];
1583 int rhs_start=rhs_end;
1584 bool quoted = false;
1585
1586 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1587 --rhs_end;
1588 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1589 quoted = true;
1590 }
1591 else {
1592 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1593 }
1594
1595 if ((rhs_end-rhs_start)<0) {
1596 // no meaningful rhs expression
1597 return false;
1598 }
1599
1600 // form rhs_expr
1601 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1602
1603 // skip over white space
1604 const int to_whitespace = (quoted) ? 2 : 1;
1605
1606 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1607 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1608
1609 if ((op_end<0) && (op_start<0)) {
1610 // no meaningful expression operator
1611 return false;
1612 }
1613
1614 if (op_end-op_start<0) {
1615 // no meaningful expression operator
1616 return false;
1617 }
1618
1619 op_expr = extract_substr(outstring,op_start,op_end);
1620
1621
1622 // check for operator
1623 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1624 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1625
1626 // not a valid operator
1627 return false;
1628 }
1629
1630 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1631 if (lhs_end<0) {
1632 // no meaningful lhs expression
1633 return false;
1634 }
1635
1636 int lhs_start = scan_over_whitespace(outstring,0);
1637
1638 // form lhs_expr from remainder of string
1639 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1640
1641 // Now we know we have a valid expression, look up any
1642 // metadata terms
1643
1644 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1645 disp,rhs_expr,options,logout);
1646 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1647 disp,lhs_expr,options,logout);
1648
1649 return true;
1650}
1651
1652static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1653 const text_t& rhs_expr, ostream& logout)
1654{
1655 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1656 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1657 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1658 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1659 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1660 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1661 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1662 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1663 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1664 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1665 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1666 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1667 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1668 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1669 else {
1670 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1671 }
1672
1673 return false;
1674}
1675
1676
1677static text_t get_if (const text_t& collection, recptproto* collectproto,
1678 ResultDocInfo_t &docinfo, displayclass &disp,
1679 const decision_t &decision,
1680 format_t *ifptr, format_t *elseptr,
1681 text_tmap &options, ostream& logout)
1682{
1683 // If the decision component is a metadata element, then evaluate it
1684 // to see whether we output the "then" or the "else" clause
1685 if (decision.command == dMeta) {
1686
1687 bool store_metadata_wrap = metadata_wrap;
1688 metadata_wrap = 0;
1689
1690 // temporarily suspend metadata-XXXwrap (if on) so can test if metadata item really exits or not
1691 bool metadata_exists
1692 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1693 logout) != "");
1694
1695 metadata_wrap = store_metadata_wrap;
1696
1697 if (metadata_exists) {
1698 if (ifptr != NULL)
1699 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1700 options, logout);
1701 }
1702 else {
1703 if (elseptr != NULL)
1704 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1705 options, logout);
1706 }
1707 }
1708
1709 // If the decision component is text, then evaluate it (it is probably a
1710 // macro like _cgiargmode_) to decide what to output.
1711 else if (decision.command == dText) {
1712
1713 text_t outstring;
1714 disp.expandstring (decision.text, outstring);
1715
1716 // Check for if expression in form: str1 op str2
1717 // (such as [x] eq "y")
1718 text_t lhs_expr, op_expr, rhs_expr;
1719 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1720 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1721 if (ifptr != NULL) {
1722 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1723 options, logout);
1724 }
1725 else {
1726 return "";
1727 }
1728 } else {
1729 if (elseptr != NULL) {
1730 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1731 options, logout);
1732 }
1733 else {
1734 return "";
1735 }
1736 }
1737 }
1738
1739
1740 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1741 // a cgi argument macro that has not been set, it evaluates to itself.
1742 // Therefore, were have to say that a piece of text evalautes true if
1743 // it is non-empty and if it is a cgi argument evaulating to itself.
1744
1745 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1746 if (ifptr != NULL)
1747 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1748 options, logout);
1749 } else {
1750 if (elseptr != NULL)
1751 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1752 options, logout);
1753 }
1754 }
1755
1756 return "";
1757}
1758
1759bool includes_metadata(const text_t& text)
1760{
1761 text_t::const_iterator here = text.begin();
1762 text_t::const_iterator end = text.end();
1763
1764 char startbracket = '[';
1765 char endbracket = ']';
1766
1767 char bracket = startbracket;
1768 while (here != end) {
1769 if (*here == bracket) {
1770 if(bracket == startbracket) {
1771 // seen a [, next look for a ] to confirm it's metadata
1772 bracket = endbracket;
1773 } else if(bracket == endbracket) {
1774 // found [ ... ] in text, so we think it includes metadata
1775 return true;
1776 }
1777 }
1778 ++here;
1779 }
1780
1781 return false;
1782}
1783
1784static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1785 recptproto* collectproto,
1786 ResultDocInfo_t &docinfo,
1787 displayclass &disp, text_tmap &options,
1788 ostream &logout) {
1789
1790 if (includes_metadata(metavalue)) {
1791
1792 // text has embedded metadata in it => expand it
1793 FilterRequest_t request;
1794 FilterResponse_t response;
1795
1796 request.getParents = false;
1797
1798 format_t *expanded_formatlistptr = new format_t();
1799 parse_formatstring (metavalue, expanded_formatlistptr,
1800 request.fields, request.getParents);
1801
1802 // retrieve metadata
1803 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1804 collectproto, response, logout);
1805
1806 if (!response.docInfo.empty()) {
1807
1808 text_t expanded_metavalue
1809 = get_formatted_string(collection, collectproto,
1810 response.docInfo[0], disp, expanded_formatlistptr,
1811 options, logout);
1812
1813 return expanded_metavalue;
1814 }
1815 else {
1816 return metavalue;
1817 }
1818 }
1819 else {
1820
1821 return metavalue;
1822 }
1823}
1824
1825text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1826 displayclass &disp,
1827 text_t meta_name, ostream& logout) {
1828
1829 ColInfoResponse_t collectinfo;
1830 comerror_t err;
1831 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1832 text_t meta_value = "";
1833 text_t lang;
1834 disp.expandstring("_cgiargl_",lang);
1835 if (lang.empty()) {
1836 lang = "en";
1837 }
1838
1839 if (err == noError) {
1840 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1841 }
1842 return meta_value;
1843
1844
1845}
1846text_t format_string (const text_t& collection, recptproto* collectproto,
1847 ResultDocInfo_t &docinfo, displayclass &disp,
1848 format_t *formatlistptr, text_tmap &options,
1849 ostream& logout) {
1850
1851 if (formatlistptr == NULL) return "";
1852
1853 switch (formatlistptr->command) {
1854 case comOID:
1855 return docinfo.OID;
1856 case comTopOID:
1857 {
1858 text_t top_id;
1859 get_top(docinfo.OID, top_id);
1860 return top_id;
1861 }
1862 case comRank:
1863 return text_t(docinfo.ranking);
1864 case comText:
1865 return formatlistptr->text;
1866 case comLink:
1867 return options["link"];
1868 case comEndLink:
1869 {
1870 if (options["link"].empty()) return "";
1871 else return "</a>";
1872 }
1873 case comHref:
1874 return get_href(options["link"]);
1875 case comIcon:
1876 return options["icon"];
1877 case comNum:
1878 return docinfo.result_num;
1879 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1880 return get_related_docs(collection, collectproto, docinfo, logout);
1881
1882 case comSummary:
1883 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1884 case comAssocLink:
1885 {
1886 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1887 if (!link_filename.empty()) {
1888 text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
1889 if (formatlistptr->text == "href") {
1890 return href;
1891 }
1892 return "<a href=\""+ href + "\">";
1893 }
1894 return "";
1895 }
1896 case comEndAssocLink:
1897 {
1898 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1899 if (!link_filename.empty()) {
1900 return "</a>";
1901 }
1902 return "";
1903 }
1904 case comMeta:
1905 {
1906 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1907 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1908 }
1909
1910 case comDoc:
1911 return format_text(collection, collectproto, docinfo, disp, options, logout);
1912
1913 case comImage:
1914 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1915 case comTOC:
1916 return options["DocTOC"];
1917 case comDocumentButtonDetach:
1918 return options["DocumentButtonDetach"];
1919 case comDocumentButtonHighlight:
1920 return options["DocumentButtonHighlight"];
1921 case comDocumentButtonExpandContents:
1922 return options["DocumentButtonExpandContents"];
1923 case comDocumentButtonExpandText:
1924 return options["DocumentButtonExpandText"];
1925 case comHighlight:
1926 if (options["highlight"] == "1") return "<b>";
1927 break;
1928 case comEndHighlight:
1929 if (options["highlight"] == "1") return "</b>";
1930 break;
1931 case comMetadataSpanWrap:
1932 metadata_wrap=true; metadata_wrap_type="span"; return "";
1933 break;
1934 case comEndMetadataSpanWrap:
1935 metadata_wrap=false; metadata_wrap_type=""; return "";
1936 break;
1937 case comMetadataDivWrap:
1938 metadata_wrap=true; metadata_wrap_type="div"; return "";
1939 break;
1940 case comEndMetadataDivWrap:
1941 metadata_wrap=false; metadata_wrap_type=""; return "";
1942 break;
1943 case comIf:
1944 return get_if (collection, collectproto, docinfo, disp,
1945 formatlistptr->decision, formatlistptr->ifptr,
1946 formatlistptr->elseptr, options, logout);
1947 case comOr:
1948 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1949 options, logout);
1950 case comDocTermsFreqTotal:
1951 return docinfo.num_terms_matched;
1952 case comCollection:
1953 if (formatlistptr->meta.metaname == g_EmptyText) {
1954 return collection;
1955 }
1956 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1957
1958 }
1959 return "";
1960}
1961
1962text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1963 ResultDocInfo_t &docinfo, displayclass &disp,
1964 format_t *formatlistptr, text_tmap &options,
1965 ostream& logout) {
1966
1967 text_t ft;
1968 while (formatlistptr != NULL)
1969 {
1970 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1971 options, logout);
1972 formatlistptr = formatlistptr->nextptr;
1973 }
1974
1975 return ft;
1976}
1977
1978
1979// we have only preloaded the text in DocumentAction. But you may want
1980// to get the text in query, so copy what we have done with
1981// format_summary and get the text here. Probably is quite expensive?
1982text_t format_text (const text_t& collection, recptproto* collectproto,
1983 ResultDocInfo_t &docinfo, displayclass &disp,
1984 text_tmap &options, ostream& logout)
1985{
1986 text_t text;
1987
1988 if (!options["text"].empty()) {
1989 text = options["text"];
1990 }
1991 else {
1992 // get document text here
1993 DocumentRequest_t docrequest;
1994 DocumentResponse_t docresponse;
1995 comerror_t err;
1996 docrequest.OID = docinfo.OID;
1997 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1998 text = docresponse.doc;
1999 }
2000
2001 if (metadata_wrap) {
2002 text = wrap_metatext(text,docinfo.OID,"Text");
2003 }
2004
2005 return text;
2006}
2007
2008/* FUNCTION NAME: format_summary
2009 * DESC: this is invoked when a [Summary] special metadata is processed.
2010 * RETURNS: a query-biased summary for the document */
2011
2012text_t format_summary (const text_t& collection, recptproto* collectproto,
2013 ResultDocInfo_t &docinfo, displayclass &disp,
2014 text_tmap &options, ostream& logout) {
2015
2016 // GRB: added code here to ensure that the cstr (and other collections)
2017 // uses the document metadata item Summary, rather than compressing
2018 // the text of the document, processed via the methods in
2019 // summarise.cpp
2020
2021 text_t summary;
2022
2023 if (docinfo.metadata.count("Summary") > 0 &&
2024 docinfo.metadata["Summary"].values.size() > 0) {
2025 summary = docinfo.metadata["Summary"].values[0];
2026 }
2027 else {
2028
2029 text_t textToSummarise, query;
2030
2031 if(options["text"].empty()) { // get document text
2032 DocumentRequest_t docrequest;
2033 DocumentResponse_t docresponse;
2034 comerror_t err;
2035 docrequest.OID = docinfo.OID;
2036 collectproto->get_document (collection, docrequest, docresponse, err, logout);
2037 textToSummarise = docresponse.doc;
2038 }
2039 else {
2040 // in practice, this would not happen, because text is only
2041 // loaded with the [Text] command
2042 textToSummarise = options["text"];
2043 }
2044
2045 disp.expandstring("_cgiargq_",query);
2046 summary = summarise(textToSummarise,query,80);
2047 //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
2048 }
2049
2050 summary.replace("'","&#039;");
2051 summary.replace("\n","&#013;");
2052
2053 if (metadata_wrap) {
2054 summary = wrap_metatext(summary,docinfo.OID,"Summary");
2055 }
2056
2057 return summary;
2058}
Note: See TracBrowser for help on using the repository browser.