source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 24306

Last change on this file since 24306 was 24306, checked in by ak19, 9 years ago

More changes to do with the ex. prefixed to embedded metadata (that may have an additional metadata set as namespace qualifier). The C code now removes the ex. prefix only if there are no other metadataset qualifiers in the metadata name.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 61.6 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "gsdltools.h"
29#include "recptprototools.h"
30#include "OIDtools.h"
31#include "summarise.h"
32
33#include <assert.h>
34
35static bool metadata_wrap = false;
36static text_t metadata_wrap_type = "";
37
38// a few function prototypes
39
40static text_t format_string (const text_t& collection, recptproto* collectproto,
41 ResultDocInfo_t &docinfo, displayclass &disp,
42 format_t *formatlistptr, text_tmap &options,
43 ostream& logout);
44
45static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
46 format_t *formatlistptr, text_tset &metadata, bool &getParents);
47
48static text_t format_summary (const text_t& collection, recptproto* collectproto,
49 ResultDocInfo_t &docinfo, displayclass &disp,
50 text_tmap &options, ostream& logout);
51static text_t format_text (const text_t& collection, recptproto* collectproto,
52 ResultDocInfo_t &docinfo, displayclass &disp,
53 text_tmap &options, ostream& logout);
54
55static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
56 recptproto* collectproto, ResultDocInfo_t &docinfo,
57 displayclass &disp, text_tmap &options,
58 ostream &logout);
59
60
61void metadata_t::clear() {
62 metaname.clear();
63 metacommand = mNone;
64 mqualifier.parent = pNone;
65 mqualifier.sibling = sNone;
66 mqualifier.child = cNone;
67 pre_tree_traverse.clear();
68 parentoptions.clear();
69 siblingoptions.clear();
70 childoptions.clear();
71}
72
73void decision_t::clear() {
74 command = dMeta;
75 meta.clear();
76 text.clear();
77}
78
79format_t::~format_t()
80{
81 if (nextptr != NULL) delete nextptr;
82 if (ifptr != NULL) delete ifptr;
83 if (elseptr != NULL) delete elseptr;
84 if (orptr != NULL) delete orptr;
85}
86
87void format_t::clear() {
88 command = comText;
89 decision.clear();
90 text.clear();
91 meta.clear();
92 nextptr = NULL;
93 ifptr = NULL;
94 elseptr = NULL;
95 orptr = NULL;
96}
97
98void formatinfo_t::clear() {
99 DocumentImages = false;
100 DocumentTitles = true;
101 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
102 DocumentContents = true;
103 DocumentArrowsBottom = true;
104 DocumentArrowsTop = false;
105 DocumentSearchResultLinks = false;
106 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
107 // DocumentButtons.push_back ("Expand Text");
108 // DocumentButtons.push_back ("Expand Contents");
109 DocumentButtons.push_back ("Detach");
110 DocumentButtons.push_back ("Highlight");
111 RelatedDocuments = "";
112 DocumentText = "[Text]";
113 formatstrings.erase (formatstrings.begin(), formatstrings.end());
114 DocumentUseHTML = false;
115 AllowExtendedOptions = false;
116}
117
118// simply checks to see if formatstring begins with a <td> tag
119bool is_table_content (const text_t &formatstring) {
120 text_t::const_iterator here = formatstring.begin();
121 text_t::const_iterator end = formatstring.end();
122
123 while (here != end) {
124 if (*here != ' ') {
125 if ((*here == '<') && ((here+3) < end)) {
126 if ((*(here+1) == 't' || *(here+1) == 'T') &&
127 (*(here+2) == 'd' || *(here+2) == 'D') &&
128 (*(here+3) == '>' || *(here+3) == ' '))
129 return true;
130 } else return false;
131 }
132 ++here;
133 }
134 return false;
135}
136
137bool is_table_content (const format_t *formatlistptr) {
138
139 if (formatlistptr == NULL) return false;
140
141 if (formatlistptr->command == comText)
142 return is_table_content (formatlistptr->text);
143
144 return false;
145}
146
147// returns false if key isn't in formatstringmap
148bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
149 text_t &formatstring) {
150
151 formatstring.clear();
152 text_tmap::const_iterator it = formatstringmap.find(key);
153 if (it == formatstringmap.end()) return false;
154 formatstring = (*it).second;
155 return true;
156}
157
158// tries to find "key1key2" then "key1" then "key2"
159bool get_formatstring (const text_t &key1, const text_t &key2,
160 const text_tmap &formatstringmap,
161 text_t &formatstring) {
162
163 formatstring.clear();
164 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
165 if (it != formatstringmap.end()) {
166 formatstring = (*it).second;
167 return true;
168 }
169 it = formatstringmap.find(key1);
170 if (it != formatstringmap.end()) {
171 formatstring = (*it).second;
172 return true;
173 }
174 it = formatstringmap.find(key2);
175 if (it != formatstringmap.end()) {
176 formatstring = (*it).second;
177 return true;
178 }
179 return false;
180}
181
182
183text_t remove_namespace(const text_t &meta_name) {
184 text_t::const_iterator end = meta_name.end();
185 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
186 if (it != end) {
187 return substr(it+1, end);
188 }
189
190 return meta_name;
191
192}
193// returns a date of form _format:date_(year, month, day)
194// input is date of type yyyy-?mm-?dd
195// at least the year must be present in date
196text_t format_date (const text_t &date) {
197
198 if (date.size() < 4) return "";
199
200 text_t::const_iterator datebegin = date.begin();
201
202 text_t year = substr (datebegin, datebegin+4);
203 int chars_seen_so_far = 4;
204 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
205
206 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
207 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
208
209 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
210 int imonth = month.getint();
211 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
212
213 chars_seen_so_far += 2;
214 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
215
216 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
217 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
218
219 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
220 if (day[0] == '0') day = substr (day.begin()+1, day.end());
221 int iday = day.getint();
222 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
223
224 return "_format:date_("+year+","+month+","+day+")";
225}
226
227// converts an iso639 language code to its English equivalent
228// should we be checking that the macro exists??
229text_t iso639 (const text_t &langcode) {
230 if (langcode.empty()) return "";
231 return "_iso639:iso639"+langcode+"_";
232}
233
234
235text_t get_href (const text_t &link) {
236
237 text_t href;
238
239 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
240 text_t::const_iterator end = link.end();
241 if (here == end) return g_EmptyText;
242
243 ++here;
244 while (here != end) {
245 if (*here == '"') break;
246 href.push_back(*here);
247 ++here;
248 }
249
250 return href;
251}
252
253//this function gets the information associated with the relation
254//metadata for the document associated with 'docinfo'. This relation
255//metadata consists of a line of pairs containing 'collection, document OID'
256//(this is the OID of the document related to the current document, and
257//the collection the related document belongs to). For each of these pairs
258//the title metadata is obtained and then an html link between the title
259//of the related doc and the document's position (the document will be
260//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
261//(where collection is the related documents collection, and OID is the
262//related documents OID). A list of these html links are made for as many
263//related documents as there are. This list is then returned. If there are
264//no related documents available for the current document then the string
265//'.. no related documents .. ' is returned.
266text_t get_related_docs(const text_t& collection, recptproto* collectproto,
267 ResultDocInfo_t &docinfo, ostream& logout){
268
269 text_tset metadata;
270
271 //insert the metadata we wish to collect
272 metadata.insert("dc.Relation");
273 metadata.insert("Title");
274 metadata.insert("Subject"); //for emails, where title data doesn't apply
275
276 FilterResponse_t response;
277 text_t relation = ""; //string for displaying relation metadata
278 text_t relationTitle = ""; //the related documents Title (or subject)
279 text_t relationOID = ""; //the related documents OID
280
281 //get the information associated with the metadata for current doc
282 if (get_info (docinfo.OID, collection, "", metadata,
283 false, collectproto, response, logout)) {
284
285 //if the relation metadata exists, store for displaying
286 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
287 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
288
289 //split relation data into pairs of collectionname,ID number
290 text_tarray relationpairs;
291 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
292
293 text_tarray::const_iterator currDoc = relationpairs.begin();
294 text_tarray::const_iterator lastDoc = relationpairs.end();
295
296 //iterate through the pairs to split and display
297 while(currDoc != lastDoc){
298
299 //split pairs into collectionname and ID
300 text_tarray relationdata;
301 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
302
303 //get first element in the array (collection)
304 text_tarray::const_iterator doc_data = relationdata.begin();
305 text_t document_collection = *doc_data;
306 ++doc_data; //increment to get next item in array (oid)
307 text_t document_OID = *doc_data;
308
309 //create html link to related document
310 relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
311 relation += "&amp;cl=search&amp;d=" + document_OID;
312
313 //get the information associated with the metadata for related doc
314 if (get_info (document_OID, document_collection, "", metadata,
315 false, collectproto, response, logout)) {
316
317 //if title metadata doesn't exist, collect subject metadata
318 //if that doesn't exist, just call it 'related document'
319 if (!response.docInfo[0].metadata["Title"].values[0].empty())
320 relationTitle = response.docInfo[0].metadata["Title"].values[0];
321 else if (!response.docInfo[0].metadata["Subject"].values.empty())
322 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
323 else relationTitle = "RELATED DOCUMENT";
324
325 }
326
327 //link the related document's title to its page
328 relation += "\">" + relationTitle + "</a>";
329 relation += " (" + document_collection + ")<br>";
330
331 ++currDoc;
332 }
333 }
334
335 }
336
337 if(relation.empty()) //no relation data for documnet
338 relation = ".. no related documents .. ";
339
340 return relation;
341}
342
343
344
345static void get_parent_options (text_t &instring, metadata_t &metaoption) {
346
347 assert (instring.size() > 7);
348 if (instring.size() <= 7) return;
349
350 text_t meta, com, op;
351 bool inbraces = false;
352 bool inquotes = false;
353 bool foundcolon = false;
354 text_t::const_iterator here = instring.begin()+6;
355 text_t::const_iterator end = instring.end();
356 while (here != end) {
357 if (foundcolon) meta.push_back (*here);
358 else if (*here == '(') inbraces = true;
359 else if (*here == ')') inbraces = false;
360 else if (*here == '\'' && !inquotes) inquotes = true;
361 else if (*here == '\'' && inquotes) inquotes = false;
362 else if (*here == ':' && !inbraces) foundcolon = true;
363 else if (inquotes) op.push_back (*here);
364 else com.push_back (*here);
365 ++here;
366 }
367
368 instring = meta;
369 if (com.empty())
370 metaoption.mqualifier.parent = pImmediate;
371 else if (com == "Top")
372 metaoption.mqualifier.parent = pTop;
373 else if (com == "All") {
374 metaoption.mqualifier.parent = pAll;
375 metaoption.parentoptions = op;
376 }
377}
378
379
380static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
381
382 assert (instring.size() > 8);
383 if (instring.size() <= 8) return;
384 text_t meta, com, op;
385 bool inbraces = false;
386 bool inquotes = false;
387 bool foundcolon = false;
388 text_t::const_iterator here = instring.begin()+7;
389 text_t::const_iterator end = instring.end();
390 while (here != end) {
391 if (foundcolon) meta.push_back (*here);
392 else if (*here == '(') inbraces = true;
393 else if (*here == ')') inbraces = false;
394 else if (*here == '\'' && !inquotes) inquotes = true;
395 else if (*here == '\'' && inquotes) inquotes = false;
396 else if (*here == ':' && !inbraces) foundcolon = true;
397 else if (inquotes) op.push_back (*here);
398 else com.push_back (*here);
399 ++here;
400 }
401
402 instring = meta;
403 metaoption.siblingoptions.clear();
404
405 if (com.empty()) {
406 metaoption.mqualifier.sibling = sAll;
407 metaoption.siblingoptions = " ";
408 }
409 else if (com == "first") {
410 metaoption.mqualifier.sibling = sNum;
411 metaoption.siblingoptions = "0";
412 }
413 else if (com == "last") {
414 metaoption.mqualifier.sibling = sNum;
415 metaoption.siblingoptions = "-2"; // == last
416 }
417 else if (com.getint()>0) {
418 metaoption.mqualifier.sibling = sNum;
419 int pos = com.getint()-1;
420 metaoption.siblingoptions +=pos;
421 }
422 else {
423 metaoption.mqualifier.sibling = sAll;
424 metaoption.siblingoptions = op;
425 }
426}
427
428static void get_child_options (text_t &instring, metadata_t &metaoption) {
429
430 assert (instring.size() > 6);
431 if (instring.size() <= 6) return;
432 text_t meta, com, op;
433 bool inbraces = false;
434 bool inquotes = false;
435 bool foundcolon = false;
436 text_t::const_iterator here = instring.begin()+5;
437 text_t::const_iterator end = instring.end();
438 while (here != end) {
439 if (foundcolon) meta.push_back (*here);
440 else if (*here == '(') inbraces = true;
441 else if (*here == ')') inbraces = false;
442 else if (*here == '\'' && !inquotes) inquotes = true;
443 else if (*here == '\'' && inquotes) inquotes = false;
444 else if (*here == ':' && !inbraces) foundcolon = true;
445 else if (inquotes) op.push_back (*here);
446 else com.push_back (*here);
447 ++here;
448 }
449
450 instring = meta;
451 if (com.empty()) {
452 metaoption.mqualifier.child = cAll;
453 metaoption.childoptions = " ";
454 }
455 else if (com == "first") {
456 metaoption.mqualifier.child = cNum;
457 metaoption.childoptions = ".fc";
458 }
459 else if (com == "last") {
460 metaoption.mqualifier.child = cNum;
461 metaoption.childoptions = ".lc";
462 }
463 else if (com.getint()>0) {
464 metaoption.mqualifier.child = cNum;
465 metaoption.childoptions = "."+com;
466 }
467 else {
468 metaoption.mqualifier.child = cAll;
469 metaoption.childoptions = op;
470 }
471}
472
473
474static void get_truncate_options (text_t &instring, metadata_t &metaoption)
475{
476 assert (instring.size() > ((text_t) "truncate").size());
477 if (instring.size() <= ((text_t) "truncate").size()) return;
478 text_t meta, com;
479 bool inbraces = false;
480 bool foundcolon = false;
481 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
482 text_t::const_iterator end = instring.end();
483 while (here != end) {
484 if (foundcolon) meta.push_back (*here);
485 else if (*here == '(') inbraces = true;
486 else if (*here == ')') inbraces = false;
487 else if (*here == ':' && !inbraces) foundcolon = true;
488 else com.push_back (*here);
489 ++here;
490 }
491
492 instring = meta;
493
494 if (!com.empty())
495 {
496 metaoption.siblingoptions = com;
497 }
498 else
499 {
500 // Default is 100 characters if not specified
501 metaoption.siblingoptions = "100";
502 }
503}
504
505
506
507static void parse_meta (text_t &meta, metadata_t &metaoption,
508 text_tset &metadata, bool &getParents) {
509
510 // Look for the various format statement modifiers
511 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
512 // is irrelevant because this is not stored in metaoption.metacommand anyway
513 bool keep_trying = true;
514 while (keep_trying)
515 {
516 keep_trying = false;
517
518 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
519 {
520 metaoption.metacommand |= mCgiSafe;
521 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
522 keep_trying = true;
523 }
524 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
525 {
526 metaoption.metacommand |= mSpecial;
527 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
528 keep_trying = true;
529 }
530
531 // New "truncate" special formatting option
532 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
533 {
534 metaoption.metacommand |= mTruncate;
535 get_truncate_options (meta, metaoption);
536 keep_trying = true;
537 }
538 // New "htmlsafe" special formatting option
539 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
540 {
541 metaoption.metacommand |= mHTMLSafe;
542 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
543 keep_trying = true;
544 }
545 // New "xmlsafe" special formatting option
546 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
547 {
548 metaoption.metacommand |= mXMLSafe;
549 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
550 keep_trying = true;
551 }
552 // New "dmsafe" special formatting option
553 if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
554 {
555 metaoption.metacommand |= mDMSafe;
556 meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
557 keep_trying = true;
558 }
559 }
560
561 bool had_parent_or_child = true;
562 bool prev_was_parent = false;
563 bool prev_was_child = false;
564
565 while (had_parent_or_child) {
566 if (meta.size() > 7
567 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
568
569 // clear out sibling and child (cmd and options)
570 metaoption.metacommand &= ~(mChild|mSibling);
571 metaoption.childoptions.clear();
572 metaoption.siblingoptions.clear();
573
574 getParents = true;
575 metaoption.metacommand |= mParent;
576 get_parent_options (meta, metaoption);
577
578 if (prev_was_parent) {
579 metaoption.pre_tree_traverse += ".pr";
580 }
581 else if (prev_was_child) {
582 metaoption.pre_tree_traverse += ".fc";
583 }
584
585 prev_was_parent = true;
586 prev_was_child = false;
587 }
588 else if (meta.size() > 6
589 && (substr (meta.begin(), meta.begin()+5) == "child")) {
590
591 // clear out sibling and parent (cmd and options)
592 metaoption.metacommand &= ~(mParent|mSibling);
593 metaoption.parentoptions.clear();
594 metaoption.siblingoptions.clear();
595
596 metaoption.metacommand |= mChild;
597 get_child_options (meta, metaoption);
598 metadata.insert("contains");
599
600 if (prev_was_parent) {
601 metaoption.pre_tree_traverse += ".pr";
602 }
603 else if (prev_was_child) {
604 metaoption.pre_tree_traverse += ".fc";
605 }
606
607 prev_was_child = true;
608 prev_was_parent = false;
609 }
610 else {
611 prev_was_child = false;
612 prev_was_parent = false;
613 had_parent_or_child = false;
614 }
615 }
616
617 // parent/child can have sibling tacked on end also
618 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
619 metaoption.metacommand |= mSibling;
620 get_sibling_options (meta, metaoption);
621 }
622
623 // check for ex. which may occur in format statements
624 // remove "ex." prefix, but only if there are no other metadata set qualifiers
625 // in the metaname, since we want to retain prefixes like "ex.dc." as-is
626 text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
627 text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
628
629 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
630 meta = substr (meta.begin()+3, meta.end());
631 }
632 metadata.insert (meta);
633 metaoption.metaname = meta;
634}
635
636static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
637 if (meta == "collection") {
638 // no qualifiers
639 metaoption.metaname = g_EmptyText;
640 return;
641 }
642 meta = substr (meta.begin()+11, meta.end());
643 metaoption.metaname = meta;
644
645}
646
647static void parse_meta (text_t &meta, format_t *formatlistptr,
648 text_tset &metadata, bool &getParents) {
649
650 // check for ex. which may occur in format statements
651 // remove "ex." prefix, but only if there are no other metadata set qualifiers
652 // in the metaname, since we want to retain prefixes like "ex.dc." as-is
653 text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
654 text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
655
656 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
657 meta = substr (meta.begin()+3, meta.end());
658 }
659 if (meta == "link")
660 formatlistptr->command = comLink;
661 else if (meta == "/link")
662 formatlistptr->command = comEndLink;
663
664 // the metaname "srclink_file" is deprecated, use "srclinkFile"
665 else if (meta == "srclink") {
666 formatlistptr->command = comAssocLink;
667 formatlistptr->meta.metaname = "srclinkFile";
668 metadata.insert("srclinkFile");
669 }
670 else if (meta == "srchref") {
671 formatlistptr->command = comAssocLink;
672 formatlistptr->text = "href";
673 formatlistptr->meta.metaname = "srclinkFile";
674 metadata.insert("srclinkFile");
675 }
676 else if (meta == "/srclink") {
677 formatlistptr->command = comEndAssocLink;
678 formatlistptr->meta.metaname = "srclinkFile";
679 }
680 // and weblink etc
681 else if (meta == "href")
682 formatlistptr->command = comHref;
683
684 else if (meta == "num")
685 formatlistptr->command = comNum;
686
687 else if (meta == "icon")
688 formatlistptr->command = comIcon;
689
690 else if (meta == "Text")
691 formatlistptr->command = comDoc;
692
693 else if (meta == "RelatedDocuments")
694 formatlistptr->command = comRel;
695
696 else if (meta == "highlight")
697 formatlistptr->command = comHighlight;
698
699 else if (meta == "/highlight")
700 formatlistptr->command = comEndHighlight;
701
702 else if (meta == "metadata-spanwrap")
703 formatlistptr->command = comMetadataSpanWrap;
704
705 else if (meta == "/metadata-spanwrap")
706 formatlistptr->command = comEndMetadataSpanWrap;
707
708 else if (meta == "metadata-divwrap")
709 formatlistptr->command = comMetadataDivWrap;
710
711 else if (meta == "/metadata-divwrap")
712 formatlistptr->command = comEndMetadataDivWrap;
713
714 else if (meta == "Summary")
715 formatlistptr->command = comSummary;
716
717 else if (meta == "DocImage")
718 formatlistptr->command = comImage;
719
720 else if (meta == "DocTOC")
721 formatlistptr->command = comTOC;
722
723 else if (meta == "DocumentButtonDetach")
724 formatlistptr->command = comDocumentButtonDetach;
725
726 else if (meta == "DocumentButtonHighlight")
727 formatlistptr->command = comDocumentButtonHighlight;
728
729 else if (meta == "DocumentButtonExpandContents")
730 formatlistptr->command = comDocumentButtonExpandContents;
731
732 else if (meta == "DocumentButtonExpandText")
733 formatlistptr->command = comDocumentButtonExpandText;
734
735 else if (meta == "DocOID")
736 formatlistptr->command = comOID;
737 else if (meta == "DocTopOID")
738 formatlistptr->command = comTopOID;
739 else if (meta == "DocRank")
740 formatlistptr->command = comRank;
741 else if (meta == "DocTermsFreqTotal")
742 formatlistptr->command = comDocTermsFreqTotal;
743 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
744 formatlistptr->command = comCollection;
745 parse_coll_meta(meta, formatlistptr->meta);
746 }
747 else {
748 formatlistptr->command = comMeta;
749 parse_meta (meta, formatlistptr->meta, metadata, getParents);
750 }
751}
752
753
754static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
755 text_tset &metadata, bool &getParents) {
756
757 text_t text;
758 text_t::const_iterator here = formatstring.begin();
759 text_t::const_iterator end = formatstring.end();
760
761 while (here != end) {
762
763 if (*here == '\\') {
764 ++here;
765 if (here != end) text.push_back (*here);
766
767 } else if (*here == '{') {
768 if (!text.empty()) {
769 formatlistptr->command = comText;
770 formatlistptr->text = text;
771 formatlistptr->nextptr = new format_t();
772 formatlistptr = formatlistptr->nextptr;
773
774 text.clear();
775 }
776 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
777
778 formatlistptr->nextptr = new format_t();
779 formatlistptr = formatlistptr->nextptr;
780 if (here == end) break;
781 }
782 } else if (*here == '[') {
783 if (!text.empty()) {
784 formatlistptr->command = comText;
785 formatlistptr->text = text;
786 formatlistptr->nextptr = new format_t();
787 formatlistptr = formatlistptr->nextptr;
788
789 text.clear();
790 }
791 text_t meta;
792 ++here;
793 while (*here != ']') {
794 if (here == end) return false;
795 meta.push_back (*here);
796 ++here;
797 }
798 parse_meta (meta, formatlistptr, metadata, getParents);
799 formatlistptr->nextptr = new format_t();
800 formatlistptr = formatlistptr->nextptr;
801
802 } else
803 text.push_back (*here);
804
805 if (here != end) ++here;
806 }
807 if (!text.empty()) {
808 formatlistptr->command = comText;
809 formatlistptr->text = text;
810 formatlistptr->nextptr = new format_t();
811 formatlistptr = formatlistptr->nextptr;
812
813 }
814 return true;
815}
816
817
818static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
819 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
820
821 text_t::const_iterator it = findchar (here, end, '}');
822 if (it == end) return false;
823
824 text_t com = substr (here, it);
825 here = findchar (it, end, '{');
826 if (here == end) return false;
827 else ++here;
828
829 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
830 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
831 else return false;
832
833 int commacount = 0;
834 text_t text;
835 while (here != end) {
836
837 if (*here == '\\') {
838 ++here;
839 if (here != end) text.push_back(*here);
840
841 }
842
843 else if (*here == ',' || *here == '}' || *here == '{') {
844
845 if (formatlistptr->command == comOr) {
846 // the {Or}{this, or this, or this, or this} statement
847 format_t *or_ptr;
848
849 // find the next unused orptr
850 if (formatlistptr->orptr == NULL) {
851 formatlistptr->orptr = new format_t();
852 or_ptr = formatlistptr->orptr;
853 } else {
854 or_ptr = formatlistptr->orptr;
855 while (or_ptr->nextptr != NULL)
856 or_ptr = or_ptr->nextptr;
857 or_ptr->nextptr = new format_t();
858 or_ptr = or_ptr->nextptr;
859 }
860
861 if (!text.empty())
862 {
863 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
864 }
865
866 if (*here == '{')
867 {
868 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
869 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
870 // The latter can always be re-written:
871 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
872
873 if (!text.empty()) // already used up allocated format_t
874 {
875 // => allocate new one for detected action
876 or_ptr->nextptr = new format_t();
877 or_ptr = or_ptr->nextptr;
878 }
879 if (!parse_action(++here, end, or_ptr, metadata, getParents))
880 {
881 return false;
882 }
883 }
884 else
885 {
886 if (*here == '}') break;
887 }
888 text.clear();
889
890 }
891
892 // Parse an {If}{decide,do,else} statement
893 else {
894
895 // Read the decision component.
896 if (commacount == 0) {
897 // Decsion can be a metadata element, or a piece of text.
898 // Originally Stefan's code, updated 25/10/2000 by Gordon.
899
900 text_t::const_iterator beginbracket = text.begin();
901 text_t::const_iterator endbracket = (text.end() - 1);
902
903 // Decision is based on a metadata element
904 if ((*beginbracket == '[') && (*endbracket == ']')) {
905 // Ignore the surrounding square brackets
906 text_t meta = substr (beginbracket+1, endbracket);
907 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
908 ++commacount;
909 text.clear();
910 }
911
912 // Decision is a piece of text (probably a macro like _cgiargmode_).
913 else {
914
915 // hunt for any metadata in string, which might be uses in
916 // to test a condition, e.g. [Format] eq 'PDF'
917 format_t* dummyformat = new format_t();
918 // update which metadata fields needed
919 // (not interested in updatng formatlistptr)
920 parse_string (text, dummyformat, metadata, getParents);
921 delete dummyformat;
922
923 formatlistptr->decision.command = dText;
924 formatlistptr->decision.text = text;
925 ++commacount;
926 text.clear();
927 }
928 }
929
930 // Read the "then" and "else" components of the {If} statement.
931 else {
932 format_t** nextlistptr = NULL;
933 if (commacount == 1) {
934 nextlistptr = &formatlistptr->ifptr;
935 } else if (commacount == 2 ) {
936 nextlistptr = &formatlistptr->elseptr;
937 } else {
938 return false;
939 }
940
941 if (!text.empty()) {
942 if (*nextlistptr == NULL) {
943 *nextlistptr = new format_t();
944 } else {
945
946 // skip to the end of any format_t statements already added
947 while ((*nextlistptr)->nextptr != NULL)
948 {
949 nextlistptr = &(*nextlistptr)->nextptr;
950 }
951
952 (*nextlistptr)->nextptr = new format_t();
953 nextlistptr = &(*nextlistptr)->nextptr;
954 }
955
956 if (!parse_string (text, *nextlistptr, metadata, getParents))
957 {
958 return false;
959 }
960 text.clear();
961 }
962
963 if (*here == '{')
964 {
965 if (*nextlistptr == NULL) {
966 *nextlistptr = new format_t();
967 } else {
968 // skip to the end of any format_t statements already added
969 while ((*nextlistptr)->nextptr != NULL)
970 {
971 nextlistptr = &(*nextlistptr)->nextptr;
972 }
973
974 (*nextlistptr)->nextptr = new format_t();
975 nextlistptr = &(*nextlistptr)->nextptr;
976 }
977
978 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
979 {
980 return false;
981 }
982 }
983 else
984 {
985 if (*here == '}') break;
986 ++commacount;
987 }
988 }
989 }
990
991 } else text.push_back(*here);
992
993 if (here != end) ++here;
994 }
995
996 return true;
997}
998
999
1000static text_t wrap_metatext(const text_t& metatext, const text_t& OID,
1001 const text_t metaname, int metapos=-1)
1002{
1003
1004 text_t tag_type = metadata_wrap_type;
1005 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
1006
1007 text_t wrapped_metatext = "<" + tag_type + " ";
1008 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
1009
1010 wrapped_metatext += "docoid=\"" + OID + "\" ";
1011 wrapped_metatext += "metaname=\"" + metaname + "\"";
1012
1013 if (metapos>=0) {
1014 text_t metapos_str = metapos;
1015 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
1016 }
1017
1018 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1019
1020 return wrapped_metatext;
1021}
1022
1023
1024
1025bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1026 text_tset &metadata, bool &getParents) {
1027
1028 formatlistptr->clear();
1029 getParents = false;
1030
1031 return (parse_string (formatstring, formatlistptr, metadata, getParents));
1032}
1033
1034// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1035// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1036
1037static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1038{
1039 text_t no_ns_metaname = remove_namespace(meta.metaname);
1040 text_t formatted_metatext;
1041 bool first = true;
1042
1043 const int start_i=0;
1044 const int end_i = metainfo.values.size()-1;
1045
1046 if (position == -1) { // all
1047 for (int i=start_i; i<=end_i; ++i) {
1048 if (!first) formatted_metatext += meta.siblingoptions;
1049
1050 text_t fresh_metatext;
1051
1052 if (meta.metacommand & mSpecial) {
1053 // special formatting
1054 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1055 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1056 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1057 }
1058 else fresh_metatext = metainfo.values[i];
1059
1060 // New "truncate" special formatting option
1061 if (meta.metacommand & mTruncate)
1062 {
1063 int truncate_length = meta.siblingoptions.getint();
1064 text_t truncated_value = fresh_metatext;
1065 if (truncated_value.size() > truncate_length)
1066 {
1067 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1068 }
1069 fresh_metatext = truncated_value;
1070 }
1071 // New "xmlsafe" special formatting option
1072 if (meta.metacommand & mXMLSafe)
1073 {
1074 // Make it XML-safe
1075 text_t text_xml_safe = "";
1076 text_t::const_iterator text_iterator = fresh_metatext.begin();
1077 while (text_iterator != fresh_metatext.end())
1078 {
1079 if (*text_iterator == '&') text_xml_safe += "&amp;";
1080 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1081 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1082 else text_xml_safe.push_back(*text_iterator);
1083 text_iterator++;
1084 }
1085 fresh_metatext = text_xml_safe;
1086 }
1087 // New "htmlsafe" special formatting option
1088 if (meta.metacommand & mHTMLSafe)
1089 {
1090 // Make it HTML-safe
1091 text_t text_html_safe = "";
1092 text_t::const_iterator text_iterator = fresh_metatext.begin();
1093 while (text_iterator != fresh_metatext.end())
1094 {
1095 if (*text_iterator == '&') text_html_safe += "&amp;";
1096 else if (*text_iterator == '<') text_html_safe += "&lt;";
1097 else if (*text_iterator == '>') text_html_safe += "&gt;";
1098 else if (*text_iterator == '"') text_html_safe += "&quot;";
1099 else text_html_safe.push_back(*text_iterator);
1100 text_iterator++;
1101 }
1102 fresh_metatext = text_html_safe;
1103 }
1104 // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1105 // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1106 if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1107 {
1108 // Make it macro-safe
1109 text_t text_dm_safe = dm_safe(fresh_metatext);
1110 fresh_metatext = text_dm_safe;
1111 }
1112
1113 if (metadata_wrap) {
1114 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,i);
1115 }
1116 formatted_metatext += fresh_metatext;
1117
1118 first = false;
1119
1120 }
1121 } else {
1122 if (position == -2) { // end
1123 position = end_i;
1124 } else if (position < start_i || position > end_i) {
1125 return "";
1126 }
1127
1128 text_t fresh_metatext;
1129 if (meta.metacommand & mSpecial) {
1130
1131 // special formatting
1132 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1133 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1134 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1135 }
1136 else fresh_metatext = metainfo.values[position];
1137
1138 // New "truncate" special formatting option
1139 if (meta.metacommand & mTruncate)
1140 {
1141 int truncate_length = meta.siblingoptions.getint();
1142 text_t truncated_value = fresh_metatext;
1143 if (truncated_value.size() > truncate_length)
1144 {
1145 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1146 }
1147 fresh_metatext = truncated_value;
1148 }
1149 // New "xmlsafe" special formatting option
1150 if (meta.metacommand & mXMLSafe)
1151 {
1152 // Make it XML-safe
1153 text_t text_xml_safe = "";
1154 text_t::const_iterator text_iterator = fresh_metatext.begin();
1155 while (text_iterator != fresh_metatext.end())
1156 {
1157 if (*text_iterator == '&') text_xml_safe += "&amp;";
1158 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1159 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1160 else text_xml_safe.push_back(*text_iterator);
1161 text_iterator++;
1162 }
1163 fresh_metatext = text_xml_safe;
1164 }
1165 // New "htmlsafe" special formatting option
1166 if (meta.metacommand & mHTMLSafe)
1167 {
1168 // Make it HTML-safe
1169 text_t text_html_safe = "";
1170 text_t::const_iterator text_iterator = fresh_metatext.begin();
1171 while (text_iterator != fresh_metatext.end())
1172 {
1173 if (*text_iterator == '&') text_html_safe += "&amp;";
1174 else if (*text_iterator == '<') text_html_safe += "&lt;";
1175 else if (*text_iterator == '>') text_html_safe += "&gt;";
1176 else if (*text_iterator == '"') text_html_safe += "&quot;";
1177 else if (*text_iterator == '\'') text_html_safe += "&#39;";
1178 else if (*text_iterator == ',') text_html_safe += "&#44;";
1179 else text_html_safe.push_back(*text_iterator);
1180 text_iterator++;
1181 }
1182 fresh_metatext = text_html_safe;
1183 }
1184 // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1185 // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1186 if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1187 {
1188 // Make it macro-safe
1189 text_t text_dm_safe = dm_safe(fresh_metatext);
1190 fresh_metatext = text_dm_safe;
1191 }
1192
1193 if (metadata_wrap) {
1194 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,position);
1195 }
1196
1197 formatted_metatext += fresh_metatext;
1198 }
1199
1200 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1201 else return formatted_metatext;
1202}
1203
1204static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1205{
1206
1207 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1208
1209 switch (meta.mqualifier.parent) {
1210 case pNone:
1211 return "Nothing!!";
1212 break;
1213
1214 case pImmediate:
1215 if (parent != NULL) {
1216 text_t parent_oid = get_parent(docinfo.OID);
1217 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1218 }
1219 break;
1220
1221 case pTop:
1222 if (parent != NULL) {
1223 text_t parent_oid = get_parent(docinfo.OID);
1224
1225 while (parent->parent != NULL) {
1226 parent = parent->parent;
1227 parent_oid = get_parent(parent_oid);
1228 }
1229 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1230 }
1231 break;
1232
1233 case pAll:
1234 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1235 if (parent != NULL) {
1236 text_t parent_oid = get_parent(docinfo.OID);
1237
1238 text_tarray tmparray;
1239 while (parent != NULL) {
1240 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1241 parent = parent->parent;
1242 parent_oid = get_parent(parent_oid);
1243
1244 }
1245 // now join them up - use teh parent separator
1246 bool first = true;
1247 text_t tmp;
1248 text_tarray::reverse_iterator here = tmparray.rbegin();
1249 text_tarray::reverse_iterator end = tmparray.rend();
1250 while (here != end) {
1251 if (!first) tmp += meta.parentoptions;
1252 tmp += *here;
1253 first = false;
1254 ++here;
1255 }
1256 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1257 else return tmp;
1258 }
1259 }
1260 return "";
1261
1262}
1263
1264static text_t get_child_meta (const text_t& collection,
1265 recptproto* collectproto,
1266 ResultDocInfo_t &docinfo, displayclass &disp,
1267 const metadata_t &meta, text_tmap &options,
1268 ostream& logout, int siblings_values)
1269{
1270 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1271
1272 const text_t& pre_tree_trav = meta.pre_tree_traverse;
1273 const text_t& child_metaname = meta.metaname;
1274 const text_t& child_field = meta.childoptions;
1275 text_tset child_metadata;
1276 child_metadata.insert(child_metaname);
1277
1278 FilterResponse_t child_response;
1279 if (meta.mqualifier.child == cNum) {
1280 // just one child
1281 //get the information associated with the metadata for child doc
1282 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1283 child_metadata, false, collectproto, child_response,
1284 logout)) return ""; // invalid child number
1285
1286 if (child_response.docInfo.empty()) return false; // no info for the child
1287
1288 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1289 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1290
1291 text_t child_metavalue
1292 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1293 return expand_metadata(child_metavalue,collection,collectproto,
1294 child_docinfo,disp,options,logout);
1295 }
1296
1297
1298 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1299
1300
1301 if (!pre_tree_trav.empty()) {
1302 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1303 FilterResponse_t trav_response;
1304
1305 text_tset trav_metadata;
1306 trav_metadata.insert("contains");
1307
1308 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1309 trav_metadata, false, collectproto, trav_response,
1310 logout)) return ""; // invalid pre_tree_trav
1311
1312 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1313
1314 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1315
1316 // use this for rest of routine
1317 docinfo = trav_docinfo;
1318 }
1319
1320 // we need to get all children
1321 text_t result = "";
1322 text_tarray children;
1323 text_t contains = docinfo.metadata["contains"].values[0];
1324 splitchar (contains.begin(), contains.end(), ';', children);
1325 text_tarray::const_iterator here = children.begin();
1326 text_tarray::const_iterator end = children.end();
1327 bool first = true;
1328 while (here !=end) {
1329 text_t oid = *here;
1330 here++;
1331 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1332
1333 //get the information associated with the metadata for child doc
1334 if (!get_info (oid, collection, "", child_metadata,
1335 false, collectproto, child_response, logout) ||
1336 child_response.docInfo.empty()) {
1337 first = false;
1338 continue;
1339 }
1340
1341
1342 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1343 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1344
1345 text_t child_metavalue
1346 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1347
1348
1349 if (!first) result += child_field;
1350 first = false;
1351 // need to do this here cos otherwise we are in the wrong document
1352 text_t em = expand_metadata(child_metavalue,collection,collectproto,
1353 child_docinfo,disp,options,logout);
1354
1355 result += em;
1356 }
1357 return result;
1358
1359}
1360
1361static text_t get_meta (const text_t& collection, recptproto* collectproto,
1362 ResultDocInfo_t &docinfo, displayclass &disp,
1363 const metadata_t &meta, text_tmap &options,
1364 ostream& logout) {
1365
1366 // make sure we have the requested metadata
1367 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1368 if (it == docinfo.metadata.end()) return "";
1369
1370 int siblings_values = 0; // default is no siblings, just the first metadata available
1371 if (meta.metacommand & mSibling) {
1372 if (meta.mqualifier.sibling == sAll) {
1373 siblings_values = -1; //all
1374 } else if (meta.mqualifier.sibling == sNum) {
1375 siblings_values = meta.siblingoptions.getint();
1376 }
1377 }
1378 if (meta.metacommand & mParent) {
1379 return get_parent_meta(docinfo,meta,siblings_values);
1380 }
1381
1382 else if (meta.metacommand & mChild) {
1383 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1384 options,logout, siblings_values);
1385 }
1386 else if (meta.metacommand & mSibling) { // only siblings
1387 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1388 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1389 }
1390 else {
1391
1392 // straightforward metadata request (nothing fancy)
1393
1394 text_t classifier_metaname = docinfo.classifier_metadata_type;
1395 int metaname_index
1396 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1397 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1398 }
1399
1400 return "";
1401}
1402
1403static text_t get_or (const text_t& collection, recptproto* collectproto,
1404 ResultDocInfo_t &docinfo, displayclass &disp,
1405 format_t *orptr, text_tmap &options,
1406 ostream& logout) {
1407
1408 while (orptr != NULL) {
1409
1410 if (metadata_wrap) {
1411 // need to be a bit more careful about this
1412 // => test for it *without* spanwrap or divwrap, and if defined, then
1413 // got back and generate it again, this time with spanwrap/divwrap on
1414
1415 metadata_wrap = false;
1416 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1417 options, logout);
1418 metadata_wrap = true;
1419 if (!test_tmp.empty()) {
1420
1421 return format_string (collection,collectproto,docinfo, disp, orptr,
1422 options, logout);
1423 }
1424 }
1425 else {
1426 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1427 options, logout);
1428 if (!tmp.empty()) return tmp;
1429 }
1430
1431 orptr = orptr->nextptr;
1432 }
1433 return "";
1434}
1435
1436static bool char_is_whitespace(const char c)
1437{
1438 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1439
1440}
1441
1442static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1443{
1444 int pos = start_pos;
1445 while (pos<outstring.size()) {
1446 if (!char_is_whitespace(outstring[pos])) {
1447 break;
1448 }
1449 ++pos;
1450 }
1451
1452 return pos;
1453}
1454
1455static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1456{
1457 int pos = start_pos;
1458 while (pos>=0) {
1459 if (!char_is_whitespace(outstring[pos])) {
1460 break;
1461 }
1462 --pos;
1463 }
1464
1465 return pos;
1466}
1467
1468static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1469{
1470 int pos = start_pos;
1471 while (pos>=0) {
1472 if (char_is_whitespace(outstring[pos])) {
1473 break;
1474 }
1475 --pos;
1476 }
1477
1478 return pos;
1479}
1480
1481
1482static int rscan_for(const text_t& outstring, const int start_pos,
1483 const char find_c)
1484{
1485 int pos = start_pos;
1486 while (pos>=0) {
1487 char c = outstring[pos];
1488 if (outstring[pos] == find_c) {
1489 break;
1490 }
1491 --pos;
1492 }
1493
1494 return pos;
1495}
1496
1497text_t extract_substr(const text_t& outstring, const int start_pos,
1498 const int end_pos)
1499{
1500 text_t extracted_str;
1501 extracted_str.clear();
1502
1503 for (int pos=start_pos; pos<=end_pos; ++pos) {
1504 extracted_str.push_back(outstring[pos]);
1505 }
1506
1507 return extracted_str;
1508}
1509
1510
1511static text_t expand_potential_metadata(const text_t& collection,
1512 recptproto* collectproto,
1513 ResultDocInfo_t &docinfo,
1514 displayclass &disp,
1515 const text_t& intext,
1516 text_tmap &options,
1517 ostream& logout)
1518{
1519 text_t outtext;
1520
1521 // decide if dealing with metadata or text
1522
1523 text_t::const_iterator beginbracket = intext.begin();
1524 text_t::const_iterator endbracket = (intext.end() - 1);
1525
1526 // Decision is based on a metadata element
1527 if ((*beginbracket == '[') && (*endbracket == ']')) {
1528 // Ignore the surrounding square brackets
1529 text_t meta_text = substr (beginbracket+1, endbracket);
1530
1531 if (meta_text == "Text") {
1532 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1533 }
1534 else {
1535
1536 text_tset metadata;
1537 bool getParents =false;
1538 metadata_t meta;
1539
1540 parse_meta (meta_text, meta, metadata, getParents);
1541 outtext
1542 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1543 }
1544
1545 }
1546 else {
1547 outtext = intext;
1548 }
1549
1550 return outtext;
1551}
1552
1553
1554
1555
1556static bool uses_expression(const text_t& collection, recptproto* collectproto,
1557 ResultDocInfo_t &docinfo,
1558 displayclass &disp,
1559 const text_t& outstring, text_t& lhs_expr,
1560 text_t& op_expr, text_t& rhs_expr,
1561 text_tmap &options,
1562 ostream& logout)
1563{
1564 // Note: the string may not be of the form: str1 op str2, however
1565 // to deterine this we have to process it on the assumption it is,
1566 // and if at any point an 'erroneous' value is encountered, return
1567 // false and let something else have a go at evaluating it
1568
1569 // Starting at the end of the string and working backwards ..
1570
1571 const int outstring_len = outstring.size();
1572
1573 // skip over white space
1574 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1575
1576 if (rhs_end<=0) {
1577 // no meaningful text or (rhs_end==0) no room for operator
1578 return false;
1579 }
1580
1581 // check for ' or " and then scan over token
1582 const char potential_quote = outstring[rhs_end];
1583 int rhs_start=rhs_end;
1584 bool quoted = false;
1585
1586 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1587 --rhs_end;
1588 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1589 quoted = true;
1590 }
1591 else {
1592 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1593 }
1594
1595 if ((rhs_end-rhs_start)<0) {
1596 // no meaningful rhs expression
1597 return false;
1598 }
1599
1600 // form rhs_expr
1601 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1602
1603 // skip over white space
1604 const int to_whitespace = (quoted) ? 2 : 1;
1605
1606 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1607 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1608
1609 if ((op_end<0) && (op_start<0)) {
1610 // no meaningful expression operator
1611 return false;
1612 }
1613
1614 if (op_end-op_start<0) {
1615 // no meaningful expression operator
1616 return false;
1617 }
1618
1619 op_expr = extract_substr(outstring,op_start,op_end);
1620
1621
1622 // check for operator
1623 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1624 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1625
1626 // not a valid operator
1627 return false;
1628 }
1629
1630 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1631 if (lhs_end<0) {
1632 // no meaningful lhs expression
1633 return false;
1634 }
1635
1636 int lhs_start = scan_over_whitespace(outstring,0);
1637
1638 // form lhs_expr from remainder of string
1639 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1640
1641 // Now we know we have a valid expression, look up any
1642 // metadata terms
1643
1644 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1645 disp,rhs_expr,options,logout);
1646 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1647 disp,lhs_expr,options,logout);
1648
1649 return true;
1650}
1651
1652static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1653 const text_t& rhs_expr, ostream& logout)
1654{
1655 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1656 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1657 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1658 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1659 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1660 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1661 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1662 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1663 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1664 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1665 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1666 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1667 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1668 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1669 else {
1670 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1671 }
1672
1673 return false;
1674}
1675
1676
1677static text_t get_if (const text_t& collection, recptproto* collectproto,
1678 ResultDocInfo_t &docinfo, displayclass &disp,
1679 const decision_t &decision,
1680 format_t *ifptr, format_t *elseptr,
1681 text_tmap &options, ostream& logout)
1682{
1683 // If the decision component is a metadata element, then evaluate it
1684 // to see whether we output the "then" or the "else" clause
1685 if (decision.command == dMeta) {
1686
1687 bool store_metadata_wrap = metadata_wrap;
1688 metadata_wrap = 0;
1689
1690 // temporarily suspend metadata-XXXwrap (if on) so can test if metadata item really exits or not
1691 bool metadata_exists
1692 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1693 logout) != "");
1694
1695 metadata_wrap = store_metadata_wrap;
1696
1697 if (metadata_exists) {
1698 if (ifptr != NULL)
1699 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1700 options, logout);
1701 }
1702 else {
1703 if (elseptr != NULL)
1704 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1705 options, logout);
1706 }
1707 }
1708
1709 // If the decision component is text, then evaluate it (it is probably a
1710 // macro like _cgiargmode_) to decide what to output.
1711 else if (decision.command == dText) {
1712
1713 text_t outstring;
1714 disp.expandstring (decision.text, outstring);
1715
1716 // Check for if expression in form: str1 op str2
1717 // (such as [x] eq "y")
1718 text_t lhs_expr, op_expr, rhs_expr;
1719 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1720 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1721 if (ifptr != NULL) {
1722 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1723 options, logout);
1724 }
1725 else {
1726 return "";
1727 }
1728 } else {
1729 if (elseptr != NULL) {
1730 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1731 options, logout);
1732 }
1733 else {
1734 return "";
1735 }
1736 }
1737 }
1738
1739
1740 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1741 // a cgi argument macro that has not been set, it evaluates to itself.
1742 // Therefore, were have to say that a piece of text evalautes true if
1743 // it is non-empty and if it is a cgi argument evaulating to itself.
1744
1745 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1746 if (ifptr != NULL)
1747 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1748 options, logout);
1749 } else {
1750 if (elseptr != NULL)
1751 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1752 options, logout);
1753 }
1754 }
1755
1756 return "";
1757}
1758
1759bool includes_metadata(const text_t& text)
1760{
1761 text_t::const_iterator here = text.begin();
1762 text_t::const_iterator end = text.end();
1763
1764 char startbracket = '[';
1765 char endbracket = ']';
1766
1767 char bracket = startbracket;
1768 while (here != end) {
1769 if (*here == bracket) {
1770 if(bracket == startbracket) {
1771 // seen a [, next look for a ] to confirm it's metadata
1772 bracket = endbracket;
1773 } else if(bracket == endbracket) {
1774 // found [ ... ] in text, so we think it includes metadata
1775 return true;
1776 }
1777 }
1778 ++here;
1779 }
1780
1781 return false;
1782}
1783
1784static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1785 recptproto* collectproto,
1786 ResultDocInfo_t &docinfo,
1787 displayclass &disp, text_tmap &options,
1788 ostream &logout) {
1789
1790 if (includes_metadata(metavalue)) {
1791
1792 // text has embedded metadata in it => expand it
1793 FilterRequest_t request;
1794 FilterResponse_t response;
1795
1796 request.getParents = false;
1797
1798 format_t *expanded_formatlistptr = new format_t();
1799 parse_formatstring (metavalue, expanded_formatlistptr,
1800 request.fields, request.getParents);
1801
1802 // retrieve metadata
1803 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1804 collectproto, response, logout);
1805
1806 if (!response.docInfo.empty()) {
1807
1808 text_t expanded_metavalue
1809 = get_formatted_string(collection, collectproto,
1810 response.docInfo[0], disp, expanded_formatlistptr,
1811 options, logout);
1812
1813 return expanded_metavalue;
1814 }
1815 else {
1816 return metavalue;
1817 }
1818 }
1819 else {
1820
1821 return metavalue;
1822 }
1823}
1824
1825text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1826 displayclass &disp,
1827 text_t meta_name, ostream& logout) {
1828
1829 ColInfoResponse_t collectinfo;
1830 comerror_t err;
1831 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1832 text_t meta_value = "";
1833 text_t lang;
1834 disp.expandstring("_cgiargl_",lang);
1835 if (lang.empty()) {
1836 lang = "en";
1837 }
1838
1839 if (err == noError) {
1840 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1841 }
1842 return meta_value;
1843
1844
1845}
1846text_t format_string (const text_t& collection, recptproto* collectproto,
1847 ResultDocInfo_t &docinfo, displayclass &disp,
1848 format_t *formatlistptr, text_tmap &options,
1849 ostream& logout) {
1850
1851 if (formatlistptr == NULL) return "";
1852
1853 switch (formatlistptr->command) {
1854 case comOID:
1855 return docinfo.OID;
1856 case comTopOID:
1857 {
1858 text_t top_id;
1859 get_top(docinfo.OID, top_id);
1860 return top_id;
1861 }
1862 case comRank:
1863 return text_t(docinfo.ranking);
1864 case comText:
1865 return formatlistptr->text;
1866 case comLink:
1867 return options["link"];
1868 case comEndLink:
1869 {
1870 if (options["link"].empty()) return "";
1871 else return "</a>";
1872 }
1873 case comHref:
1874 return get_href(options["link"]);
1875 case comIcon:
1876 return options["icon"];
1877 case comNum:
1878 return docinfo.result_num;
1879 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1880 return get_related_docs(collection, collectproto, docinfo, logout);
1881
1882 case comSummary:
1883 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1884 case comAssocLink:
1885 {
1886 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1887 if (!link_filename.empty()) {
1888 text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
1889 if (formatlistptr->text == "href") {
1890 return href;
1891 }
1892 return "<a href=\""+ href + "\">";
1893 }
1894 return "";
1895 }
1896 case comEndAssocLink:
1897 {
1898 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1899 if (!link_filename.empty()) {
1900 return "</a>";
1901 }
1902 return "";
1903 }
1904 case comMeta:
1905 {
1906 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1907 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1908 }
1909
1910 case comDoc:
1911 return format_text(collection, collectproto, docinfo, disp, options, logout);
1912
1913 case comImage:
1914 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1915 case comTOC:
1916 return options["DocTOC"];
1917 case comDocumentButtonDetach:
1918 return options["DocumentButtonDetach"];
1919 case comDocumentButtonHighlight:
1920 return options["DocumentButtonHighlight"];
1921 case comDocumentButtonExpandContents:
1922 return options["DocumentButtonExpandContents"];
1923 case comDocumentButtonExpandText:
1924 return options["DocumentButtonExpandText"];
1925 case comHighlight:
1926 if (options["highlight"] == "1") return "<b>";
1927 break;
1928 case comEndHighlight:
1929 if (options["highlight"] == "1") return "</b>";
1930 break;
1931 case comMetadataSpanWrap:
1932 metadata_wrap=true; metadata_wrap_type="span"; return "";
1933 break;
1934 case comEndMetadataSpanWrap:
1935 metadata_wrap=false; metadata_wrap_type=""; return "";
1936 break;
1937 case comMetadataDivWrap:
1938 metadata_wrap=true; metadata_wrap_type="div"; return "";
1939 break;
1940 case comEndMetadataDivWrap:
1941 metadata_wrap=false; metadata_wrap_type=""; return "";
1942 break;
1943 case comIf:
1944 return get_if (collection, collectproto, docinfo, disp,
1945 formatlistptr->decision, formatlistptr->ifptr,
1946 formatlistptr->elseptr, options, logout);
1947 case comOr:
1948 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1949 options, logout);
1950 case comDocTermsFreqTotal:
1951 return docinfo.num_terms_matched;
1952 case comCollection:
1953 if (formatlistptr->meta.metaname == g_EmptyText) {
1954 return collection;
1955 }
1956 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1957
1958 }
1959 return "";
1960}
1961
1962text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1963 ResultDocInfo_t &docinfo, displayclass &disp,
1964 format_t *formatlistptr, text_tmap &options,
1965 ostream& logout) {
1966
1967 text_t ft;
1968 while (formatlistptr != NULL)
1969 {
1970 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1971 options, logout);
1972 formatlistptr = formatlistptr->nextptr;
1973 }
1974
1975 return ft;
1976}
1977
1978
1979// we have only preloaded the text in DocumentAction. But you may want
1980// to get the text in query, so copy what we have done with
1981// format_summary and get the text here. Probably is quite expensive?
1982text_t format_text (const text_t& collection, recptproto* collectproto,
1983 ResultDocInfo_t &docinfo, displayclass &disp,
1984 text_tmap &options, ostream& logout)
1985{
1986 text_t text;
1987
1988 if (!options["text"].empty()) {
1989 text = options["text"];
1990 }
1991 else {
1992 // get document text here
1993 DocumentRequest_t docrequest;
1994 DocumentResponse_t docresponse;
1995 comerror_t err;
1996 docrequest.OID = docinfo.OID;
1997 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1998 text = docresponse.doc;
1999 }
2000
2001 if (metadata_wrap) {
2002 text = wrap_metatext(text,docinfo.OID,"Text");
2003 }
2004
2005 return text;
2006}
2007
2008/* FUNCTION NAME: format_summary
2009 * DESC: this is invoked when a [Summary] special metadata is processed.
2010 * RETURNS: a query-biased summary for the document */
2011
2012text_t format_summary (const text_t& collection, recptproto* collectproto,
2013 ResultDocInfo_t &docinfo, displayclass &disp,
2014 text_tmap &options, ostream& logout) {
2015
2016 // GRB: added code here to ensure that the cstr (and other collections)
2017 // uses the document metadata item Summary, rather than compressing
2018 // the text of the document, processed via the methods in
2019 // summarise.cpp
2020
2021 text_t summary;
2022
2023 if (docinfo.metadata.count("Summary") > 0 &&
2024 docinfo.metadata["Summary"].values.size() > 0) {
2025 summary = docinfo.metadata["Summary"].values[0];
2026 }
2027 else {
2028
2029 text_t textToSummarise, query;
2030
2031 if(options["text"].empty()) { // get document text
2032 DocumentRequest_t docrequest;
2033 DocumentResponse_t docresponse;
2034 comerror_t err;
2035 docrequest.OID = docinfo.OID;
2036 collectproto->get_document (collection, docrequest, docresponse, err, logout);
2037 textToSummarise = docresponse.doc;
2038 }
2039 else {
2040 // in practice, this would not happen, because text is only
2041 // loaded with the [Text] command
2042 textToSummarise = options["text"];
2043 }
2044
2045 disp.expandstring("_cgiargq_",query);
2046 summary = summarise(textToSummarise,query,80);
2047 //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
2048 }
2049
2050 summary.replace("'","&#039;");
2051 summary.replace("\n","&#013;");
2052
2053 if (metadata_wrap) {
2054 summary = wrap_metatext(summary,docinfo.OID,"Summary");
2055 }
2056
2057 return summary;
2058}
Note: See TracBrowser for help on using the repository browser.