source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 21808

Last change on this file since 21808 was 21808, checked in by mdewsnip, 14 years ago

Added format_t destructor, to fix one memory leak.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 58.7 KB
RevLine 
[347]1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
[533]6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
[347]9 *
[533]10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[347]24 *********************************************************************/
25
26#include "formattools.h"
[462]27#include "cgiutils.h"
[15418]28#include "recptprototools.h"
[1443]29#include "OIDtools.h"
[2967]30#include "summarise.h"
[1443]31
[1257]32#include <assert.h>
[347]33
[19302]34static bool metadata_spanwrap = false;
[19298]35
[354]36// a few function prototypes
[5788]37
[1443]38static text_t format_string (const text_t& collection, recptproto* collectproto,
[1610]39 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]40 format_t *formatlistptr, text_tmap &options,
41 ostream& logout);
[354]42
43static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
[649]44 format_t *formatlistptr, text_tset &metadata, bool &getParents);
[354]45
[5788]46static text_t format_summary (const text_t& collection, recptproto* collectproto,
47 ResultDocInfo_t &docinfo, displayclass &disp,
48 text_tmap &options, ostream& logout);
[9852]49static text_t format_text (const text_t& collection, recptproto* collectproto,
50 ResultDocInfo_t &docinfo, displayclass &disp,
51 text_tmap &options, ostream& logout);
[2967]52
[9401]53static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
54 recptproto* collectproto, ResultDocInfo_t &docinfo,
55 displayclass &disp, text_tmap &options,
56 ostream &logout);
[2967]57
[9401]58
[347]59void metadata_t::clear() {
[410]60 metaname.clear();
[462]61 metacommand = mNone;
[9401]62 mqualifier.parent = pNone;
63 mqualifier.sibling = sNone;
64 mqualifier.child = cNone;
[19046]65 pre_tree_traverse.clear();
[10415]66 parentoptions.clear();
67 siblingoptions.clear();
68 childoptions.clear();
[749]69}
[347]70
71void decision_t::clear() {
72 command = dMeta;
73 meta.clear();
[1610]74 text.clear();
[749]75}
[347]76
[21808]77format_t::~format_t()
78{
79 if (nextptr != NULL) delete nextptr;
80 if (ifptr != NULL) delete ifptr;
81 if (elseptr != NULL) delete elseptr;
82 if (orptr != NULL) delete orptr;
83}
84
[347]85void format_t::clear() {
86 command = comText;
87 decision.clear();
88 text.clear();
89 meta.clear();
90 nextptr = NULL;
91 ifptr = NULL;
92 elseptr = NULL;
93 orptr = NULL;
[749]94}
[347]95
[442]96void formatinfo_t::clear() {
[1079]97 DocumentImages = false;
98 DocumentTitles = true;
99 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
100 DocumentContents = true;
[1941]101 DocumentArrowsBottom = true;
[5788]102 DocumentArrowsTop = false;
[13365]103 DocumentSearchResultLinks = false;
[442]104 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
[1496]105 // DocumentButtons.push_back ("Expand Text");
106 // DocumentButtons.push_back ("Expand Contents");
[442]107 DocumentButtons.push_back ("Detach");
108 DocumentButtons.push_back ("Highlight");
[1941]109 RelatedDocuments = "";
[11146]110 DocumentText = "[Text]";
[649]111 formatstrings.erase (formatstrings.begin(), formatstrings.end());
[868]112 DocumentUseHTML = false;
[5788]113 AllowExtendedOptions = false;
[442]114}
115
[749]116// simply checks to see if formatstring begins with a <td> tag
117bool is_table_content (const text_t &formatstring) {
118 text_t::const_iterator here = formatstring.begin();
119 text_t::const_iterator end = formatstring.end();
120
121 while (here != end) {
122 if (*here != ' ') {
[1257]123 if ((*here == '<') && ((here+3) < end)) {
[749]124 if ((*(here+1) == 't' || *(here+1) == 'T') &&
125 (*(here+2) == 'd' || *(here+2) == 'D') &&
126 (*(here+3) == '>' || *(here+3) == ' '))
127 return true;
128 } else return false;
129 }
[9620]130 ++here;
[749]131 }
132 return false;
133}
134
135bool is_table_content (const format_t *formatlistptr) {
136
137 if (formatlistptr == NULL) return false;
138
139 if (formatlistptr->command == comText)
140 return is_table_content (formatlistptr->text);
141
142 return false;
143}
144
[649]145// returns false if key isn't in formatstringmap
146bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
147 text_t &formatstring) {
[442]148
[649]149 formatstring.clear();
150 text_tmap::const_iterator it = formatstringmap.find(key);
151 if (it == formatstringmap.end()) return false;
152 formatstring = (*it).second;
153 return true;
154}
[749]155
[649]156// tries to find "key1key2" then "key1" then "key2"
157bool get_formatstring (const text_t &key1, const text_t &key2,
158 const text_tmap &formatstringmap,
159 text_t &formatstring) {
160
161 formatstring.clear();
162 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
163 if (it != formatstringmap.end()) {
164 formatstring = (*it).second;
165 return true;
166 }
167 it = formatstringmap.find(key1);
168 if (it != formatstringmap.end()) {
169 formatstring = (*it).second;
170 return true;
171 }
172 it = formatstringmap.find(key2);
173 if (it != formatstringmap.end()) {
174 formatstring = (*it).second;
175 return true;
176 }
177 return false;
178}
179
180
[6645]181text_t remove_namespace(const text_t &meta_name) {
182 text_t::const_iterator end = meta_name.end();
183 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
184 if (it != end) {
185 return substr(it+1, end);
186 }
[12567]187
[6645]188 return meta_name;
189
190}
[12567]191// returns a date of form _format:date_(year, month, day)
192// input is date of type yyyy-?mm-?dd
[410]193// at least the year must be present in date
[422]194text_t format_date (const text_t &date) {
[347]195
[410]196 if (date.size() < 4) return "";
[347]197
[410]198 text_t::const_iterator datebegin = date.begin();
[354]199
[410]200 text_t year = substr (datebegin, datebegin+4);
[12567]201 int chars_seen_so_far = 4;
[20756]202 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
[410]203
[12567]204 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
205 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
206
207 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
[410]208 int imonth = month.getint();
[12567]209 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
210
211 chars_seen_so_far += 2;
[20756]212 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
213
[12567]214 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
215 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
[410]216
[12567]217 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
[410]218 if (day[0] == '0') day = substr (day.begin()+1, day.end());
219 int iday = day.getint();
[12567]220 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
[410]221
[12567]222 return "_format:date_("+year+","+month+","+day+")";
[410]223}
224
[2001]225// converts an iso639 language code to its English equivalent
[12567]226// should we be checking that the macro exists??
[2001]227text_t iso639 (const text_t &langcode) {
[12567]228 if (langcode.empty()) return "";
229 return "_iso639:iso639"+langcode+"_";
[2001]230}
231
[12567]232
[2706]233text_t get_href (const text_t &link) {
234
235 text_t href;
236
237 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
238 text_t::const_iterator end = link.end();
[13117]239 if (here == end) return g_EmptyText;
[2706]240
[9620]241 ++here;
[2706]242 while (here != end) {
243 if (*here == '"') break;
244 href.push_back(*here);
[9620]245 ++here;
[2706]246 }
247
248 return href;
249}
250
[1941]251//this function gets the information associated with the relation
252//metadata for the document associated with 'docinfo'. This relation
253//metadata consists of a line of pairs containing 'collection, document OID'
254//(this is the OID of the document related to the current document, and
255//the collection the related document belongs to). For each of these pairs
256//the title metadata is obtained and then an html link between the title
257//of the related doc and the document's position (the document will be
258//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
259//(where collection is the related documents collection, and OID is the
260//related documents OID). A list of these html links are made for as many
261//related documents as there are. This list is then returned. If there are
262//no related documents available for the current document then the string
263//'.. no related documents .. ' is returned.
264text_t get_related_docs(const text_t& collection, recptproto* collectproto,
[9948]265 ResultDocInfo_t &docinfo, ostream& logout){
[1941]266
267 text_tset metadata;
268
269 //insert the metadata we wish to collect
[11324]270 metadata.insert("dc.Relation");
[1941]271 metadata.insert("Title");
272 metadata.insert("Subject"); //for emails, where title data doesn't apply
273
274 FilterResponse_t response;
275 text_t relation = ""; //string for displaying relation metadata
276 text_t relationTitle = ""; //the related documents Title (or subject)
[1963]277 text_t relationOID = ""; //the related documents OID
[1941]278
279 //get the information associated with the metadata for current doc
[7432]280 if (get_info (docinfo.OID, collection, "", metadata,
[1941]281 false, collectproto, response, logout)) {
282
283 //if the relation metadata exists, store for displaying
[11324]284 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
285 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
[1941]286
287 //split relation data into pairs of collectionname,ID number
288 text_tarray relationpairs;
289 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
290
291 text_tarray::const_iterator currDoc = relationpairs.begin();
292 text_tarray::const_iterator lastDoc = relationpairs.end();
293
294 //iterate through the pairs to split and display
295 while(currDoc != lastDoc){
296
297 //split pairs into collectionname and ID
298 text_tarray relationdata;
299 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
300
301 //get first element in the array (collection)
302 text_tarray::const_iterator doc_data = relationdata.begin();
303 text_t document_collection = *doc_data;
[9620]304 ++doc_data; //increment to get next item in array (oid)
[1941]305 text_t document_OID = *doc_data;
306
307 //create html link to related document
308 relation += "<a href=\"_httpdocument_&c=" + document_collection;
309 relation += "&cl=search&d=" + document_OID;
310
311 //get the information associated with the metadata for related doc
[7432]312 if (get_info (document_OID, document_collection, "", metadata,
[1941]313 false, collectproto, response, logout)) {
314
315 //if title metadata doesn't exist, collect subject metadata
316 //if that doesn't exist, just call it 'related document'
317 if (!response.docInfo[0].metadata["Title"].values[0].empty())
318 relationTitle = response.docInfo[0].metadata["Title"].values[0];
319 else if (!response.docInfo[0].metadata["Subject"].values.empty())
320 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
321 else relationTitle = "RELATED DOCUMENT";
322
323 }
324
325 //link the related document's title to its page
326 relation += "\">" + relationTitle + "</a>";
327 relation += " (" + document_collection + ")<br>";
328
[9620]329 ++currDoc;
[1941]330 }
331 }
332
333 }
334
335 if(relation.empty()) //no relation data for documnet
336 relation = ".. no related documents .. ";
337
338 return relation;
339}
340
341
342
[354]343static void get_parent_options (text_t &instring, metadata_t &metaoption) {
344
[1257]345 assert (instring.size() > 7);
346 if (instring.size() <= 7) return;
347
[354]348 text_t meta, com, op;
349 bool inbraces = false;
350 bool inquotes = false;
351 bool foundcolon = false;
352 text_t::const_iterator here = instring.begin()+6;
353 text_t::const_iterator end = instring.end();
354 while (here != end) {
[10415]355 if (foundcolon) meta.push_back (*here);
356 else if (*here == '(') inbraces = true;
[354]357 else if (*here == ')') inbraces = false;
358 else if (*here == '\'' && !inquotes) inquotes = true;
359 else if (*here == '\'' && inquotes) inquotes = false;
360 else if (*here == ':' && !inbraces) foundcolon = true;
361 else if (inquotes) op.push_back (*here);
362 else com.push_back (*here);
[9620]363 ++here;
[354]364 }
[9401]365
[354]366 instring = meta;
367 if (com.empty())
[9401]368 metaoption.mqualifier.parent = pImmediate;
[354]369 else if (com == "Top")
[9401]370 metaoption.mqualifier.parent = pTop;
[649]371 else if (com == "All") {
[9401]372 metaoption.mqualifier.parent = pAll;
[10415]373 metaoption.parentoptions = op;
[354]374 }
375}
376
[5787]377
378static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
379
380 assert (instring.size() > 8);
381 if (instring.size() <= 8) return;
382 text_t meta, com, op;
383 bool inbraces = false;
384 bool inquotes = false;
385 bool foundcolon = false;
386 text_t::const_iterator here = instring.begin()+7;
387 text_t::const_iterator end = instring.end();
388 while (here != end) {
[10415]389 if (foundcolon) meta.push_back (*here);
390 else if (*here == '(') inbraces = true;
[5787]391 else if (*here == ')') inbraces = false;
392 else if (*here == '\'' && !inquotes) inquotes = true;
393 else if (*here == '\'' && inquotes) inquotes = false;
[10415]394 else if (*here == ':' && !inbraces) foundcolon = true;
[5787]395 else if (inquotes) op.push_back (*here);
396 else com.push_back (*here);
[9620]397 ++here;
[5787]398 }
399
400 instring = meta;
[10415]401 metaoption.siblingoptions.clear();
[5787]402
403 if (com.empty()) {
[9401]404 metaoption.mqualifier.sibling = sAll;
[10415]405 metaoption.siblingoptions = " ";
[5787]406 }
[10415]407 else if (com == "first") {
408 metaoption.mqualifier.sibling = sNum;
409 metaoption.siblingoptions = "0";
410 }
411 else if (com == "last") {
412 metaoption.mqualifier.sibling = sNum;
413 metaoption.siblingoptions = "-2"; // == last
414 }
415 else if (com.getint()>0) {
416 metaoption.mqualifier.sibling = sNum;
417 int pos = com.getint()-1;
418 metaoption.siblingoptions +=pos;
419 }
[5787]420 else {
[9401]421 metaoption.mqualifier.sibling = sAll;
[10415]422 metaoption.siblingoptions = op;
[5787]423 }
424}
425
[9401]426static void get_child_options (text_t &instring, metadata_t &metaoption) {
[5788]427
[9401]428 assert (instring.size() > 6);
429 if (instring.size() <= 6) return;
430 text_t meta, com, op;
431 bool inbraces = false;
432 bool inquotes = false;
433 bool foundcolon = false;
434 text_t::const_iterator here = instring.begin()+5;
435 text_t::const_iterator end = instring.end();
436 while (here != end) {
[10415]437 if (foundcolon) meta.push_back (*here);
438 else if (*here == '(') inbraces = true;
[9401]439 else if (*here == ')') inbraces = false;
440 else if (*here == '\'' && !inquotes) inquotes = true;
441 else if (*here == '\'' && inquotes) inquotes = false;
442 else if (*here == ':' && !inbraces) foundcolon = true;
443 else if (inquotes) op.push_back (*here);
444 else com.push_back (*here);
[9620]445 ++here;
[9401]446 }
447
448 instring = meta;
449 if (com.empty()) {
450 metaoption.mqualifier.child = cAll;
[10415]451 metaoption.childoptions = " ";
[9401]452 }
453 else if (com == "first") {
454 metaoption.mqualifier.child = cNum;
[10415]455 metaoption.childoptions = ".fc";
[9401]456 }
457 else if (com == "last") {
458 metaoption.mqualifier.child = cNum;
[10415]459 metaoption.childoptions = ".lc";
[9401]460 }
461 else if (com.getint()>0) {
462 metaoption.mqualifier.child = cNum;
[10415]463 metaoption.childoptions = "."+com;
[9401]464 }
465 else {
466 metaoption.mqualifier.child = cAll;
[10415]467 metaoption.childoptions = op;
[9401]468 }
469}
470
471
[21752]472static void get_truncate_options (text_t &instring, metadata_t &metaoption)
473{
474 assert (instring.size() > ((text_t) "truncate").size());
475 if (instring.size() <= ((text_t) "truncate").size()) return;
476 text_t meta, com;
477 bool inbraces = false;
478 bool foundcolon = false;
479 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
480 text_t::const_iterator end = instring.end();
481 while (here != end) {
482 if (foundcolon) meta.push_back (*here);
483 else if (*here == '(') inbraces = true;
484 else if (*here == ')') inbraces = false;
485 else if (*here == ':' && !inbraces) foundcolon = true;
486 else com.push_back (*here);
487 ++here;
488 }
[9401]489
[21752]490 instring = meta;
491
492 if (!com.empty())
493 {
494 metaoption.siblingoptions = com;
495 }
496 else
497 {
498 // Default is 100 characters if not specified
499 metaoption.siblingoptions = "100";
500 }
501}
502
503
504
[649]505static void parse_meta (text_t &meta, metadata_t &metaoption,
506 text_tset &metadata, bool &getParents) {
[354]507
[21752]508 // Look for the various format statement modifiers
509 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
510 // is irrelevant because this is not stored in metaoption.metacommand anyway
511 bool keep_trying = true;
512 while (keep_trying)
513 {
514 keep_trying = false;
515
516 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
517 {
518 metaoption.metacommand |= mCgiSafe;
519 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
520 keep_trying = true;
521 }
522 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
523 {
524 metaoption.metacommand |= mSpecial;
525 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
526 keep_trying = true;
527 }
528
529 // New "truncate" special formatting option
530 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
531 {
532 metaoption.metacommand |= mTruncate;
533 get_truncate_options (meta, metaoption);
534 keep_trying = true;
535 }
536 // New "htmlsafe" special formatting option
537 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
538 {
539 metaoption.metacommand |= mHTMLSafe;
540 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
541 keep_trying = true;
542 }
543 // New "xmlsafe" special formatting option
544 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
545 {
546 metaoption.metacommand |= mXMLSafe;
547 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
548 keep_trying = true;
549 }
[649]550 }
551
[19046]552 bool had_parent_or_child = true;
553 bool prev_was_parent = false;
554 bool prev_was_child = false;
555
556 while (had_parent_or_child) {
557 if (meta.size() > 7
558 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
559
560 // clear out sibling and child (cmd and options)
561 metaoption.metacommand &= ~(mChild|mSibling);
562 metaoption.childoptions.clear();
563 metaoption.siblingoptions.clear();
564
565 getParents = true;
566 metaoption.metacommand |= mParent;
567 get_parent_options (meta, metaoption);
568
569 if (prev_was_parent) {
570 metaoption.pre_tree_traverse += ".pr";
571 }
572 else if (prev_was_child) {
573 metaoption.pre_tree_traverse += ".fc";
574 }
575
576 prev_was_parent = true;
577 prev_was_child = false;
578 }
579 else if (meta.size() > 6
580 && (substr (meta.begin(), meta.begin()+5) == "child")) {
581
582 // clear out sibling and parent (cmd and options)
583 metaoption.metacommand &= ~(mParent|mSibling);
584 metaoption.parentoptions.clear();
585 metaoption.siblingoptions.clear();
586
587 metaoption.metacommand |= mChild;
588 get_child_options (meta, metaoption);
589 metadata.insert("contains");
590
591 if (prev_was_parent) {
592 metaoption.pre_tree_traverse += ".pr";
593 }
594 else if (prev_was_child) {
595 metaoption.pre_tree_traverse += ".fc";
596 }
597
598 prev_was_child = true;
599 prev_was_parent = false;
600 }
601 else {
602 prev_was_child = false;
603 prev_was_parent = false;
604 had_parent_or_child = false;
605 }
606 }
607
608 // parent/child can have sibling tacked on end also
[10415]609 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
610 metaoption.metacommand |= mSibling;
611 get_sibling_options (meta, metaoption);
612 }
613
[7599]614 // check for ex. which may occur in format statements
615 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
616 meta = substr (meta.begin()+3, meta.end());
617 }
[649]618 metadata.insert (meta);
619 metaoption.metaname = meta;
[354]620}
621
[9948]622static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
623 if (meta == "collection") {
624 // no qualifiers
625 metaoption.metaname = g_EmptyText;
626 return;
627 }
628 meta = substr (meta.begin()+11, meta.end());
629 metaoption.metaname = meta;
630
631}
632
[649]633static void parse_meta (text_t &meta, format_t *formatlistptr,
634 text_tset &metadata, bool &getParents) {
[354]635
636 if (meta == "link")
637 formatlistptr->command = comLink;
638 else if (meta == "/link")
639 formatlistptr->command = comEndLink;
640
[21758]641 else if (meta == "srclink") {
642 formatlistptr->command = comAssocLink;
643 formatlistptr->meta.metaname = "srclink_file";
644 metadata.insert("srclink_file");
645 }
646 else if (meta == "srchref") {
647 formatlistptr->command = comAssocLink;
648 formatlistptr->text = "href";
649 formatlistptr->meta.metaname = "srclink_file";
650 metadata.insert("srclink_file");
651 }
652 else if (meta == "/srclink") {
653 formatlistptr->command = comEndAssocLink;
654 formatlistptr->meta.metaname = "srclink_file";
655 }
656 // and weblink etc
[2706]657 else if (meta == "href")
658 formatlistptr->command = comHref;
659
[354]660 else if (meta == "num")
661 formatlistptr->command = comNum;
662
[407]663 else if (meta == "icon")
664 formatlistptr->command = comIcon;
665
[442]666 else if (meta == "Text")
667 formatlistptr->command = comDoc;
[1941]668
669 else if (meta == "RelatedDocuments")
670 formatlistptr->command = comRel;
[442]671
[670]672 else if (meta == "highlight")
673 formatlistptr->command = comHighlight;
674
675 else if (meta == "/highlight")
676 formatlistptr->command = comEndHighlight;
677
[19302]678 else if (meta == "metadata-spanwrap")
679 formatlistptr->command = comMetadataSpanWrap;
680
681 else if (meta == "/metadata-spanwrap")
682 formatlistptr->command = comEndMetadataSpanWrap;
683
[2967]684 else if (meta == "Summary")
685 formatlistptr->command = comSummary;
686
[5788]687 else if (meta == "DocImage")
688 formatlistptr->command = comImage;
689
690 else if (meta == "DocTOC")
691 formatlistptr->command = comTOC;
692
693 else if (meta == "DocumentButtonDetach")
694 formatlistptr->command = comDocumentButtonDetach;
695
696 else if (meta == "DocumentButtonHighlight")
697 formatlistptr->command = comDocumentButtonHighlight;
698
699 else if (meta == "DocumentButtonExpandContents")
700 formatlistptr->command = comDocumentButtonExpandContents;
701
702 else if (meta == "DocumentButtonExpandText")
703 formatlistptr->command = comDocumentButtonExpandText;
[6020]704
705 else if (meta == "DocOID")
706 formatlistptr->command = comOID;
[13118]707 else if (meta == "DocTopOID")
708 formatlistptr->command = comTopOID;
[6710]709 else if (meta == "DocRank")
710 formatlistptr->command = comRank;
[16915]711 else if (meta == "DocTermsFreqTotal")
712 formatlistptr->command = comDocTermsFreqTotal;
[9948]713 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
714 formatlistptr->command = comCollection;
715 parse_coll_meta(meta, formatlistptr->meta);
716 }
[354]717 else {
718 formatlistptr->command = comMeta;
[649]719 parse_meta (meta, formatlistptr->meta, metadata, getParents);
[354]720 }
721}
722
[9948]723
[354]724static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
[649]725 text_tset &metadata, bool &getParents) {
[354]726
[347]727 text_t text;
728 text_t::const_iterator here = formatstring.begin();
729 text_t::const_iterator end = formatstring.end();
730
731 while (here != end) {
732
[1257]733 if (*here == '\\') {
[9620]734 ++here;
[1257]735 if (here != end) text.push_back (*here);
[347]736
[1257]737 } else if (*here == '{') {
[347]738 if (!text.empty()) {
739 formatlistptr->command = comText;
740 formatlistptr->text = text;
741 formatlistptr->nextptr = new format_t();
742 formatlistptr = formatlistptr->nextptr;
743
744 text.clear();
745 }
[649]746 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
[1443]747
[347]748 formatlistptr->nextptr = new format_t();
749 formatlistptr = formatlistptr->nextptr;
750 if (here == end) break;
751 }
752 } else if (*here == '[') {
753 if (!text.empty()) {
754 formatlistptr->command = comText;
755 formatlistptr->text = text;
756 formatlistptr->nextptr = new format_t();
757 formatlistptr = formatlistptr->nextptr;
758
759 text.clear();
760 }
761 text_t meta;
[9620]762 ++here;
[347]763 while (*here != ']') {
764 if (here == end) return false;
765 meta.push_back (*here);
[9620]766 ++here;
[347]767 }
[649]768 parse_meta (meta, formatlistptr, metadata, getParents);
769 formatlistptr->nextptr = new format_t();
770 formatlistptr = formatlistptr->nextptr;
[347]771
772 } else
773 text.push_back (*here);
774
[9620]775 if (here != end) ++here;
[347]776 }
777 if (!text.empty()) {
778 formatlistptr->command = comText;
779 formatlistptr->text = text;
780 formatlistptr->nextptr = new format_t();
781 formatlistptr = formatlistptr->nextptr;
782
783 }
784 return true;
785}
786
787
[354]788static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
[649]789 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
[354]790
[347]791 text_t::const_iterator it = findchar (here, end, '}');
792 if (it == end) return false;
793
794 text_t com = substr (here, it);
795 here = findchar (it, end, '{');
796 if (here == end) return false;
[9620]797 else ++here;
[347]798
[7266]799 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
800 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
[347]801 else return false;
802
803 int commacount = 0;
804 text_t text;
805 while (here != end) {
[636]806
807 if (*here == '\\') {
[9745]808 ++here;
[636]809 if (here != end) text.push_back(*here);
810
[1443]811 }
812
813 else if (*here == ',' || *here == '}' || *here == '{') {
[347]814
815 if (formatlistptr->command == comOr) {
816 // the {Or}{this, or this, or this, or this} statement
817 format_t *or_ptr;
818
819 // find the next unused orptr
820 if (formatlistptr->orptr == NULL) {
821 formatlistptr->orptr = new format_t();
822 or_ptr = formatlistptr->orptr;
823 } else {
824 or_ptr = formatlistptr->orptr;
825 while (or_ptr->nextptr != NULL)
826 or_ptr = or_ptr->nextptr;
827 or_ptr->nextptr = new format_t();
828 or_ptr = or_ptr->nextptr;
829 }
830
[1443]831 if (!text.empty())
832 {
833 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
834 }
[347]835
[1443]836 if (*here == '{')
837 {
838 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
839 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
840 // The latter can always be re-written:
841 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
842
843 if (!text.empty()) // already used up allocated format_t
844 {
845 // => allocate new one for detected action
846 or_ptr->nextptr = new format_t();
847 or_ptr = or_ptr->nextptr;
848 }
849 if (!parse_action(++here, end, or_ptr, metadata, getParents))
850 {
851 return false;
852 }
853 }
854 else
855 {
856 if (*here == '}') break;
857 }
[347]858 text.clear();
859
[1610]860 }
861
862 // Parse an {If}{decide,do,else} statement
863 else {
864
865 // Read the decision component.
[347]866 if (commacount == 0) {
[1610]867 // Decsion can be a metadata element, or a piece of text.
868 // Originally Stefan's code, updated 25/10/2000 by Gordon.
[1443]869
[347]870 text_t::const_iterator beginbracket = text.begin();
871 text_t::const_iterator endbracket = (text.end() - 1);
[1610]872
873 // Decision is based on a metadata element
[347]874 if ((*beginbracket == '[') && (*endbracket == ']')) {
[1610]875 // Ignore the surrounding square brackets
[347]876 text_t meta = substr (beginbracket+1, endbracket);
[649]877 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
[9620]878 ++commacount;
[347]879 text.clear();
880 }
[1610]881
882 // Decision is a piece of text (probably a macro like _cgiargmode_).
883 else {
[7389]884
885 // hunt for any metadata in string, which might be uses in
886 // to test a condition, e.g. [Format] eq 'PDF'
887 format_t* dummyformat = new format_t();
888 // update which metadata fields needed
889 // (not interested in updatng formatlistptr)
890 parse_string (text, dummyformat, metadata, getParents);
891 delete dummyformat;
892
[1610]893 formatlistptr->decision.command = dText;
894 formatlistptr->decision.text = text;
[9620]895 ++commacount;
[1610]896 text.clear();
897 }
898 }
899
900 // Read the "then" and "else" components of the {If} statement.
901 else {
[1443]902 format_t** nextlistptr = NULL;
903 if (commacount == 1) {
[1610]904 nextlistptr = &formatlistptr->ifptr;
[1443]905 } else if (commacount == 2 ) {
906 nextlistptr = &formatlistptr->elseptr;
907 } else {
908 return false;
909 }
910
911 if (!text.empty()) {
912 if (*nextlistptr == NULL) {
913 *nextlistptr = new format_t();
914 } else {
915
916 // skip to the end of any format_t statements already added
917 while ((*nextlistptr)->nextptr != NULL)
918 {
919 nextlistptr = &(*nextlistptr)->nextptr;
920 }
921
922 (*nextlistptr)->nextptr = new format_t();
923 nextlistptr = &(*nextlistptr)->nextptr;
924 }
925
926 if (!parse_string (text, *nextlistptr, metadata, getParents))
927 {
928 return false;
929 }
930 text.clear();
931 }
[347]932
[1443]933 if (*here == '{')
934 {
935 if (*nextlistptr == NULL) {
936 *nextlistptr = new format_t();
937 } else {
[7474]938 // skip to the end of any format_t statements already added
939 while ((*nextlistptr)->nextptr != NULL)
940 {
941 nextlistptr = &(*nextlistptr)->nextptr;
942 }
943
[1443]944 (*nextlistptr)->nextptr = new format_t();
945 nextlistptr = &(*nextlistptr)->nextptr;
946 }
947
948 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
949 {
950 return false;
951 }
952 }
953 else
954 {
955 if (*here == '}') break;
[9620]956 ++commacount;
[1443]957 }
[347]958 }
959 }
[636]960
961 } else text.push_back(*here);
[347]962
[9620]963 if (here != end) ++here;
[347]964 }
965
966 return true;
967}
968
[354]969
[19312]970static text_t spanwrap_metatext(const text_t& metatext, const text_t& OID,
971 const text_t metaname, int metapos=-1)
972{
973
974 text_t tag_type = (metaname == "Text") ? "div" : "span";
975 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
976
977 text_t wrapped_metatext = "<" + tag_type + " ";
978 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
979
980 wrapped_metatext += "docoid=\"" + OID + "\" ";
981 wrapped_metatext += "metaname=\"" + metaname + "\"";
982
983 if (metapos>=0) {
984 text_t metapos_str = metapos;
985 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
986 }
987
988 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
989
990 return wrapped_metatext;
991}
992
993
994
[347]995bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
[649]996 text_tset &metadata, bool &getParents) {
[347]997
998 formatlistptr->clear();
999 getParents = false;
1000
[649]1001 return (parse_string (formatstring, formatlistptr, metadata, getParents));
[347]1002}
1003
[10415]1004// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1005// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
[19312]1006
1007static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
[9401]1008{
1009 text_t no_ns_metaname = remove_namespace(meta.metaname);
[19298]1010 text_t formatted_metatext;
[9401]1011 bool first = true;
[649]1012
[9401]1013 const int start_i=0;
1014 const int end_i = metainfo.values.size()-1;
[10415]1015
1016 if (position == -1) { // all
1017 for (int i=start_i; i<=end_i; ++i) {
[19298]1018 if (!first) formatted_metatext += meta.siblingoptions;
1019
1020 text_t fresh_metatext;
1021
[12567]1022 if (meta.metacommand & mSpecial) {
1023 // special formatting
[19298]1024 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1025 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1026 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
[12567]1027 }
[19298]1028 else fresh_metatext = metainfo.values[i];
1029
[21752]1030 // New "truncate" special formatting option
1031 if (meta.metacommand & mTruncate)
1032 {
1033 int truncate_length = meta.siblingoptions.getint();
1034 text_t truncated_value = fresh_metatext;
1035 if (truncated_value.size() > truncate_length)
1036 {
1037 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1038 }
1039 fresh_metatext = truncated_value;
1040 }
1041 // New "xmlsafe" special formatting option
1042 if (meta.metacommand & mXMLSafe)
1043 {
1044 // Make it XML-safe
1045 text_t text_xml_safe = "";
1046 text_t::const_iterator text_iterator = fresh_metatext.begin();
1047 while (text_iterator != fresh_metatext.end())
1048 {
1049 if (*text_iterator == '&') text_xml_safe += "&amp;";
1050 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1051 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1052 else text_xml_safe.push_back(*text_iterator);
1053 text_iterator++;
1054 }
1055 fresh_metatext = text_xml_safe;
1056 }
1057 // New "htmlsafe" special formatting option
1058 if (meta.metacommand & mHTMLSafe)
1059 {
1060 // Make it HTML-safe
1061 text_t text_html_safe = "";
1062 text_t::const_iterator text_iterator = fresh_metatext.begin();
1063 while (text_iterator != fresh_metatext.end())
1064 {
1065 if (*text_iterator == '&') text_html_safe += "&amp;";
1066 else if (*text_iterator == '<') text_html_safe += "&lt;";
1067 else if (*text_iterator == '>') text_html_safe += "&gt;";
1068 else if (*text_iterator == '"') text_html_safe += "&quot;";
1069 else text_html_safe.push_back(*text_iterator);
1070 text_iterator++;
1071 }
1072 fresh_metatext = text_html_safe;
1073 }
1074
[19302]1075 if (metadata_spanwrap) {
[19312]1076 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,i);
[19298]1077 }
1078 formatted_metatext += fresh_metatext;
1079
[10415]1080 first = false;
[9401]1081
[10415]1082 }
1083 } else {
1084 if (position == -2) { // end
1085 position = end_i;
1086 } else if (position < start_i || position > end_i) {
1087 return "";
1088 }
[19298]1089
1090 text_t fresh_metatext;
[12567]1091 if (meta.metacommand & mSpecial) {
[19298]1092
[12567]1093 // special formatting
[19298]1094 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1095 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1096 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
[12567]1097 }
[19298]1098 else fresh_metatext = metainfo.values[position];
1099
[21752]1100 // New "truncate" special formatting option
1101 if (meta.metacommand & mTruncate)
1102 {
1103 int truncate_length = meta.siblingoptions.getint();
1104 text_t truncated_value = fresh_metatext;
1105 if (truncated_value.size() > truncate_length)
1106 {
1107 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1108 }
1109 fresh_metatext = truncated_value;
1110 }
1111 // New "xmlsafe" special formatting option
1112 if (meta.metacommand & mXMLSafe)
1113 {
1114 // Make it XML-safe
1115 text_t text_xml_safe = "";
1116 text_t::const_iterator text_iterator = fresh_metatext.begin();
1117 while (text_iterator != fresh_metatext.end())
1118 {
1119 if (*text_iterator == '&') text_xml_safe += "&amp;";
1120 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1121 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1122 else text_xml_safe.push_back(*text_iterator);
1123 text_iterator++;
1124 }
1125 fresh_metatext = text_xml_safe;
1126 }
1127 // New "htmlsafe" special formatting option
1128 if (meta.metacommand & mHTMLSafe)
1129 {
1130 // Make it HTML-safe
1131 text_t text_html_safe = "";
1132 text_t::const_iterator text_iterator = fresh_metatext.begin();
1133 while (text_iterator != fresh_metatext.end())
1134 {
1135 if (*text_iterator == '&') text_html_safe += "&amp;";
1136 else if (*text_iterator == '<') text_html_safe += "&lt;";
1137 else if (*text_iterator == '>') text_html_safe += "&gt;";
1138 else if (*text_iterator == '"') text_html_safe += "&quot;";
1139 else text_html_safe.push_back(*text_iterator);
1140 text_iterator++;
1141 }
1142 fresh_metatext = text_html_safe;
1143 }
1144
[19302]1145 if (metadata_spanwrap) {
[19312]1146 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,position);
[19298]1147 }
1148
1149 formatted_metatext += fresh_metatext;
[9401]1150 }
[19298]1151
1152 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1153 else return formatted_metatext;
[9401]1154}
[347]1155
[10415]1156static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
[9401]1157{
1158
[649]1159 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
[19312]1160
[9401]1161 switch (meta.mqualifier.parent) {
[347]1162 case pNone:
[9401]1163 return "Nothing!!";
1164 break;
[5787]1165
[347]1166 case pImmediate:
[649]1167 if (parent != NULL) {
[19312]1168 text_t parent_oid = get_parent(docinfo.OID);
1169 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
[410]1170 }
[347]1171 break;
1172
1173 case pTop:
[649]1174 if (parent != NULL) {
[19312]1175 text_t parent_oid = get_parent(docinfo.OID);
1176
1177 while (parent->parent != NULL) {
1178 parent = parent->parent;
1179 parent_oid = get_parent(parent_oid);
1180 }
1181 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
[410]1182 }
[347]1183 break;
1184
1185 case pAll:
[649]1186 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1187 if (parent != NULL) {
[19312]1188 text_t parent_oid = get_parent(docinfo.OID);
1189
[649]1190 text_tarray tmparray;
1191 while (parent != NULL) {
[19312]1192 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
[649]1193 parent = parent->parent;
[19312]1194 parent_oid = get_parent(parent_oid);
1195
[649]1196 }
[10415]1197 // now join them up - use teh parent separator
[649]1198 bool first = true;
1199 text_t tmp;
1200 text_tarray::reverse_iterator here = tmparray.rbegin();
1201 text_tarray::reverse_iterator end = tmparray.rend();
[359]1202 while (here != end) {
[10415]1203 if (!first) tmp += meta.parentoptions;
1204 tmp += *here;
[359]1205 first = false;
[9620]1206 ++here;
[359]1207 }
[13457]1208 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
[649]1209 else return tmp;
[347]1210 }
1211 }
1212 return "";
[9401]1213
[347]1214}
1215
[9948]1216static text_t get_child_meta (const text_t& collection,
1217 recptproto* collectproto,
[9401]1218 ResultDocInfo_t &docinfo, displayclass &disp,
1219 const metadata_t &meta, text_tmap &options,
[10415]1220 ostream& logout, int siblings_values)
[9401]1221{
[10415]1222 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1223
[19046]1224 const text_t& pre_tree_trav = meta.pre_tree_traverse;
[10415]1225 const text_t& child_metaname = meta.metaname;
1226 const text_t& child_field = meta.childoptions;
1227 text_tset child_metadata;
1228 child_metadata.insert(child_metaname);
[9401]1229
[10415]1230 FilterResponse_t child_response;
1231 if (meta.mqualifier.child == cNum) {
1232 // just one child
1233 //get the information associated with the metadata for child doc
[19046]1234 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1235 child_metadata, false, collectproto, child_response,
1236 logout)) return ""; // invalid child number
[9401]1237
[10415]1238 if (child_response.docInfo.empty()) return false; // no info for the child
1239
1240 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1241 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1242
[19312]1243 text_t child_metavalue
1244 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
[10415]1245 return expand_metadata(child_metavalue,collection,collectproto,
1246 child_docinfo,disp,options,logout);
1247 }
1248
[9401]1249
[10415]1250 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
[19046]1251
1252
1253 if (!pre_tree_trav.empty()) {
1254 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1255 FilterResponse_t trav_response;
1256
1257 text_tset trav_metadata;
1258 trav_metadata.insert("contains");
1259
1260 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1261 trav_metadata, false, collectproto, trav_response,
1262 logout)) return ""; // invalid pre_tree_trav
1263
1264 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
[10415]1265
[19046]1266 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1267 // use this for rest of routine
1268 docinfo = trav_docinfo;
1269 }
1270
[10415]1271 // we need to get all children
1272 text_t result = "";
1273 text_tarray children;
1274 text_t contains = docinfo.metadata["contains"].values[0];
1275 splitchar (contains.begin(), contains.end(), ';', children);
1276 text_tarray::const_iterator here = children.begin();
1277 text_tarray::const_iterator end = children.end();
1278 bool first = true;
1279 while (here !=end) {
1280 text_t oid = *here;
1281 here++;
1282 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1283
[9401]1284 //get the information associated with the metadata for child doc
[10415]1285 if (!get_info (oid, collection, "", child_metadata,
1286 false, collectproto, child_response, logout) ||
1287 child_response.docInfo.empty()) {
1288 first = false;
1289 continue;
1290 }
1291
1292
1293 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1294 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1295
[19312]1296 text_t child_metavalue
1297 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
[10415]1298 if (!first) result += child_field;
1299 first = false;
1300 // need to do this here cos otherwise we are in the wrong document
1301 result += expand_metadata(child_metavalue,collection,collectproto,
[9401]1302 child_docinfo,disp,options,logout);
1303 }
[10415]1304 return result;
1305
[9401]1306}
1307
1308static text_t get_meta (const text_t& collection, recptproto* collectproto,
1309 ResultDocInfo_t &docinfo, displayclass &disp,
1310 const metadata_t &meta, text_tmap &options,
1311 ostream& logout) {
1312
1313 // make sure we have the requested metadata
1314 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1315 if (it == docinfo.metadata.end()) return "";
1316
[10415]1317 int siblings_values = 0; // default is no siblings, just the first metadata available
1318 if (meta.metacommand & mSibling) {
1319 if (meta.mqualifier.sibling == sAll) {
1320 siblings_values = -1; //all
1321 } else if (meta.mqualifier.sibling == sNum) {
1322 siblings_values = meta.siblingoptions.getint();
1323 }
1324 }
[9401]1325 if (meta.metacommand & mParent) {
[10415]1326 return get_parent_meta(docinfo,meta,siblings_values);
[9401]1327 }
[10415]1328
[9401]1329 else if (meta.metacommand & mChild) {
1330 return get_child_meta(collection,collectproto,docinfo,disp,meta,
[10415]1331 options,logout, siblings_values);
[9401]1332 }
[10415]1333 else if (meta.metacommand & mSibling) { // only siblings
[9401]1334 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
[19312]1335 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
[9401]1336 }
1337 else {
1338
1339 // straightforward metadata request (nothing fancy)
1340
1341 text_t classifier_metaname = docinfo.classifier_metadata_type;
1342 int metaname_index
1343 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
[19312]1344 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
[9401]1345 }
[10415]1346
[9401]1347 return "";
1348}
1349
[1443]1350static text_t get_or (const text_t& collection, recptproto* collectproto,
[1610]1351 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1352 format_t *orptr, text_tmap &options,
[1443]1353 ostream& logout) {
[347]1354
[354]1355 while (orptr != NULL) {
[347]1356
[19302]1357 if (metadata_spanwrap) {
1358 // need to be a bit more careful about this
1359 // => test for it *without* spanwrap, and if defined, then
1360 // got back and generate it again, this time with spanwrap on
[354]1361
[19302]1362 metadata_spanwrap = false;
1363 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1364 options, logout);
1365 metadata_spanwrap = true;
1366 if (!test_tmp.empty()) {
1367
1368 return format_string (collection,collectproto,docinfo, disp, orptr,
1369 options, logout);
1370 }
1371 }
1372 else {
1373 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1374 options, logout);
1375 if (!tmp.empty()) return tmp;
1376 }
1377
[354]1378 orptr = orptr->nextptr;
[347]1379 }
[354]1380 return "";
[347]1381}
1382
[7389]1383static bool char_is_whitespace(const char c)
1384{
1385 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1386
1387}
1388
1389static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1390{
1391 int pos = start_pos;
1392 while (pos<outstring.size()) {
1393 if (!char_is_whitespace(outstring[pos])) {
1394 break;
1395 }
[9620]1396 ++pos;
[7389]1397 }
1398
1399 return pos;
1400}
1401
1402static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1403{
1404 int pos = start_pos;
1405 while (pos>=0) {
1406 if (!char_is_whitespace(outstring[pos])) {
1407 break;
1408 }
[9620]1409 --pos;
[7389]1410 }
1411
1412 return pos;
1413}
1414
1415static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1416{
1417 int pos = start_pos;
1418 while (pos>=0) {
1419 if (char_is_whitespace(outstring[pos])) {
1420 break;
1421 }
[9620]1422 --pos;
[7389]1423 }
1424
1425 return pos;
1426}
1427
1428
1429static int rscan_for(const text_t& outstring, const int start_pos,
1430 const char find_c)
1431{
1432 int pos = start_pos;
1433 while (pos>=0) {
1434 char c = outstring[pos];
1435 if (outstring[pos] == find_c) {
1436 break;
1437 }
[9620]1438 --pos;
[7389]1439 }
1440
1441 return pos;
1442}
1443
1444text_t extract_substr(const text_t& outstring, const int start_pos,
1445 const int end_pos)
1446{
1447 text_t extracted_str;
1448 extracted_str.clear();
1449
[9620]1450 for (int pos=start_pos; pos<=end_pos; ++pos) {
[7389]1451 extracted_str.push_back(outstring[pos]);
1452 }
1453
1454 return extracted_str;
1455}
1456
1457
[9401]1458static text_t expand_potential_metadata(const text_t& collection,
1459 recptproto* collectproto,
1460 ResultDocInfo_t &docinfo,
1461 displayclass &disp,
1462 const text_t& intext,
1463 text_tmap &options,
1464 ostream& logout)
[7389]1465{
1466 text_t outtext;
1467
1468 // decide if dealing with metadata or text
1469
1470 text_t::const_iterator beginbracket = intext.begin();
1471 text_t::const_iterator endbracket = (intext.end() - 1);
1472
1473 // Decision is based on a metadata element
1474 if ((*beginbracket == '[') && (*endbracket == ']')) {
1475 // Ignore the surrounding square brackets
1476 text_t meta_text = substr (beginbracket+1, endbracket);
1477
[10614]1478 if (meta_text == "Text") {
1479 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
[19311]1480 }
1481 else {
[7389]1482
[10614]1483 text_tset metadata;
1484 bool getParents =false;
1485 metadata_t meta;
1486
1487 parse_meta (meta_text, meta, metadata, getParents);
1488 outtext
1489 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1490 }
1491
[7389]1492 }
1493 else {
1494 outtext = intext;
1495 }
1496
1497 return outtext;
1498}
1499
1500
1501
1502
[9401]1503static bool uses_expression(const text_t& collection, recptproto* collectproto,
1504 ResultDocInfo_t &docinfo,
1505 displayclass &disp,
[7389]1506 const text_t& outstring, text_t& lhs_expr,
[9401]1507 text_t& op_expr, text_t& rhs_expr,
1508 text_tmap &options,
1509 ostream& logout)
[7389]1510{
1511 // Note: the string may not be of the form: str1 op str2, however
1512 // to deterine this we have to process it on the assumption it is,
1513 // and if at any point an 'erroneous' value is encountered, return
1514 // false and let something else have a go at evaluating it
1515
1516 // Starting at the end of the string and working backwards ..
1517
1518 const int outstring_len = outstring.size();
1519
1520 // skip over white space
1521 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1522
1523 if (rhs_end<=0) {
1524 // no meaningful text or (rhs_end==0) no room for operator
1525 return false;
1526 }
1527
1528 // check for ' or " and then scan over token
1529 const char potential_quote = outstring[rhs_end];
1530 int rhs_start=rhs_end;
1531 bool quoted = false;
1532
1533 if ((potential_quote == '\'') || (potential_quote == '\"')) {
[9620]1534 --rhs_end;
[7389]1535 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1536 quoted = true;
1537 }
1538 else {
1539 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1540 }
1541
[7617]1542 if ((rhs_end-rhs_start)<0) {
[7389]1543 // no meaningful rhs expression
1544 return false;
1545 }
1546
1547 // form rhs_expr
1548 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1549
1550 // skip over white space
1551 const int to_whitespace = (quoted) ? 2 : 1;
1552
1553 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1554 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1555
[19058]1556 if ((op_end<0) && (op_start<0)) {
1557 // no meaningful expression operator
1558 return false;
1559 }
[7389]1560
[7617]1561 if (op_end-op_start<0) {
[7389]1562 // no meaningful expression operator
1563 return false;
1564 }
1565
1566 op_expr = extract_substr(outstring,op_start,op_end);
1567
1568
1569 // check for operator
[10142]1570 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1571 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
[10145]1572
[7389]1573 // not a valid operator
1574 return false;
1575 }
1576
1577 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
[7617]1578 if (lhs_end<0) {
[7389]1579 // no meaningful lhs expression
1580 return false;
1581 }
1582
1583 int lhs_start = scan_over_whitespace(outstring,0);
1584
1585 // form lhs_expr from remainder of string
1586 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1587
1588 // Now we know we have a valid expression, look up any
1589 // metadata terms
1590
[9401]1591 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1592 disp,rhs_expr,options,logout);
1593 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1594 disp,lhs_expr,options,logout);
[7389]1595
1596 return true;
1597}
1598
1599static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1600 const text_t& rhs_expr, ostream& logout)
1601{
[10142]1602 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1603 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1604 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1605 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1606 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1607 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1608 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1609 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1610 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1611 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1612 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1613 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1614 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1615 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
[7389]1616 else {
1617 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1618 }
[10142]1619
[7389]1620 return false;
1621}
1622
1623
[1443]1624static text_t get_if (const text_t& collection, recptproto* collectproto,
[1610]1625 ResultDocInfo_t &docinfo, displayclass &disp,
1626 const decision_t &decision,
[5788]1627 format_t *ifptr, format_t *elseptr,
1628 text_tmap &options, ostream& logout)
[1443]1629{
[1610]1630 // If the decision component is a metadata element, then evaluate it
1631 // to see whether we output the "then" or the "else" clause
[354]1632 if (decision.command == dMeta) {
[19298]1633
[19302]1634 bool store_metadata_spanwrap = metadata_spanwrap;
1635 metadata_spanwrap = 0;
[19298]1636
[19302]1637 // temporarily suspend metadata-spanwrap (if on) so can test if metadata item really exits or not
[19298]1638 bool metadata_exists
1639 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1640 logout) != "");
1641
[19302]1642 metadata_spanwrap = store_metadata_spanwrap;
[19298]1643
1644 if (metadata_exists) {
[354]1645 if (ifptr != NULL)
[9948]1646 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
[5788]1647 options, logout);
[354]1648 }
1649 else {
1650 if (elseptr != NULL)
[9948]1651 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
[5788]1652 options, logout);
[354]1653 }
1654 }
[1610]1655
1656 // If the decision component is text, then evaluate it (it is probably a
1657 // macro like _cgiargmode_) to decide what to output.
1658 else if (decision.command == dText) {
1659
1660 text_t outstring;
1661 disp.expandstring (decision.text, outstring);
1662
[7389]1663 // Check for if expression in form: str1 op str2
1664 // (such as [x] eq "y")
1665 text_t lhs_expr, op_expr, rhs_expr;
[9401]1666 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
[7389]1667 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1668 if (ifptr != NULL) {
1669 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1670 options, logout);
1671 }
1672 else {
1673 return "";
1674 }
1675 } else {
1676 if (elseptr != NULL) {
1677 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1678 options, logout);
1679 }
1680 else {
1681 return "";
1682 }
1683 }
1684 }
1685
1686
[1610]1687 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1688 // a cgi argument macro that has not been set, it evaluates to itself.
1689 // Therefore, were have to say that a piece of text evalautes true if
1690 // it is non-empty and if it is a cgi argument evaulating to itself.
[7389]1691
[1610]1692 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1693 if (ifptr != NULL)
1694 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
[5788]1695 options, logout);
[1610]1696 } else {
1697 if (elseptr != NULL)
1698 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
[5788]1699 options, logout);
[1610]1700 }
1701 }
1702
[354]1703 return "";
1704}
1705
[1443]1706bool includes_metadata(const text_t& text)
1707{
1708 text_t::const_iterator here = text.begin();
1709 text_t::const_iterator end = text.end();
1710 while (here != end) {
1711 if (*here == '[') return true;
[9620]1712 ++here;
[1443]1713 }
1714
1715 return false;
1716}
1717
[5788]1718static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
[9948]1719 recptproto* collectproto,
1720 ResultDocInfo_t &docinfo,
[5788]1721 displayclass &disp, text_tmap &options,
1722 ostream &logout) {
1723
[10415]1724 if (includes_metadata(metavalue)) {
1725
1726 // text has embedded metadata in it => expand it
1727 FilterRequest_t request;
1728 FilterResponse_t response;
1729
1730 request.getParents = false;
1731
1732 format_t *expanded_formatlistptr = new format_t();
1733 parse_formatstring (metavalue, expanded_formatlistptr,
1734 request.fields, request.getParents);
1735
1736 // retrieve metadata
1737 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1738 collectproto, response, logout);
1739
1740 if (!response.docInfo.empty()) {
1741
1742 text_t expanded_metavalue
1743 = get_formatted_string(collection, collectproto,
1744 response.docInfo[0], disp, expanded_formatlistptr,
1745 options, logout);
1746
1747 return expanded_metavalue;
1748 }
1749 else {
1750 return metavalue;
1751 }
1752 }
1753 else {
1754
1755 return metavalue;
1756 }
[5788]1757}
[1941]1758
[9948]1759text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1760 displayclass &disp,
1761 text_t meta_name, ostream& logout) {
1762
1763 ColInfoResponse_t collectinfo;
1764 comerror_t err;
1765 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1766 text_t meta_value = "";
1767 text_t lang;
1768 disp.expandstring("_cgiargl_",lang);
1769 if (lang.empty()) {
1770 lang = "en";
1771 }
1772
1773 if (err == noError) {
1774 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1775 }
1776 return meta_value;
1777
1778
1779}
[1443]1780text_t format_string (const text_t& collection, recptproto* collectproto,
[1610]1781 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1782 format_t *formatlistptr, text_tmap &options,
[1443]1783 ostream& logout) {
[354]1784
[347]1785 if (formatlistptr == NULL) return "";
1786
1787 switch (formatlistptr->command) {
[6020]1788 case comOID:
1789 return docinfo.OID;
[13118]1790 case comTopOID:
1791 {
1792 text_t top_id;
1793 get_top(docinfo.OID, top_id);
1794 return top_id;
1795 }
[6710]1796 case comRank:
1797 return text_t(docinfo.ranking);
[5788]1798 case comText:
1799 return formatlistptr->text;
1800 case comLink:
1801 return options["link"];
1802 case comEndLink:
[21758]1803 {
1804 if (options["link"].empty()) return "";
[5788]1805 else return "</a>";
[21758]1806 }
[5788]1807 case comHref:
1808 return get_href(options["link"]);
1809 case comIcon:
1810 return options["icon"];
1811 case comNum:
1812 return docinfo.result_num;
1813 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1814 return get_related_docs(collection, collectproto, docinfo, logout);
[19311]1815
[5788]1816 case comSummary:
[19312]1817 return format_summary(collection, collectproto, docinfo, disp, options, logout);
[21758]1818 case comAssocLink:
1819 {
1820 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1821 if (!link_filename.empty()) {
1822 text_t href= expand_metadata(options["assocfilepath"]+link_filename, collection, collectproto, docinfo, disp, options, logout);
1823 if (formatlistptr->text == "href") {
1824 return href;
1825 }
1826 return "<a href=\""+ href + "\">";
1827 }
1828 return "";
1829 }
1830 case comEndAssocLink:
1831 {
1832 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1833 if (!link_filename.empty()) {
1834 return "</a>";
1835 }
1836 return "";
1837 }
[5788]1838 case comMeta:
[1443]1839 {
[9948]1840 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
[5788]1841 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
[1443]1842 }
[19311]1843
[5788]1844 case comDoc:
[19311]1845 return format_text(collection, collectproto, docinfo, disp, options, logout);
1846
[5788]1847 case comImage:
1848 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1849 case comTOC:
1850 return options["DocTOC"];
1851 case comDocumentButtonDetach:
1852 return options["DocumentButtonDetach"];
1853 case comDocumentButtonHighlight:
1854 return options["DocumentButtonHighlight"];
1855 case comDocumentButtonExpandContents:
1856 return options["DocumentButtonExpandContents"];
1857 case comDocumentButtonExpandText:
1858 return options["DocumentButtonExpandText"];
1859 case comHighlight:
1860 if (options["highlight"] == "1") return "<b>";
1861 break;
1862 case comEndHighlight:
1863 if (options["highlight"] == "1") return "</b>";
1864 break;
[19302]1865 case comMetadataSpanWrap:
1866 metadata_spanwrap=true; return "";
1867 break;
1868 case comEndMetadataSpanWrap:
1869 metadata_spanwrap=false; return "";
1870 break;
[5788]1871 case comIf:
1872 return get_if (collection, collectproto, docinfo, disp,
1873 formatlistptr->decision, formatlistptr->ifptr,
1874 formatlistptr->elseptr, options, logout);
1875 case comOr:
1876 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1877 options, logout);
[16915]1878 case comDocTermsFreqTotal:
1879 return docinfo.num_terms_matched;
[9948]1880 case comCollection:
1881 if (formatlistptr->meta.metaname == g_EmptyText) {
1882 return collection;
1883 }
1884 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1885
[347]1886 }
1887 return "";
1888}
1889
[1443]1890text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
[1610]1891 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1892 format_t *formatlistptr, text_tmap &options,
[1443]1893 ostream& logout) {
[407]1894
[5788]1895 text_t ft;
1896 while (formatlistptr != NULL)
1897 {
1898 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1899 options, logout);
1900 formatlistptr = formatlistptr->nextptr;
1901 }
1902
1903 return ft;
[347]1904}
1905
1906
[9852]1907// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1908text_t format_text (const text_t& collection, recptproto* collectproto,
[9948]1909 ResultDocInfo_t &docinfo, displayclass &disp,
[19311]1910 text_tmap &options, ostream& logout)
1911{
1912 text_t text;
1913
[9852]1914 if(!options["text"].empty()) {
[19311]1915 text = options["text"];
[9852]1916 }
[19311]1917 else {
1918 // get document text here
1919 DocumentRequest_t docrequest;
1920 DocumentResponse_t docresponse;
1921 comerror_t err;
1922 docrequest.OID = docinfo.OID;
1923 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1924 text = docresponse.doc;
1925 }
[9852]1926
[19311]1927 if (metadata_spanwrap) {
[19312]1928 text = spanwrap_metatext(text,docinfo.OID,"Text");
[19311]1929 }
1930
1931 return text;
[9852]1932}
1933
[2967]1934/* FUNCTION NAME: format_summary
1935 * DESC: this is invoked when a [Summary] special metadata is processed.
1936 * RETURNS: a query-biased summary for the document */
1937
1938text_t format_summary (const text_t& collection, recptproto* collectproto,
[5788]1939 ResultDocInfo_t &docinfo, displayclass &disp,
1940 text_tmap &options, ostream& logout) {
[3673]1941
1942 // GRB: added code here to ensure that the cstr (and other collections)
1943 // uses the document metadata item Summary, rather than compressing
1944 // the text of the document, processed via the methods in
1945 // summarise.cpp
[19312]1946
1947 text_t summary;
1948
[3673]1949 if (docinfo.metadata.count("Summary") > 0 &&
1950 docinfo.metadata["Summary"].values.size() > 0) {
[19312]1951 summary = docinfo.metadata["Summary"].values[0];
[3673]1952 }
[19312]1953 else {
1954
1955 text_t textToSummarise, query;
[3673]1956
[19312]1957 if(options["text"].empty()) { // get document text
1958 DocumentRequest_t docrequest;
1959 DocumentResponse_t docresponse;
1960 comerror_t err;
1961 docrequest.OID = docinfo.OID;
1962 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1963 textToSummarise = docresponse.doc;
1964 }
1965 else {
1966 // in practice, this would not happen, because text is only
1967 // loaded with the [Text] command
[19311]1968 textToSummarise = options["text"];
[19312]1969 }
1970
1971 disp.expandstring("_cgiargq_",query);
1972 summary = summarise(textToSummarise,query,80);
[19311]1973 }
1974
[19312]1975 if (metadata_spanwrap) {
1976 summary = spanwrap_metatext(summary,docinfo.OID,"Summary");
1977 }
1978
1979 return summary;
[2967]1980}
Note: See TracBrowser for help on using the repository browser.