source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 23707

Last change on this file since 23707 was 23707, checked in by ak19, 13 years ago

Fix to server-crashing bug where, when loading the titles classifier, a Title's contents were mistaken for metadata since it contained an open square bracket. The code now is a bit more resilient as it checks for the presence of an open square bracket and close square bracket (in that order) before assuming it is format-related Metadata.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 60.8 KB
RevLine 
[347]1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
[533]6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
[347]9 *
[533]10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[347]24 *********************************************************************/
25
26#include "formattools.h"
[462]27#include "cgiutils.h"
[22650]28#include "gsdltools.h"
[15418]29#include "recptprototools.h"
[1443]30#include "OIDtools.h"
[2967]31#include "summarise.h"
[1443]32
[1257]33#include <assert.h>
[347]34
[23515]35static bool metadata_wrap = false;
36static text_t metadata_wrap_type = "";
[19298]37
[354]38// a few function prototypes
[5788]39
[1443]40static text_t format_string (const text_t& collection, recptproto* collectproto,
[1610]41 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]42 format_t *formatlistptr, text_tmap &options,
43 ostream& logout);
[354]44
45static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
[649]46 format_t *formatlistptr, text_tset &metadata, bool &getParents);
[354]47
[5788]48static text_t format_summary (const text_t& collection, recptproto* collectproto,
49 ResultDocInfo_t &docinfo, displayclass &disp,
50 text_tmap &options, ostream& logout);
[9852]51static text_t format_text (const text_t& collection, recptproto* collectproto,
52 ResultDocInfo_t &docinfo, displayclass &disp,
53 text_tmap &options, ostream& logout);
[2967]54
[9401]55static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
56 recptproto* collectproto, ResultDocInfo_t &docinfo,
57 displayclass &disp, text_tmap &options,
58 ostream &logout);
[2967]59
[9401]60
[347]61void metadata_t::clear() {
[410]62 metaname.clear();
[462]63 metacommand = mNone;
[9401]64 mqualifier.parent = pNone;
65 mqualifier.sibling = sNone;
66 mqualifier.child = cNone;
[19046]67 pre_tree_traverse.clear();
[10415]68 parentoptions.clear();
69 siblingoptions.clear();
70 childoptions.clear();
[749]71}
[347]72
73void decision_t::clear() {
74 command = dMeta;
75 meta.clear();
[1610]76 text.clear();
[749]77}
[347]78
[21808]79format_t::~format_t()
80{
81 if (nextptr != NULL) delete nextptr;
82 if (ifptr != NULL) delete ifptr;
83 if (elseptr != NULL) delete elseptr;
84 if (orptr != NULL) delete orptr;
85}
86
[347]87void format_t::clear() {
88 command = comText;
89 decision.clear();
90 text.clear();
91 meta.clear();
92 nextptr = NULL;
93 ifptr = NULL;
94 elseptr = NULL;
95 orptr = NULL;
[749]96}
[347]97
[442]98void formatinfo_t::clear() {
[1079]99 DocumentImages = false;
100 DocumentTitles = true;
101 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
102 DocumentContents = true;
[1941]103 DocumentArrowsBottom = true;
[5788]104 DocumentArrowsTop = false;
[13365]105 DocumentSearchResultLinks = false;
[442]106 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
[1496]107 // DocumentButtons.push_back ("Expand Text");
108 // DocumentButtons.push_back ("Expand Contents");
[442]109 DocumentButtons.push_back ("Detach");
110 DocumentButtons.push_back ("Highlight");
[1941]111 RelatedDocuments = "";
[11146]112 DocumentText = "[Text]";
[649]113 formatstrings.erase (formatstrings.begin(), formatstrings.end());
[868]114 DocumentUseHTML = false;
[5788]115 AllowExtendedOptions = false;
[442]116}
117
[749]118// simply checks to see if formatstring begins with a <td> tag
119bool is_table_content (const text_t &formatstring) {
120 text_t::const_iterator here = formatstring.begin();
121 text_t::const_iterator end = formatstring.end();
122
123 while (here != end) {
124 if (*here != ' ') {
[1257]125 if ((*here == '<') && ((here+3) < end)) {
[749]126 if ((*(here+1) == 't' || *(here+1) == 'T') &&
127 (*(here+2) == 'd' || *(here+2) == 'D') &&
128 (*(here+3) == '>' || *(here+3) == ' '))
129 return true;
130 } else return false;
131 }
[9620]132 ++here;
[749]133 }
134 return false;
135}
136
137bool is_table_content (const format_t *formatlistptr) {
138
139 if (formatlistptr == NULL) return false;
140
141 if (formatlistptr->command == comText)
142 return is_table_content (formatlistptr->text);
143
144 return false;
145}
146
[649]147// returns false if key isn't in formatstringmap
148bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
149 text_t &formatstring) {
[442]150
[649]151 formatstring.clear();
152 text_tmap::const_iterator it = formatstringmap.find(key);
153 if (it == formatstringmap.end()) return false;
154 formatstring = (*it).second;
155 return true;
156}
[749]157
[649]158// tries to find "key1key2" then "key1" then "key2"
159bool get_formatstring (const text_t &key1, const text_t &key2,
160 const text_tmap &formatstringmap,
161 text_t &formatstring) {
162
163 formatstring.clear();
164 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
165 if (it != formatstringmap.end()) {
166 formatstring = (*it).second;
167 return true;
168 }
169 it = formatstringmap.find(key1);
170 if (it != formatstringmap.end()) {
171 formatstring = (*it).second;
172 return true;
173 }
174 it = formatstringmap.find(key2);
175 if (it != formatstringmap.end()) {
176 formatstring = (*it).second;
177 return true;
178 }
179 return false;
180}
181
182
[6645]183text_t remove_namespace(const text_t &meta_name) {
184 text_t::const_iterator end = meta_name.end();
185 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
186 if (it != end) {
187 return substr(it+1, end);
188 }
[12567]189
[6645]190 return meta_name;
191
192}
[12567]193// returns a date of form _format:date_(year, month, day)
194// input is date of type yyyy-?mm-?dd
[410]195// at least the year must be present in date
[422]196text_t format_date (const text_t &date) {
[347]197
[410]198 if (date.size() < 4) return "";
[347]199
[410]200 text_t::const_iterator datebegin = date.begin();
[354]201
[410]202 text_t year = substr (datebegin, datebegin+4);
[12567]203 int chars_seen_so_far = 4;
[20756]204 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
[410]205
[12567]206 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
207 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
208
209 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
[410]210 int imonth = month.getint();
[12567]211 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
212
213 chars_seen_so_far += 2;
[20756]214 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
215
[12567]216 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
217 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
[410]218
[12567]219 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
[410]220 if (day[0] == '0') day = substr (day.begin()+1, day.end());
221 int iday = day.getint();
[12567]222 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
[410]223
[12567]224 return "_format:date_("+year+","+month+","+day+")";
[410]225}
226
[2001]227// converts an iso639 language code to its English equivalent
[12567]228// should we be checking that the macro exists??
[2001]229text_t iso639 (const text_t &langcode) {
[12567]230 if (langcode.empty()) return "";
231 return "_iso639:iso639"+langcode+"_";
[2001]232}
233
[12567]234
[2706]235text_t get_href (const text_t &link) {
236
237 text_t href;
238
239 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
240 text_t::const_iterator end = link.end();
[13117]241 if (here == end) return g_EmptyText;
[2706]242
[9620]243 ++here;
[2706]244 while (here != end) {
245 if (*here == '"') break;
246 href.push_back(*here);
[9620]247 ++here;
[2706]248 }
249
250 return href;
251}
252
[1941]253//this function gets the information associated with the relation
254//metadata for the document associated with 'docinfo'. This relation
255//metadata consists of a line of pairs containing 'collection, document OID'
256//(this is the OID of the document related to the current document, and
257//the collection the related document belongs to). For each of these pairs
258//the title metadata is obtained and then an html link between the title
259//of the related doc and the document's position (the document will be
260//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
261//(where collection is the related documents collection, and OID is the
262//related documents OID). A list of these html links are made for as many
263//related documents as there are. This list is then returned. If there are
264//no related documents available for the current document then the string
265//'.. no related documents .. ' is returned.
266text_t get_related_docs(const text_t& collection, recptproto* collectproto,
[9948]267 ResultDocInfo_t &docinfo, ostream& logout){
[1941]268
269 text_tset metadata;
270
271 //insert the metadata we wish to collect
[11324]272 metadata.insert("dc.Relation");
[1941]273 metadata.insert("Title");
274 metadata.insert("Subject"); //for emails, where title data doesn't apply
275
276 FilterResponse_t response;
277 text_t relation = ""; //string for displaying relation metadata
278 text_t relationTitle = ""; //the related documents Title (or subject)
[1963]279 text_t relationOID = ""; //the related documents OID
[1941]280
281 //get the information associated with the metadata for current doc
[7432]282 if (get_info (docinfo.OID, collection, "", metadata,
[1941]283 false, collectproto, response, logout)) {
284
285 //if the relation metadata exists, store for displaying
[11324]286 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
287 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
[1941]288
289 //split relation data into pairs of collectionname,ID number
290 text_tarray relationpairs;
291 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
292
293 text_tarray::const_iterator currDoc = relationpairs.begin();
294 text_tarray::const_iterator lastDoc = relationpairs.end();
295
296 //iterate through the pairs to split and display
297 while(currDoc != lastDoc){
298
299 //split pairs into collectionname and ID
300 text_tarray relationdata;
301 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
302
303 //get first element in the array (collection)
304 text_tarray::const_iterator doc_data = relationdata.begin();
305 text_t document_collection = *doc_data;
[9620]306 ++doc_data; //increment to get next item in array (oid)
[1941]307 text_t document_OID = *doc_data;
308
309 //create html link to related document
[22810]310 relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
311 relation += "&amp;cl=search&amp;d=" + document_OID;
[1941]312
313 //get the information associated with the metadata for related doc
[7432]314 if (get_info (document_OID, document_collection, "", metadata,
[1941]315 false, collectproto, response, logout)) {
316
317 //if title metadata doesn't exist, collect subject metadata
318 //if that doesn't exist, just call it 'related document'
319 if (!response.docInfo[0].metadata["Title"].values[0].empty())
320 relationTitle = response.docInfo[0].metadata["Title"].values[0];
321 else if (!response.docInfo[0].metadata["Subject"].values.empty())
322 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
323 else relationTitle = "RELATED DOCUMENT";
324
325 }
326
327 //link the related document's title to its page
328 relation += "\">" + relationTitle + "</a>";
329 relation += " (" + document_collection + ")<br>";
330
[9620]331 ++currDoc;
[1941]332 }
333 }
334
335 }
336
337 if(relation.empty()) //no relation data for documnet
338 relation = ".. no related documents .. ";
339
340 return relation;
341}
342
343
344
[354]345static void get_parent_options (text_t &instring, metadata_t &metaoption) {
346
[1257]347 assert (instring.size() > 7);
348 if (instring.size() <= 7) return;
349
[354]350 text_t meta, com, op;
351 bool inbraces = false;
352 bool inquotes = false;
353 bool foundcolon = false;
354 text_t::const_iterator here = instring.begin()+6;
355 text_t::const_iterator end = instring.end();
356 while (here != end) {
[10415]357 if (foundcolon) meta.push_back (*here);
358 else if (*here == '(') inbraces = true;
[354]359 else if (*here == ')') inbraces = false;
360 else if (*here == '\'' && !inquotes) inquotes = true;
361 else if (*here == '\'' && inquotes) inquotes = false;
362 else if (*here == ':' && !inbraces) foundcolon = true;
363 else if (inquotes) op.push_back (*here);
364 else com.push_back (*here);
[9620]365 ++here;
[354]366 }
[9401]367
[354]368 instring = meta;
369 if (com.empty())
[9401]370 metaoption.mqualifier.parent = pImmediate;
[354]371 else if (com == "Top")
[9401]372 metaoption.mqualifier.parent = pTop;
[649]373 else if (com == "All") {
[9401]374 metaoption.mqualifier.parent = pAll;
[10415]375 metaoption.parentoptions = op;
[354]376 }
377}
378
[5787]379
380static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
381
382 assert (instring.size() > 8);
383 if (instring.size() <= 8) return;
384 text_t meta, com, op;
385 bool inbraces = false;
386 bool inquotes = false;
387 bool foundcolon = false;
388 text_t::const_iterator here = instring.begin()+7;
389 text_t::const_iterator end = instring.end();
390 while (here != end) {
[10415]391 if (foundcolon) meta.push_back (*here);
392 else if (*here == '(') inbraces = true;
[5787]393 else if (*here == ')') inbraces = false;
394 else if (*here == '\'' && !inquotes) inquotes = true;
395 else if (*here == '\'' && inquotes) inquotes = false;
[10415]396 else if (*here == ':' && !inbraces) foundcolon = true;
[5787]397 else if (inquotes) op.push_back (*here);
398 else com.push_back (*here);
[9620]399 ++here;
[5787]400 }
401
402 instring = meta;
[10415]403 metaoption.siblingoptions.clear();
[5787]404
405 if (com.empty()) {
[9401]406 metaoption.mqualifier.sibling = sAll;
[10415]407 metaoption.siblingoptions = " ";
[5787]408 }
[10415]409 else if (com == "first") {
410 metaoption.mqualifier.sibling = sNum;
411 metaoption.siblingoptions = "0";
412 }
413 else if (com == "last") {
414 metaoption.mqualifier.sibling = sNum;
415 metaoption.siblingoptions = "-2"; // == last
416 }
417 else if (com.getint()>0) {
418 metaoption.mqualifier.sibling = sNum;
419 int pos = com.getint()-1;
420 metaoption.siblingoptions +=pos;
421 }
[5787]422 else {
[9401]423 metaoption.mqualifier.sibling = sAll;
[10415]424 metaoption.siblingoptions = op;
[5787]425 }
426}
427
[9401]428static void get_child_options (text_t &instring, metadata_t &metaoption) {
[5788]429
[9401]430 assert (instring.size() > 6);
431 if (instring.size() <= 6) return;
432 text_t meta, com, op;
433 bool inbraces = false;
434 bool inquotes = false;
435 bool foundcolon = false;
436 text_t::const_iterator here = instring.begin()+5;
437 text_t::const_iterator end = instring.end();
438 while (here != end) {
[10415]439 if (foundcolon) meta.push_back (*here);
440 else if (*here == '(') inbraces = true;
[9401]441 else if (*here == ')') inbraces = false;
442 else if (*here == '\'' && !inquotes) inquotes = true;
443 else if (*here == '\'' && inquotes) inquotes = false;
444 else if (*here == ':' && !inbraces) foundcolon = true;
445 else if (inquotes) op.push_back (*here);
446 else com.push_back (*here);
[9620]447 ++here;
[9401]448 }
449
450 instring = meta;
451 if (com.empty()) {
452 metaoption.mqualifier.child = cAll;
[10415]453 metaoption.childoptions = " ";
[9401]454 }
455 else if (com == "first") {
456 metaoption.mqualifier.child = cNum;
[10415]457 metaoption.childoptions = ".fc";
[9401]458 }
459 else if (com == "last") {
460 metaoption.mqualifier.child = cNum;
[10415]461 metaoption.childoptions = ".lc";
[9401]462 }
463 else if (com.getint()>0) {
464 metaoption.mqualifier.child = cNum;
[10415]465 metaoption.childoptions = "."+com;
[9401]466 }
467 else {
468 metaoption.mqualifier.child = cAll;
[10415]469 metaoption.childoptions = op;
[9401]470 }
471}
472
473
[21752]474static void get_truncate_options (text_t &instring, metadata_t &metaoption)
475{
476 assert (instring.size() > ((text_t) "truncate").size());
477 if (instring.size() <= ((text_t) "truncate").size()) return;
478 text_t meta, com;
479 bool inbraces = false;
480 bool foundcolon = false;
481 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
482 text_t::const_iterator end = instring.end();
483 while (here != end) {
484 if (foundcolon) meta.push_back (*here);
485 else if (*here == '(') inbraces = true;
486 else if (*here == ')') inbraces = false;
487 else if (*here == ':' && !inbraces) foundcolon = true;
488 else com.push_back (*here);
489 ++here;
490 }
[9401]491
[21752]492 instring = meta;
493
494 if (!com.empty())
495 {
496 metaoption.siblingoptions = com;
497 }
498 else
499 {
500 // Default is 100 characters if not specified
501 metaoption.siblingoptions = "100";
502 }
503}
504
505
506
[649]507static void parse_meta (text_t &meta, metadata_t &metaoption,
508 text_tset &metadata, bool &getParents) {
[354]509
[21752]510 // Look for the various format statement modifiers
511 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
512 // is irrelevant because this is not stored in metaoption.metacommand anyway
513 bool keep_trying = true;
514 while (keep_trying)
515 {
516 keep_trying = false;
517
518 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
519 {
520 metaoption.metacommand |= mCgiSafe;
521 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
522 keep_trying = true;
523 }
524 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
525 {
526 metaoption.metacommand |= mSpecial;
527 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
528 keep_trying = true;
529 }
530
531 // New "truncate" special formatting option
532 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
533 {
534 metaoption.metacommand |= mTruncate;
535 get_truncate_options (meta, metaoption);
536 keep_trying = true;
537 }
538 // New "htmlsafe" special formatting option
539 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
540 {
541 metaoption.metacommand |= mHTMLSafe;
542 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
543 keep_trying = true;
544 }
545 // New "xmlsafe" special formatting option
546 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
547 {
548 metaoption.metacommand |= mXMLSafe;
549 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
550 keep_trying = true;
551 }
[22650]552 // New "dmsafe" special formatting option
553 if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
554 {
555 metaoption.metacommand |= mDMSafe;
556 meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
557 keep_trying = true;
558 }
[649]559 }
560
[19046]561 bool had_parent_or_child = true;
562 bool prev_was_parent = false;
563 bool prev_was_child = false;
564
565 while (had_parent_or_child) {
566 if (meta.size() > 7
567 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
568
569 // clear out sibling and child (cmd and options)
570 metaoption.metacommand &= ~(mChild|mSibling);
571 metaoption.childoptions.clear();
572 metaoption.siblingoptions.clear();
573
574 getParents = true;
575 metaoption.metacommand |= mParent;
576 get_parent_options (meta, metaoption);
577
578 if (prev_was_parent) {
579 metaoption.pre_tree_traverse += ".pr";
580 }
581 else if (prev_was_child) {
582 metaoption.pre_tree_traverse += ".fc";
583 }
584
585 prev_was_parent = true;
586 prev_was_child = false;
587 }
588 else if (meta.size() > 6
589 && (substr (meta.begin(), meta.begin()+5) == "child")) {
590
591 // clear out sibling and parent (cmd and options)
592 metaoption.metacommand &= ~(mParent|mSibling);
593 metaoption.parentoptions.clear();
594 metaoption.siblingoptions.clear();
595
596 metaoption.metacommand |= mChild;
597 get_child_options (meta, metaoption);
598 metadata.insert("contains");
599
600 if (prev_was_parent) {
601 metaoption.pre_tree_traverse += ".pr";
602 }
603 else if (prev_was_child) {
604 metaoption.pre_tree_traverse += ".fc";
605 }
606
607 prev_was_child = true;
608 prev_was_parent = false;
609 }
610 else {
611 prev_was_child = false;
612 prev_was_parent = false;
613 had_parent_or_child = false;
614 }
615 }
616
617 // parent/child can have sibling tacked on end also
[10415]618 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
619 metaoption.metacommand |= mSibling;
620 get_sibling_options (meta, metaoption);
621 }
622
[7599]623 // check for ex. which may occur in format statements
624 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
625 meta = substr (meta.begin()+3, meta.end());
626 }
[649]627 metadata.insert (meta);
628 metaoption.metaname = meta;
[354]629}
630
[9948]631static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
632 if (meta == "collection") {
633 // no qualifiers
634 metaoption.metaname = g_EmptyText;
635 return;
636 }
637 meta = substr (meta.begin()+11, meta.end());
638 metaoption.metaname = meta;
639
640}
641
[649]642static void parse_meta (text_t &meta, format_t *formatlistptr,
643 text_tset &metadata, bool &getParents) {
[354]644
[22437]645 // check for ex. which may occur in format statements
646 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
647 meta = substr (meta.begin()+3, meta.end());
648 }
[354]649 if (meta == "link")
650 formatlistptr->command = comLink;
651 else if (meta == "/link")
652 formatlistptr->command = comEndLink;
653
[21758]654 else if (meta == "srclink") {
655 formatlistptr->command = comAssocLink;
656 formatlistptr->meta.metaname = "srclink_file";
657 metadata.insert("srclink_file");
658 }
659 else if (meta == "srchref") {
660 formatlistptr->command = comAssocLink;
661 formatlistptr->text = "href";
662 formatlistptr->meta.metaname = "srclink_file";
663 metadata.insert("srclink_file");
664 }
665 else if (meta == "/srclink") {
666 formatlistptr->command = comEndAssocLink;
667 formatlistptr->meta.metaname = "srclink_file";
668 }
669 // and weblink etc
[2706]670 else if (meta == "href")
671 formatlistptr->command = comHref;
672
[354]673 else if (meta == "num")
674 formatlistptr->command = comNum;
675
[407]676 else if (meta == "icon")
677 formatlistptr->command = comIcon;
678
[442]679 else if (meta == "Text")
680 formatlistptr->command = comDoc;
[1941]681
682 else if (meta == "RelatedDocuments")
683 formatlistptr->command = comRel;
[442]684
[670]685 else if (meta == "highlight")
686 formatlistptr->command = comHighlight;
687
688 else if (meta == "/highlight")
689 formatlistptr->command = comEndHighlight;
690
[19302]691 else if (meta == "metadata-spanwrap")
692 formatlistptr->command = comMetadataSpanWrap;
693
694 else if (meta == "/metadata-spanwrap")
695 formatlistptr->command = comEndMetadataSpanWrap;
696
[23515]697 else if (meta == "metadata-divwrap")
698 formatlistptr->command = comMetadataDivWrap;
699
700 else if (meta == "/metadata-divwrap")
701 formatlistptr->command = comEndMetadataDivWrap;
702
[2967]703 else if (meta == "Summary")
704 formatlistptr->command = comSummary;
705
[5788]706 else if (meta == "DocImage")
707 formatlistptr->command = comImage;
708
709 else if (meta == "DocTOC")
710 formatlistptr->command = comTOC;
711
712 else if (meta == "DocumentButtonDetach")
713 formatlistptr->command = comDocumentButtonDetach;
714
715 else if (meta == "DocumentButtonHighlight")
716 formatlistptr->command = comDocumentButtonHighlight;
717
718 else if (meta == "DocumentButtonExpandContents")
719 formatlistptr->command = comDocumentButtonExpandContents;
720
721 else if (meta == "DocumentButtonExpandText")
722 formatlistptr->command = comDocumentButtonExpandText;
[6020]723
724 else if (meta == "DocOID")
725 formatlistptr->command = comOID;
[13118]726 else if (meta == "DocTopOID")
727 formatlistptr->command = comTopOID;
[6710]728 else if (meta == "DocRank")
729 formatlistptr->command = comRank;
[16915]730 else if (meta == "DocTermsFreqTotal")
731 formatlistptr->command = comDocTermsFreqTotal;
[9948]732 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
733 formatlistptr->command = comCollection;
734 parse_coll_meta(meta, formatlistptr->meta);
735 }
[354]736 else {
737 formatlistptr->command = comMeta;
[649]738 parse_meta (meta, formatlistptr->meta, metadata, getParents);
[354]739 }
740}
741
[9948]742
[354]743static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
[649]744 text_tset &metadata, bool &getParents) {
[354]745
[347]746 text_t text;
747 text_t::const_iterator here = formatstring.begin();
748 text_t::const_iterator end = formatstring.end();
749
750 while (here != end) {
751
[1257]752 if (*here == '\\') {
[9620]753 ++here;
[1257]754 if (here != end) text.push_back (*here);
[347]755
[1257]756 } else if (*here == '{') {
[347]757 if (!text.empty()) {
758 formatlistptr->command = comText;
759 formatlistptr->text = text;
760 formatlistptr->nextptr = new format_t();
761 formatlistptr = formatlistptr->nextptr;
762
763 text.clear();
764 }
[649]765 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
[1443]766
[347]767 formatlistptr->nextptr = new format_t();
768 formatlistptr = formatlistptr->nextptr;
769 if (here == end) break;
770 }
771 } else if (*here == '[') {
772 if (!text.empty()) {
773 formatlistptr->command = comText;
774 formatlistptr->text = text;
775 formatlistptr->nextptr = new format_t();
776 formatlistptr = formatlistptr->nextptr;
777
778 text.clear();
779 }
780 text_t meta;
[9620]781 ++here;
[347]782 while (*here != ']') {
783 if (here == end) return false;
784 meta.push_back (*here);
[9620]785 ++here;
[347]786 }
[649]787 parse_meta (meta, formatlistptr, metadata, getParents);
788 formatlistptr->nextptr = new format_t();
789 formatlistptr = formatlistptr->nextptr;
[347]790
791 } else
792 text.push_back (*here);
793
[9620]794 if (here != end) ++here;
[347]795 }
796 if (!text.empty()) {
797 formatlistptr->command = comText;
798 formatlistptr->text = text;
799 formatlistptr->nextptr = new format_t();
800 formatlistptr = formatlistptr->nextptr;
801
802 }
803 return true;
804}
805
806
[354]807static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
[649]808 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
[354]809
[347]810 text_t::const_iterator it = findchar (here, end, '}');
811 if (it == end) return false;
812
813 text_t com = substr (here, it);
814 here = findchar (it, end, '{');
815 if (here == end) return false;
[9620]816 else ++here;
[347]817
[7266]818 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
819 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
[347]820 else return false;
821
822 int commacount = 0;
823 text_t text;
824 while (here != end) {
[636]825
826 if (*here == '\\') {
[9745]827 ++here;
[636]828 if (here != end) text.push_back(*here);
829
[1443]830 }
831
832 else if (*here == ',' || *here == '}' || *here == '{') {
[347]833
834 if (formatlistptr->command == comOr) {
835 // the {Or}{this, or this, or this, or this} statement
836 format_t *or_ptr;
837
838 // find the next unused orptr
839 if (formatlistptr->orptr == NULL) {
840 formatlistptr->orptr = new format_t();
841 or_ptr = formatlistptr->orptr;
842 } else {
843 or_ptr = formatlistptr->orptr;
844 while (or_ptr->nextptr != NULL)
845 or_ptr = or_ptr->nextptr;
846 or_ptr->nextptr = new format_t();
847 or_ptr = or_ptr->nextptr;
848 }
849
[1443]850 if (!text.empty())
851 {
852 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
853 }
[347]854
[1443]855 if (*here == '{')
856 {
857 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
858 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
859 // The latter can always be re-written:
860 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
861
862 if (!text.empty()) // already used up allocated format_t
863 {
864 // => allocate new one for detected action
865 or_ptr->nextptr = new format_t();
866 or_ptr = or_ptr->nextptr;
867 }
868 if (!parse_action(++here, end, or_ptr, metadata, getParents))
869 {
870 return false;
871 }
872 }
873 else
874 {
875 if (*here == '}') break;
876 }
[347]877 text.clear();
878
[1610]879 }
880
881 // Parse an {If}{decide,do,else} statement
882 else {
883
884 // Read the decision component.
[347]885 if (commacount == 0) {
[1610]886 // Decsion can be a metadata element, or a piece of text.
887 // Originally Stefan's code, updated 25/10/2000 by Gordon.
[1443]888
[347]889 text_t::const_iterator beginbracket = text.begin();
890 text_t::const_iterator endbracket = (text.end() - 1);
[1610]891
892 // Decision is based on a metadata element
[347]893 if ((*beginbracket == '[') && (*endbracket == ']')) {
[1610]894 // Ignore the surrounding square brackets
[347]895 text_t meta = substr (beginbracket+1, endbracket);
[649]896 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
[9620]897 ++commacount;
[347]898 text.clear();
899 }
[1610]900
901 // Decision is a piece of text (probably a macro like _cgiargmode_).
902 else {
[7389]903
904 // hunt for any metadata in string, which might be uses in
905 // to test a condition, e.g. [Format] eq 'PDF'
906 format_t* dummyformat = new format_t();
907 // update which metadata fields needed
908 // (not interested in updatng formatlistptr)
909 parse_string (text, dummyformat, metadata, getParents);
910 delete dummyformat;
911
[1610]912 formatlistptr->decision.command = dText;
913 formatlistptr->decision.text = text;
[9620]914 ++commacount;
[1610]915 text.clear();
916 }
917 }
918
919 // Read the "then" and "else" components of the {If} statement.
920 else {
[1443]921 format_t** nextlistptr = NULL;
922 if (commacount == 1) {
[1610]923 nextlistptr = &formatlistptr->ifptr;
[1443]924 } else if (commacount == 2 ) {
925 nextlistptr = &formatlistptr->elseptr;
926 } else {
927 return false;
928 }
929
930 if (!text.empty()) {
931 if (*nextlistptr == NULL) {
932 *nextlistptr = new format_t();
933 } else {
934
935 // skip to the end of any format_t statements already added
936 while ((*nextlistptr)->nextptr != NULL)
937 {
938 nextlistptr = &(*nextlistptr)->nextptr;
939 }
940
941 (*nextlistptr)->nextptr = new format_t();
942 nextlistptr = &(*nextlistptr)->nextptr;
943 }
944
945 if (!parse_string (text, *nextlistptr, metadata, getParents))
946 {
947 return false;
948 }
949 text.clear();
950 }
[347]951
[1443]952 if (*here == '{')
953 {
954 if (*nextlistptr == NULL) {
955 *nextlistptr = new format_t();
956 } else {
[7474]957 // skip to the end of any format_t statements already added
958 while ((*nextlistptr)->nextptr != NULL)
959 {
960 nextlistptr = &(*nextlistptr)->nextptr;
961 }
962
[1443]963 (*nextlistptr)->nextptr = new format_t();
964 nextlistptr = &(*nextlistptr)->nextptr;
965 }
966
967 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
968 {
969 return false;
970 }
971 }
972 else
973 {
974 if (*here == '}') break;
[9620]975 ++commacount;
[1443]976 }
[347]977 }
978 }
[636]979
980 } else text.push_back(*here);
[347]981
[9620]982 if (here != end) ++here;
[347]983 }
984
985 return true;
986}
987
[354]988
[23515]989static text_t wrap_metatext(const text_t& metatext, const text_t& OID,
[19312]990 const text_t metaname, int metapos=-1)
991{
992
[23515]993 text_t tag_type = metadata_wrap_type;
[19312]994 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
995
996 text_t wrapped_metatext = "<" + tag_type + " ";
997 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
998
999 wrapped_metatext += "docoid=\"" + OID + "\" ";
1000 wrapped_metatext += "metaname=\"" + metaname + "\"";
1001
1002 if (metapos>=0) {
1003 text_t metapos_str = metapos;
1004 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
1005 }
1006
1007 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1008
1009 return wrapped_metatext;
1010}
1011
1012
1013
[347]1014bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
[649]1015 text_tset &metadata, bool &getParents) {
[347]1016
1017 formatlistptr->clear();
1018 getParents = false;
1019
[649]1020 return (parse_string (formatstring, formatlistptr, metadata, getParents));
[347]1021}
1022
[10415]1023// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1024// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
[19312]1025
1026static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
[9401]1027{
1028 text_t no_ns_metaname = remove_namespace(meta.metaname);
[19298]1029 text_t formatted_metatext;
[9401]1030 bool first = true;
[649]1031
[9401]1032 const int start_i=0;
1033 const int end_i = metainfo.values.size()-1;
[10415]1034
1035 if (position == -1) { // all
1036 for (int i=start_i; i<=end_i; ++i) {
[19298]1037 if (!first) formatted_metatext += meta.siblingoptions;
1038
1039 text_t fresh_metatext;
1040
[12567]1041 if (meta.metacommand & mSpecial) {
1042 // special formatting
[19298]1043 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1044 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1045 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
[12567]1046 }
[19298]1047 else fresh_metatext = metainfo.values[i];
1048
[21752]1049 // New "truncate" special formatting option
1050 if (meta.metacommand & mTruncate)
1051 {
1052 int truncate_length = meta.siblingoptions.getint();
1053 text_t truncated_value = fresh_metatext;
1054 if (truncated_value.size() > truncate_length)
1055 {
1056 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1057 }
1058 fresh_metatext = truncated_value;
1059 }
1060 // New "xmlsafe" special formatting option
1061 if (meta.metacommand & mXMLSafe)
1062 {
1063 // Make it XML-safe
1064 text_t text_xml_safe = "";
1065 text_t::const_iterator text_iterator = fresh_metatext.begin();
1066 while (text_iterator != fresh_metatext.end())
1067 {
1068 if (*text_iterator == '&') text_xml_safe += "&amp;";
1069 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1070 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1071 else text_xml_safe.push_back(*text_iterator);
1072 text_iterator++;
1073 }
1074 fresh_metatext = text_xml_safe;
1075 }
1076 // New "htmlsafe" special formatting option
1077 if (meta.metacommand & mHTMLSafe)
1078 {
1079 // Make it HTML-safe
1080 text_t text_html_safe = "";
1081 text_t::const_iterator text_iterator = fresh_metatext.begin();
1082 while (text_iterator != fresh_metatext.end())
1083 {
1084 if (*text_iterator == '&') text_html_safe += "&amp;";
1085 else if (*text_iterator == '<') text_html_safe += "&lt;";
1086 else if (*text_iterator == '>') text_html_safe += "&gt;";
1087 else if (*text_iterator == '"') text_html_safe += "&quot;";
1088 else text_html_safe.push_back(*text_iterator);
1089 text_iterator++;
1090 }
1091 fresh_metatext = text_html_safe;
1092 }
[22665]1093 // New "dmsafe" special formatting option (always apply to "srclink_file" metadata)
1094 if (meta.metacommand & mDMSafe || meta.metaname == "srclink_file")
[22650]1095 {
1096 // Make it macro-safe
1097 text_t text_dm_safe = dm_safe(fresh_metatext);
1098 fresh_metatext = text_dm_safe;
1099 }
[21752]1100
[23515]1101 if (metadata_wrap) {
1102 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,i);
[19298]1103 }
1104 formatted_metatext += fresh_metatext;
1105
[10415]1106 first = false;
[9401]1107
[10415]1108 }
1109 } else {
1110 if (position == -2) { // end
1111 position = end_i;
1112 } else if (position < start_i || position > end_i) {
1113 return "";
1114 }
[19298]1115
1116 text_t fresh_metatext;
[12567]1117 if (meta.metacommand & mSpecial) {
[19298]1118
[12567]1119 // special formatting
[19298]1120 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1121 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1122 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
[12567]1123 }
[19298]1124 else fresh_metatext = metainfo.values[position];
1125
[21752]1126 // New "truncate" special formatting option
1127 if (meta.metacommand & mTruncate)
1128 {
1129 int truncate_length = meta.siblingoptions.getint();
1130 text_t truncated_value = fresh_metatext;
1131 if (truncated_value.size() > truncate_length)
1132 {
1133 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1134 }
1135 fresh_metatext = truncated_value;
1136 }
1137 // New "xmlsafe" special formatting option
1138 if (meta.metacommand & mXMLSafe)
1139 {
1140 // Make it XML-safe
1141 text_t text_xml_safe = "";
1142 text_t::const_iterator text_iterator = fresh_metatext.begin();
1143 while (text_iterator != fresh_metatext.end())
1144 {
1145 if (*text_iterator == '&') text_xml_safe += "&amp;";
1146 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1147 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1148 else text_xml_safe.push_back(*text_iterator);
1149 text_iterator++;
1150 }
1151 fresh_metatext = text_xml_safe;
1152 }
1153 // New "htmlsafe" special formatting option
1154 if (meta.metacommand & mHTMLSafe)
1155 {
1156 // Make it HTML-safe
1157 text_t text_html_safe = "";
1158 text_t::const_iterator text_iterator = fresh_metatext.begin();
1159 while (text_iterator != fresh_metatext.end())
1160 {
1161 if (*text_iterator == '&') text_html_safe += "&amp;";
1162 else if (*text_iterator == '<') text_html_safe += "&lt;";
1163 else if (*text_iterator == '>') text_html_safe += "&gt;";
1164 else if (*text_iterator == '"') text_html_safe += "&quot;";
[23305]1165 else if (*text_iterator == '\'') text_html_safe += "&#39;";
1166 else if (*text_iterator == ',') text_html_safe += "&#44;";
[21752]1167 else text_html_safe.push_back(*text_iterator);
1168 text_iterator++;
1169 }
1170 fresh_metatext = text_html_safe;
1171 }
[22665]1172 // New "dmsafe" special formatting option (always apply to "srclink_file" metadata)
1173 if (meta.metacommand & mDMSafe || meta.metaname == "srclink_file")
[22650]1174 {
1175 // Make it macro-safe
1176 text_t text_dm_safe = dm_safe(fresh_metatext);
1177 fresh_metatext = text_dm_safe;
1178 }
[21752]1179
[23515]1180 if (metadata_wrap) {
1181 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,position);
[19298]1182 }
1183
1184 formatted_metatext += fresh_metatext;
[9401]1185 }
[19298]1186
1187 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1188 else return formatted_metatext;
[9401]1189}
[347]1190
[10415]1191static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
[9401]1192{
1193
[649]1194 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
[19312]1195
[9401]1196 switch (meta.mqualifier.parent) {
[347]1197 case pNone:
[9401]1198 return "Nothing!!";
1199 break;
[5787]1200
[347]1201 case pImmediate:
[649]1202 if (parent != NULL) {
[19312]1203 text_t parent_oid = get_parent(docinfo.OID);
1204 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
[410]1205 }
[347]1206 break;
1207
1208 case pTop:
[649]1209 if (parent != NULL) {
[19312]1210 text_t parent_oid = get_parent(docinfo.OID);
1211
1212 while (parent->parent != NULL) {
1213 parent = parent->parent;
1214 parent_oid = get_parent(parent_oid);
1215 }
1216 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
[410]1217 }
[347]1218 break;
1219
1220 case pAll:
[649]1221 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1222 if (parent != NULL) {
[19312]1223 text_t parent_oid = get_parent(docinfo.OID);
1224
[649]1225 text_tarray tmparray;
1226 while (parent != NULL) {
[19312]1227 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
[649]1228 parent = parent->parent;
[19312]1229 parent_oid = get_parent(parent_oid);
1230
[649]1231 }
[10415]1232 // now join them up - use teh parent separator
[649]1233 bool first = true;
1234 text_t tmp;
1235 text_tarray::reverse_iterator here = tmparray.rbegin();
1236 text_tarray::reverse_iterator end = tmparray.rend();
[359]1237 while (here != end) {
[10415]1238 if (!first) tmp += meta.parentoptions;
1239 tmp += *here;
[359]1240 first = false;
[9620]1241 ++here;
[359]1242 }
[13457]1243 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
[649]1244 else return tmp;
[347]1245 }
1246 }
1247 return "";
[9401]1248
[347]1249}
1250
[9948]1251static text_t get_child_meta (const text_t& collection,
1252 recptproto* collectproto,
[9401]1253 ResultDocInfo_t &docinfo, displayclass &disp,
1254 const metadata_t &meta, text_tmap &options,
[10415]1255 ostream& logout, int siblings_values)
[9401]1256{
[10415]1257 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1258
[19046]1259 const text_t& pre_tree_trav = meta.pre_tree_traverse;
[10415]1260 const text_t& child_metaname = meta.metaname;
1261 const text_t& child_field = meta.childoptions;
1262 text_tset child_metadata;
1263 child_metadata.insert(child_metaname);
[9401]1264
[10415]1265 FilterResponse_t child_response;
1266 if (meta.mqualifier.child == cNum) {
1267 // just one child
1268 //get the information associated with the metadata for child doc
[19046]1269 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1270 child_metadata, false, collectproto, child_response,
1271 logout)) return ""; // invalid child number
[9401]1272
[10415]1273 if (child_response.docInfo.empty()) return false; // no info for the child
1274
1275 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1276 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1277
[19312]1278 text_t child_metavalue
1279 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
[10415]1280 return expand_metadata(child_metavalue,collection,collectproto,
1281 child_docinfo,disp,options,logout);
1282 }
1283
[9401]1284
[10415]1285 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
[19046]1286
1287
1288 if (!pre_tree_trav.empty()) {
1289 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1290 FilterResponse_t trav_response;
1291
1292 text_tset trav_metadata;
1293 trav_metadata.insert("contains");
1294
1295 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1296 trav_metadata, false, collectproto, trav_response,
1297 logout)) return ""; // invalid pre_tree_trav
1298
1299 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
[10415]1300
[19046]1301 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
[22046]1302
[19046]1303 // use this for rest of routine
1304 docinfo = trav_docinfo;
1305 }
1306
[10415]1307 // we need to get all children
1308 text_t result = "";
1309 text_tarray children;
1310 text_t contains = docinfo.metadata["contains"].values[0];
1311 splitchar (contains.begin(), contains.end(), ';', children);
1312 text_tarray::const_iterator here = children.begin();
1313 text_tarray::const_iterator end = children.end();
1314 bool first = true;
1315 while (here !=end) {
1316 text_t oid = *here;
1317 here++;
1318 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
[22046]1319
[9401]1320 //get the information associated with the metadata for child doc
[10415]1321 if (!get_info (oid, collection, "", child_metadata,
1322 false, collectproto, child_response, logout) ||
1323 child_response.docInfo.empty()) {
1324 first = false;
1325 continue;
1326 }
1327
1328
1329 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1330 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1331
[19312]1332 text_t child_metavalue
1333 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
[22046]1334
1335
[10415]1336 if (!first) result += child_field;
1337 first = false;
1338 // need to do this here cos otherwise we are in the wrong document
[22046]1339 text_t em = expand_metadata(child_metavalue,collection,collectproto,
1340 child_docinfo,disp,options,logout);
1341
1342 result += em;
[9401]1343 }
[10415]1344 return result;
1345
[9401]1346}
1347
1348static text_t get_meta (const text_t& collection, recptproto* collectproto,
1349 ResultDocInfo_t &docinfo, displayclass &disp,
1350 const metadata_t &meta, text_tmap &options,
1351 ostream& logout) {
1352
1353 // make sure we have the requested metadata
1354 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1355 if (it == docinfo.metadata.end()) return "";
1356
[10415]1357 int siblings_values = 0; // default is no siblings, just the first metadata available
1358 if (meta.metacommand & mSibling) {
1359 if (meta.mqualifier.sibling == sAll) {
1360 siblings_values = -1; //all
1361 } else if (meta.mqualifier.sibling == sNum) {
1362 siblings_values = meta.siblingoptions.getint();
1363 }
1364 }
[9401]1365 if (meta.metacommand & mParent) {
[10415]1366 return get_parent_meta(docinfo,meta,siblings_values);
[9401]1367 }
[10415]1368
[9401]1369 else if (meta.metacommand & mChild) {
1370 return get_child_meta(collection,collectproto,docinfo,disp,meta,
[10415]1371 options,logout, siblings_values);
[9401]1372 }
[10415]1373 else if (meta.metacommand & mSibling) { // only siblings
[9401]1374 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
[19312]1375 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
[9401]1376 }
1377 else {
1378
1379 // straightforward metadata request (nothing fancy)
1380
1381 text_t classifier_metaname = docinfo.classifier_metadata_type;
1382 int metaname_index
1383 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
[19312]1384 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
[9401]1385 }
[10415]1386
[9401]1387 return "";
1388}
1389
[1443]1390static text_t get_or (const text_t& collection, recptproto* collectproto,
[1610]1391 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1392 format_t *orptr, text_tmap &options,
[1443]1393 ostream& logout) {
[347]1394
[354]1395 while (orptr != NULL) {
[347]1396
[23515]1397 if (metadata_wrap) {
[19302]1398 // need to be a bit more careful about this
[23515]1399 // => test for it *without* spanwrap or divwrap, and if defined, then
1400 // got back and generate it again, this time with spanwrap/divwrap on
[354]1401
[23515]1402 metadata_wrap = false;
[19302]1403 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1404 options, logout);
[23515]1405 metadata_wrap = true;
[19302]1406 if (!test_tmp.empty()) {
1407
1408 return format_string (collection,collectproto,docinfo, disp, orptr,
1409 options, logout);
1410 }
1411 }
1412 else {
1413 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1414 options, logout);
1415 if (!tmp.empty()) return tmp;
1416 }
1417
[354]1418 orptr = orptr->nextptr;
[347]1419 }
[354]1420 return "";
[347]1421}
1422
[7389]1423static bool char_is_whitespace(const char c)
1424{
1425 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1426
1427}
1428
1429static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1430{
1431 int pos = start_pos;
1432 while (pos<outstring.size()) {
1433 if (!char_is_whitespace(outstring[pos])) {
1434 break;
1435 }
[9620]1436 ++pos;
[7389]1437 }
1438
1439 return pos;
1440}
1441
1442static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1443{
1444 int pos = start_pos;
1445 while (pos>=0) {
1446 if (!char_is_whitespace(outstring[pos])) {
1447 break;
1448 }
[9620]1449 --pos;
[7389]1450 }
1451
1452 return pos;
1453}
1454
1455static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1456{
1457 int pos = start_pos;
1458 while (pos>=0) {
1459 if (char_is_whitespace(outstring[pos])) {
1460 break;
1461 }
[9620]1462 --pos;
[7389]1463 }
1464
1465 return pos;
1466}
1467
1468
1469static int rscan_for(const text_t& outstring, const int start_pos,
1470 const char find_c)
1471{
1472 int pos = start_pos;
1473 while (pos>=0) {
1474 char c = outstring[pos];
1475 if (outstring[pos] == find_c) {
1476 break;
1477 }
[9620]1478 --pos;
[7389]1479 }
1480
1481 return pos;
1482}
1483
1484text_t extract_substr(const text_t& outstring, const int start_pos,
1485 const int end_pos)
1486{
1487 text_t extracted_str;
1488 extracted_str.clear();
1489
[9620]1490 for (int pos=start_pos; pos<=end_pos; ++pos) {
[7389]1491 extracted_str.push_back(outstring[pos]);
1492 }
1493
1494 return extracted_str;
1495}
1496
1497
[9401]1498static text_t expand_potential_metadata(const text_t& collection,
1499 recptproto* collectproto,
1500 ResultDocInfo_t &docinfo,
1501 displayclass &disp,
1502 const text_t& intext,
1503 text_tmap &options,
1504 ostream& logout)
[7389]1505{
1506 text_t outtext;
1507
1508 // decide if dealing with metadata or text
1509
1510 text_t::const_iterator beginbracket = intext.begin();
1511 text_t::const_iterator endbracket = (intext.end() - 1);
1512
1513 // Decision is based on a metadata element
1514 if ((*beginbracket == '[') && (*endbracket == ']')) {
1515 // Ignore the surrounding square brackets
1516 text_t meta_text = substr (beginbracket+1, endbracket);
1517
[10614]1518 if (meta_text == "Text") {
1519 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
[19311]1520 }
1521 else {
[7389]1522
[10614]1523 text_tset metadata;
1524 bool getParents =false;
1525 metadata_t meta;
1526
1527 parse_meta (meta_text, meta, metadata, getParents);
1528 outtext
1529 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1530 }
1531
[7389]1532 }
1533 else {
1534 outtext = intext;
1535 }
1536
1537 return outtext;
1538}
1539
1540
1541
1542
[9401]1543static bool uses_expression(const text_t& collection, recptproto* collectproto,
1544 ResultDocInfo_t &docinfo,
1545 displayclass &disp,
[7389]1546 const text_t& outstring, text_t& lhs_expr,
[9401]1547 text_t& op_expr, text_t& rhs_expr,
1548 text_tmap &options,
1549 ostream& logout)
[7389]1550{
1551 // Note: the string may not be of the form: str1 op str2, however
1552 // to deterine this we have to process it on the assumption it is,
1553 // and if at any point an 'erroneous' value is encountered, return
1554 // false and let something else have a go at evaluating it
1555
1556 // Starting at the end of the string and working backwards ..
1557
1558 const int outstring_len = outstring.size();
1559
1560 // skip over white space
1561 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1562
1563 if (rhs_end<=0) {
1564 // no meaningful text or (rhs_end==0) no room for operator
1565 return false;
1566 }
1567
1568 // check for ' or " and then scan over token
1569 const char potential_quote = outstring[rhs_end];
1570 int rhs_start=rhs_end;
1571 bool quoted = false;
1572
1573 if ((potential_quote == '\'') || (potential_quote == '\"')) {
[9620]1574 --rhs_end;
[7389]1575 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1576 quoted = true;
1577 }
1578 else {
1579 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1580 }
1581
[7617]1582 if ((rhs_end-rhs_start)<0) {
[7389]1583 // no meaningful rhs expression
1584 return false;
1585 }
1586
1587 // form rhs_expr
1588 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1589
1590 // skip over white space
1591 const int to_whitespace = (quoted) ? 2 : 1;
1592
1593 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1594 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1595
[19058]1596 if ((op_end<0) && (op_start<0)) {
1597 // no meaningful expression operator
1598 return false;
1599 }
[7389]1600
[7617]1601 if (op_end-op_start<0) {
[7389]1602 // no meaningful expression operator
1603 return false;
1604 }
1605
1606 op_expr = extract_substr(outstring,op_start,op_end);
1607
1608
1609 // check for operator
[10142]1610 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1611 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
[10145]1612
[7389]1613 // not a valid operator
1614 return false;
1615 }
1616
1617 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
[7617]1618 if (lhs_end<0) {
[7389]1619 // no meaningful lhs expression
1620 return false;
1621 }
1622
1623 int lhs_start = scan_over_whitespace(outstring,0);
1624
1625 // form lhs_expr from remainder of string
1626 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1627
1628 // Now we know we have a valid expression, look up any
1629 // metadata terms
1630
[9401]1631 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1632 disp,rhs_expr,options,logout);
1633 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1634 disp,lhs_expr,options,logout);
[7389]1635
1636 return true;
1637}
1638
1639static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1640 const text_t& rhs_expr, ostream& logout)
1641{
[10142]1642 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1643 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1644 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1645 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1646 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1647 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1648 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1649 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1650 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1651 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1652 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1653 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1654 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1655 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
[7389]1656 else {
1657 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1658 }
[10142]1659
[7389]1660 return false;
1661}
1662
1663
[1443]1664static text_t get_if (const text_t& collection, recptproto* collectproto,
[1610]1665 ResultDocInfo_t &docinfo, displayclass &disp,
1666 const decision_t &decision,
[5788]1667 format_t *ifptr, format_t *elseptr,
1668 text_tmap &options, ostream& logout)
[1443]1669{
[1610]1670 // If the decision component is a metadata element, then evaluate it
1671 // to see whether we output the "then" or the "else" clause
[354]1672 if (decision.command == dMeta) {
[19298]1673
[23515]1674 bool store_metadata_wrap = metadata_wrap;
1675 metadata_wrap = 0;
[19298]1676
[23515]1677 // temporarily suspend metadata-XXXwrap (if on) so can test if metadata item really exits or not
[19298]1678 bool metadata_exists
1679 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1680 logout) != "");
1681
[23515]1682 metadata_wrap = store_metadata_wrap;
[19298]1683
1684 if (metadata_exists) {
[354]1685 if (ifptr != NULL)
[9948]1686 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
[5788]1687 options, logout);
[354]1688 }
1689 else {
1690 if (elseptr != NULL)
[9948]1691 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
[5788]1692 options, logout);
[354]1693 }
1694 }
[1610]1695
1696 // If the decision component is text, then evaluate it (it is probably a
1697 // macro like _cgiargmode_) to decide what to output.
1698 else if (decision.command == dText) {
1699
1700 text_t outstring;
1701 disp.expandstring (decision.text, outstring);
1702
[7389]1703 // Check for if expression in form: str1 op str2
1704 // (such as [x] eq "y")
1705 text_t lhs_expr, op_expr, rhs_expr;
[9401]1706 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
[7389]1707 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1708 if (ifptr != NULL) {
1709 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1710 options, logout);
1711 }
1712 else {
1713 return "";
1714 }
1715 } else {
1716 if (elseptr != NULL) {
1717 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1718 options, logout);
1719 }
1720 else {
1721 return "";
1722 }
1723 }
1724 }
1725
1726
[1610]1727 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1728 // a cgi argument macro that has not been set, it evaluates to itself.
1729 // Therefore, were have to say that a piece of text evalautes true if
1730 // it is non-empty and if it is a cgi argument evaulating to itself.
[7389]1731
[1610]1732 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1733 if (ifptr != NULL)
1734 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
[5788]1735 options, logout);
[1610]1736 } else {
1737 if (elseptr != NULL)
1738 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
[5788]1739 options, logout);
[1610]1740 }
1741 }
1742
[354]1743 return "";
1744}
1745
[1443]1746bool includes_metadata(const text_t& text)
1747{
1748 text_t::const_iterator here = text.begin();
1749 text_t::const_iterator end = text.end();
[23707]1750
1751 char startbracket = '[';
1752 char endbracket = ']';
1753
1754 char bracket = startbracket;
[1443]1755 while (here != end) {
[23707]1756 if (*here == bracket) {
1757 if(bracket == startbracket) {
1758 // seen a [, next look for a ] to confirm it's metadata
1759 bracket = endbracket;
1760 } else if(bracket == endbracket) {
1761 // found [ ... ] in text, so we think it includes metadata
1762 return true;
1763 }
1764 }
[9620]1765 ++here;
[1443]1766 }
1767
1768 return false;
1769}
1770
[5788]1771static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
[9948]1772 recptproto* collectproto,
1773 ResultDocInfo_t &docinfo,
[5788]1774 displayclass &disp, text_tmap &options,
1775 ostream &logout) {
1776
[10415]1777 if (includes_metadata(metavalue)) {
1778
1779 // text has embedded metadata in it => expand it
1780 FilterRequest_t request;
1781 FilterResponse_t response;
1782
1783 request.getParents = false;
1784
1785 format_t *expanded_formatlistptr = new format_t();
1786 parse_formatstring (metavalue, expanded_formatlistptr,
1787 request.fields, request.getParents);
1788
1789 // retrieve metadata
1790 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1791 collectproto, response, logout);
1792
1793 if (!response.docInfo.empty()) {
1794
1795 text_t expanded_metavalue
1796 = get_formatted_string(collection, collectproto,
1797 response.docInfo[0], disp, expanded_formatlistptr,
1798 options, logout);
1799
1800 return expanded_metavalue;
1801 }
1802 else {
1803 return metavalue;
1804 }
1805 }
1806 else {
1807
1808 return metavalue;
1809 }
[5788]1810}
[1941]1811
[9948]1812text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1813 displayclass &disp,
1814 text_t meta_name, ostream& logout) {
1815
1816 ColInfoResponse_t collectinfo;
1817 comerror_t err;
1818 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1819 text_t meta_value = "";
1820 text_t lang;
1821 disp.expandstring("_cgiargl_",lang);
1822 if (lang.empty()) {
1823 lang = "en";
1824 }
1825
1826 if (err == noError) {
1827 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1828 }
1829 return meta_value;
1830
1831
1832}
[1443]1833text_t format_string (const text_t& collection, recptproto* collectproto,
[1610]1834 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1835 format_t *formatlistptr, text_tmap &options,
[1443]1836 ostream& logout) {
[354]1837
[347]1838 if (formatlistptr == NULL) return "";
1839
1840 switch (formatlistptr->command) {
[6020]1841 case comOID:
1842 return docinfo.OID;
[13118]1843 case comTopOID:
1844 {
1845 text_t top_id;
1846 get_top(docinfo.OID, top_id);
1847 return top_id;
1848 }
[6710]1849 case comRank:
1850 return text_t(docinfo.ranking);
[5788]1851 case comText:
1852 return formatlistptr->text;
1853 case comLink:
1854 return options["link"];
1855 case comEndLink:
[21758]1856 {
1857 if (options["link"].empty()) return "";
[5788]1858 else return "</a>";
[21758]1859 }
[5788]1860 case comHref:
1861 return get_href(options["link"]);
1862 case comIcon:
1863 return options["icon"];
1864 case comNum:
1865 return docinfo.result_num;
1866 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1867 return get_related_docs(collection, collectproto, docinfo, logout);
[19311]1868
[5788]1869 case comSummary:
[19312]1870 return format_summary(collection, collectproto, docinfo, disp, options, logout);
[21758]1871 case comAssocLink:
1872 {
1873 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1874 if (!link_filename.empty()) {
[22670]1875 text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
[21758]1876 if (formatlistptr->text == "href") {
1877 return href;
1878 }
1879 return "<a href=\""+ href + "\">";
1880 }
1881 return "";
1882 }
1883 case comEndAssocLink:
1884 {
1885 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1886 if (!link_filename.empty()) {
1887 return "</a>";
1888 }
1889 return "";
1890 }
[5788]1891 case comMeta:
[1443]1892 {
[9948]1893 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
[5788]1894 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
[1443]1895 }
[19311]1896
[5788]1897 case comDoc:
[19311]1898 return format_text(collection, collectproto, docinfo, disp, options, logout);
1899
[5788]1900 case comImage:
1901 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1902 case comTOC:
1903 return options["DocTOC"];
1904 case comDocumentButtonDetach:
1905 return options["DocumentButtonDetach"];
1906 case comDocumentButtonHighlight:
1907 return options["DocumentButtonHighlight"];
1908 case comDocumentButtonExpandContents:
1909 return options["DocumentButtonExpandContents"];
1910 case comDocumentButtonExpandText:
1911 return options["DocumentButtonExpandText"];
1912 case comHighlight:
1913 if (options["highlight"] == "1") return "<b>";
1914 break;
1915 case comEndHighlight:
1916 if (options["highlight"] == "1") return "</b>";
1917 break;
[19302]1918 case comMetadataSpanWrap:
[23515]1919 metadata_wrap=true; metadata_wrap_type="span"; return "";
[19302]1920 break;
1921 case comEndMetadataSpanWrap:
[23515]1922 metadata_wrap=false; metadata_wrap_type=""; return "";
[19302]1923 break;
[23515]1924 case comMetadataDivWrap:
1925 metadata_wrap=true; metadata_wrap_type="div"; return "";
1926 break;
1927 case comEndMetadataDivWrap:
1928 metadata_wrap=false; metadata_wrap_type=""; return "";
1929 break;
[5788]1930 case comIf:
1931 return get_if (collection, collectproto, docinfo, disp,
1932 formatlistptr->decision, formatlistptr->ifptr,
1933 formatlistptr->elseptr, options, logout);
1934 case comOr:
1935 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1936 options, logout);
[16915]1937 case comDocTermsFreqTotal:
1938 return docinfo.num_terms_matched;
[9948]1939 case comCollection:
1940 if (formatlistptr->meta.metaname == g_EmptyText) {
1941 return collection;
1942 }
1943 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1944
[347]1945 }
1946 return "";
1947}
1948
[1443]1949text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
[1610]1950 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1951 format_t *formatlistptr, text_tmap &options,
[1443]1952 ostream& logout) {
[407]1953
[5788]1954 text_t ft;
1955 while (formatlistptr != NULL)
1956 {
1957 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1958 options, logout);
1959 formatlistptr = formatlistptr->nextptr;
1960 }
1961
1962 return ft;
[347]1963}
1964
1965
[22924]1966// we have only preloaded the text in DocumentAction. But you may want
1967// to get the text in query, so copy what we have done with
1968// format_summary and get the text here. Probably is quite expensive?
[9852]1969text_t format_text (const text_t& collection, recptproto* collectproto,
[9948]1970 ResultDocInfo_t &docinfo, displayclass &disp,
[19311]1971 text_tmap &options, ostream& logout)
1972{
1973 text_t text;
1974
[22924]1975 if (!options["text"].empty()) {
[19311]1976 text = options["text"];
[9852]1977 }
[19311]1978 else {
1979 // get document text here
1980 DocumentRequest_t docrequest;
1981 DocumentResponse_t docresponse;
1982 comerror_t err;
1983 docrequest.OID = docinfo.OID;
1984 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1985 text = docresponse.doc;
1986 }
[9852]1987
[23515]1988 if (metadata_wrap) {
1989 text = wrap_metatext(text,docinfo.OID,"Text");
[19311]1990 }
1991
1992 return text;
[9852]1993}
1994
[2967]1995/* FUNCTION NAME: format_summary
1996 * DESC: this is invoked when a [Summary] special metadata is processed.
1997 * RETURNS: a query-biased summary for the document */
1998
1999text_t format_summary (const text_t& collection, recptproto* collectproto,
[5788]2000 ResultDocInfo_t &docinfo, displayclass &disp,
2001 text_tmap &options, ostream& logout) {
[3673]2002
2003 // GRB: added code here to ensure that the cstr (and other collections)
2004 // uses the document metadata item Summary, rather than compressing
2005 // the text of the document, processed via the methods in
2006 // summarise.cpp
[19312]2007
2008 text_t summary;
2009
[3673]2010 if (docinfo.metadata.count("Summary") > 0 &&
2011 docinfo.metadata["Summary"].values.size() > 0) {
[19312]2012 summary = docinfo.metadata["Summary"].values[0];
[3673]2013 }
[19312]2014 else {
2015
2016 text_t textToSummarise, query;
[3673]2017
[19312]2018 if(options["text"].empty()) { // get document text
2019 DocumentRequest_t docrequest;
2020 DocumentResponse_t docresponse;
2021 comerror_t err;
2022 docrequest.OID = docinfo.OID;
2023 collectproto->get_document (collection, docrequest, docresponse, err, logout);
2024 textToSummarise = docresponse.doc;
2025 }
2026 else {
2027 // in practice, this would not happen, because text is only
2028 // loaded with the [Text] command
[22924]2029 textToSummarise = options["text"];
[19312]2030 }
2031
2032 disp.expandstring("_cgiargq_",query);
2033 summary = summarise(textToSummarise,query,80);
[22924]2034 //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
[19311]2035 }
2036
[22924]2037 summary.replace("'","&#039;");
2038 summary.replace("\n","&#013;");
2039
[23515]2040 if (metadata_wrap) {
2041 summary = wrap_metatext(summary,docinfo.OID,"Summary");
[19312]2042 }
2043
2044 return summary;
[2967]2045}
Note: See TracBrowser for help on using the repository browser.