source: gsdl/trunk/runtime-src/src/recpt/formattools.cpp@ 16915

Last change on this file since 16915 was 16915, checked in by mdewsnip, 16 years ago

Changes made by Richard Managh at DL Consulting Ltd for returning document-level term frequency totals.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 47.0 KB
RevLine 
[347]1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
[533]6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
[347]9 *
[533]10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[347]24 *********************************************************************/
25
26#include "formattools.h"
[462]27#include "cgiutils.h"
[15418]28#include "recptprototools.h"
[1443]29#include "OIDtools.h"
[2967]30#include "summarise.h"
[1443]31
[1257]32#include <assert.h>
[347]33
[354]34// a few function prototypes
[5788]35
[1443]36static text_t format_string (const text_t& collection, recptproto* collectproto,
[1610]37 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]38 format_t *formatlistptr, text_tmap &options,
39 ostream& logout);
[354]40
41static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
[649]42 format_t *formatlistptr, text_tset &metadata, bool &getParents);
[354]43
[5788]44static text_t format_summary (const text_t& collection, recptproto* collectproto,
45 ResultDocInfo_t &docinfo, displayclass &disp,
46 text_tmap &options, ostream& logout);
[9852]47static text_t format_text (const text_t& collection, recptproto* collectproto,
48 ResultDocInfo_t &docinfo, displayclass &disp,
49 text_tmap &options, ostream& logout);
[2967]50
[9401]51static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
52 recptproto* collectproto, ResultDocInfo_t &docinfo,
53 displayclass &disp, text_tmap &options,
54 ostream &logout);
[2967]55
[9401]56
[347]57void metadata_t::clear() {
[410]58 metaname.clear();
[462]59 metacommand = mNone;
[9401]60 mqualifier.parent = pNone;
61 mqualifier.sibling = sNone;
62 mqualifier.child = cNone;
[10415]63 parentoptions.clear();
64 siblingoptions.clear();
65 childoptions.clear();
[749]66}
[347]67
68void decision_t::clear() {
69 command = dMeta;
70 meta.clear();
[1610]71 text.clear();
[749]72}
[347]73
74void format_t::clear() {
75 command = comText;
76 decision.clear();
77 text.clear();
78 meta.clear();
79 nextptr = NULL;
80 ifptr = NULL;
81 elseptr = NULL;
82 orptr = NULL;
[749]83}
[347]84
[442]85void formatinfo_t::clear() {
[1079]86 DocumentImages = false;
87 DocumentTitles = true;
88 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
89 DocumentContents = true;
[1941]90 DocumentArrowsBottom = true;
[5788]91 DocumentArrowsTop = false;
[13365]92 DocumentSearchResultLinks = false;
[442]93 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
[1496]94 // DocumentButtons.push_back ("Expand Text");
95 // DocumentButtons.push_back ("Expand Contents");
[442]96 DocumentButtons.push_back ("Detach");
97 DocumentButtons.push_back ("Highlight");
[1941]98 RelatedDocuments = "";
[11146]99 DocumentText = "[Text]";
[649]100 formatstrings.erase (formatstrings.begin(), formatstrings.end());
[868]101 DocumentUseHTML = false;
[5788]102 AllowExtendedOptions = false;
[442]103}
104
[749]105// simply checks to see if formatstring begins with a <td> tag
106bool is_table_content (const text_t &formatstring) {
107 text_t::const_iterator here = formatstring.begin();
108 text_t::const_iterator end = formatstring.end();
109
110 while (here != end) {
111 if (*here != ' ') {
[1257]112 if ((*here == '<') && ((here+3) < end)) {
[749]113 if ((*(here+1) == 't' || *(here+1) == 'T') &&
114 (*(here+2) == 'd' || *(here+2) == 'D') &&
115 (*(here+3) == '>' || *(here+3) == ' '))
116 return true;
117 } else return false;
118 }
[9620]119 ++here;
[749]120 }
121 return false;
122}
123
124bool is_table_content (const format_t *formatlistptr) {
125
126 if (formatlistptr == NULL) return false;
127
128 if (formatlistptr->command == comText)
129 return is_table_content (formatlistptr->text);
130
131 return false;
132}
133
[649]134// returns false if key isn't in formatstringmap
135bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
136 text_t &formatstring) {
[442]137
[649]138 formatstring.clear();
139 text_tmap::const_iterator it = formatstringmap.find(key);
140 if (it == formatstringmap.end()) return false;
141 formatstring = (*it).second;
142 return true;
143}
[749]144
[649]145// tries to find "key1key2" then "key1" then "key2"
146bool get_formatstring (const text_t &key1, const text_t &key2,
147 const text_tmap &formatstringmap,
148 text_t &formatstring) {
149
150 formatstring.clear();
151 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
152 if (it != formatstringmap.end()) {
153 formatstring = (*it).second;
154 return true;
155 }
156 it = formatstringmap.find(key1);
157 if (it != formatstringmap.end()) {
158 formatstring = (*it).second;
159 return true;
160 }
161 it = formatstringmap.find(key2);
162 if (it != formatstringmap.end()) {
163 formatstring = (*it).second;
164 return true;
165 }
166 return false;
167}
168
169
[6645]170text_t remove_namespace(const text_t &meta_name) {
171 text_t::const_iterator end = meta_name.end();
172 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
173 if (it != end) {
174 return substr(it+1, end);
175 }
[12567]176
[6645]177 return meta_name;
178
179}
[12567]180// returns a date of form _format:date_(year, month, day)
181// input is date of type yyyy-?mm-?dd
[410]182// at least the year must be present in date
[422]183text_t format_date (const text_t &date) {
[347]184
[410]185 if (date.size() < 4) return "";
[347]186
[410]187 text_t::const_iterator datebegin = date.begin();
[354]188
[410]189 text_t year = substr (datebegin, datebegin+4);
[12567]190 int chars_seen_so_far = 4;
[410]191
[12567]192 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
193 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
194
195 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
[410]196 int imonth = month.getint();
[12567]197 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
198
199 chars_seen_so_far += 2;
200 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
201
202 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
[410]203
[12567]204 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
[410]205 if (day[0] == '0') day = substr (day.begin()+1, day.end());
206 int iday = day.getint();
[12567]207 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
[410]208
[12567]209 return "_format:date_("+year+","+month+","+day+")";
[410]210}
211
[2001]212// converts an iso639 language code to its English equivalent
[12567]213// should we be checking that the macro exists??
[2001]214text_t iso639 (const text_t &langcode) {
[12567]215 if (langcode.empty()) return "";
216 return "_iso639:iso639"+langcode+"_";
[2001]217}
218
[12567]219
[2706]220text_t get_href (const text_t &link) {
221
222 text_t href;
223
224 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
225 text_t::const_iterator end = link.end();
[13117]226 if (here == end) return g_EmptyText;
[2706]227
[9620]228 ++here;
[2706]229 while (here != end) {
230 if (*here == '"') break;
231 href.push_back(*here);
[9620]232 ++here;
[2706]233 }
234
235 return href;
236}
237
[1941]238//this function gets the information associated with the relation
239//metadata for the document associated with 'docinfo'. This relation
240//metadata consists of a line of pairs containing 'collection, document OID'
241//(this is the OID of the document related to the current document, and
242//the collection the related document belongs to). For each of these pairs
243//the title metadata is obtained and then an html link between the title
244//of the related doc and the document's position (the document will be
245//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
246//(where collection is the related documents collection, and OID is the
247//related documents OID). A list of these html links are made for as many
248//related documents as there are. This list is then returned. If there are
249//no related documents available for the current document then the string
250//'.. no related documents .. ' is returned.
251text_t get_related_docs(const text_t& collection, recptproto* collectproto,
[9948]252 ResultDocInfo_t &docinfo, ostream& logout){
[1941]253
254 text_tset metadata;
255
256 //insert the metadata we wish to collect
[11324]257 metadata.insert("dc.Relation");
[1941]258 metadata.insert("Title");
259 metadata.insert("Subject"); //for emails, where title data doesn't apply
260
261 FilterResponse_t response;
262 text_t relation = ""; //string for displaying relation metadata
263 text_t relationTitle = ""; //the related documents Title (or subject)
[1963]264 text_t relationOID = ""; //the related documents OID
[1941]265
266 //get the information associated with the metadata for current doc
[7432]267 if (get_info (docinfo.OID, collection, "", metadata,
[1941]268 false, collectproto, response, logout)) {
269
270 //if the relation metadata exists, store for displaying
[11324]271 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
272 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
[1941]273
274 //split relation data into pairs of collectionname,ID number
275 text_tarray relationpairs;
276 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
277
278 text_tarray::const_iterator currDoc = relationpairs.begin();
279 text_tarray::const_iterator lastDoc = relationpairs.end();
280
281 //iterate through the pairs to split and display
282 while(currDoc != lastDoc){
283
284 //split pairs into collectionname and ID
285 text_tarray relationdata;
286 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
287
288 //get first element in the array (collection)
289 text_tarray::const_iterator doc_data = relationdata.begin();
290 text_t document_collection = *doc_data;
[9620]291 ++doc_data; //increment to get next item in array (oid)
[1941]292 text_t document_OID = *doc_data;
293
294 //create html link to related document
295 relation += "<a href=\"_httpdocument_&c=" + document_collection;
296 relation += "&cl=search&d=" + document_OID;
297
298 //get the information associated with the metadata for related doc
[7432]299 if (get_info (document_OID, document_collection, "", metadata,
[1941]300 false, collectproto, response, logout)) {
301
302 //if title metadata doesn't exist, collect subject metadata
303 //if that doesn't exist, just call it 'related document'
304 if (!response.docInfo[0].metadata["Title"].values[0].empty())
305 relationTitle = response.docInfo[0].metadata["Title"].values[0];
306 else if (!response.docInfo[0].metadata["Subject"].values.empty())
307 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
308 else relationTitle = "RELATED DOCUMENT";
309
310 }
311
312 //link the related document's title to its page
313 relation += "\">" + relationTitle + "</a>";
314 relation += " (" + document_collection + ")<br>";
315
[9620]316 ++currDoc;
[1941]317 }
318 }
319
320 }
321
322 if(relation.empty()) //no relation data for documnet
323 relation = ".. no related documents .. ";
324
325 return relation;
326}
327
328
329
[354]330static void get_parent_options (text_t &instring, metadata_t &metaoption) {
331
[1257]332 assert (instring.size() > 7);
333 if (instring.size() <= 7) return;
334
[354]335 text_t meta, com, op;
336 bool inbraces = false;
337 bool inquotes = false;
338 bool foundcolon = false;
339 text_t::const_iterator here = instring.begin()+6;
340 text_t::const_iterator end = instring.end();
341 while (here != end) {
[10415]342 if (foundcolon) meta.push_back (*here);
343 else if (*here == '(') inbraces = true;
[354]344 else if (*here == ')') inbraces = false;
345 else if (*here == '\'' && !inquotes) inquotes = true;
346 else if (*here == '\'' && inquotes) inquotes = false;
347 else if (*here == ':' && !inbraces) foundcolon = true;
348 else if (inquotes) op.push_back (*here);
349 else com.push_back (*here);
[9620]350 ++here;
[354]351 }
[9401]352
[354]353 instring = meta;
354 if (com.empty())
[9401]355 metaoption.mqualifier.parent = pImmediate;
[354]356 else if (com == "Top")
[9401]357 metaoption.mqualifier.parent = pTop;
[649]358 else if (com == "All") {
[9401]359 metaoption.mqualifier.parent = pAll;
[10415]360 metaoption.parentoptions = op;
[354]361 }
362}
363
[5787]364
365static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
366
367 assert (instring.size() > 8);
368 if (instring.size() <= 8) return;
369 text_t meta, com, op;
370 bool inbraces = false;
371 bool inquotes = false;
372 bool foundcolon = false;
373 text_t::const_iterator here = instring.begin()+7;
374 text_t::const_iterator end = instring.end();
375 while (here != end) {
[10415]376 if (foundcolon) meta.push_back (*here);
377 else if (*here == '(') inbraces = true;
[5787]378 else if (*here == ')') inbraces = false;
379 else if (*here == '\'' && !inquotes) inquotes = true;
380 else if (*here == '\'' && inquotes) inquotes = false;
[10415]381 else if (*here == ':' && !inbraces) foundcolon = true;
[5787]382 else if (inquotes) op.push_back (*here);
383 else com.push_back (*here);
[9620]384 ++here;
[5787]385 }
386
387 instring = meta;
[10415]388 metaoption.siblingoptions.clear();
[5787]389
390 if (com.empty()) {
[9401]391 metaoption.mqualifier.sibling = sAll;
[10415]392 metaoption.siblingoptions = " ";
[5787]393 }
[10415]394 else if (com == "first") {
395 metaoption.mqualifier.sibling = sNum;
396 metaoption.siblingoptions = "0";
397 }
398 else if (com == "last") {
399 metaoption.mqualifier.sibling = sNum;
400 metaoption.siblingoptions = "-2"; // == last
401 }
402 else if (com.getint()>0) {
403 metaoption.mqualifier.sibling = sNum;
404 int pos = com.getint()-1;
405 metaoption.siblingoptions +=pos;
406 }
[5787]407 else {
[9401]408 metaoption.mqualifier.sibling = sAll;
[10415]409 metaoption.siblingoptions = op;
[5787]410 }
411}
412
[9401]413static void get_child_options (text_t &instring, metadata_t &metaoption) {
[5788]414
[9401]415 assert (instring.size() > 6);
416 if (instring.size() <= 6) return;
417 text_t meta, com, op;
418 bool inbraces = false;
419 bool inquotes = false;
420 bool foundcolon = false;
421 text_t::const_iterator here = instring.begin()+5;
422 text_t::const_iterator end = instring.end();
423 while (here != end) {
[10415]424 if (foundcolon) meta.push_back (*here);
425 else if (*here == '(') inbraces = true;
[9401]426 else if (*here == ')') inbraces = false;
427 else if (*here == '\'' && !inquotes) inquotes = true;
428 else if (*here == '\'' && inquotes) inquotes = false;
429 else if (*here == ':' && !inbraces) foundcolon = true;
430 else if (inquotes) op.push_back (*here);
431 else com.push_back (*here);
[9620]432 ++here;
[9401]433 }
434
435 instring = meta;
436 if (com.empty()) {
437 metaoption.mqualifier.child = cAll;
[10415]438 metaoption.childoptions = " ";
[9401]439 }
440 else if (com == "first") {
441 metaoption.mqualifier.child = cNum;
[10415]442 metaoption.childoptions = ".fc";
[9401]443 }
444 else if (com == "last") {
445 metaoption.mqualifier.child = cNum;
[10415]446 metaoption.childoptions = ".lc";
[9401]447 }
448 else if (com.getint()>0) {
449 metaoption.mqualifier.child = cNum;
[10415]450 metaoption.childoptions = "."+com;
[9401]451 }
452 else {
453 metaoption.mqualifier.child = cAll;
[10415]454 metaoption.childoptions = op;
[9401]455 }
456}
457
458
459
[649]460static void parse_meta (text_t &meta, metadata_t &metaoption,
461 text_tset &metadata, bool &getParents) {
[354]462
[649]463 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
[5787]464 metaoption.metacommand |= mCgiSafe;
[649]465 meta = substr (meta.begin()+8, meta.end());
466 }
[12567]467 if (meta.size() > 7 && (substr(meta.begin(), meta.begin()+7) == "format:")) {
468 metaoption.metacommand |= mSpecial;
469 meta = substr (meta.begin()+7, meta.end());
470 }
[649]471
[354]472 if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
473 getParents = true;
[9401]474 metaoption.metacommand |= mParent;
[354]475 get_parent_options (meta, metaoption);
[649]476 }
[9401]477 else if (meta.size() > 6 && (substr (meta.begin(), meta.begin()+5) == "child")) {
478 metaoption.metacommand |= mChild;
479 get_child_options (meta, metaoption);
[10415]480 metadata.insert("contains");
[9401]481 }
[10415]482 // parent and child can have sibling also
483 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
484 metaoption.metacommand |= mSibling;
485 get_sibling_options (meta, metaoption);
486 }
487
[7599]488 // check for ex. which may occur in format statements
489 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
490 meta = substr (meta.begin()+3, meta.end());
491 }
[649]492 metadata.insert (meta);
493 metaoption.metaname = meta;
[354]494}
495
[9948]496static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
497 if (meta == "collection") {
498 // no qualifiers
499 metaoption.metaname = g_EmptyText;
500 return;
501 }
502 meta = substr (meta.begin()+11, meta.end());
503 metaoption.metaname = meta;
504
505}
506
[649]507static void parse_meta (text_t &meta, format_t *formatlistptr,
508 text_tset &metadata, bool &getParents) {
[354]509
510 if (meta == "link")
511 formatlistptr->command = comLink;
512 else if (meta == "/link")
513 formatlistptr->command = comEndLink;
514
[2706]515 else if (meta == "href")
516 formatlistptr->command = comHref;
517
[354]518 else if (meta == "num")
519 formatlistptr->command = comNum;
520
[407]521 else if (meta == "icon")
522 formatlistptr->command = comIcon;
523
[442]524 else if (meta == "Text")
525 formatlistptr->command = comDoc;
[1941]526
527 else if (meta == "RelatedDocuments")
528 formatlistptr->command = comRel;
[442]529
[670]530 else if (meta == "highlight")
531 formatlistptr->command = comHighlight;
532
533 else if (meta == "/highlight")
534 formatlistptr->command = comEndHighlight;
535
[2967]536 else if (meta == "Summary")
537 formatlistptr->command = comSummary;
538
[5788]539 else if (meta == "DocImage")
540 formatlistptr->command = comImage;
541
542 else if (meta == "DocTOC")
543 formatlistptr->command = comTOC;
544
545 else if (meta == "DocumentButtonDetach")
546 formatlistptr->command = comDocumentButtonDetach;
547
548 else if (meta == "DocumentButtonHighlight")
549 formatlistptr->command = comDocumentButtonHighlight;
550
551 else if (meta == "DocumentButtonExpandContents")
552 formatlistptr->command = comDocumentButtonExpandContents;
553
554 else if (meta == "DocumentButtonExpandText")
555 formatlistptr->command = comDocumentButtonExpandText;
[6020]556
557 else if (meta == "DocOID")
558 formatlistptr->command = comOID;
[13118]559 else if (meta == "DocTopOID")
560 formatlistptr->command = comTopOID;
[6710]561 else if (meta == "DocRank")
562 formatlistptr->command = comRank;
[16915]563 else if (meta == "DocTermsFreqTotal")
564 formatlistptr->command = comDocTermsFreqTotal;
[9948]565 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
566 formatlistptr->command = comCollection;
567 parse_coll_meta(meta, formatlistptr->meta);
568 }
[354]569 else {
570 formatlistptr->command = comMeta;
[649]571 parse_meta (meta, formatlistptr->meta, metadata, getParents);
[354]572 }
573}
574
[9948]575
[354]576static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
[649]577 text_tset &metadata, bool &getParents) {
[354]578
[347]579 text_t text;
580 text_t::const_iterator here = formatstring.begin();
581 text_t::const_iterator end = formatstring.end();
582
583 while (here != end) {
584
[1257]585 if (*here == '\\') {
[9620]586 ++here;
[1257]587 if (here != end) text.push_back (*here);
[347]588
[1257]589 } else if (*here == '{') {
[347]590 if (!text.empty()) {
591 formatlistptr->command = comText;
592 formatlistptr->text = text;
593 formatlistptr->nextptr = new format_t();
594 formatlistptr = formatlistptr->nextptr;
595
596 text.clear();
597 }
[649]598 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
[1443]599
[347]600 formatlistptr->nextptr = new format_t();
601 formatlistptr = formatlistptr->nextptr;
602 if (here == end) break;
603 }
604 } else if (*here == '[') {
605 if (!text.empty()) {
606 formatlistptr->command = comText;
607 formatlistptr->text = text;
608 formatlistptr->nextptr = new format_t();
609 formatlistptr = formatlistptr->nextptr;
610
611 text.clear();
612 }
613 text_t meta;
[9620]614 ++here;
[347]615 while (*here != ']') {
616 if (here == end) return false;
617 meta.push_back (*here);
[9620]618 ++here;
[347]619 }
[649]620 parse_meta (meta, formatlistptr, metadata, getParents);
621 formatlistptr->nextptr = new format_t();
622 formatlistptr = formatlistptr->nextptr;
[347]623
624 } else
625 text.push_back (*here);
626
[9620]627 if (here != end) ++here;
[347]628 }
629 if (!text.empty()) {
630 formatlistptr->command = comText;
631 formatlistptr->text = text;
632 formatlistptr->nextptr = new format_t();
633 formatlistptr = formatlistptr->nextptr;
634
635 }
636 return true;
637}
638
639
[354]640static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
[649]641 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
[354]642
[347]643 text_t::const_iterator it = findchar (here, end, '}');
644 if (it == end) return false;
645
646 text_t com = substr (here, it);
647 here = findchar (it, end, '{');
648 if (here == end) return false;
[9620]649 else ++here;
[347]650
[7266]651 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
652 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
[347]653 else return false;
654
655 int commacount = 0;
656 text_t text;
657 while (here != end) {
[636]658
659 if (*here == '\\') {
[9745]660 ++here;
[636]661 if (here != end) text.push_back(*here);
662
[1443]663 }
664
665 else if (*here == ',' || *here == '}' || *here == '{') {
[347]666
667 if (formatlistptr->command == comOr) {
668 // the {Or}{this, or this, or this, or this} statement
669 format_t *or_ptr;
670
671 // find the next unused orptr
672 if (formatlistptr->orptr == NULL) {
673 formatlistptr->orptr = new format_t();
674 or_ptr = formatlistptr->orptr;
675 } else {
676 or_ptr = formatlistptr->orptr;
677 while (or_ptr->nextptr != NULL)
678 or_ptr = or_ptr->nextptr;
679 or_ptr->nextptr = new format_t();
680 or_ptr = or_ptr->nextptr;
681 }
682
[1443]683 if (!text.empty())
684 {
685 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
686 }
[347]687
[1443]688 if (*here == '{')
689 {
690 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
691 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
692 // The latter can always be re-written:
693 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
694
695 if (!text.empty()) // already used up allocated format_t
696 {
697 // => allocate new one for detected action
698 or_ptr->nextptr = new format_t();
699 or_ptr = or_ptr->nextptr;
700 }
701 if (!parse_action(++here, end, or_ptr, metadata, getParents))
702 {
703 return false;
704 }
705 }
706 else
707 {
708 if (*here == '}') break;
709 }
[347]710 text.clear();
711
[1610]712 }
713
714 // Parse an {If}{decide,do,else} statement
715 else {
716
717 // Read the decision component.
[347]718 if (commacount == 0) {
[1610]719 // Decsion can be a metadata element, or a piece of text.
720 // Originally Stefan's code, updated 25/10/2000 by Gordon.
[1443]721
[347]722 text_t::const_iterator beginbracket = text.begin();
723 text_t::const_iterator endbracket = (text.end() - 1);
[1610]724
725 // Decision is based on a metadata element
[347]726 if ((*beginbracket == '[') && (*endbracket == ']')) {
[1610]727 // Ignore the surrounding square brackets
[347]728 text_t meta = substr (beginbracket+1, endbracket);
[649]729 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
[9620]730 ++commacount;
[347]731 text.clear();
732 }
[1610]733
734 // Decision is a piece of text (probably a macro like _cgiargmode_).
735 else {
[7389]736
737 // hunt for any metadata in string, which might be uses in
738 // to test a condition, e.g. [Format] eq 'PDF'
739 format_t* dummyformat = new format_t();
740 // update which metadata fields needed
741 // (not interested in updatng formatlistptr)
742 parse_string (text, dummyformat, metadata, getParents);
743 delete dummyformat;
744
[1610]745 formatlistptr->decision.command = dText;
746 formatlistptr->decision.text = text;
[9620]747 ++commacount;
[1610]748 text.clear();
749 }
750 }
751
752 // Read the "then" and "else" components of the {If} statement.
753 else {
[1443]754 format_t** nextlistptr = NULL;
755 if (commacount == 1) {
[1610]756 nextlistptr = &formatlistptr->ifptr;
[1443]757 } else if (commacount == 2 ) {
758 nextlistptr = &formatlistptr->elseptr;
759 } else {
760 return false;
761 }
762
763 if (!text.empty()) {
764 if (*nextlistptr == NULL) {
765 *nextlistptr = new format_t();
766 } else {
767
768 // skip to the end of any format_t statements already added
769 while ((*nextlistptr)->nextptr != NULL)
770 {
771 nextlistptr = &(*nextlistptr)->nextptr;
772 }
773
774 (*nextlistptr)->nextptr = new format_t();
775 nextlistptr = &(*nextlistptr)->nextptr;
776 }
777
778 if (!parse_string (text, *nextlistptr, metadata, getParents))
779 {
780 return false;
781 }
782 text.clear();
783 }
[347]784
[1443]785 if (*here == '{')
786 {
787 if (*nextlistptr == NULL) {
788 *nextlistptr = new format_t();
789 } else {
[7474]790 // skip to the end of any format_t statements already added
791 while ((*nextlistptr)->nextptr != NULL)
792 {
793 nextlistptr = &(*nextlistptr)->nextptr;
794 }
795
[1443]796 (*nextlistptr)->nextptr = new format_t();
797 nextlistptr = &(*nextlistptr)->nextptr;
798 }
799
800 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
801 {
802 return false;
803 }
804 }
805 else
806 {
807 if (*here == '}') break;
[9620]808 ++commacount;
[1443]809 }
[347]810 }
811 }
[636]812
813 } else text.push_back(*here);
[347]814
[9620]815 if (here != end) ++here;
[347]816 }
817
818 return true;
819}
820
[354]821
[347]822bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
[649]823 text_tset &metadata, bool &getParents) {
[347]824
825 formatlistptr->clear();
826 getParents = false;
827
[649]828 return (parse_string (formatstring, formatlistptr, metadata, getParents));
[347]829}
830
[10415]831// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
832// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
833static text_t get_formatted_meta_text(MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
[9401]834{
835 text_t no_ns_metaname = remove_namespace(meta.metaname);
836 text_t tmp;
837 bool first = true;
[649]838
[9401]839 const int start_i=0;
840 const int end_i = metainfo.values.size()-1;
[10415]841
842 if (position == -1) { // all
843 for (int i=start_i; i<=end_i; ++i) {
844 if (!first) tmp += meta.siblingoptions;
[12567]845 if (meta.metacommand & mSpecial) {
846 // special formatting
847 if (no_ns_metaname == "Date") tmp += format_date (metainfo.values[i]);
848 else if (no_ns_metaname == "Language") tmp += iso639(metainfo.values[i]);
849 else tmp += "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
850 }
[10415]851 else tmp += metainfo.values[i];
852 first = false;
[9401]853
[10415]854 }
855 } else {
856 if (position == -2) { // end
857 position = end_i;
858 } else if (position < start_i || position > end_i) {
859 return "";
860 }
[12567]861 if (meta.metacommand & mSpecial) {
862 // special formatting
863 if (no_ns_metaname == "Date") tmp += format_date (metainfo.values[position]);
864 else if (no_ns_metaname == "Language") tmp += iso639(metainfo.values[position]);
865 else tmp += "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
866 }
[10415]867 else tmp += metainfo.values[position];
[9401]868 }
[13457]869 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (tmp);
[9401]870 else return tmp;
871}
[347]872
[10415]873static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
[9401]874{
875
[649]876 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
[9401]877 switch (meta.mqualifier.parent) {
[347]878 case pNone:
[9401]879 return "Nothing!!";
880 break;
[5787]881
[347]882 case pImmediate:
[649]883 if (parent != NULL) {
[10415]884 return get_formatted_meta_text(*parent, meta, siblings_values);
[410]885 }
[347]886 break;
887
888 case pTop:
[649]889 if (parent != NULL) {
890 while (parent->parent != NULL) parent = parent->parent;
[10415]891 return get_formatted_meta_text(*parent, meta, siblings_values);
[410]892 }
[347]893 break;
894
895 case pAll:
[649]896 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
897 if (parent != NULL) {
898 text_tarray tmparray;
899 while (parent != NULL) {
[10415]900 tmparray.push_back (get_formatted_meta_text(*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
[649]901 parent = parent->parent;
902 }
[10415]903 // now join them up - use teh parent separator
[649]904 bool first = true;
905 text_t tmp;
906 text_tarray::reverse_iterator here = tmparray.rbegin();
907 text_tarray::reverse_iterator end = tmparray.rend();
[359]908 while (here != end) {
[10415]909 if (!first) tmp += meta.parentoptions;
910 tmp += *here;
[359]911 first = false;
[9620]912 ++here;
[359]913 }
[13457]914 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
[649]915 else return tmp;
[347]916 }
917 }
918 return "";
[9401]919
[347]920}
921
[9948]922static text_t get_child_meta (const text_t& collection,
923 recptproto* collectproto,
[9401]924 ResultDocInfo_t &docinfo, displayclass &disp,
925 const metadata_t &meta, text_tmap &options,
[10415]926 ostream& logout, int siblings_values)
[9401]927{
[10415]928 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
929
930 const text_t& child_metaname = meta.metaname;
931 const text_t& child_field = meta.childoptions;
932 text_tset child_metadata;
933 child_metadata.insert(child_metaname);
[9401]934
[10415]935 FilterResponse_t child_response;
936 if (meta.mqualifier.child == cNum) {
937 // just one child
938 //get the information associated with the metadata for child doc
939 if (!get_info (docinfo.OID+child_field, collection, "", child_metadata,
940 false, collectproto, child_response, logout)) return ""; // invalid child number
[9401]941
[10415]942 if (child_response.docInfo.empty()) return false; // no info for the child
943
944 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
945 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
946
947 text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
948 return expand_metadata(child_metavalue,collection,collectproto,
949 child_docinfo,disp,options,logout);
950 }
951
[9401]952
[10415]953 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
954
955 // we need to get all children
956 text_t result = "";
957 text_tarray children;
958 text_t contains = docinfo.metadata["contains"].values[0];
959 splitchar (contains.begin(), contains.end(), ';', children);
960 text_tarray::const_iterator here = children.begin();
961 text_tarray::const_iterator end = children.end();
962 bool first = true;
963 while (here !=end) {
964 text_t oid = *here;
965 here++;
966 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
967
968
[9401]969 //get the information associated with the metadata for child doc
[10415]970 if (!get_info (oid, collection, "", child_metadata,
971 false, collectproto, child_response, logout) ||
972 child_response.docInfo.empty()) {
973 first = false;
974 continue;
975 }
976
977
978 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
979 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
980
981 text_t child_metavalue = get_formatted_meta_text(metaname_rec,meta,siblings_values);
982 if (!first) result += child_field;
983 first = false;
984 // need to do this here cos otherwise we are in the wrong document
985 result += expand_metadata(child_metavalue,collection,collectproto,
[9401]986 child_docinfo,disp,options,logout);
987 }
[10415]988 return result;
989
[9401]990}
991
992static text_t get_meta (const text_t& collection, recptproto* collectproto,
993 ResultDocInfo_t &docinfo, displayclass &disp,
994 const metadata_t &meta, text_tmap &options,
995 ostream& logout) {
996
997 // make sure we have the requested metadata
998 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
999 if (it == docinfo.metadata.end()) return "";
1000
[10415]1001 int siblings_values = 0; // default is no siblings, just the first metadata available
1002 if (meta.metacommand & mSibling) {
1003 if (meta.mqualifier.sibling == sAll) {
1004 siblings_values = -1; //all
1005 } else if (meta.mqualifier.sibling == sNum) {
1006 siblings_values = meta.siblingoptions.getint();
1007 }
1008 }
[9401]1009 if (meta.metacommand & mParent) {
[10415]1010 return get_parent_meta(docinfo,meta,siblings_values);
[9401]1011 }
[10415]1012
[9401]1013 else if (meta.metacommand & mChild) {
1014 return get_child_meta(collection,collectproto,docinfo,disp,meta,
[10415]1015 options,logout, siblings_values);
[9401]1016 }
[10415]1017 else if (meta.metacommand & mSibling) { // only siblings
[9401]1018 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
[10415]1019 return get_formatted_meta_text(docinfo.metadata[meta.metaname],meta, siblings_values);
[9401]1020 }
1021 else {
1022
1023 // straightforward metadata request (nothing fancy)
1024
1025 text_t classifier_metaname = docinfo.classifier_metadata_type;
1026 int metaname_index
1027 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
[10415]1028 return get_formatted_meta_text(docinfo.metadata[meta.metaname], meta, metaname_index);
[9401]1029 }
[10415]1030
[9401]1031 return "";
1032}
1033
[1443]1034static text_t get_or (const text_t& collection, recptproto* collectproto,
[1610]1035 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1036 format_t *orptr, text_tmap &options,
[1443]1037 ostream& logout) {
[347]1038
[354]1039 text_t tmp;
1040 while (orptr != NULL) {
[347]1041
[9948]1042 tmp = format_string (collection,collectproto,docinfo, disp, orptr,
[5788]1043 options, logout);
[354]1044 if (!tmp.empty()) return tmp;
1045
1046 orptr = orptr->nextptr;
[347]1047 }
[354]1048 return "";
[347]1049}
1050
[7389]1051static bool char_is_whitespace(const char c)
1052{
1053 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1054
1055}
1056
1057static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1058{
1059 int pos = start_pos;
1060 while (pos<outstring.size()) {
1061 if (!char_is_whitespace(outstring[pos])) {
1062 break;
1063 }
[9620]1064 ++pos;
[7389]1065 }
1066
1067 return pos;
1068}
1069
1070static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1071{
1072 int pos = start_pos;
1073 while (pos>=0) {
1074 if (!char_is_whitespace(outstring[pos])) {
1075 break;
1076 }
[9620]1077 --pos;
[7389]1078 }
1079
1080 return pos;
1081}
1082
1083static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1084{
1085 int pos = start_pos;
1086 while (pos>=0) {
1087 if (char_is_whitespace(outstring[pos])) {
1088 break;
1089 }
[9620]1090 --pos;
[7389]1091 }
1092
1093 return pos;
1094}
1095
1096
1097static int rscan_for(const text_t& outstring, const int start_pos,
1098 const char find_c)
1099{
1100 int pos = start_pos;
1101 while (pos>=0) {
1102 char c = outstring[pos];
1103 if (outstring[pos] == find_c) {
1104 break;
1105 }
[9620]1106 --pos;
[7389]1107 }
1108
1109 return pos;
1110}
1111
1112text_t extract_substr(const text_t& outstring, const int start_pos,
1113 const int end_pos)
1114{
1115 text_t extracted_str;
1116 extracted_str.clear();
1117
[9620]1118 for (int pos=start_pos; pos<=end_pos; ++pos) {
[7389]1119 extracted_str.push_back(outstring[pos]);
1120 }
1121
1122 return extracted_str;
1123}
1124
1125
[9401]1126static text_t expand_potential_metadata(const text_t& collection,
1127 recptproto* collectproto,
1128 ResultDocInfo_t &docinfo,
1129 displayclass &disp,
1130 const text_t& intext,
1131 text_tmap &options,
1132 ostream& logout)
[7389]1133{
1134 text_t outtext;
1135
1136 // decide if dealing with metadata or text
1137
1138 text_t::const_iterator beginbracket = intext.begin();
1139 text_t::const_iterator endbracket = (intext.end() - 1);
1140
1141 // Decision is based on a metadata element
1142 if ((*beginbracket == '[') && (*endbracket == ']')) {
1143 // Ignore the surrounding square brackets
1144 text_t meta_text = substr (beginbracket+1, endbracket);
1145
[10614]1146 if (meta_text == "Text") {
1147 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1148 } else {
[7389]1149
[10614]1150 text_tset metadata;
1151 bool getParents =false;
1152 metadata_t meta;
1153
1154 parse_meta (meta_text, meta, metadata, getParents);
1155 outtext
1156 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1157 }
1158
[7389]1159 }
1160 else {
1161 outtext = intext;
1162 }
1163
1164 return outtext;
1165}
1166
1167
1168
1169
[9401]1170static bool uses_expression(const text_t& collection, recptproto* collectproto,
1171 ResultDocInfo_t &docinfo,
1172 displayclass &disp,
[7389]1173 const text_t& outstring, text_t& lhs_expr,
[9401]1174 text_t& op_expr, text_t& rhs_expr,
1175 text_tmap &options,
1176 ostream& logout)
[7389]1177{
1178 // Note: the string may not be of the form: str1 op str2, however
1179 // to deterine this we have to process it on the assumption it is,
1180 // and if at any point an 'erroneous' value is encountered, return
1181 // false and let something else have a go at evaluating it
1182
1183 // Starting at the end of the string and working backwards ..
1184
1185 const int outstring_len = outstring.size();
1186
1187 // skip over white space
1188 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1189
1190 if (rhs_end<=0) {
1191 // no meaningful text or (rhs_end==0) no room for operator
1192 return false;
1193 }
1194
1195 // check for ' or " and then scan over token
1196 const char potential_quote = outstring[rhs_end];
1197 int rhs_start=rhs_end;
1198 bool quoted = false;
1199
1200 if ((potential_quote == '\'') || (potential_quote == '\"')) {
[9620]1201 --rhs_end;
[7389]1202 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1203 quoted = true;
1204 }
1205 else {
1206 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1207 }
1208
[7617]1209 if ((rhs_end-rhs_start)<0) {
[7389]1210 // no meaningful rhs expression
1211 return false;
1212 }
1213
1214 // form rhs_expr
1215 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1216
1217 // skip over white space
1218 const int to_whitespace = (quoted) ? 2 : 1;
1219
1220 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1221 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1222
1223
[7617]1224 if (op_end-op_start<0) {
[7389]1225 // no meaningful expression operator
1226 return false;
1227 }
1228
1229 op_expr = extract_substr(outstring,op_start,op_end);
1230
1231
1232 // check for operator
[10142]1233 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1234 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
[10145]1235
[7389]1236 // not a valid operator
1237 return false;
1238 }
1239
1240 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
[7617]1241 if (lhs_end<0) {
[7389]1242 // no meaningful lhs expression
1243 return false;
1244 }
1245
1246 int lhs_start = scan_over_whitespace(outstring,0);
1247
1248 // form lhs_expr from remainder of string
1249 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1250
1251 // Now we know we have a valid expression, look up any
1252 // metadata terms
1253
[9401]1254 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1255 disp,rhs_expr,options,logout);
1256 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1257 disp,lhs_expr,options,logout);
[7389]1258
1259 return true;
1260}
1261
1262static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1263 const text_t& rhs_expr, ostream& logout)
1264{
[10142]1265 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1266 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1267 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1268 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1269 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1270 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1271 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1272 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1273 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1274 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1275 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1276 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1277 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1278 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
[7389]1279 else {
1280 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1281 }
[10142]1282
[7389]1283 return false;
1284}
1285
1286
[1443]1287static text_t get_if (const text_t& collection, recptproto* collectproto,
[1610]1288 ResultDocInfo_t &docinfo, displayclass &disp,
1289 const decision_t &decision,
[5788]1290 format_t *ifptr, format_t *elseptr,
1291 text_tmap &options, ostream& logout)
[1443]1292{
[1610]1293 // If the decision component is a metadata element, then evaluate it
1294 // to see whether we output the "then" or the "else" clause
[354]1295 if (decision.command == dMeta) {
[9401]1296 if (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1297 logout) != "") {
[354]1298 if (ifptr != NULL)
[9948]1299 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
[5788]1300 options, logout);
[354]1301 }
1302 else {
1303 if (elseptr != NULL)
[9948]1304 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
[5788]1305 options, logout);
[354]1306 }
1307 }
[1610]1308
1309 // If the decision component is text, then evaluate it (it is probably a
1310 // macro like _cgiargmode_) to decide what to output.
1311 else if (decision.command == dText) {
1312
1313 text_t outstring;
1314 disp.expandstring (decision.text, outstring);
1315
[7389]1316 // Check for if expression in form: str1 op str2
1317 // (such as [x] eq "y")
1318 text_t lhs_expr, op_expr, rhs_expr;
[9401]1319 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
[7389]1320 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1321 if (ifptr != NULL) {
1322 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1323 options, logout);
1324 }
1325 else {
1326 return "";
1327 }
1328 } else {
1329 if (elseptr != NULL) {
1330 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1331 options, logout);
1332 }
1333 else {
1334 return "";
1335 }
1336 }
1337 }
1338
1339
[1610]1340 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1341 // a cgi argument macro that has not been set, it evaluates to itself.
1342 // Therefore, were have to say that a piece of text evalautes true if
1343 // it is non-empty and if it is a cgi argument evaulating to itself.
[7389]1344
[1610]1345 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1346 if (ifptr != NULL)
1347 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
[5788]1348 options, logout);
[1610]1349 } else {
1350 if (elseptr != NULL)
1351 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
[5788]1352 options, logout);
[1610]1353 }
1354 }
1355
[354]1356 return "";
1357}
1358
[1443]1359bool includes_metadata(const text_t& text)
1360{
1361 text_t::const_iterator here = text.begin();
1362 text_t::const_iterator end = text.end();
1363 while (here != end) {
1364 if (*here == '[') return true;
[9620]1365 ++here;
[1443]1366 }
1367
1368 return false;
1369}
1370
[5788]1371static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
[9948]1372 recptproto* collectproto,
1373 ResultDocInfo_t &docinfo,
[5788]1374 displayclass &disp, text_tmap &options,
1375 ostream &logout) {
1376
[10415]1377 if (includes_metadata(metavalue)) {
1378
1379 // text has embedded metadata in it => expand it
1380 FilterRequest_t request;
1381 FilterResponse_t response;
1382
1383 request.getParents = false;
1384
1385 format_t *expanded_formatlistptr = new format_t();
1386 parse_formatstring (metavalue, expanded_formatlistptr,
1387 request.fields, request.getParents);
1388
1389 // retrieve metadata
1390 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1391 collectproto, response, logout);
1392
1393 if (!response.docInfo.empty()) {
1394
1395 text_t expanded_metavalue
1396 = get_formatted_string(collection, collectproto,
1397 response.docInfo[0], disp, expanded_formatlistptr,
1398 options, logout);
1399
1400 return expanded_metavalue;
1401 }
1402 else {
1403 return metavalue;
1404 }
1405 }
1406 else {
1407
1408 return metavalue;
1409 }
[5788]1410}
[1941]1411
[9948]1412text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1413 displayclass &disp,
1414 text_t meta_name, ostream& logout) {
1415
1416 ColInfoResponse_t collectinfo;
1417 comerror_t err;
1418 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1419 text_t meta_value = "";
1420 text_t lang;
1421 disp.expandstring("_cgiargl_",lang);
1422 if (lang.empty()) {
1423 lang = "en";
1424 }
1425
1426 if (err == noError) {
1427 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1428 }
1429 return meta_value;
1430
1431
1432}
[1443]1433text_t format_string (const text_t& collection, recptproto* collectproto,
[1610]1434 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1435 format_t *formatlistptr, text_tmap &options,
[1443]1436 ostream& logout) {
[354]1437
[347]1438 if (formatlistptr == NULL) return "";
1439
1440 switch (formatlistptr->command) {
[6020]1441 case comOID:
1442 return docinfo.OID;
[13118]1443 case comTopOID:
1444 {
1445 text_t top_id;
1446 get_top(docinfo.OID, top_id);
1447 return top_id;
1448 }
[6710]1449 case comRank:
1450 return text_t(docinfo.ranking);
[5788]1451 case comText:
1452 return formatlistptr->text;
1453 case comLink:
1454 return options["link"];
1455 case comEndLink:
1456 if (options["link"].empty()) return "";
1457 else return "</a>";
1458 case comHref:
1459 return get_href(options["link"]);
1460 case comIcon:
1461 return options["icon"];
1462 case comNum:
1463 return docinfo.result_num;
1464 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1465 return get_related_docs(collection, collectproto, docinfo, logout);
1466 case comSummary:
1467 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1468 case comMeta:
[1443]1469 {
[9948]1470 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
[5788]1471 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
[1443]1472 }
[5788]1473 case comDoc:
[9852]1474 return format_text(collection, collectproto, docinfo, disp, options, logout);
1475 //return options["text"];
[5788]1476 case comImage:
1477 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1478 case comTOC:
1479 return options["DocTOC"];
1480 case comDocumentButtonDetach:
1481 return options["DocumentButtonDetach"];
1482 case comDocumentButtonHighlight:
1483 return options["DocumentButtonHighlight"];
1484 case comDocumentButtonExpandContents:
1485 return options["DocumentButtonExpandContents"];
1486 case comDocumentButtonExpandText:
1487 return options["DocumentButtonExpandText"];
1488 case comHighlight:
1489 if (options["highlight"] == "1") return "<b>";
1490 break;
1491 case comEndHighlight:
1492 if (options["highlight"] == "1") return "</b>";
1493 break;
1494 case comIf:
1495 return get_if (collection, collectproto, docinfo, disp,
1496 formatlistptr->decision, formatlistptr->ifptr,
1497 formatlistptr->elseptr, options, logout);
1498 case comOr:
1499 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1500 options, logout);
[16915]1501 case comDocTermsFreqTotal:
1502 return docinfo.num_terms_matched;
[9948]1503 case comCollection:
1504 if (formatlistptr->meta.metaname == g_EmptyText) {
1505 return collection;
1506 }
1507 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1508
[347]1509 }
1510 return "";
1511}
1512
[1443]1513text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
[1610]1514 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1515 format_t *formatlistptr, text_tmap &options,
[1443]1516 ostream& logout) {
[407]1517
[5788]1518 text_t ft;
1519 while (formatlistptr != NULL)
1520 {
1521 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1522 options, logout);
1523 formatlistptr = formatlistptr->nextptr;
1524 }
1525
1526 return ft;
[347]1527}
1528
1529
[9852]1530// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1531text_t format_text (const text_t& collection, recptproto* collectproto,
[9948]1532 ResultDocInfo_t &docinfo, displayclass &disp,
1533 text_tmap &options, ostream& logout) {
[9852]1534 if(!options["text"].empty()) {
1535 return options["text"];
1536 }
1537 // else get document text here
1538 DocumentRequest_t docrequest;
1539 DocumentResponse_t docresponse;
1540 comerror_t err;
1541 docrequest.OID = docinfo.OID;
1542 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1543 return docresponse.doc;
1544
1545}
1546
[2967]1547/* FUNCTION NAME: format_summary
1548 * DESC: this is invoked when a [Summary] special metadata is processed.
1549 * RETURNS: a query-biased summary for the document */
1550
1551text_t format_summary (const text_t& collection, recptproto* collectproto,
[5788]1552 ResultDocInfo_t &docinfo, displayclass &disp,
1553 text_tmap &options, ostream& logout) {
[3673]1554
1555 // GRB: added code here to ensure that the cstr (and other collections)
1556 // uses the document metadata item Summary, rather than compressing
1557 // the text of the document, processed via the methods in
1558 // summarise.cpp
1559 if (docinfo.metadata.count("Summary") > 0 &&
1560 docinfo.metadata["Summary"].values.size() > 0) {
1561 return docinfo.metadata["Summary"].values[0];
1562 }
1563
[2967]1564 text_t textToSummarise, query;
[5788]1565 if(options["text"].empty()) { // get document text
1566 DocumentRequest_t docrequest;
1567 DocumentResponse_t docresponse;
1568 comerror_t err;
1569 docrequest.OID = docinfo.OID;
1570 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1571 textToSummarise = docresponse.doc;
[2967]1572 } else // in practice, this would not happen, because text is only
1573 // loaded with the [Text] command
[5788]1574 textToSummarise = options["text"];
[2967]1575 disp.expandstring("_cgiargq_",query);
1576 return summarise(textToSummarise,query,80);
1577}
Note: See TracBrowser for help on using the repository browser.