source: gsdl/trunk/runtime-src/src/recpt/formattools.cpp@ 19312

Last change on this file since 19312 was 19312, checked in by davidb, 15 years ago

DocOID is now stored in the span-wrap tag to help with set-metadata call

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 51.9 KB
RevLine 
[347]1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
[533]6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
[347]9 *
[533]10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[347]24 *********************************************************************/
25
26#include "formattools.h"
[462]27#include "cgiutils.h"
[15418]28#include "recptprototools.h"
[1443]29#include "OIDtools.h"
[2967]30#include "summarise.h"
[1443]31
[1257]32#include <assert.h>
[347]33
[19302]34static bool metadata_spanwrap = false;
[19298]35
[354]36// a few function prototypes
[5788]37
[1443]38static text_t format_string (const text_t& collection, recptproto* collectproto,
[1610]39 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]40 format_t *formatlistptr, text_tmap &options,
41 ostream& logout);
[354]42
43static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
[649]44 format_t *formatlistptr, text_tset &metadata, bool &getParents);
[354]45
[5788]46static text_t format_summary (const text_t& collection, recptproto* collectproto,
47 ResultDocInfo_t &docinfo, displayclass &disp,
48 text_tmap &options, ostream& logout);
[9852]49static text_t format_text (const text_t& collection, recptproto* collectproto,
50 ResultDocInfo_t &docinfo, displayclass &disp,
51 text_tmap &options, ostream& logout);
[2967]52
[9401]53static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
54 recptproto* collectproto, ResultDocInfo_t &docinfo,
55 displayclass &disp, text_tmap &options,
56 ostream &logout);
[2967]57
[9401]58
[347]59void metadata_t::clear() {
[410]60 metaname.clear();
[462]61 metacommand = mNone;
[9401]62 mqualifier.parent = pNone;
63 mqualifier.sibling = sNone;
64 mqualifier.child = cNone;
[19046]65 pre_tree_traverse.clear();
[10415]66 parentoptions.clear();
67 siblingoptions.clear();
68 childoptions.clear();
[749]69}
[347]70
71void decision_t::clear() {
72 command = dMeta;
73 meta.clear();
[1610]74 text.clear();
[749]75}
[347]76
77void format_t::clear() {
78 command = comText;
79 decision.clear();
80 text.clear();
81 meta.clear();
82 nextptr = NULL;
83 ifptr = NULL;
84 elseptr = NULL;
85 orptr = NULL;
[749]86}
[347]87
[442]88void formatinfo_t::clear() {
[1079]89 DocumentImages = false;
90 DocumentTitles = true;
91 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
92 DocumentContents = true;
[1941]93 DocumentArrowsBottom = true;
[5788]94 DocumentArrowsTop = false;
[13365]95 DocumentSearchResultLinks = false;
[442]96 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
[1496]97 // DocumentButtons.push_back ("Expand Text");
98 // DocumentButtons.push_back ("Expand Contents");
[442]99 DocumentButtons.push_back ("Detach");
100 DocumentButtons.push_back ("Highlight");
[1941]101 RelatedDocuments = "";
[11146]102 DocumentText = "[Text]";
[649]103 formatstrings.erase (formatstrings.begin(), formatstrings.end());
[868]104 DocumentUseHTML = false;
[5788]105 AllowExtendedOptions = false;
[442]106}
107
[749]108// simply checks to see if formatstring begins with a <td> tag
109bool is_table_content (const text_t &formatstring) {
110 text_t::const_iterator here = formatstring.begin();
111 text_t::const_iterator end = formatstring.end();
112
113 while (here != end) {
114 if (*here != ' ') {
[1257]115 if ((*here == '<') && ((here+3) < end)) {
[749]116 if ((*(here+1) == 't' || *(here+1) == 'T') &&
117 (*(here+2) == 'd' || *(here+2) == 'D') &&
118 (*(here+3) == '>' || *(here+3) == ' '))
119 return true;
120 } else return false;
121 }
[9620]122 ++here;
[749]123 }
124 return false;
125}
126
127bool is_table_content (const format_t *formatlistptr) {
128
129 if (formatlistptr == NULL) return false;
130
131 if (formatlistptr->command == comText)
132 return is_table_content (formatlistptr->text);
133
134 return false;
135}
136
[649]137// returns false if key isn't in formatstringmap
138bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
139 text_t &formatstring) {
[442]140
[649]141 formatstring.clear();
142 text_tmap::const_iterator it = formatstringmap.find(key);
143 if (it == formatstringmap.end()) return false;
144 formatstring = (*it).second;
145 return true;
146}
[749]147
[649]148// tries to find "key1key2" then "key1" then "key2"
149bool get_formatstring (const text_t &key1, const text_t &key2,
150 const text_tmap &formatstringmap,
151 text_t &formatstring) {
152
153 formatstring.clear();
154 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
155 if (it != formatstringmap.end()) {
156 formatstring = (*it).second;
157 return true;
158 }
159 it = formatstringmap.find(key1);
160 if (it != formatstringmap.end()) {
161 formatstring = (*it).second;
162 return true;
163 }
164 it = formatstringmap.find(key2);
165 if (it != formatstringmap.end()) {
166 formatstring = (*it).second;
167 return true;
168 }
169 return false;
170}
171
172
[6645]173text_t remove_namespace(const text_t &meta_name) {
174 text_t::const_iterator end = meta_name.end();
175 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
176 if (it != end) {
177 return substr(it+1, end);
178 }
[12567]179
[6645]180 return meta_name;
181
182}
[12567]183// returns a date of form _format:date_(year, month, day)
184// input is date of type yyyy-?mm-?dd
[410]185// at least the year must be present in date
[422]186text_t format_date (const text_t &date) {
[347]187
[410]188 if (date.size() < 4) return "";
[347]189
[410]190 text_t::const_iterator datebegin = date.begin();
[354]191
[410]192 text_t year = substr (datebegin, datebegin+4);
[12567]193 int chars_seen_so_far = 4;
[410]194
[12567]195 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
196 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
197
198 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
[410]199 int imonth = month.getint();
[12567]200 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
201
202 chars_seen_so_far += 2;
203 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
204
205 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
[410]206
[12567]207 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
[410]208 if (day[0] == '0') day = substr (day.begin()+1, day.end());
209 int iday = day.getint();
[12567]210 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
[410]211
[12567]212 return "_format:date_("+year+","+month+","+day+")";
[410]213}
214
[2001]215// converts an iso639 language code to its English equivalent
[12567]216// should we be checking that the macro exists??
[2001]217text_t iso639 (const text_t &langcode) {
[12567]218 if (langcode.empty()) return "";
219 return "_iso639:iso639"+langcode+"_";
[2001]220}
221
[12567]222
[2706]223text_t get_href (const text_t &link) {
224
225 text_t href;
226
227 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
228 text_t::const_iterator end = link.end();
[13117]229 if (here == end) return g_EmptyText;
[2706]230
[9620]231 ++here;
[2706]232 while (here != end) {
233 if (*here == '"') break;
234 href.push_back(*here);
[9620]235 ++here;
[2706]236 }
237
238 return href;
239}
240
[1941]241//this function gets the information associated with the relation
242//metadata for the document associated with 'docinfo'. This relation
243//metadata consists of a line of pairs containing 'collection, document OID'
244//(this is the OID of the document related to the current document, and
245//the collection the related document belongs to). For each of these pairs
246//the title metadata is obtained and then an html link between the title
247//of the related doc and the document's position (the document will be
248//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
249//(where collection is the related documents collection, and OID is the
250//related documents OID). A list of these html links are made for as many
251//related documents as there are. This list is then returned. If there are
252//no related documents available for the current document then the string
253//'.. no related documents .. ' is returned.
254text_t get_related_docs(const text_t& collection, recptproto* collectproto,
[9948]255 ResultDocInfo_t &docinfo, ostream& logout){
[1941]256
257 text_tset metadata;
258
259 //insert the metadata we wish to collect
[11324]260 metadata.insert("dc.Relation");
[1941]261 metadata.insert("Title");
262 metadata.insert("Subject"); //for emails, where title data doesn't apply
263
264 FilterResponse_t response;
265 text_t relation = ""; //string for displaying relation metadata
266 text_t relationTitle = ""; //the related documents Title (or subject)
[1963]267 text_t relationOID = ""; //the related documents OID
[1941]268
269 //get the information associated with the metadata for current doc
[7432]270 if (get_info (docinfo.OID, collection, "", metadata,
[1941]271 false, collectproto, response, logout)) {
272
273 //if the relation metadata exists, store for displaying
[11324]274 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
275 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
[1941]276
277 //split relation data into pairs of collectionname,ID number
278 text_tarray relationpairs;
279 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
280
281 text_tarray::const_iterator currDoc = relationpairs.begin();
282 text_tarray::const_iterator lastDoc = relationpairs.end();
283
284 //iterate through the pairs to split and display
285 while(currDoc != lastDoc){
286
287 //split pairs into collectionname and ID
288 text_tarray relationdata;
289 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
290
291 //get first element in the array (collection)
292 text_tarray::const_iterator doc_data = relationdata.begin();
293 text_t document_collection = *doc_data;
[9620]294 ++doc_data; //increment to get next item in array (oid)
[1941]295 text_t document_OID = *doc_data;
296
297 //create html link to related document
298 relation += "<a href=\"_httpdocument_&c=" + document_collection;
299 relation += "&cl=search&d=" + document_OID;
300
301 //get the information associated with the metadata for related doc
[7432]302 if (get_info (document_OID, document_collection, "", metadata,
[1941]303 false, collectproto, response, logout)) {
304
305 //if title metadata doesn't exist, collect subject metadata
306 //if that doesn't exist, just call it 'related document'
307 if (!response.docInfo[0].metadata["Title"].values[0].empty())
308 relationTitle = response.docInfo[0].metadata["Title"].values[0];
309 else if (!response.docInfo[0].metadata["Subject"].values.empty())
310 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
311 else relationTitle = "RELATED DOCUMENT";
312
313 }
314
315 //link the related document's title to its page
316 relation += "\">" + relationTitle + "</a>";
317 relation += " (" + document_collection + ")<br>";
318
[9620]319 ++currDoc;
[1941]320 }
321 }
322
323 }
324
325 if(relation.empty()) //no relation data for documnet
326 relation = ".. no related documents .. ";
327
328 return relation;
329}
330
331
332
[354]333static void get_parent_options (text_t &instring, metadata_t &metaoption) {
334
[1257]335 assert (instring.size() > 7);
336 if (instring.size() <= 7) return;
337
[354]338 text_t meta, com, op;
339 bool inbraces = false;
340 bool inquotes = false;
341 bool foundcolon = false;
342 text_t::const_iterator here = instring.begin()+6;
343 text_t::const_iterator end = instring.end();
344 while (here != end) {
[10415]345 if (foundcolon) meta.push_back (*here);
346 else if (*here == '(') inbraces = true;
[354]347 else if (*here == ')') inbraces = false;
348 else if (*here == '\'' && !inquotes) inquotes = true;
349 else if (*here == '\'' && inquotes) inquotes = false;
350 else if (*here == ':' && !inbraces) foundcolon = true;
351 else if (inquotes) op.push_back (*here);
352 else com.push_back (*here);
[9620]353 ++here;
[354]354 }
[9401]355
[354]356 instring = meta;
357 if (com.empty())
[9401]358 metaoption.mqualifier.parent = pImmediate;
[354]359 else if (com == "Top")
[9401]360 metaoption.mqualifier.parent = pTop;
[649]361 else if (com == "All") {
[9401]362 metaoption.mqualifier.parent = pAll;
[10415]363 metaoption.parentoptions = op;
[354]364 }
365}
366
[5787]367
368static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
369
370 assert (instring.size() > 8);
371 if (instring.size() <= 8) return;
372 text_t meta, com, op;
373 bool inbraces = false;
374 bool inquotes = false;
375 bool foundcolon = false;
376 text_t::const_iterator here = instring.begin()+7;
377 text_t::const_iterator end = instring.end();
378 while (here != end) {
[10415]379 if (foundcolon) meta.push_back (*here);
380 else if (*here == '(') inbraces = true;
[5787]381 else if (*here == ')') inbraces = false;
382 else if (*here == '\'' && !inquotes) inquotes = true;
383 else if (*here == '\'' && inquotes) inquotes = false;
[10415]384 else if (*here == ':' && !inbraces) foundcolon = true;
[5787]385 else if (inquotes) op.push_back (*here);
386 else com.push_back (*here);
[9620]387 ++here;
[5787]388 }
389
390 instring = meta;
[10415]391 metaoption.siblingoptions.clear();
[5787]392
393 if (com.empty()) {
[9401]394 metaoption.mqualifier.sibling = sAll;
[10415]395 metaoption.siblingoptions = " ";
[5787]396 }
[10415]397 else if (com == "first") {
398 metaoption.mqualifier.sibling = sNum;
399 metaoption.siblingoptions = "0";
400 }
401 else if (com == "last") {
402 metaoption.mqualifier.sibling = sNum;
403 metaoption.siblingoptions = "-2"; // == last
404 }
405 else if (com.getint()>0) {
406 metaoption.mqualifier.sibling = sNum;
407 int pos = com.getint()-1;
408 metaoption.siblingoptions +=pos;
409 }
[5787]410 else {
[9401]411 metaoption.mqualifier.sibling = sAll;
[10415]412 metaoption.siblingoptions = op;
[5787]413 }
414}
415
[9401]416static void get_child_options (text_t &instring, metadata_t &metaoption) {
[5788]417
[9401]418 assert (instring.size() > 6);
419 if (instring.size() <= 6) return;
420 text_t meta, com, op;
421 bool inbraces = false;
422 bool inquotes = false;
423 bool foundcolon = false;
424 text_t::const_iterator here = instring.begin()+5;
425 text_t::const_iterator end = instring.end();
426 while (here != end) {
[10415]427 if (foundcolon) meta.push_back (*here);
428 else if (*here == '(') inbraces = true;
[9401]429 else if (*here == ')') inbraces = false;
430 else if (*here == '\'' && !inquotes) inquotes = true;
431 else if (*here == '\'' && inquotes) inquotes = false;
432 else if (*here == ':' && !inbraces) foundcolon = true;
433 else if (inquotes) op.push_back (*here);
434 else com.push_back (*here);
[9620]435 ++here;
[9401]436 }
437
438 instring = meta;
439 if (com.empty()) {
440 metaoption.mqualifier.child = cAll;
[10415]441 metaoption.childoptions = " ";
[9401]442 }
443 else if (com == "first") {
444 metaoption.mqualifier.child = cNum;
[10415]445 metaoption.childoptions = ".fc";
[9401]446 }
447 else if (com == "last") {
448 metaoption.mqualifier.child = cNum;
[10415]449 metaoption.childoptions = ".lc";
[9401]450 }
451 else if (com.getint()>0) {
452 metaoption.mqualifier.child = cNum;
[10415]453 metaoption.childoptions = "."+com;
[9401]454 }
455 else {
456 metaoption.mqualifier.child = cAll;
[10415]457 metaoption.childoptions = op;
[9401]458 }
459}
460
461
462
[649]463static void parse_meta (text_t &meta, metadata_t &metaoption,
464 text_tset &metadata, bool &getParents) {
[354]465
[649]466 if (meta.size() > 8 && (substr(meta.begin(), meta.begin()+8) == "cgisafe:")) {
[5787]467 metaoption.metacommand |= mCgiSafe;
[649]468 meta = substr (meta.begin()+8, meta.end());
469 }
[12567]470 if (meta.size() > 7 && (substr(meta.begin(), meta.begin()+7) == "format:")) {
471 metaoption.metacommand |= mSpecial;
472 meta = substr (meta.begin()+7, meta.end());
473 }
[649]474
[19046]475 bool had_parent_or_child = true;
476 bool prev_was_parent = false;
477 bool prev_was_child = false;
478
479 while (had_parent_or_child) {
480 if (meta.size() > 7
481 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
482
483 // clear out sibling and child (cmd and options)
484 metaoption.metacommand &= ~(mChild|mSibling);
485 metaoption.childoptions.clear();
486 metaoption.siblingoptions.clear();
487
488 getParents = true;
489 metaoption.metacommand |= mParent;
490 get_parent_options (meta, metaoption);
491
492 if (prev_was_parent) {
493 metaoption.pre_tree_traverse += ".pr";
494 }
495 else if (prev_was_child) {
496 metaoption.pre_tree_traverse += ".fc";
497 }
498
499 prev_was_parent = true;
500 prev_was_child = false;
501 }
502 else if (meta.size() > 6
503 && (substr (meta.begin(), meta.begin()+5) == "child")) {
504
505 // clear out sibling and parent (cmd and options)
506 metaoption.metacommand &= ~(mParent|mSibling);
507 metaoption.parentoptions.clear();
508 metaoption.siblingoptions.clear();
509
510 metaoption.metacommand |= mChild;
511 get_child_options (meta, metaoption);
512 metadata.insert("contains");
513
514 if (prev_was_parent) {
515 metaoption.pre_tree_traverse += ".pr";
516 }
517 else if (prev_was_child) {
518 metaoption.pre_tree_traverse += ".fc";
519 }
520
521 prev_was_child = true;
522 prev_was_parent = false;
523 }
524 else {
525 prev_was_child = false;
526 prev_was_parent = false;
527 had_parent_or_child = false;
528 }
529 }
530
531 // parent/child can have sibling tacked on end also
[10415]532 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
533 metaoption.metacommand |= mSibling;
534 get_sibling_options (meta, metaoption);
535 }
536
[7599]537 // check for ex. which may occur in format statements
538 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.")) {
539 meta = substr (meta.begin()+3, meta.end());
540 }
[649]541 metadata.insert (meta);
542 metaoption.metaname = meta;
[354]543}
544
[9948]545static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
546 if (meta == "collection") {
547 // no qualifiers
548 metaoption.metaname = g_EmptyText;
549 return;
550 }
551 meta = substr (meta.begin()+11, meta.end());
552 metaoption.metaname = meta;
553
554}
555
[649]556static void parse_meta (text_t &meta, format_t *formatlistptr,
557 text_tset &metadata, bool &getParents) {
[354]558
559 if (meta == "link")
560 formatlistptr->command = comLink;
561 else if (meta == "/link")
562 formatlistptr->command = comEndLink;
563
[2706]564 else if (meta == "href")
565 formatlistptr->command = comHref;
566
[354]567 else if (meta == "num")
568 formatlistptr->command = comNum;
569
[407]570 else if (meta == "icon")
571 formatlistptr->command = comIcon;
572
[442]573 else if (meta == "Text")
574 formatlistptr->command = comDoc;
[1941]575
576 else if (meta == "RelatedDocuments")
577 formatlistptr->command = comRel;
[442]578
[670]579 else if (meta == "highlight")
580 formatlistptr->command = comHighlight;
581
582 else if (meta == "/highlight")
583 formatlistptr->command = comEndHighlight;
584
[19302]585 else if (meta == "metadata-spanwrap")
586 formatlistptr->command = comMetadataSpanWrap;
587
588 else if (meta == "/metadata-spanwrap")
589 formatlistptr->command = comEndMetadataSpanWrap;
590
[2967]591 else if (meta == "Summary")
592 formatlistptr->command = comSummary;
593
[5788]594 else if (meta == "DocImage")
595 formatlistptr->command = comImage;
596
597 else if (meta == "DocTOC")
598 formatlistptr->command = comTOC;
599
600 else if (meta == "DocumentButtonDetach")
601 formatlistptr->command = comDocumentButtonDetach;
602
603 else if (meta == "DocumentButtonHighlight")
604 formatlistptr->command = comDocumentButtonHighlight;
605
606 else if (meta == "DocumentButtonExpandContents")
607 formatlistptr->command = comDocumentButtonExpandContents;
608
609 else if (meta == "DocumentButtonExpandText")
610 formatlistptr->command = comDocumentButtonExpandText;
[6020]611
612 else if (meta == "DocOID")
613 formatlistptr->command = comOID;
[13118]614 else if (meta == "DocTopOID")
615 formatlistptr->command = comTopOID;
[6710]616 else if (meta == "DocRank")
617 formatlistptr->command = comRank;
[16915]618 else if (meta == "DocTermsFreqTotal")
619 formatlistptr->command = comDocTermsFreqTotal;
[9948]620 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
621 formatlistptr->command = comCollection;
622 parse_coll_meta(meta, formatlistptr->meta);
623 }
[354]624 else {
625 formatlistptr->command = comMeta;
[649]626 parse_meta (meta, formatlistptr->meta, metadata, getParents);
[354]627 }
628}
629
[9948]630
[354]631static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
[649]632 text_tset &metadata, bool &getParents) {
[354]633
[347]634 text_t text;
635 text_t::const_iterator here = formatstring.begin();
636 text_t::const_iterator end = formatstring.end();
637
638 while (here != end) {
639
[1257]640 if (*here == '\\') {
[9620]641 ++here;
[1257]642 if (here != end) text.push_back (*here);
[347]643
[1257]644 } else if (*here == '{') {
[347]645 if (!text.empty()) {
646 formatlistptr->command = comText;
647 formatlistptr->text = text;
648 formatlistptr->nextptr = new format_t();
649 formatlistptr = formatlistptr->nextptr;
650
651 text.clear();
652 }
[649]653 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
[1443]654
[347]655 formatlistptr->nextptr = new format_t();
656 formatlistptr = formatlistptr->nextptr;
657 if (here == end) break;
658 }
659 } else if (*here == '[') {
660 if (!text.empty()) {
661 formatlistptr->command = comText;
662 formatlistptr->text = text;
663 formatlistptr->nextptr = new format_t();
664 formatlistptr = formatlistptr->nextptr;
665
666 text.clear();
667 }
668 text_t meta;
[9620]669 ++here;
[347]670 while (*here != ']') {
671 if (here == end) return false;
672 meta.push_back (*here);
[9620]673 ++here;
[347]674 }
[649]675 parse_meta (meta, formatlistptr, metadata, getParents);
676 formatlistptr->nextptr = new format_t();
677 formatlistptr = formatlistptr->nextptr;
[347]678
679 } else
680 text.push_back (*here);
681
[9620]682 if (here != end) ++here;
[347]683 }
684 if (!text.empty()) {
685 formatlistptr->command = comText;
686 formatlistptr->text = text;
687 formatlistptr->nextptr = new format_t();
688 formatlistptr = formatlistptr->nextptr;
689
690 }
691 return true;
692}
693
694
[354]695static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
[649]696 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
[354]697
[347]698 text_t::const_iterator it = findchar (here, end, '}');
699 if (it == end) return false;
700
701 text_t com = substr (here, it);
702 here = findchar (it, end, '{');
703 if (here == end) return false;
[9620]704 else ++here;
[347]705
[7266]706 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
707 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
[347]708 else return false;
709
710 int commacount = 0;
711 text_t text;
712 while (here != end) {
[636]713
714 if (*here == '\\') {
[9745]715 ++here;
[636]716 if (here != end) text.push_back(*here);
717
[1443]718 }
719
720 else if (*here == ',' || *here == '}' || *here == '{') {
[347]721
722 if (formatlistptr->command == comOr) {
723 // the {Or}{this, or this, or this, or this} statement
724 format_t *or_ptr;
725
726 // find the next unused orptr
727 if (formatlistptr->orptr == NULL) {
728 formatlistptr->orptr = new format_t();
729 or_ptr = formatlistptr->orptr;
730 } else {
731 or_ptr = formatlistptr->orptr;
732 while (or_ptr->nextptr != NULL)
733 or_ptr = or_ptr->nextptr;
734 or_ptr->nextptr = new format_t();
735 or_ptr = or_ptr->nextptr;
736 }
737
[1443]738 if (!text.empty())
739 {
740 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
741 }
[347]742
[1443]743 if (*here == '{')
744 {
745 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
746 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
747 // The latter can always be re-written:
748 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
749
750 if (!text.empty()) // already used up allocated format_t
751 {
752 // => allocate new one for detected action
753 or_ptr->nextptr = new format_t();
754 or_ptr = or_ptr->nextptr;
755 }
756 if (!parse_action(++here, end, or_ptr, metadata, getParents))
757 {
758 return false;
759 }
760 }
761 else
762 {
763 if (*here == '}') break;
764 }
[347]765 text.clear();
766
[1610]767 }
768
769 // Parse an {If}{decide,do,else} statement
770 else {
771
772 // Read the decision component.
[347]773 if (commacount == 0) {
[1610]774 // Decsion can be a metadata element, or a piece of text.
775 // Originally Stefan's code, updated 25/10/2000 by Gordon.
[1443]776
[347]777 text_t::const_iterator beginbracket = text.begin();
778 text_t::const_iterator endbracket = (text.end() - 1);
[1610]779
780 // Decision is based on a metadata element
[347]781 if ((*beginbracket == '[') && (*endbracket == ']')) {
[1610]782 // Ignore the surrounding square brackets
[347]783 text_t meta = substr (beginbracket+1, endbracket);
[649]784 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
[9620]785 ++commacount;
[347]786 text.clear();
787 }
[1610]788
789 // Decision is a piece of text (probably a macro like _cgiargmode_).
790 else {
[7389]791
792 // hunt for any metadata in string, which might be uses in
793 // to test a condition, e.g. [Format] eq 'PDF'
794 format_t* dummyformat = new format_t();
795 // update which metadata fields needed
796 // (not interested in updatng formatlistptr)
797 parse_string (text, dummyformat, metadata, getParents);
798 delete dummyformat;
799
[1610]800 formatlistptr->decision.command = dText;
801 formatlistptr->decision.text = text;
[9620]802 ++commacount;
[1610]803 text.clear();
804 }
805 }
806
807 // Read the "then" and "else" components of the {If} statement.
808 else {
[1443]809 format_t** nextlistptr = NULL;
810 if (commacount == 1) {
[1610]811 nextlistptr = &formatlistptr->ifptr;
[1443]812 } else if (commacount == 2 ) {
813 nextlistptr = &formatlistptr->elseptr;
814 } else {
815 return false;
816 }
817
818 if (!text.empty()) {
819 if (*nextlistptr == NULL) {
820 *nextlistptr = new format_t();
821 } else {
822
823 // skip to the end of any format_t statements already added
824 while ((*nextlistptr)->nextptr != NULL)
825 {
826 nextlistptr = &(*nextlistptr)->nextptr;
827 }
828
829 (*nextlistptr)->nextptr = new format_t();
830 nextlistptr = &(*nextlistptr)->nextptr;
831 }
832
833 if (!parse_string (text, *nextlistptr, metadata, getParents))
834 {
835 return false;
836 }
837 text.clear();
838 }
[347]839
[1443]840 if (*here == '{')
841 {
842 if (*nextlistptr == NULL) {
843 *nextlistptr = new format_t();
844 } else {
[7474]845 // skip to the end of any format_t statements already added
846 while ((*nextlistptr)->nextptr != NULL)
847 {
848 nextlistptr = &(*nextlistptr)->nextptr;
849 }
850
[1443]851 (*nextlistptr)->nextptr = new format_t();
852 nextlistptr = &(*nextlistptr)->nextptr;
853 }
854
855 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
856 {
857 return false;
858 }
859 }
860 else
861 {
862 if (*here == '}') break;
[9620]863 ++commacount;
[1443]864 }
[347]865 }
866 }
[636]867
868 } else text.push_back(*here);
[347]869
[9620]870 if (here != end) ++here;
[347]871 }
872
873 return true;
874}
875
[354]876
[19312]877static text_t spanwrap_metatext(const text_t& metatext, const text_t& OID,
878 const text_t metaname, int metapos=-1)
879{
880
881 text_t tag_type = (metaname == "Text") ? "div" : "span";
882 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
883
884 text_t wrapped_metatext = "<" + tag_type + " ";
885 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
886
887 wrapped_metatext += "docoid=\"" + OID + "\" ";
888 wrapped_metatext += "metaname=\"" + metaname + "\"";
889
890 if (metapos>=0) {
891 text_t metapos_str = metapos;
892 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
893 }
894
895 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
896
897 return wrapped_metatext;
898}
899
900
901
[347]902bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
[649]903 text_tset &metadata, bool &getParents) {
[347]904
905 formatlistptr->clear();
906 getParents = false;
907
[649]908 return (parse_string (formatstring, formatlistptr, metadata, getParents));
[347]909}
910
[10415]911// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
912// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
[19312]913
914static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
[9401]915{
916 text_t no_ns_metaname = remove_namespace(meta.metaname);
[19298]917 text_t formatted_metatext;
[9401]918 bool first = true;
[649]919
[9401]920 const int start_i=0;
921 const int end_i = metainfo.values.size()-1;
[10415]922
923 if (position == -1) { // all
924 for (int i=start_i; i<=end_i; ++i) {
[19298]925 if (!first) formatted_metatext += meta.siblingoptions;
926
927 text_t fresh_metatext;
928
[12567]929 if (meta.metacommand & mSpecial) {
930 // special formatting
[19298]931 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
932 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
933 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
[12567]934 }
[19298]935 else fresh_metatext = metainfo.values[i];
936
[19302]937 if (metadata_spanwrap) {
[19312]938 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,i);
[19298]939 }
940 formatted_metatext += fresh_metatext;
941
[10415]942 first = false;
[9401]943
[10415]944 }
945 } else {
946 if (position == -2) { // end
947 position = end_i;
948 } else if (position < start_i || position > end_i) {
949 return "";
950 }
[19298]951
952 text_t fresh_metatext;
[12567]953 if (meta.metacommand & mSpecial) {
[19298]954
[12567]955 // special formatting
[19298]956 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
957 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
958 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
[12567]959 }
[19298]960 else fresh_metatext = metainfo.values[position];
961
[19302]962 if (metadata_spanwrap) {
[19312]963 fresh_metatext = spanwrap_metatext(fresh_metatext,OID,meta.metaname,position);
[19298]964 }
965
966 formatted_metatext += fresh_metatext;
[9401]967 }
[19298]968
969 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
970 else return formatted_metatext;
[9401]971}
[347]972
[10415]973static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
[9401]974{
975
[649]976 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
[19312]977
[9401]978 switch (meta.mqualifier.parent) {
[347]979 case pNone:
[9401]980 return "Nothing!!";
981 break;
[5787]982
[347]983 case pImmediate:
[649]984 if (parent != NULL) {
[19312]985 text_t parent_oid = get_parent(docinfo.OID);
986 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
[410]987 }
[347]988 break;
989
990 case pTop:
[649]991 if (parent != NULL) {
[19312]992 text_t parent_oid = get_parent(docinfo.OID);
993
994 while (parent->parent != NULL) {
995 parent = parent->parent;
996 parent_oid = get_parent(parent_oid);
997 }
998 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
[410]999 }
[347]1000 break;
1001
1002 case pAll:
[649]1003 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1004 if (parent != NULL) {
[19312]1005 text_t parent_oid = get_parent(docinfo.OID);
1006
[649]1007 text_tarray tmparray;
1008 while (parent != NULL) {
[19312]1009 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
[649]1010 parent = parent->parent;
[19312]1011 parent_oid = get_parent(parent_oid);
1012
[649]1013 }
[10415]1014 // now join them up - use teh parent separator
[649]1015 bool first = true;
1016 text_t tmp;
1017 text_tarray::reverse_iterator here = tmparray.rbegin();
1018 text_tarray::reverse_iterator end = tmparray.rend();
[359]1019 while (here != end) {
[10415]1020 if (!first) tmp += meta.parentoptions;
1021 tmp += *here;
[359]1022 first = false;
[9620]1023 ++here;
[359]1024 }
[13457]1025 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
[649]1026 else return tmp;
[347]1027 }
1028 }
1029 return "";
[9401]1030
[347]1031}
1032
[9948]1033static text_t get_child_meta (const text_t& collection,
1034 recptproto* collectproto,
[9401]1035 ResultDocInfo_t &docinfo, displayclass &disp,
1036 const metadata_t &meta, text_tmap &options,
[10415]1037 ostream& logout, int siblings_values)
[9401]1038{
[10415]1039 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1040
[19046]1041 const text_t& pre_tree_trav = meta.pre_tree_traverse;
[10415]1042 const text_t& child_metaname = meta.metaname;
1043 const text_t& child_field = meta.childoptions;
1044 text_tset child_metadata;
1045 child_metadata.insert(child_metaname);
[9401]1046
[10415]1047 FilterResponse_t child_response;
1048 if (meta.mqualifier.child == cNum) {
1049 // just one child
1050 //get the information associated with the metadata for child doc
[19046]1051 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1052 child_metadata, false, collectproto, child_response,
1053 logout)) return ""; // invalid child number
[9401]1054
[10415]1055 if (child_response.docInfo.empty()) return false; // no info for the child
1056
1057 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1058 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1059
[19312]1060 text_t child_metavalue
1061 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
[10415]1062 return expand_metadata(child_metavalue,collection,collectproto,
1063 child_docinfo,disp,options,logout);
1064 }
1065
[9401]1066
[10415]1067 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
[19046]1068
1069
1070 if (!pre_tree_trav.empty()) {
1071 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1072 FilterResponse_t trav_response;
1073
1074 text_tset trav_metadata;
1075 trav_metadata.insert("contains");
1076
1077 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1078 trav_metadata, false, collectproto, trav_response,
1079 logout)) return ""; // invalid pre_tree_trav
1080
1081 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
[10415]1082
[19046]1083 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1084 // use this for rest of routine
1085 docinfo = trav_docinfo;
1086 }
1087
[10415]1088 // we need to get all children
1089 text_t result = "";
1090 text_tarray children;
1091 text_t contains = docinfo.metadata["contains"].values[0];
1092 splitchar (contains.begin(), contains.end(), ';', children);
1093 text_tarray::const_iterator here = children.begin();
1094 text_tarray::const_iterator end = children.end();
1095 bool first = true;
1096 while (here !=end) {
1097 text_t oid = *here;
1098 here++;
1099 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1100
[9401]1101 //get the information associated with the metadata for child doc
[10415]1102 if (!get_info (oid, collection, "", child_metadata,
1103 false, collectproto, child_response, logout) ||
1104 child_response.docInfo.empty()) {
1105 first = false;
1106 continue;
1107 }
1108
1109
1110 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1111 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1112
[19312]1113 text_t child_metavalue
1114 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
[10415]1115 if (!first) result += child_field;
1116 first = false;
1117 // need to do this here cos otherwise we are in the wrong document
1118 result += expand_metadata(child_metavalue,collection,collectproto,
[9401]1119 child_docinfo,disp,options,logout);
1120 }
[10415]1121 return result;
1122
[9401]1123}
1124
1125static text_t get_meta (const text_t& collection, recptproto* collectproto,
1126 ResultDocInfo_t &docinfo, displayclass &disp,
1127 const metadata_t &meta, text_tmap &options,
1128 ostream& logout) {
1129
1130 // make sure we have the requested metadata
1131 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1132 if (it == docinfo.metadata.end()) return "";
1133
[10415]1134 int siblings_values = 0; // default is no siblings, just the first metadata available
1135 if (meta.metacommand & mSibling) {
1136 if (meta.mqualifier.sibling == sAll) {
1137 siblings_values = -1; //all
1138 } else if (meta.mqualifier.sibling == sNum) {
1139 siblings_values = meta.siblingoptions.getint();
1140 }
1141 }
[9401]1142 if (meta.metacommand & mParent) {
[10415]1143 return get_parent_meta(docinfo,meta,siblings_values);
[9401]1144 }
[10415]1145
[9401]1146 else if (meta.metacommand & mChild) {
1147 return get_child_meta(collection,collectproto,docinfo,disp,meta,
[10415]1148 options,logout, siblings_values);
[9401]1149 }
[10415]1150 else if (meta.metacommand & mSibling) { // only siblings
[9401]1151 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
[19312]1152 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
[9401]1153 }
1154 else {
1155
1156 // straightforward metadata request (nothing fancy)
1157
1158 text_t classifier_metaname = docinfo.classifier_metadata_type;
1159 int metaname_index
1160 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
[19312]1161 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
[9401]1162 }
[10415]1163
[9401]1164 return "";
1165}
1166
[1443]1167static text_t get_or (const text_t& collection, recptproto* collectproto,
[1610]1168 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1169 format_t *orptr, text_tmap &options,
[1443]1170 ostream& logout) {
[347]1171
[354]1172 while (orptr != NULL) {
[347]1173
[19302]1174 if (metadata_spanwrap) {
1175 // need to be a bit more careful about this
1176 // => test for it *without* spanwrap, and if defined, then
1177 // got back and generate it again, this time with spanwrap on
[354]1178
[19302]1179 metadata_spanwrap = false;
1180 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1181 options, logout);
1182 metadata_spanwrap = true;
1183 if (!test_tmp.empty()) {
1184
1185 return format_string (collection,collectproto,docinfo, disp, orptr,
1186 options, logout);
1187 }
1188 }
1189 else {
1190 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1191 options, logout);
1192 if (!tmp.empty()) return tmp;
1193 }
1194
[354]1195 orptr = orptr->nextptr;
[347]1196 }
[354]1197 return "";
[347]1198}
1199
[7389]1200static bool char_is_whitespace(const char c)
1201{
1202 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1203
1204}
1205
1206static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1207{
1208 int pos = start_pos;
1209 while (pos<outstring.size()) {
1210 if (!char_is_whitespace(outstring[pos])) {
1211 break;
1212 }
[9620]1213 ++pos;
[7389]1214 }
1215
1216 return pos;
1217}
1218
1219static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1220{
1221 int pos = start_pos;
1222 while (pos>=0) {
1223 if (!char_is_whitespace(outstring[pos])) {
1224 break;
1225 }
[9620]1226 --pos;
[7389]1227 }
1228
1229 return pos;
1230}
1231
1232static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1233{
1234 int pos = start_pos;
1235 while (pos>=0) {
1236 if (char_is_whitespace(outstring[pos])) {
1237 break;
1238 }
[9620]1239 --pos;
[7389]1240 }
1241
1242 return pos;
1243}
1244
1245
1246static int rscan_for(const text_t& outstring, const int start_pos,
1247 const char find_c)
1248{
1249 int pos = start_pos;
1250 while (pos>=0) {
1251 char c = outstring[pos];
1252 if (outstring[pos] == find_c) {
1253 break;
1254 }
[9620]1255 --pos;
[7389]1256 }
1257
1258 return pos;
1259}
1260
1261text_t extract_substr(const text_t& outstring, const int start_pos,
1262 const int end_pos)
1263{
1264 text_t extracted_str;
1265 extracted_str.clear();
1266
[9620]1267 for (int pos=start_pos; pos<=end_pos; ++pos) {
[7389]1268 extracted_str.push_back(outstring[pos]);
1269 }
1270
1271 return extracted_str;
1272}
1273
1274
[9401]1275static text_t expand_potential_metadata(const text_t& collection,
1276 recptproto* collectproto,
1277 ResultDocInfo_t &docinfo,
1278 displayclass &disp,
1279 const text_t& intext,
1280 text_tmap &options,
1281 ostream& logout)
[7389]1282{
1283 text_t outtext;
1284
1285 // decide if dealing with metadata or text
1286
1287 text_t::const_iterator beginbracket = intext.begin();
1288 text_t::const_iterator endbracket = (intext.end() - 1);
1289
1290 // Decision is based on a metadata element
1291 if ((*beginbracket == '[') && (*endbracket == ']')) {
1292 // Ignore the surrounding square brackets
1293 text_t meta_text = substr (beginbracket+1, endbracket);
1294
[10614]1295 if (meta_text == "Text") {
1296 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
[19311]1297 }
1298 else {
[7389]1299
[10614]1300 text_tset metadata;
1301 bool getParents =false;
1302 metadata_t meta;
1303
1304 parse_meta (meta_text, meta, metadata, getParents);
1305 outtext
1306 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1307 }
1308
[7389]1309 }
1310 else {
1311 outtext = intext;
1312 }
1313
1314 return outtext;
1315}
1316
1317
1318
1319
[9401]1320static bool uses_expression(const text_t& collection, recptproto* collectproto,
1321 ResultDocInfo_t &docinfo,
1322 displayclass &disp,
[7389]1323 const text_t& outstring, text_t& lhs_expr,
[9401]1324 text_t& op_expr, text_t& rhs_expr,
1325 text_tmap &options,
1326 ostream& logout)
[7389]1327{
1328 // Note: the string may not be of the form: str1 op str2, however
1329 // to deterine this we have to process it on the assumption it is,
1330 // and if at any point an 'erroneous' value is encountered, return
1331 // false and let something else have a go at evaluating it
1332
1333 // Starting at the end of the string and working backwards ..
1334
1335 const int outstring_len = outstring.size();
1336
1337 // skip over white space
1338 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1339
1340 if (rhs_end<=0) {
1341 // no meaningful text or (rhs_end==0) no room for operator
1342 return false;
1343 }
1344
1345 // check for ' or " and then scan over token
1346 const char potential_quote = outstring[rhs_end];
1347 int rhs_start=rhs_end;
1348 bool quoted = false;
1349
1350 if ((potential_quote == '\'') || (potential_quote == '\"')) {
[9620]1351 --rhs_end;
[7389]1352 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1353 quoted = true;
1354 }
1355 else {
1356 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1357 }
1358
[7617]1359 if ((rhs_end-rhs_start)<0) {
[7389]1360 // no meaningful rhs expression
1361 return false;
1362 }
1363
1364 // form rhs_expr
1365 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1366
1367 // skip over white space
1368 const int to_whitespace = (quoted) ? 2 : 1;
1369
1370 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1371 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1372
[19058]1373 if ((op_end<0) && (op_start<0)) {
1374 // no meaningful expression operator
1375 return false;
1376 }
[7389]1377
[7617]1378 if (op_end-op_start<0) {
[7389]1379 // no meaningful expression operator
1380 return false;
1381 }
1382
1383 op_expr = extract_substr(outstring,op_start,op_end);
1384
1385
1386 // check for operator
[10142]1387 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1388 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
[10145]1389
[7389]1390 // not a valid operator
1391 return false;
1392 }
1393
1394 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
[7617]1395 if (lhs_end<0) {
[7389]1396 // no meaningful lhs expression
1397 return false;
1398 }
1399
1400 int lhs_start = scan_over_whitespace(outstring,0);
1401
1402 // form lhs_expr from remainder of string
1403 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1404
1405 // Now we know we have a valid expression, look up any
1406 // metadata terms
1407
[9401]1408 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1409 disp,rhs_expr,options,logout);
1410 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1411 disp,lhs_expr,options,logout);
[7389]1412
1413 return true;
1414}
1415
1416static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1417 const text_t& rhs_expr, ostream& logout)
1418{
[10142]1419 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1420 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1421 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1422 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1423 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1424 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1425 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1426 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1427 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1428 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1429 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1430 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1431 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1432 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
[7389]1433 else {
1434 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1435 }
[10142]1436
[7389]1437 return false;
1438}
1439
1440
[1443]1441static text_t get_if (const text_t& collection, recptproto* collectproto,
[1610]1442 ResultDocInfo_t &docinfo, displayclass &disp,
1443 const decision_t &decision,
[5788]1444 format_t *ifptr, format_t *elseptr,
1445 text_tmap &options, ostream& logout)
[1443]1446{
[1610]1447 // If the decision component is a metadata element, then evaluate it
1448 // to see whether we output the "then" or the "else" clause
[354]1449 if (decision.command == dMeta) {
[19298]1450
[19302]1451 bool store_metadata_spanwrap = metadata_spanwrap;
1452 metadata_spanwrap = 0;
[19298]1453
[19302]1454 // temporarily suspend metadata-spanwrap (if on) so can test if metadata item really exits or not
[19298]1455 bool metadata_exists
1456 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1457 logout) != "");
1458
[19302]1459 metadata_spanwrap = store_metadata_spanwrap;
[19298]1460
1461 if (metadata_exists) {
[354]1462 if (ifptr != NULL)
[9948]1463 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
[5788]1464 options, logout);
[354]1465 }
1466 else {
1467 if (elseptr != NULL)
[9948]1468 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
[5788]1469 options, logout);
[354]1470 }
1471 }
[1610]1472
1473 // If the decision component is text, then evaluate it (it is probably a
1474 // macro like _cgiargmode_) to decide what to output.
1475 else if (decision.command == dText) {
1476
1477 text_t outstring;
1478 disp.expandstring (decision.text, outstring);
1479
[7389]1480 // Check for if expression in form: str1 op str2
1481 // (such as [x] eq "y")
1482 text_t lhs_expr, op_expr, rhs_expr;
[9401]1483 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
[7389]1484 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1485 if (ifptr != NULL) {
1486 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1487 options, logout);
1488 }
1489 else {
1490 return "";
1491 }
1492 } else {
1493 if (elseptr != NULL) {
1494 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1495 options, logout);
1496 }
1497 else {
1498 return "";
1499 }
1500 }
1501 }
1502
1503
[1610]1504 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1505 // a cgi argument macro that has not been set, it evaluates to itself.
1506 // Therefore, were have to say that a piece of text evalautes true if
1507 // it is non-empty and if it is a cgi argument evaulating to itself.
[7389]1508
[1610]1509 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1510 if (ifptr != NULL)
1511 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
[5788]1512 options, logout);
[1610]1513 } else {
1514 if (elseptr != NULL)
1515 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
[5788]1516 options, logout);
[1610]1517 }
1518 }
1519
[354]1520 return "";
1521}
1522
[1443]1523bool includes_metadata(const text_t& text)
1524{
1525 text_t::const_iterator here = text.begin();
1526 text_t::const_iterator end = text.end();
1527 while (here != end) {
1528 if (*here == '[') return true;
[9620]1529 ++here;
[1443]1530 }
1531
1532 return false;
1533}
1534
[5788]1535static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
[9948]1536 recptproto* collectproto,
1537 ResultDocInfo_t &docinfo,
[5788]1538 displayclass &disp, text_tmap &options,
1539 ostream &logout) {
1540
[10415]1541 if (includes_metadata(metavalue)) {
1542
1543 // text has embedded metadata in it => expand it
1544 FilterRequest_t request;
1545 FilterResponse_t response;
1546
1547 request.getParents = false;
1548
1549 format_t *expanded_formatlistptr = new format_t();
1550 parse_formatstring (metavalue, expanded_formatlistptr,
1551 request.fields, request.getParents);
1552
1553 // retrieve metadata
1554 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1555 collectproto, response, logout);
1556
1557 if (!response.docInfo.empty()) {
1558
1559 text_t expanded_metavalue
1560 = get_formatted_string(collection, collectproto,
1561 response.docInfo[0], disp, expanded_formatlistptr,
1562 options, logout);
1563
1564 return expanded_metavalue;
1565 }
1566 else {
1567 return metavalue;
1568 }
1569 }
1570 else {
1571
1572 return metavalue;
1573 }
[5788]1574}
[1941]1575
[9948]1576text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1577 displayclass &disp,
1578 text_t meta_name, ostream& logout) {
1579
1580 ColInfoResponse_t collectinfo;
1581 comerror_t err;
1582 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1583 text_t meta_value = "";
1584 text_t lang;
1585 disp.expandstring("_cgiargl_",lang);
1586 if (lang.empty()) {
1587 lang = "en";
1588 }
1589
1590 if (err == noError) {
1591 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1592 }
1593 return meta_value;
1594
1595
1596}
[1443]1597text_t format_string (const text_t& collection, recptproto* collectproto,
[1610]1598 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1599 format_t *formatlistptr, text_tmap &options,
[1443]1600 ostream& logout) {
[354]1601
[347]1602 if (formatlistptr == NULL) return "";
1603
1604 switch (formatlistptr->command) {
[6020]1605 case comOID:
1606 return docinfo.OID;
[13118]1607 case comTopOID:
1608 {
1609 text_t top_id;
1610 get_top(docinfo.OID, top_id);
1611 return top_id;
1612 }
[6710]1613 case comRank:
1614 return text_t(docinfo.ranking);
[5788]1615 case comText:
1616 return formatlistptr->text;
1617 case comLink:
1618 return options["link"];
1619 case comEndLink:
1620 if (options["link"].empty()) return "";
1621 else return "</a>";
1622 case comHref:
1623 return get_href(options["link"]);
1624 case comIcon:
1625 return options["icon"];
1626 case comNum:
1627 return docinfo.result_num;
1628 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1629 return get_related_docs(collection, collectproto, docinfo, logout);
[19311]1630
[5788]1631 case comSummary:
[19312]1632 return format_summary(collection, collectproto, docinfo, disp, options, logout);
[19311]1633
[5788]1634 case comMeta:
[1443]1635 {
[9948]1636 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
[5788]1637 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
[1443]1638 }
[19311]1639
[5788]1640 case comDoc:
[19311]1641 return format_text(collection, collectproto, docinfo, disp, options, logout);
1642
[5788]1643 case comImage:
1644 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1645 case comTOC:
1646 return options["DocTOC"];
1647 case comDocumentButtonDetach:
1648 return options["DocumentButtonDetach"];
1649 case comDocumentButtonHighlight:
1650 return options["DocumentButtonHighlight"];
1651 case comDocumentButtonExpandContents:
1652 return options["DocumentButtonExpandContents"];
1653 case comDocumentButtonExpandText:
1654 return options["DocumentButtonExpandText"];
1655 case comHighlight:
1656 if (options["highlight"] == "1") return "<b>";
1657 break;
1658 case comEndHighlight:
1659 if (options["highlight"] == "1") return "</b>";
1660 break;
[19302]1661 case comMetadataSpanWrap:
1662 metadata_spanwrap=true; return "";
1663 break;
1664 case comEndMetadataSpanWrap:
1665 metadata_spanwrap=false; return "";
1666 break;
[5788]1667 case comIf:
1668 return get_if (collection, collectproto, docinfo, disp,
1669 formatlistptr->decision, formatlistptr->ifptr,
1670 formatlistptr->elseptr, options, logout);
1671 case comOr:
1672 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1673 options, logout);
[16915]1674 case comDocTermsFreqTotal:
1675 return docinfo.num_terms_matched;
[9948]1676 case comCollection:
1677 if (formatlistptr->meta.metaname == g_EmptyText) {
1678 return collection;
1679 }
1680 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1681
[347]1682 }
1683 return "";
1684}
1685
[1443]1686text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
[1610]1687 ResultDocInfo_t &docinfo, displayclass &disp,
[5788]1688 format_t *formatlistptr, text_tmap &options,
[1443]1689 ostream& logout) {
[407]1690
[5788]1691 text_t ft;
1692 while (formatlistptr != NULL)
1693 {
1694 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1695 options, logout);
1696 formatlistptr = formatlistptr->nextptr;
1697 }
1698
1699 return ft;
[347]1700}
1701
1702
[9852]1703// we have only preloaded the text in DocumentAction. But you may want to get the text in query. so copy what we have done with format_summary and get the text here. probably is quite expensive?
1704text_t format_text (const text_t& collection, recptproto* collectproto,
[9948]1705 ResultDocInfo_t &docinfo, displayclass &disp,
[19311]1706 text_tmap &options, ostream& logout)
1707{
1708 text_t text;
1709
[9852]1710 if(!options["text"].empty()) {
[19311]1711 text = options["text"];
[9852]1712 }
[19311]1713 else {
1714 // get document text here
1715 DocumentRequest_t docrequest;
1716 DocumentResponse_t docresponse;
1717 comerror_t err;
1718 docrequest.OID = docinfo.OID;
1719 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1720 text = docresponse.doc;
1721 }
[9852]1722
[19311]1723 if (metadata_spanwrap) {
[19312]1724 text = spanwrap_metatext(text,docinfo.OID,"Text");
[19311]1725 }
1726
1727 return text;
[9852]1728}
1729
[2967]1730/* FUNCTION NAME: format_summary
1731 * DESC: this is invoked when a [Summary] special metadata is processed.
1732 * RETURNS: a query-biased summary for the document */
1733
1734text_t format_summary (const text_t& collection, recptproto* collectproto,
[5788]1735 ResultDocInfo_t &docinfo, displayclass &disp,
1736 text_tmap &options, ostream& logout) {
[3673]1737
1738 // GRB: added code here to ensure that the cstr (and other collections)
1739 // uses the document metadata item Summary, rather than compressing
1740 // the text of the document, processed via the methods in
1741 // summarise.cpp
[19312]1742
1743 text_t summary;
1744
[3673]1745 if (docinfo.metadata.count("Summary") > 0 &&
1746 docinfo.metadata["Summary"].values.size() > 0) {
[19312]1747 summary = docinfo.metadata["Summary"].values[0];
[3673]1748 }
[19312]1749 else {
1750
1751 text_t textToSummarise, query;
[3673]1752
[19312]1753 if(options["text"].empty()) { // get document text
1754 DocumentRequest_t docrequest;
1755 DocumentResponse_t docresponse;
1756 comerror_t err;
1757 docrequest.OID = docinfo.OID;
1758 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1759 textToSummarise = docresponse.doc;
1760 }
1761 else {
1762 // in practice, this would not happen, because text is only
1763 // loaded with the [Text] command
[19311]1764 textToSummarise = options["text"];
[19312]1765 }
1766
1767 disp.expandstring("_cgiargq_",query);
1768 summary = summarise(textToSummarise,query,80);
[19311]1769 }
1770
[19312]1771 if (metadata_spanwrap) {
1772 summary = spanwrap_metatext(summary,docinfo.OID,"Summary");
1773 }
1774
1775 return summary;
[2967]1776}
Note: See TracBrowser for help on using the repository browser.