source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 30484

Last change on this file since 30484 was 30484, checked in by ak19, 8 years ago

Bugfix by DLConsulting's Andrew Holland submitted via email on 14/04/16 and merged into the code today.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 72.8 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "gsdltools.h"
29#include "recptprototools.h"
30#include "OIDtools.h"
31#include "summarise.h"
32
33#include <assert.h>
34
35static bool metadata_wrap = false;
36static text_t metadata_wrap_type = "";
37
38// a few function prototypes
39
40static text_t format_string (const text_t& collection, recptproto* collectproto,
41 ResultDocInfo_t &docinfo, displayclass &disp,
42 format_t *formatlistptr, text_tmap &options,
43 ostream& logout);
44
45static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
46 format_t *formatlistptr, text_tset &metadata, bool &getParents);
47
48static text_t format_summary (const text_t& collection, recptproto* collectproto,
49 ResultDocInfo_t &docinfo, displayclass &disp,
50 text_tmap &options, ostream& logout);
51
52static text_t format_text (const text_t& collection, recptproto* collectproto,
53 ResultDocInfo_t &docinfo, displayclass &disp,
54 text_tmap &options, ostream& logout);
55
56static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
57 recptproto* collectproto, ResultDocInfo_t &docinfo,
58 displayclass &disp, text_tmap &options,
59 ostream &logout);
60
61static text_t transform_to_GS3_format (format_t *formatlistptr, const text_t& nodeType);
62
63void metadata_t::clear() {
64 metaname.clear();
65 metacommand = mNone;
66 mqualifier.parent = pNone;
67 mqualifier.sibling = sNone;
68 mqualifier.child = cNone;
69 pre_tree_traverse.clear();
70 parentoptions.clear();
71 siblingoptions.clear();
72 childoptions.clear();
73}
74
75void decision_t::clear() {
76 command = dMeta;
77 meta.clear();
78 text.clear();
79}
80
81format_t::~format_t()
82{
83 if (nextptr != NULL) delete nextptr;
84 if (ifptr != NULL) delete ifptr;
85 if (elseptr != NULL) delete elseptr;
86 if (orptr != NULL) delete orptr;
87}
88
89void format_t::clear() {
90 command = comText;
91 decision.clear();
92 text.clear();
93 meta.clear();
94 nextptr = NULL;
95 ifptr = NULL;
96 elseptr = NULL;
97 orptr = NULL;
98}
99
100void formatinfo_t::clear() {
101 DocumentImages = false;
102 DocumentTitles = true;
103 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
104 DocumentContents = true;
105 DocumentArrowsBottom = true;
106 DocumentArrowsTop = false;
107 DocumentSearchResultLinks = false;
108 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
109 // DocumentButtons.push_back ("Expand Text");
110 // DocumentButtons.push_back ("Expand Contents");
111 DocumentButtons.push_back ("Detach");
112 DocumentButtons.push_back ("Highlight");
113 RelatedDocuments = "";
114 DocumentText = "[Text]";
115 formatstrings.erase (formatstrings.begin(), formatstrings.end());
116 DocumentUseHTML = false;
117 AllowExtendedOptions = false;
118}
119
120// simply checks to see if formatstring begins with a <td> tag
121bool is_table_content (const text_t &formatstring) {
122 text_t::const_iterator here = formatstring.begin();
123 text_t::const_iterator end = formatstring.end();
124
125 while (here != end) {
126 if (*here != ' ') {
127 if ((*here == '<') && ((here+3) < end)) {
128 if ((*(here+1) == 't' || *(here+1) == 'T') &&
129 (*(here+2) == 'd' || *(here+2) == 'D') &&
130 (*(here+3) == '>' || *(here+3) == ' '))
131 //|| *(here+3) == '\t' || *(here+3) == '\n'))
132 return true;
133 } else return false;
134 }
135 ++here;
136 }
137 return false;
138}
139
140bool is_table_content (const format_t *formatlistptr) {
141
142 if (formatlistptr == NULL) return false;
143
144 if (formatlistptr->command == comText)
145 return is_table_content (formatlistptr->text);
146
147 return false;
148}
149
150// returns false if key isn't in formatstringmap
151bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
152 text_t &formatstring) {
153
154 formatstring.clear();
155 text_tmap::const_iterator it = formatstringmap.find(key);
156 if (it == formatstringmap.end()) return false;
157 formatstring = (*it).second;
158 return true;
159}
160
161// tries to find "key1key2" then "key1" then "key2"
162bool get_formatstring (const text_t &key1, const text_t &key2,
163 const text_tmap &formatstringmap,
164 text_t &formatstring) {
165
166 formatstring.clear();
167 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
168 if (it != formatstringmap.end()) {
169 formatstring = (*it).second;
170 return true;
171 }
172 it = formatstringmap.find(key1);
173 if (it != formatstringmap.end()) {
174 formatstring = (*it).second;
175 return true;
176 }
177 it = formatstringmap.find(key2);
178 if (it != formatstringmap.end()) {
179 formatstring = (*it).second;
180 return true;
181 }
182 return false;
183}
184
185
186text_t remove_namespace(const text_t &meta_name) {
187 text_t::const_iterator end = meta_name.end();
188 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
189 if (it != end) {
190 return substr(it+1, end);
191 }
192
193 return meta_name;
194
195}
196// returns a date of form _format:date_(year, month, day)
197// input is date of type yyyy-?mm-?dd
198// at least the year must be present in date
199text_t format_date (const text_t &date) {
200
201 if (date.size() < 4) return "";
202
203 text_t::const_iterator datebegin = date.begin();
204
205 text_t year = substr (datebegin, datebegin+4);
206 int chars_seen_so_far = 4;
207 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
208
209 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
210 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
211
212 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
213 int imonth = month.getint();
214 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
215
216 chars_seen_so_far += 2;
217 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
218
219 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
220 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
221
222 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
223 if (day[0] == '0') day = substr (day.begin()+1, day.end());
224 int iday = day.getint();
225 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
226
227 return "_format:date_("+year+","+month+","+day+")";
228}
229
230// converts an iso639 language code to its English equivalent
231// should we be checking that the macro exists??
232text_t iso639 (const text_t &langcode) {
233 if (langcode.empty()) return "";
234 return "_iso639:iso639"+langcode+"_";
235}
236
237
238text_t get_href (const text_t &link) {
239
240 text_t href;
241
242 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
243 text_t::const_iterator end = link.end();
244 if (here == end) return g_EmptyText;
245
246 ++here;
247 while (here != end) {
248 if (*here == '"') break;
249 href.push_back(*here);
250 ++here;
251 }
252
253 return href;
254}
255
256//this function gets the information associated with the relation
257//metadata for the document associated with 'docinfo'. This relation
258//metadata consists of a line of pairs containing 'collection, document OID'
259//(this is the OID of the document related to the current document, and
260//the collection the related document belongs to). For each of these pairs
261//the title metadata is obtained and then an html link between the title
262//of the related doc and the document's position (the document will be
263//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
264//(where collection is the related documents collection, and OID is the
265//related documents OID). A list of these html links are made for as many
266//related documents as there are. This list is then returned. If there are
267//no related documents available for the current document then the string
268//'.. no related documents .. ' is returned.
269text_t get_related_docs(const text_t& collection, recptproto* collectproto,
270 ResultDocInfo_t &docinfo, ostream& logout){
271
272 text_tset metadata;
273
274 //insert the metadata we wish to collect
275 metadata.insert("dc.Relation");
276 metadata.insert("Title");
277 metadata.insert("Subject"); //for emails, where title data doesn't apply
278
279 FilterResponse_t response;
280 text_t relation = ""; //string for displaying relation metadata
281 text_t relationTitle = ""; //the related documents Title (or subject)
282 text_t relationOID = ""; //the related documents OID
283
284 //get the information associated with the metadata for current doc
285 if (get_info (docinfo.OID, collection, "", metadata,
286 false, collectproto, response, logout)) {
287
288 //if the relation metadata exists, store for displaying
289 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
290 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
291
292 //split relation data into pairs of collectionname,ID number
293 text_tarray relationpairs;
294 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
295
296 text_tarray::const_iterator currDoc = relationpairs.begin();
297 text_tarray::const_iterator lastDoc = relationpairs.end();
298
299 //iterate through the pairs to split and display
300 while(currDoc != lastDoc){
301
302 //split pairs into collectionname and ID
303 text_tarray relationdata;
304 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
305
306 //get first element in the array (collection)
307 text_tarray::const_iterator doc_data = relationdata.begin();
308 text_t document_collection = *doc_data;
309 ++doc_data; //increment to get next item in array (oid)
310 text_t document_OID = *doc_data;
311
312 //create html link to related document
313 relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
314 relation += "&amp;cl=search&amp;d=" + document_OID;
315
316 //get the information associated with the metadata for related doc
317 if (get_info (document_OID, document_collection, "", metadata,
318 false, collectproto, response, logout)) {
319
320 //if title metadata doesn't exist, collect subject metadata
321 //if that doesn't exist, just call it 'related document'
322 if (!response.docInfo[0].metadata["Title"].values[0].empty())
323 relationTitle = response.docInfo[0].metadata["Title"].values[0];
324 else if (!response.docInfo[0].metadata["Subject"].values.empty())
325 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
326 else relationTitle = "RELATED DOCUMENT";
327
328 }
329
330 //link the related document's title to its page
331 relation += "\">" + relationTitle + "</a>";
332 relation += " (" + document_collection + ")<br>";
333
334 ++currDoc;
335 }
336 }
337
338 }
339
340 if(relation.empty()) //no relation data for documnet
341 relation = ".. no related documents .. ";
342
343 return relation;
344}
345
346
347
348static void get_parent_options (text_t &instring, metadata_t &metaoption) {
349
350 assert (instring.size() > 7);
351 if (instring.size() <= 7) return;
352
353 text_t meta, com, op;
354 bool inbraces = false;
355 bool inquotes = false;
356 bool foundcolon = false;
357 text_t::const_iterator here = instring.begin()+6;
358 text_t::const_iterator end = instring.end();
359 while (here != end) {
360 if (foundcolon) meta.push_back (*here);
361 else if (*here == '(') inbraces = true;
362 else if (*here == ')') inbraces = false;
363 else if (*here == '\'' && !inquotes) inquotes = true;
364 else if (*here == '\'' && inquotes) inquotes = false;
365 else if (*here == ':' && !inbraces) foundcolon = true;
366 else if (inquotes) op.push_back (*here);
367 else com.push_back (*here);
368 ++here;
369 }
370
371 instring = meta;
372 if (com.empty())
373 metaoption.mqualifier.parent = pImmediate;
374 else if (com == "Top")
375 metaoption.mqualifier.parent = pTop;
376 else if (com == "All") {
377 metaoption.mqualifier.parent = pAll;
378 metaoption.parentoptions = op;
379 }
380}
381
382
383static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
384
385 assert (instring.size() > 8);
386 if (instring.size() <= 8) return;
387 text_t meta, com, op;
388 bool inbraces = false;
389 bool inquotes = false;
390 bool foundcolon = false;
391 text_t::const_iterator here = instring.begin()+7;
392 text_t::const_iterator end = instring.end();
393 while (here != end) {
394 if (foundcolon) meta.push_back (*here);
395 else if (*here == '(') inbraces = true;
396 else if (*here == ')') inbraces = false;
397 else if (*here == '\'' && !inquotes) inquotes = true;
398 else if (*here == '\'' && inquotes) inquotes = false;
399 else if (*here == ':' && !inbraces) foundcolon = true;
400 else if (inquotes) op.push_back (*here);
401 else com.push_back (*here);
402 ++here;
403 }
404
405 instring = meta;
406 metaoption.siblingoptions.clear();
407
408 if (com.empty()) {
409 metaoption.mqualifier.sibling = sAll;
410 metaoption.siblingoptions = " ";
411 }
412 else if (com == "first") {
413 metaoption.mqualifier.sibling = sNum;
414 metaoption.siblingoptions = "0";
415 }
416 else if (com == "last") {
417 metaoption.mqualifier.sibling = sNum;
418 metaoption.siblingoptions = "-2"; // == last
419 }
420 else if (com.getint()>0) {
421 metaoption.mqualifier.sibling = sNum;
422 int pos = com.getint()-1;
423 metaoption.siblingoptions +=pos;
424 }
425 else {
426 metaoption.mqualifier.sibling = sAll;
427 metaoption.siblingoptions = op;
428 }
429}
430
431static void get_child_options (text_t &instring, metadata_t &metaoption) {
432
433 assert (instring.size() > 6);
434 if (instring.size() <= 6) return;
435 text_t meta, com, op;
436 bool inbraces = false;
437 bool inquotes = false;
438 bool foundcolon = false;
439 text_t::const_iterator here = instring.begin()+5;
440 text_t::const_iterator end = instring.end();
441 while (here != end) {
442 if (foundcolon) meta.push_back (*here);
443 else if (*here == '(') inbraces = true;
444 else if (*here == ')') inbraces = false;
445 else if (*here == '\'' && !inquotes) inquotes = true;
446 else if (*here == '\'' && inquotes) inquotes = false;
447 else if (*here == ':' && !inbraces) foundcolon = true;
448 else if (inquotes) op.push_back (*here);
449 else com.push_back (*here);
450 ++here;
451 }
452
453 instring = meta;
454 if (com.empty()) {
455 metaoption.mqualifier.child = cAll;
456 metaoption.childoptions = " ";
457 }
458 else if (com == "first") {
459 metaoption.mqualifier.child = cNum;
460 metaoption.childoptions = ".fc";
461 }
462 else if (com == "last") {
463 metaoption.mqualifier.child = cNum;
464 metaoption.childoptions = ".lc";
465 }
466 else if (com.getint()>0) {
467 metaoption.mqualifier.child = cNum;
468 metaoption.childoptions = "."+com;
469 }
470 else {
471 metaoption.mqualifier.child = cAll;
472 metaoption.childoptions = op;
473 }
474}
475
476
477static void get_truncate_options (text_t &instring, metadata_t &metaoption)
478{
479 assert (instring.size() > ((text_t) "truncate").size());
480 if (instring.size() <= ((text_t) "truncate").size()) return;
481 text_t meta, com;
482 bool inbraces = false;
483 bool foundcolon = false;
484 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
485 text_t::const_iterator end = instring.end();
486 while (here != end) {
487 if (foundcolon) meta.push_back (*here);
488 else if (*here == '(') inbraces = true;
489 else if (*here == ')') inbraces = false;
490 else if (*here == ':' && !inbraces) foundcolon = true;
491 else com.push_back (*here);
492 ++here;
493 }
494
495 instring = meta;
496
497 if (!com.empty())
498 {
499 metaoption.siblingoptions = com;
500 }
501 else
502 {
503 // Default is 100 characters if not specified
504 metaoption.siblingoptions = "100";
505 }
506}
507
508
509
510static void parse_meta (text_t &meta, metadata_t &metaoption,
511 text_tset &metadata, bool &getParents) {
512
513 // Look for the various format statement modifiers
514 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
515 // is irrelevant because this is not stored in metaoption.metacommand anyway
516 bool keep_trying = true;
517 while (keep_trying)
518 {
519 keep_trying = false;
520
521 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
522 {
523 metaoption.metacommand |= mCgiSafe;
524 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
525 keep_trying = true;
526 }
527 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
528 {
529 metaoption.metacommand |= mSpecial;
530 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
531 keep_trying = true;
532 }
533
534 // New "truncate" special formatting option
535 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
536 {
537 metaoption.metacommand |= mTruncate;
538 get_truncate_options (meta, metaoption);
539 keep_trying = true;
540 }
541 // New "htmlsafe" special formatting option
542 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
543 {
544 metaoption.metacommand |= mHTMLSafe;
545 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
546 keep_trying = true;
547 }
548 // New "xmlsafe" special formatting option
549 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
550 {
551 metaoption.metacommand |= mXMLSafe;
552 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
553 keep_trying = true;
554 }
555 // New "dmsafe" special formatting option
556 if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
557 {
558 metaoption.metacommand |= mDMSafe;
559 meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
560 keep_trying = true;
561 }
562 }
563
564 bool had_parent_or_child = true;
565 bool prev_was_parent = false;
566 bool prev_was_child = false;
567
568 while (had_parent_or_child) {
569 if (meta.size() > 7
570 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
571
572 // clear out sibling and child (cmd and options)
573 metaoption.metacommand &= ~(mChild|mSibling);
574 metaoption.childoptions.clear();
575 metaoption.siblingoptions.clear();
576
577 getParents = true;
578 metaoption.metacommand |= mParent;
579 get_parent_options (meta, metaoption);
580
581 if (prev_was_parent) {
582 metaoption.pre_tree_traverse += ".pr";
583 }
584 else if (prev_was_child) {
585 metaoption.pre_tree_traverse += ".fc";
586 }
587
588 prev_was_parent = true;
589 prev_was_child = false;
590 }
591 else if (meta.size() > 6
592 && (substr (meta.begin(), meta.begin()+5) == "child")) {
593
594 // clear out sibling and parent (cmd and options)
595 metaoption.metacommand &= ~(mParent|mSibling);
596 metaoption.parentoptions.clear();
597 metaoption.siblingoptions.clear();
598
599 metaoption.metacommand |= mChild;
600 get_child_options (meta, metaoption);
601 metadata.insert("contains");
602
603 if (prev_was_parent) {
604 metaoption.pre_tree_traverse += ".pr";
605 }
606 else if (prev_was_child) {
607 metaoption.pre_tree_traverse += ".fc";
608 }
609
610 prev_was_child = true;
611 prev_was_parent = false;
612 }
613 else {
614 prev_was_child = false;
615 prev_was_parent = false;
616 had_parent_or_child = false;
617 }
618 }
619
620 // parent/child can have sibling tacked on end also
621 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
622 metaoption.metacommand |= mSibling;
623 get_sibling_options (meta, metaoption);
624 }
625
626 // check for ex. which may occur in format statements
627 // remove "ex." prefix, but only if there are no other metadata set qualifiers
628 // in the metaname, since we want to retain prefixes like "ex.dc." as-is
629 text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
630 text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
631
632 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
633 meta = substr (meta.begin()+3, meta.end());
634 }
635 metadata.insert (meta);
636 metaoption.metaname = meta;
637}
638
639static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
640 if (meta == "collection") {
641 // no qualifiers
642 metaoption.metaname = g_EmptyText;
643 return;
644 }
645 meta = substr (meta.begin()+11, meta.end());
646 metaoption.metaname = meta;
647
648}
649
650static void parse_meta (text_t &meta, format_t *formatlistptr,
651 text_tset &metadata, bool &getParents) {
652
653 // check for ex. which may occur in format statements
654 // remove "ex." prefix, but only if there are no other metadata set qualifiers
655 // in the metaname, since we want to retain prefixes like "ex.dc." as-is
656 text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
657 text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
658
659 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
660 meta = substr (meta.begin()+3, meta.end());
661 }
662 if (meta == "link")
663 formatlistptr->command = comLink;
664 else if (meta == "/link")
665 formatlistptr->command = comEndLink;
666
667 // the metaname "srclink_file" is deprecated, use "srclinkFile"
668 else if (meta == "srclink") {
669 formatlistptr->command = comAssocLink;
670 formatlistptr->meta.metaname = "srclinkFile";
671 metadata.insert("srclinkFile");
672 }
673 else if (meta == "srchref") {
674 formatlistptr->command = comAssocLink;
675 formatlistptr->text = "href";
676 formatlistptr->meta.metaname = "srclinkFile";
677 metadata.insert("srclinkFile");
678 }
679 else if (meta == "/srclink") {
680 formatlistptr->command = comEndAssocLink;
681 formatlistptr->meta.metaname = "srclinkFile";
682 }
683 // and weblink etc
684 else if (meta == "href")
685 formatlistptr->command = comHref;
686
687 else if (meta == "num")
688 formatlistptr->command = comNum;
689
690 else if (meta == "icon")
691 formatlistptr->command = comIcon;
692
693 else if (meta == "Text")
694 formatlistptr->command = comDoc;
695
696 else if (meta == "RelatedDocuments")
697 formatlistptr->command = comRel;
698
699 else if (meta == "highlight")
700 formatlistptr->command = comHighlight;
701
702 else if (meta == "/highlight")
703 formatlistptr->command = comEndHighlight;
704
705 else if (meta == "metadata-spanwrap")
706 formatlistptr->command = comMetadataSpanWrap;
707
708 else if (meta == "/metadata-spanwrap")
709 formatlistptr->command = comEndMetadataSpanWrap;
710
711 else if (meta == "metadata-divwrap")
712 formatlistptr->command = comMetadataDivWrap;
713
714 else if (meta == "/metadata-divwrap")
715 formatlistptr->command = comEndMetadataDivWrap;
716
717 else if (meta == "Summary")
718 formatlistptr->command = comSummary;
719
720 else if (meta == "DocImage")
721 formatlistptr->command = comImage;
722
723 else if (meta == "DocTOC")
724 formatlistptr->command = comTOC;
725
726 else if (meta == "DocumentButtonDetach")
727 formatlistptr->command = comDocumentButtonDetach;
728
729 else if (meta == "DocumentButtonHighlight")
730 formatlistptr->command = comDocumentButtonHighlight;
731
732 else if (meta == "DocumentButtonExpandContents")
733 formatlistptr->command = comDocumentButtonExpandContents;
734
735 else if (meta == "DocumentButtonExpandText")
736 formatlistptr->command = comDocumentButtonExpandText;
737
738 else if (meta == "DocOID")
739 formatlistptr->command = comOID;
740 else if (meta == "DocTopOID")
741 formatlistptr->command = comTopOID;
742 else if (meta == "DocRank")
743 formatlistptr->command = comRank;
744 else if (meta == "DocTermsFreqTotal")
745 formatlistptr->command = comDocTermsFreqTotal;
746 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
747 formatlistptr->command = comCollection;
748 parse_coll_meta(meta, formatlistptr->meta);
749 }
750 else {
751 formatlistptr->command = comMeta;
752 parse_meta (meta, formatlistptr->meta, metadata, getParents);
753 }
754}
755
756
757static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
758 text_tset &metadata, bool &getParents) {
759
760 text_t text;
761 text_t::const_iterator here = formatstring.begin();
762 text_t::const_iterator end = formatstring.end();
763
764 while (here != end) {
765
766 if (*here == '\\') {
767 ++here;
768 if (here != end) text.push_back (*here);
769
770 } else if (*here == '{') {
771 if (!text.empty()) {
772 formatlistptr->command = comText;
773 formatlistptr->text = text;
774 formatlistptr->nextptr = new format_t();
775 formatlistptr = formatlistptr->nextptr;
776
777 text.clear();
778 }
779 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
780
781 formatlistptr->nextptr = new format_t();
782 formatlistptr = formatlistptr->nextptr;
783 if (here == end) break;
784 }
785 } else if (*here == '[') {
786 if (!text.empty()) {
787 formatlistptr->command = comText;
788 formatlistptr->text = text;
789 formatlistptr->nextptr = new format_t();
790 formatlistptr = formatlistptr->nextptr;
791
792 text.clear();
793 }
794 text_t meta;
795 ++here;
796 while (*here != ']') {
797 if (here == end) return false;
798 meta.push_back (*here);
799 ++here;
800 }
801 parse_meta (meta, formatlistptr, metadata, getParents);
802 formatlistptr->nextptr = new format_t();
803 formatlistptr = formatlistptr->nextptr;
804
805 } else
806 text.push_back (*here);
807
808 if (here != end) ++here;
809 }
810 if (!text.empty()) {
811 formatlistptr->command = comText;
812 formatlistptr->text = text;
813 formatlistptr->nextptr = new format_t();
814 formatlistptr = formatlistptr->nextptr;
815
816 }
817 return true;
818}
819
820
821static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
822 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
823
824 text_t::const_iterator it = findchar (here, end, '}');
825 if (it == end) return false;
826
827 text_t com = substr (here, it);
828 here = findchar (it, end, '{');
829 if (here == end) return false;
830 else ++here;
831
832 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
833 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
834 else return false;
835
836 int commacount = 0;
837 text_t text;
838 while (here != end) {
839
840 if (*here == '\\') {
841 ++here;
842 if (here != end) text.push_back(*here);
843
844 }
845
846 else if (*here == ',' || *here == '}' || *here == '{') {
847
848 if (formatlistptr->command == comOr) {
849 // the {Or}{this, or this, or this, or this} statement
850 format_t *or_ptr;
851
852 // find the next unused orptr
853 if (formatlistptr->orptr == NULL) {
854 formatlistptr->orptr = new format_t();
855 or_ptr = formatlistptr->orptr;
856 } else {
857 or_ptr = formatlistptr->orptr;
858 while (or_ptr->nextptr != NULL)
859 or_ptr = or_ptr->nextptr;
860 or_ptr->nextptr = new format_t();
861 or_ptr = or_ptr->nextptr;
862 }
863
864 if (!text.empty())
865 {
866 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
867 }
868
869 if (*here == '{')
870 {
871 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
872 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
873 // The latter can always be re-written:
874 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
875
876 if (!text.empty()) // already used up allocated format_t
877 {
878 // => allocate new one for detected action
879 or_ptr->nextptr = new format_t();
880 or_ptr = or_ptr->nextptr;
881 }
882 if (!parse_action(++here, end, or_ptr, metadata, getParents))
883 {
884 return false;
885 }
886 }
887 else
888 {
889 if (*here == '}') break;
890 }
891 text.clear();
892
893 }
894
895 // Parse an {If}{decide,do,else} statement
896 else {
897
898 // Read the decision component.
899 if (commacount == 0) {
900 // Decsion can be a metadata element, or a piece of text.
901 // Originally Stefan's code, updated 25/10/2000 by Gordon.
902
903 text_t::const_iterator beginbracket = text.begin();
904 text_t::const_iterator endbracket = (text.end() - 1);
905
906 // Decision is based on a metadata element
907 if ((*beginbracket == '[') && (*endbracket == ']')) {
908 // Ignore the surrounding square brackets
909 text_t meta = substr (beginbracket+1, endbracket);
910 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
911 ++commacount;
912 text.clear();
913 }
914
915 // Decision is a piece of text (probably a macro like _cgiargmode_).
916 else {
917
918 // hunt for any metadata in string, which might be uses in
919 // to test a condition, e.g. [Format] eq 'PDF'
920 format_t* dummyformat = new format_t();
921 // update which metadata fields needed
922 // (not interested in updatng formatlistptr)
923 parse_string (text, dummyformat, metadata, getParents);
924 delete dummyformat;
925
926 formatlistptr->decision.command = dText;
927 formatlistptr->decision.text = text;
928 ++commacount;
929 text.clear();
930 }
931 }
932
933 // Read the "then" and "else" components of the {If} statement.
934 else {
935 format_t** nextlistptr = NULL;
936 if (commacount == 1) {
937 nextlistptr = &formatlistptr->ifptr;
938 } else if (commacount == 2 ) {
939 nextlistptr = &formatlistptr->elseptr;
940 } else {
941 return false;
942 }
943
944 if (!text.empty()) {
945 if (*nextlistptr == NULL) {
946 *nextlistptr = new format_t();
947 } else {
948
949 // skip to the end of any format_t statements already added
950 while ((*nextlistptr)->nextptr != NULL)
951 {
952 nextlistptr = &(*nextlistptr)->nextptr;
953 }
954
955 (*nextlistptr)->nextptr = new format_t();
956 nextlistptr = &(*nextlistptr)->nextptr;
957 }
958
959 if (!parse_string (text, *nextlistptr, metadata, getParents))
960 {
961 return false;
962 }
963 text.clear();
964 }
965
966 if (*here == '{')
967 {
968 if (*nextlistptr == NULL) {
969 *nextlistptr = new format_t();
970 } else {
971 // skip to the end of any format_t statements already added
972 while ((*nextlistptr)->nextptr != NULL)
973 {
974 nextlistptr = &(*nextlistptr)->nextptr;
975 }
976
977 (*nextlistptr)->nextptr = new format_t();
978 nextlistptr = &(*nextlistptr)->nextptr;
979 }
980
981 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
982 {
983 return false;
984 }
985 }
986 else
987 {
988 if (*here == '}') break;
989 ++commacount;
990 }
991 }
992 }
993
994 } else text.push_back(*here);
995
996 if (here != end) ++here;
997 }
998
999 return true;
1000}
1001
1002
1003static text_t wrap_metatext(const text_t& metatext, const text_t& OID,
1004 const text_t metaname, int metapos=-1)
1005{
1006
1007 text_t tag_type = metadata_wrap_type;
1008 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
1009
1010 text_t wrapped_metatext = "<" + tag_type + " ";
1011 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
1012
1013 wrapped_metatext += "docoid=\"" + OID + "\" ";
1014 wrapped_metatext += "metaname=\"" + metaname + "\"";
1015
1016 if (metapos>=0) {
1017 text_t metapos_str = metapos;
1018 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
1019 }
1020
1021 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1022
1023 return wrapped_metatext;
1024}
1025
1026
1027
1028bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1029 text_tset &metadata, bool &getParents) {
1030
1031 formatlistptr->clear();
1032 getParents = false;
1033
1034 return (parse_string (formatstring, formatlistptr, metadata, getParents));
1035}
1036
1037// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1038// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1039
1040static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1041{
1042 text_t no_ns_metaname = remove_namespace(meta.metaname);
1043 text_t formatted_metatext;
1044 bool first = true;
1045
1046 const int start_i=0;
1047 const int end_i = metainfo.values.size()-1;
1048
1049 if (position == -1) { // all
1050 for (int i=start_i; i<=end_i; ++i) {
1051 if (!first) formatted_metatext += meta.siblingoptions;
1052
1053 text_t fresh_metatext;
1054
1055 if (meta.metacommand & mSpecial) {
1056 // special formatting
1057 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1058 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1059 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1060 }
1061 else fresh_metatext = metainfo.values[i];
1062
1063 // New "truncate" special formatting option
1064 if (meta.metacommand & mTruncate)
1065 {
1066 int truncate_length = meta.siblingoptions.getint();
1067 text_t truncated_value = fresh_metatext;
1068 if (truncated_value.size() > truncate_length)
1069 {
1070 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1071 }
1072 fresh_metatext = truncated_value;
1073 }
1074 // New "xmlsafe" special formatting option
1075 if (meta.metacommand & mXMLSafe)
1076 {
1077 // Make it XML-safe
1078 text_t text_xml_safe = "";
1079 text_t::const_iterator text_iterator = fresh_metatext.begin();
1080 while (text_iterator != fresh_metatext.end())
1081 {
1082 if (*text_iterator == '&') text_xml_safe += "&amp;";
1083 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1084 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1085 else text_xml_safe.push_back(*text_iterator);
1086 text_iterator++;
1087 }
1088 fresh_metatext = text_xml_safe;
1089 }
1090 // New "htmlsafe" special formatting option
1091 if (meta.metacommand & mHTMLSafe)
1092 {
1093 // Make it HTML-safe
1094 text_t text_html_safe = "";
1095 text_t::const_iterator text_iterator = fresh_metatext.begin();
1096 while (text_iterator != fresh_metatext.end())
1097 {
1098 if (*text_iterator == '&') text_html_safe += "&amp;";
1099 else if (*text_iterator == '<') text_html_safe += "&lt;";
1100 else if (*text_iterator == '>') text_html_safe += "&gt;";
1101 else if (*text_iterator == '"') text_html_safe += "&quot;";
1102 else text_html_safe.push_back(*text_iterator);
1103 text_iterator++;
1104 }
1105 fresh_metatext = text_html_safe;
1106 }
1107 // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1108 // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1109 if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1110 {
1111 // Make it macro-safe
1112 text_t text_dm_safe = dm_safe(fresh_metatext);
1113 fresh_metatext = text_dm_safe;
1114 }
1115
1116 if (metadata_wrap) {
1117 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,i);
1118 }
1119 formatted_metatext += fresh_metatext;
1120
1121 first = false;
1122
1123 }
1124 } else {
1125 if (position == -2) { // end
1126 position = end_i;
1127 } else if (position < start_i || position > end_i) {
1128 return "";
1129 }
1130
1131 text_t fresh_metatext;
1132 if (meta.metacommand & mSpecial) {
1133
1134 // special formatting
1135 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1136 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1137 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1138 }
1139 else fresh_metatext = metainfo.values[position];
1140
1141 // New "truncate" special formatting option
1142 if (meta.metacommand & mTruncate)
1143 {
1144 int truncate_length = meta.siblingoptions.getint();
1145 text_t truncated_value = fresh_metatext;
1146 if (truncated_value.size() > truncate_length)
1147 {
1148 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1149 }
1150 fresh_metatext = truncated_value;
1151 }
1152 // New "xmlsafe" special formatting option
1153 if (meta.metacommand & mXMLSafe)
1154 {
1155 // Make it XML-safe
1156 text_t text_xml_safe = "";
1157 text_t::const_iterator text_iterator = fresh_metatext.begin();
1158 while (text_iterator != fresh_metatext.end())
1159 {
1160 if (*text_iterator == '&') text_xml_safe += "&amp;";
1161 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1162 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1163 else text_xml_safe.push_back(*text_iterator);
1164 text_iterator++;
1165 }
1166 fresh_metatext = text_xml_safe;
1167 }
1168 // New "htmlsafe" special formatting option
1169 if (meta.metacommand & mHTMLSafe)
1170 {
1171 // Make it HTML-safe
1172 text_t text_html_safe = "";
1173 text_t::const_iterator text_iterator = fresh_metatext.begin();
1174 while (text_iterator != fresh_metatext.end())
1175 {
1176 if (*text_iterator == '&') text_html_safe += "&amp;";
1177 else if (*text_iterator == '<') text_html_safe += "&lt;";
1178 else if (*text_iterator == '>') text_html_safe += "&gt;";
1179 else if (*text_iterator == '"') text_html_safe += "&quot;";
1180 else if (*text_iterator == '\'') text_html_safe += "&#39;";
1181 else if (*text_iterator == ',') text_html_safe += "&#44;";
1182 else text_html_safe.push_back(*text_iterator);
1183 text_iterator++;
1184 }
1185 fresh_metatext = text_html_safe;
1186 }
1187 // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1188 // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1189 if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1190 {
1191 // Make it macro-safe
1192 text_t text_dm_safe = dm_safe(fresh_metatext);
1193 fresh_metatext = text_dm_safe;
1194 }
1195
1196 if (metadata_wrap) {
1197 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,position);
1198 }
1199
1200 formatted_metatext += fresh_metatext;
1201 }
1202
1203 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1204 else return formatted_metatext;
1205}
1206
1207static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1208{
1209
1210 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1211
1212 switch (meta.mqualifier.parent) {
1213 case pNone:
1214 return "Nothing!!";
1215 break;
1216
1217 case pImmediate:
1218 if (parent != NULL) {
1219 text_t parent_oid = get_parent(docinfo.OID);
1220 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1221 }
1222 break;
1223
1224 case pTop:
1225 if (parent != NULL) {
1226 text_t parent_oid = get_parent(docinfo.OID);
1227
1228 while (parent->parent != NULL) {
1229 parent = parent->parent;
1230 parent_oid = get_parent(parent_oid);
1231 }
1232 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1233 }
1234 break;
1235
1236 case pAll:
1237 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1238 if (parent != NULL) {
1239 text_t parent_oid = get_parent(docinfo.OID);
1240
1241 text_tarray tmparray;
1242 while (parent != NULL) {
1243 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1244 parent = parent->parent;
1245 parent_oid = get_parent(parent_oid);
1246
1247 }
1248 // now join them up - use teh parent separator
1249 bool first = true;
1250 text_t tmp;
1251 text_tarray::reverse_iterator here = tmparray.rbegin();
1252 text_tarray::reverse_iterator end = tmparray.rend();
1253 while (here != end) {
1254 if (!first) tmp += meta.parentoptions;
1255 tmp += *here;
1256 first = false;
1257 ++here;
1258 }
1259 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1260 else return tmp;
1261 }
1262 }
1263 return "";
1264
1265}
1266
1267static text_t get_child_meta (const text_t& collection,
1268 recptproto* collectproto,
1269 ResultDocInfo_t &docinfo, displayclass &disp,
1270 const metadata_t &meta, text_tmap &options,
1271 ostream& logout, int siblings_values)
1272{
1273 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1274
1275 const text_t& pre_tree_trav = meta.pre_tree_traverse;
1276 const text_t& child_metaname = meta.metaname;
1277 const text_t& child_field = meta.childoptions;
1278 text_tset child_metadata;
1279 child_metadata.insert(child_metaname);
1280
1281 FilterResponse_t child_response;
1282 if (meta.mqualifier.child == cNum) {
1283 // just one child
1284 //get the information associated with the metadata for child doc
1285 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1286 child_metadata, false, collectproto, child_response,
1287 logout)) return ""; // invalid child number
1288
1289 if (child_response.docInfo.empty()) return false; // no info for the child
1290
1291 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1292 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1293
1294 text_t child_metavalue
1295 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1296 return expand_metadata(child_metavalue,collection,collectproto,
1297 child_docinfo,disp,options,logout);
1298 }
1299
1300
1301 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1302
1303
1304 if (!pre_tree_trav.empty()) {
1305 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1306 FilterResponse_t trav_response;
1307
1308 text_tset trav_metadata;
1309 trav_metadata.insert("contains");
1310
1311 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1312 trav_metadata, false, collectproto, trav_response,
1313 logout)) return ""; // invalid pre_tree_trav
1314
1315 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1316
1317 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1318
1319 // use this for rest of routine
1320 docinfo = trav_docinfo;
1321 }
1322
1323 // we need to get all children
1324 text_t result = "";
1325 text_tarray children;
1326 text_t contains = docinfo.metadata["contains"].values[0];
1327 splitchar (contains.begin(), contains.end(), ';', children);
1328 text_tarray::const_iterator here = children.begin();
1329 text_tarray::const_iterator end = children.end();
1330 bool first = true;
1331 while (here !=end) {
1332 text_t oid = *here;
1333 here++;
1334 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1335
1336 //get the information associated with the metadata for child doc
1337 if (!get_info (oid, collection, "", child_metadata,
1338 false, collectproto, child_response, logout) ||
1339 child_response.docInfo.empty()) {
1340 first = false;
1341 continue;
1342 }
1343
1344
1345 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1346 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1347
1348 text_t child_metavalue
1349 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1350
1351
1352 if (!first) result += child_field;
1353 first = false;
1354 // need to do this here cos otherwise we are in the wrong document
1355 text_t em = expand_metadata(child_metavalue,collection,collectproto,
1356 child_docinfo,disp,options,logout);
1357
1358 result += em;
1359 }
1360 return result;
1361
1362}
1363
1364static text_t get_meta (const text_t& collection, recptproto* collectproto,
1365 ResultDocInfo_t &docinfo, displayclass &disp,
1366 const metadata_t &meta, text_tmap &options,
1367 ostream& logout) {
1368
1369 // make sure we have the requested metadata
1370 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1371 if (it == docinfo.metadata.end()) return "";
1372
1373 int siblings_values = 0; // default is no siblings, just the first metadata available
1374 if (meta.metacommand & mSibling) {
1375 if (meta.mqualifier.sibling == sAll) {
1376 siblings_values = -1; //all
1377 } else if (meta.mqualifier.sibling == sNum) {
1378 siblings_values = meta.siblingoptions.getint();
1379 }
1380 }
1381 if (meta.metacommand & mParent) {
1382 return get_parent_meta(docinfo,meta,siblings_values);
1383 }
1384
1385 else if (meta.metacommand & mChild) {
1386 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1387 options,logout, siblings_values);
1388 }
1389 else if (meta.metacommand & mSibling) { // only siblings
1390 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1391 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1392 }
1393 else {
1394
1395 // straightforward metadata request (nothing fancy)
1396
1397 text_t classifier_metaname = docinfo.classifier_metadata_type;
1398 int metaname_index
1399 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1400 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1401 }
1402
1403 return "";
1404}
1405
1406static text_t get_or (const text_t& collection, recptproto* collectproto,
1407 ResultDocInfo_t &docinfo, displayclass &disp,
1408 format_t *orptr, text_tmap &options,
1409 ostream& logout) {
1410
1411 while (orptr != NULL) {
1412
1413 if (metadata_wrap) {
1414 // need to be a bit more careful about this
1415 // => test for it *without* spanwrap or divwrap, and if defined, then
1416 // got back and generate it again, this time with spanwrap/divwrap on
1417
1418 metadata_wrap = false;
1419 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1420 options, logout);
1421 metadata_wrap = true;
1422 if (!test_tmp.empty()) {
1423
1424 return format_string (collection,collectproto,docinfo, disp, orptr,
1425 options, logout);
1426 }
1427 }
1428 else {
1429 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1430 options, logout);
1431 if (!tmp.empty()) return tmp;
1432 }
1433
1434 orptr = orptr->nextptr;
1435 }
1436 return "";
1437}
1438
1439static bool char_is_whitespace(const char c)
1440{
1441 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1442
1443}
1444
1445static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1446{
1447 int pos = start_pos;
1448 while (pos<outstring.size()) {
1449 if (!char_is_whitespace(outstring[pos])) {
1450 break;
1451 }
1452 ++pos;
1453 }
1454
1455 return pos;
1456}
1457
1458static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1459{
1460 int pos = start_pos;
1461 while (pos>=0) {
1462 if (!char_is_whitespace(outstring[pos])) {
1463 break;
1464 }
1465 --pos;
1466 }
1467
1468 return pos;
1469}
1470
1471static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1472{
1473 int pos = start_pos;
1474 while (pos>=0) {
1475 if (char_is_whitespace(outstring[pos])) {
1476 break;
1477 }
1478 --pos;
1479 }
1480
1481 return pos;
1482}
1483
1484
1485static int rscan_for(const text_t& outstring, const int start_pos,
1486 const char find_c)
1487{
1488 int pos = start_pos;
1489 while (pos>=0) {
1490 char c = outstring[pos];
1491 if (outstring[pos] == find_c) {
1492 break;
1493 }
1494 --pos;
1495 }
1496
1497 return pos;
1498}
1499
1500text_t extract_substr(const text_t& outstring, const int start_pos,
1501 const int end_pos)
1502{
1503 text_t extracted_str;
1504 extracted_str.clear();
1505
1506 for (int pos=start_pos; pos<=end_pos; ++pos) {
1507 extracted_str.push_back(outstring[pos]);
1508 }
1509
1510 return extracted_str;
1511}
1512
1513
1514static text_t expand_potential_metadata(const text_t& collection,
1515 recptproto* collectproto,
1516 ResultDocInfo_t &docinfo,
1517 displayclass &disp,
1518 const text_t& intext,
1519 text_tmap &options,
1520 ostream& logout)
1521{
1522 text_t outtext;
1523
1524 // Check if the string is empty. If it is, do not do anything
1525 if (intext.empty())
1526 {
1527 outtext = intext;
1528 return outtext;
1529 }
1530
1531 // decide if dealing with metadata or text
1532
1533 text_t::const_iterator beginbracket = intext.begin();
1534 text_t::const_iterator endbracket = (intext.end() - 1);
1535
1536 // Decision is based on a metadata element
1537 if ((*beginbracket == '[') && (*endbracket == ']')) {
1538 // Ignore the surrounding square brackets
1539 text_t meta_text = substr (beginbracket+1, endbracket);
1540
1541 if (meta_text == "Text") {
1542 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1543 }
1544 else {
1545
1546 text_tset metadata;
1547 bool getParents =false;
1548 metadata_t meta;
1549
1550 parse_meta (meta_text, meta, metadata, getParents);
1551 outtext
1552 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1553 }
1554
1555 }
1556 else {
1557 outtext = intext;
1558 }
1559
1560 return outtext;
1561}
1562
1563
1564static bool uses_expression(const text_t& collection, recptproto* collectproto,
1565 ResultDocInfo_t &docinfo,
1566 displayclass &disp,
1567 const text_t& outstring, text_t& lhs_expr,
1568 text_t& op_expr, text_t& rhs_expr,
1569 text_tmap &options,
1570 ostream& logout)
1571{
1572 // Note: the string may not be of the form: str1 op str2, however
1573 // to deterine this we have to process it on the assumption it is,
1574 // and if at any point an 'erroneous' value is encountered, return
1575 // false and let something else have a go at evaluating it
1576
1577 // Starting at the end of the string and working backwards ..
1578
1579 const int outstring_len = outstring.size();
1580
1581 // skip over white space
1582 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1583
1584 if (rhs_end<=0) {
1585 // no meaningful text or (rhs_end==0) no room for operator
1586 return false;
1587 }
1588
1589 // check for ' or " and then scan over token
1590 const char potential_quote = outstring[rhs_end];
1591 int rhs_start=rhs_end;
1592 bool quoted = false;
1593
1594 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1595 --rhs_end;
1596 // We have already decremented rhs_end, so we want to look at the current value for rhs_end (allows empty quoted strings)
1597 rhs_start = rscan_for(outstring,rhs_end,potential_quote) +1;
1598 quoted = true;
1599 }
1600 else {
1601 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1602 }
1603
1604 // Allow empty quoted strings
1605 if ((rhs_end-rhs_start)<0 && !quoted) {
1606 // no meaningful rhs expression
1607 return false;
1608 }
1609
1610 // form rhs_expr
1611 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1612
1613 // skip over white space
1614 const int to_whitespace = (quoted) ? 2 : 1;
1615
1616 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1617 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1618
1619 if ((op_end<0) && (op_start<0)) {
1620 // no meaningful expression operator
1621 return false;
1622 }
1623
1624 if (op_end-op_start<0) {
1625 // no meaningful expression operator
1626 return false;
1627 }
1628
1629 op_expr = extract_substr(outstring,op_start,op_end);
1630
1631
1632 // check for operator
1633 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1634 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1635
1636 // not a valid operator
1637 return false;
1638 }
1639
1640 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1641 if (lhs_end<0) {
1642 // no meaningful lhs expression
1643 return false;
1644 }
1645
1646 int lhs_start = scan_over_whitespace(outstring,0);
1647
1648 // form lhs_expr from remainder of string
1649 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1650
1651 // Now we know we have a valid expression, look up any
1652 // metadata terms
1653
1654 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1655 disp,rhs_expr,options,logout);
1656 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1657 disp,lhs_expr,options,logout);
1658
1659 return true;
1660}
1661
1662static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1663 const text_t& rhs_expr, ostream& logout)
1664{
1665 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1666 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1667 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1668 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1669 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1670 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1671 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1672 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1673 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1674 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1675 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1676 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1677 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1678 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1679 else {
1680 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1681 }
1682
1683 return false;
1684}
1685
1686
1687static text_t get_if (const text_t& collection, recptproto* collectproto,
1688 ResultDocInfo_t &docinfo, displayclass &disp,
1689 const decision_t &decision,
1690 format_t *ifptr, format_t *elseptr,
1691 text_tmap &options, ostream& logout)
1692{
1693 // If the decision component is a metadata element, then evaluate it
1694 // to see whether we output the "then" or the "else" clause
1695 if (decision.command == dMeta) {
1696
1697 bool store_metadata_wrap = metadata_wrap;
1698 metadata_wrap = 0;
1699
1700 // temporarily suspend metadata-XXXwrap (if on) so can test if metadata item really exits or not
1701 bool metadata_exists
1702 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1703 logout) != "");
1704
1705 metadata_wrap = store_metadata_wrap;
1706
1707 if (metadata_exists) {
1708 if (ifptr != NULL)
1709 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1710 options, logout);
1711 }
1712 else {
1713 if (elseptr != NULL)
1714 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1715 options, logout);
1716 }
1717 }
1718
1719 // If the decision component is text, then evaluate it (it is probably a
1720 // macro like _cgiargmode_) to decide what to output.
1721 else if (decision.command == dText) {
1722
1723 text_t outstring;
1724 disp.expandstring (decision.text, outstring);
1725
1726 // Check for if expression in form: str1 op str2
1727 // (such as [x] eq "y")
1728 text_t lhs_expr, op_expr, rhs_expr;
1729 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1730 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1731 if (ifptr != NULL) {
1732 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1733 options, logout);
1734 }
1735 else {
1736 return "";
1737 }
1738 } else {
1739 if (elseptr != NULL) {
1740 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1741 options, logout);
1742 }
1743 else {
1744 return "";
1745 }
1746 }
1747 }
1748
1749
1750 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1751 // a cgi argument macro that has not been set, it evaluates to itself.
1752 // Therefore, we have to say that a piece of text evaluates true if
1753 // it is non-empty and if it is a cgi argument evaulating to itself.
1754
1755 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1756 if (ifptr != NULL)
1757 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1758 options, logout);
1759 } else {
1760 if (elseptr != NULL)
1761 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1762 options, logout);
1763 }
1764 }
1765
1766 return "";
1767}
1768
1769bool includes_metadata(const text_t& text)
1770{
1771 text_t::const_iterator here = text.begin();
1772 text_t::const_iterator end = text.end();
1773
1774 char startbracket = '[';
1775 char endbracket = ']';
1776
1777 char bracket = startbracket;
1778 while (here != end) {
1779 if (*here == bracket) {
1780 if(bracket == startbracket) {
1781 // seen a [, next look for a ] to confirm it's metadata
1782 bracket = endbracket;
1783 } else if(bracket == endbracket) {
1784 // found [ ... ] in text, so we think it includes metadata
1785 return true;
1786 }
1787 }
1788 ++here;
1789 }
1790
1791 return false;
1792}
1793
1794static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1795 recptproto* collectproto,
1796 ResultDocInfo_t &docinfo,
1797 displayclass &disp, text_tmap &options,
1798 ostream &logout) {
1799
1800 if (includes_metadata(metavalue)) {
1801
1802 // text has embedded metadata in it => expand it
1803 FilterRequest_t request;
1804 FilterResponse_t response;
1805
1806 request.getParents = false;
1807
1808 format_t *expanded_formatlistptr = new format_t();
1809 parse_formatstring (metavalue, expanded_formatlistptr,
1810 request.fields, request.getParents);
1811
1812 // retrieve metadata
1813 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1814 collectproto, response, logout);
1815
1816 if (!response.docInfo.empty()) {
1817
1818 text_t expanded_metavalue
1819 = get_formatted_string(collection, collectproto,
1820 response.docInfo[0], disp, expanded_formatlistptr,
1821 options, logout);
1822
1823 return expanded_metavalue;
1824 }
1825 else {
1826 return metavalue;
1827 }
1828 }
1829 else {
1830
1831 return metavalue;
1832 }
1833}
1834
1835text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1836 displayclass &disp,
1837 text_t meta_name, ostream& logout) {
1838
1839 ColInfoResponse_t collectinfo;
1840 comerror_t err;
1841 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1842 text_t meta_value = "";
1843 text_t lang;
1844 disp.expandstring("_cgiargl_",lang);
1845 if (lang.empty()) {
1846 lang = "en";
1847 }
1848
1849 if (err == noError) {
1850 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1851 }
1852 return meta_value;
1853
1854
1855}
1856text_t format_string (const text_t& collection, recptproto* collectproto,
1857 ResultDocInfo_t &docinfo, displayclass &disp,
1858 format_t *formatlistptr, text_tmap &options,
1859 ostream& logout) {
1860
1861 if (formatlistptr == NULL) return "";
1862
1863 switch (formatlistptr->command) {
1864 case comOID:
1865 return docinfo.OID;
1866 case comTopOID:
1867 {
1868 text_t top_id;
1869 get_top(docinfo.OID, top_id);
1870 return top_id;
1871 }
1872 case comRank:
1873 return text_t(docinfo.ranking);
1874 case comText:
1875 return formatlistptr->text;
1876 case comLink:
1877 return options["link"];
1878 case comEndLink:
1879 {
1880 if (options["link"].empty()) return "";
1881 else return "</a>";
1882 }
1883 case comHref:
1884 return get_href(options["link"]);
1885 case comIcon:
1886 return options["icon"];
1887 case comNum:
1888 return docinfo.result_num;
1889 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1890 return get_related_docs(collection, collectproto, docinfo, logout);
1891
1892 case comSummary:
1893 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1894 case comAssocLink:
1895 {
1896 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1897 if (!link_filename.empty()) {
1898 text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
1899 if (formatlistptr->text == "href") {
1900 return href;
1901 }
1902 return "<a href=\""+ href + "\">";
1903 }
1904 return "";
1905 }
1906 case comEndAssocLink:
1907 {
1908 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1909 if (!link_filename.empty()) {
1910 return "</a>";
1911 }
1912 return "";
1913 }
1914 case comMeta:
1915 {
1916 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1917 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1918 }
1919
1920 case comDoc:
1921 return format_text(collection, collectproto, docinfo, disp, options, logout);
1922
1923 case comImage:
1924 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1925 case comTOC:
1926 return options["DocTOC"];
1927 case comDocumentButtonDetach:
1928 return options["DocumentButtonDetach"];
1929 case comDocumentButtonHighlight:
1930 return options["DocumentButtonHighlight"];
1931 case comDocumentButtonExpandContents:
1932 return options["DocumentButtonExpandContents"];
1933 case comDocumentButtonExpandText:
1934 return options["DocumentButtonExpandText"];
1935 case comHighlight:
1936 if (options["highlight"] == "1") return "<b>";
1937 break;
1938 case comEndHighlight:
1939 if (options["highlight"] == "1") return "</b>";
1940 break;
1941 case comMetadataSpanWrap:
1942 metadata_wrap=true; metadata_wrap_type="span"; return "";
1943 break;
1944 case comEndMetadataSpanWrap:
1945 metadata_wrap=false; metadata_wrap_type=""; return "";
1946 break;
1947 case comMetadataDivWrap:
1948 metadata_wrap=true; metadata_wrap_type="div"; return "";
1949 break;
1950 case comEndMetadataDivWrap:
1951 metadata_wrap=false; metadata_wrap_type=""; return "";
1952 break;
1953 case comIf:
1954 return get_if (collection, collectproto, docinfo, disp,
1955 formatlistptr->decision, formatlistptr->ifptr,
1956 formatlistptr->elseptr, options, logout);
1957 case comOr:
1958 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1959 options, logout);
1960 case comDocTermsFreqTotal:
1961 return docinfo.num_terms_matched;
1962 case comCollection:
1963 if (formatlistptr->meta.metaname == g_EmptyText) {
1964 return collection;
1965 }
1966 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1967
1968 }
1969 return "";
1970}
1971
1972text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1973 ResultDocInfo_t &docinfo, displayclass &disp,
1974 format_t *formatlistptr, text_tmap &options,
1975 ostream& logout) {
1976
1977 text_t ft;
1978 while (formatlistptr != NULL)
1979 {
1980 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1981 options, logout);
1982 formatlistptr = formatlistptr->nextptr;
1983 }
1984
1985 return ft;
1986}
1987
1988
1989// we have only preloaded the text in DocumentAction. But you may want
1990// to get the text in query, so copy what we have done with
1991// format_summary and get the text here. Probably is quite expensive?
1992text_t format_text (const text_t& collection, recptproto* collectproto,
1993 ResultDocInfo_t &docinfo, displayclass &disp,
1994 text_tmap &options, ostream& logout)
1995{
1996 text_t text;
1997
1998 if (!options["text"].empty()) {
1999 text = options["text"];
2000 }
2001 else {
2002 // get document text here
2003 DocumentRequest_t docrequest;
2004 DocumentResponse_t docresponse;
2005 comerror_t err;
2006 docrequest.OID = docinfo.OID;
2007 collectproto->get_document (collection, docrequest, docresponse, err, logout);
2008 text = docresponse.doc;
2009 }
2010
2011 if (metadata_wrap) {
2012 text = wrap_metatext(text,docinfo.OID,"Text");
2013 }
2014
2015 return text;
2016}
2017
2018/* FUNCTION NAME: format_summary
2019 * DESC: this is invoked when a [Summary] special metadata is processed.
2020 * RETURNS: a query-biased summary for the document */
2021
2022text_t format_summary (const text_t& collection, recptproto* collectproto,
2023 ResultDocInfo_t &docinfo, displayclass &disp,
2024 text_tmap &options, ostream& logout) {
2025
2026 // GRB: added code here to ensure that the cstr (and other collections)
2027 // uses the document metadata item Summary, rather than compressing
2028 // the text of the document, processed via the methods in
2029 // summarise.cpp
2030
2031 text_t summary;
2032
2033 if (docinfo.metadata.count("Summary") > 0 &&
2034 docinfo.metadata["Summary"].values.size() > 0) {
2035 summary = docinfo.metadata["Summary"].values[0];
2036 }
2037 else {
2038
2039 text_t textToSummarise, query;
2040
2041 if(options["text"].empty()) { // get document text
2042 DocumentRequest_t docrequest;
2043 DocumentResponse_t docresponse;
2044 comerror_t err;
2045 docrequest.OID = docinfo.OID;
2046 collectproto->get_document (collection, docrequest, docresponse, err, logout);
2047 textToSummarise = docresponse.doc;
2048 }
2049 else {
2050 // in practice, this would not happen, because text is only
2051 // loaded with the [Text] command
2052 textToSummarise = options["text"];
2053 }
2054
2055 disp.expandstring("_cgiargq_",query);
2056 summary = summarise(textToSummarise,query,80);
2057 //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
2058 }
2059
2060 summary.replace("'","&#039;");
2061 summary.replace("\n","&#013;");
2062
2063 if (metadata_wrap) {
2064 summary = wrap_metatext(summary,docinfo.OID,"Summary");
2065 }
2066
2067 return summary;
2068}
2069
2070//-------------- GS3 related functions --------------
2071// copy of the other uses_expression function, but without using the extra GS2-runtime-specific parameters
2072static bool uses_expression(const text_t& outstring, text_t& lhs_expr,
2073 text_t& op_expr, text_t& rhs_expr)
2074{
2075 // Note: the string may not be of the form: str1 op str2, however
2076 // to deterine this we have to process it on the assumption it is,
2077 // and if at any point an 'erroneous' value is encountered, return
2078 // false and let something else have a go at evaluating it
2079
2080 // Starting at the end of the string and working backwards ..
2081
2082 const int outstring_len = outstring.size();
2083
2084 // skip over white space
2085 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
2086
2087 if (rhs_end<=0) {
2088 // no meaningful text or (rhs_end==0) no room for operator
2089 return false;
2090 }
2091
2092 // check for ' or " and then scan over token
2093 const char potential_quote = outstring[rhs_end];
2094 int rhs_start=rhs_end;
2095 bool quoted = false;
2096
2097 if ((potential_quote == '\'') || (potential_quote == '\"')) {
2098 --rhs_end;
2099 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
2100 quoted = true;
2101 }
2102 else {
2103 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
2104 }
2105
2106 if ((rhs_end-rhs_start)<0) {
2107 // no meaningful rhs expression
2108 return false;
2109 }
2110
2111 // form rhs_expr
2112 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
2113
2114 // skip over white space
2115 const int to_whitespace = (quoted) ? 2 : 1;
2116
2117 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
2118 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
2119
2120 if ((op_end<0) && (op_start<0)) {
2121 // no meaningful expression operator
2122 return false;
2123 }
2124
2125 if (op_end-op_start<0) {
2126 // no meaningful expression operator
2127 return false;
2128 }
2129
2130 op_expr = extract_substr(outstring,op_start,op_end);
2131
2132
2133 // check for operator
2134 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
2135 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
2136
2137 // not a valid operator
2138 return false;
2139 }
2140
2141 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
2142 if (lhs_end<0) {
2143 // no meaningful lhs expression
2144 return false;
2145 }
2146
2147 int lhs_start = scan_over_whitespace(outstring,0);
2148
2149 // form lhs_expr from remainder of string
2150 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
2151
2152 return true;
2153}
2154
2155// [ex.Title] -> ex.Title
2156static text_t remove_bracket_bookends(const text_t &str) {
2157
2158 if(str[0] == '[' && str[str.size()-1] == ']') {
2159 return substr (str.begin()+1, str.end()-1);
2160 } else {
2161 return str;
2162 }
2163}
2164
2165static text_t get_gs3_if (const decision_t &decision, format_t *ifptr, format_t *elseptr, const text_t& nodeType)
2166{
2167 text_t ifstmt ="<gsf:switch>";
2168
2169
2170 if (decision.command == dMeta) {
2171 ifstmt += "<gsf:metadata name=\"";
2172 ifstmt += remove_bracket_bookends(decision.meta.metaname);
2173 ifstmt += "\"/>";
2174 ifstmt += "<gsf:when test=\"exists\">";
2175 }
2176
2177 else { //if(decision.command == dText)
2178
2179 text_t outstring = decision.text;
2180
2181 // Check for if expression in form: str1 op str2
2182 // (such as [x] eq "y")
2183 text_t lhs_expr, op_expr, rhs_expr;
2184 if (uses_expression(outstring,lhs_expr,op_expr,rhs_expr)) {
2185
2186 text_t if_operator = op_expr;
2187 if (op_expr == "eq" || op_expr == "==") {
2188 if_operator = "equals";
2189 } else if (op_expr == "ne" || op_expr == "!=") {
2190 if_operator = "notEquals";
2191 } else if (op_expr == "gt" || op_expr == ">") {
2192 if_operator = "greaterThan";
2193 } else if (op_expr == "lt" || op_expr == "<") {
2194 if_operator = "lessThan";
2195 } else if (op_expr == "ge" || op_expr == ">=") {
2196 if_operator = "greaterThanOrEquals";
2197 } else if (op_expr == "le" || op_expr == "<=") {
2198 if_operator = "lessThanOrEquals";
2199 } else if (op_expr == "sw") {
2200 if_operator = "startsWith";
2201 } else if (op_expr == "ew") {
2202 if_operator = "endsWith";
2203 }
2204
2205 ifstmt += "<gsf:metadata name=\"";
2206 ifstmt += remove_bracket_bookends(lhs_expr);
2207 ifstmt += "\"/>";
2208
2209 ifstmt += "<gsf:when test=\"";
2210 ifstmt += if_operator; // the test operator
2211 ifstmt += "\" test-value=\"";
2212 ifstmt += remove_bracket_bookends(rhs_expr); // the test-value
2213 ifstmt += "\">";
2214 }
2215 else {
2216 ifstmt += "<gsf:metadata name=\"";
2217 ifstmt += remove_bracket_bookends(decision.text);
2218 ifstmt += "\"/>";
2219 ifstmt += "<gsf:when test=\"exists\">";
2220 }
2221 }
2222
2223 // if portion
2224 text_t if_body = "";
2225 while(ifptr != NULL) { // body of if can contain a list of items to be transformed into GS3 format stmts
2226 if_body += transform_to_GS3_format (ifptr, nodeType);
2227 ifptr = ifptr->nextptr;
2228 }
2229 ifstmt += if_body;
2230 ifstmt += "</gsf:when>";
2231
2232 // else portion
2233 if(elseptr != NULL) {
2234
2235 ifstmt += "<gsf:otherwise>";
2236 text_t else_body = ""; // body of else can contain a list of items to be transformed into GS3 format stmts
2237 while(elseptr != NULL) {
2238 else_body += transform_to_GS3_format (elseptr, nodeType);
2239 elseptr = elseptr->nextptr;
2240 }
2241 ifstmt += else_body;
2242 ifstmt += "</gsf:otherwise>";
2243 }
2244
2245 ifstmt += "</gsf:switch>";
2246 return ifstmt;
2247}
2248
2249
2250static text_t get_gs3_or (format_t *orptr, const text_t& nodeType) {
2251 text_t result = "<gsf:choose-metadata>";
2252
2253 while(orptr != NULL) {
2254 text_t or_body = transform_to_GS3_format (orptr, nodeType);
2255 if (!or_body.empty()) {
2256 result += or_body;
2257 }
2258
2259 orptr = orptr->nextptr;
2260 }
2261 result += "</gsf:choose-metadata>";
2262 return result;
2263}
2264
2265// what about all the <td>? Does that get stored in formatlistptr, such as under the ->text field?
2266text_t get_GS3_formatstring (format_t *formatlistptr, const text_t& nodeType) {
2267 text_t result;
2268
2269 while (formatlistptr != NULL) {
2270 result += transform_to_GS3_format(formatlistptr, nodeType);
2271 formatlistptr = formatlistptr->nextptr;
2272 }
2273
2274 return result;
2275}
2276
2277text_t transform_to_GS3_format (format_t *formatlistptr, const text_t& nodeType) {
2278 if (formatlistptr == NULL) return "";
2279
2280 switch (formatlistptr->command) {
2281 case comOID:
2282 return "<gsf:OID/>";
2283 case comTopOID:
2284 return "<gsf:metadata name='OID' select='root' />"; // for now try this
2285 case comRank:
2286 return "<gsf:rank/>";
2287 case comText:
2288 return formatlistptr->text; // [text]? or any string that is not a command or reserved
2289 case comLink:
2290 if(nodeType == "classifier") {
2291 return "<gsf:link type='classifier'>";
2292 } else { // if nodeType is document or not set
2293 return "<gsf:link type='document'>";
2294 }
2295 case comEndLink:
2296 return "</gsf:link>";
2297 case comHref:
2298 return "<gsf:lib name=\"href\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to
2299 case comIcon:
2300 if(nodeType == "classifier") {
2301 return "<gsf:icon type='classifier'/>";
2302 } else { // if nodeType is document or not set
2303 return "<gsf:icon type='document'/>";
2304 }
2305 case comNum:
2306 return "<gsf:lib name=\"num\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to
2307 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
2308 return "<gsf:lib name=\"RelatedDocuments\"/>"; // output comment marking this as deprecated or to be implemented for GS3 in gslib xslt
2309 case comSummary:
2310 return "<gsf:lib name=\"Summary\"/>"; // in gslib xslt output comment marking this as to be implemented for GS3
2311 // need to invent this for GS3 based on what GS2 does
2312 case comAssocLink:
2313 return "<gsf:link type='source'>";
2314 case comEndAssocLink:
2315 return "</gsf:link>";
2316 case comMeta:
2317 return "<gsf:metadata name=\"" + formatlistptr->meta.metaname + "\" />";//?
2318 case comDoc:
2319 return "<gsf:text/>";
2320 case comImage: // the cover img seems to be handled by some magic code in GS3
2321 return "<gsf:lib name=\"image\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable
2322 case comTOC:
2323 return "<gsf:lib name=\"TOC\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable
2324 // need to think about whether an equivalent actually exists
2325 // return "<gsf:option name=\"TOC\" value=\"true\"/>"; // this is wrong
2326 case comDocumentButtonDetach:
2327 return "<gsf:lib name=\"DocumentButtonDetach\"/>"; // output comment marking this as deprecated in gslib xslt
2328 case comDocumentButtonHighlight:
2329 return "<gsf:lib name=\"DocumentButtonHighlight\"/>"; // output comment marking this as deprecated in gslib xslt
2330 case comDocumentButtonExpandContents:
2331 return "<gsf:lib name=\"DocumentButtonExpandContents\"/>"; // output comment marking this as deprecated in gslib xslt
2332 case comDocumentButtonExpandText:
2333 return "<gsf:lib name=\"DocumentButtonExpandText\"/>"; // output comment marking this as deprecated in gslib xslt
2334 case comHighlight:
2335 return "<span class=\"highlight\">";
2336 break;
2337 case comEndHighlight:
2338 return "</span>";
2339 break;
2340 case comMetadataSpanWrap:
2341 metadata_wrap=true; metadata_wrap_type="span"; return "";
2342 break;
2343 case comEndMetadataSpanWrap:
2344 metadata_wrap=false; metadata_wrap_type=""; return "";
2345 break;
2346 case comMetadataDivWrap:
2347 metadata_wrap=true; metadata_wrap_type="div"; return "";
2348 break;
2349 case comEndMetadataDivWrap:
2350 metadata_wrap=false; metadata_wrap_type=""; return "";
2351 break;
2352 case comIf:
2353 if(formatlistptr->decision.meta.metaname == "numleafdocs") {
2354 if(nodeType == "classifier") {
2355 text_t if_body = "";
2356 while(formatlistptr->ifptr != NULL) { // body of if can contain a list of items to be transformed into GS3 format stmts
2357 if_body += transform_to_GS3_format (formatlistptr->ifptr, nodeType);
2358 formatlistptr->ifptr = formatlistptr->ifptr->nextptr;
2359 }
2360 return if_body;
2361 } else if(nodeType == "document") {
2362 text_t else_body = ""; // body of else can contain a list of items to be transformed into GS3 format stmts
2363 while(formatlistptr->elseptr != NULL) {
2364 else_body += transform_to_GS3_format (formatlistptr->elseptr, nodeType);
2365 formatlistptr->elseptr = formatlistptr->elseptr->nextptr;
2366 }
2367 return else_body;
2368 }
2369 }
2370
2371 // if nodeType not specified as classifier or document, or if the If test is not for numleafdocs' existence
2372 return get_gs3_if (formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr, nodeType);
2373 case comOr:
2374 return get_gs3_or (formatlistptr->orptr, nodeType);
2375 //return "<gsf:choose-metadata>"+get_gs3_or (formatlistptr->orptr, nodeType)+"</gsf:choose-metadata>";
2376 case comDocTermsFreqTotal:
2377 return "<gsf:lib name=\"DocTermsFreqTotal\"/>";
2378 case comCollection: // trying to get all the metadata for a collection. How is this done in GS3???
2379 return "<gsf:lib name=\"collection\"/>";
2380 }
2381 return "";
2382}
Note: See TracBrowser for help on using the repository browser.