source: main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp@ 30465

Last change on this file since 30465 was 29042, checked in by ak19, 10 years ago

First of 2 part commit for improving FormatConversion from GS2 to GS3. formatconverter.exe now takes an additional optional parameter which can be documentNode or classifierNode. This then determines what the formatconverter.exe does when it sees an If test on the existence of the numleafdocs variable, since a positive test applies only to classifierNodes, while a negative test applies only to documentNodes. Further, [link][icon][link] should output something slightly different for classifierNodes than for documentNodes.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 72.5 KB
Line 
1/**********************************************************************
2 *
3 * formattools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "formattools.h"
27#include "cgiutils.h"
28#include "gsdltools.h"
29#include "recptprototools.h"
30#include "OIDtools.h"
31#include "summarise.h"
32
33#include <assert.h>
34
35static bool metadata_wrap = false;
36static text_t metadata_wrap_type = "";
37
38// a few function prototypes
39
40static text_t format_string (const text_t& collection, recptproto* collectproto,
41 ResultDocInfo_t &docinfo, displayclass &disp,
42 format_t *formatlistptr, text_tmap &options,
43 ostream& logout);
44
45static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
46 format_t *formatlistptr, text_tset &metadata, bool &getParents);
47
48static text_t format_summary (const text_t& collection, recptproto* collectproto,
49 ResultDocInfo_t &docinfo, displayclass &disp,
50 text_tmap &options, ostream& logout);
51
52static text_t format_text (const text_t& collection, recptproto* collectproto,
53 ResultDocInfo_t &docinfo, displayclass &disp,
54 text_tmap &options, ostream& logout);
55
56static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
57 recptproto* collectproto, ResultDocInfo_t &docinfo,
58 displayclass &disp, text_tmap &options,
59 ostream &logout);
60
61static text_t transform_to_GS3_format (format_t *formatlistptr, const text_t& nodeType);
62
63void metadata_t::clear() {
64 metaname.clear();
65 metacommand = mNone;
66 mqualifier.parent = pNone;
67 mqualifier.sibling = sNone;
68 mqualifier.child = cNone;
69 pre_tree_traverse.clear();
70 parentoptions.clear();
71 siblingoptions.clear();
72 childoptions.clear();
73}
74
75void decision_t::clear() {
76 command = dMeta;
77 meta.clear();
78 text.clear();
79}
80
81format_t::~format_t()
82{
83 if (nextptr != NULL) delete nextptr;
84 if (ifptr != NULL) delete ifptr;
85 if (elseptr != NULL) delete elseptr;
86 if (orptr != NULL) delete orptr;
87}
88
89void format_t::clear() {
90 command = comText;
91 decision.clear();
92 text.clear();
93 meta.clear();
94 nextptr = NULL;
95 ifptr = NULL;
96 elseptr = NULL;
97 orptr = NULL;
98}
99
100void formatinfo_t::clear() {
101 DocumentImages = false;
102 DocumentTitles = true;
103 DocumentHeading = "{Or}{[parent(Top):Title],[Title],untitled}<br>";
104 DocumentContents = true;
105 DocumentArrowsBottom = true;
106 DocumentArrowsTop = false;
107 DocumentSearchResultLinks = false;
108 DocumentButtons.erase (DocumentButtons.begin(), DocumentButtons.end());
109 // DocumentButtons.push_back ("Expand Text");
110 // DocumentButtons.push_back ("Expand Contents");
111 DocumentButtons.push_back ("Detach");
112 DocumentButtons.push_back ("Highlight");
113 RelatedDocuments = "";
114 DocumentText = "[Text]";
115 formatstrings.erase (formatstrings.begin(), formatstrings.end());
116 DocumentUseHTML = false;
117 AllowExtendedOptions = false;
118}
119
120// simply checks to see if formatstring begins with a <td> tag
121bool is_table_content (const text_t &formatstring) {
122 text_t::const_iterator here = formatstring.begin();
123 text_t::const_iterator end = formatstring.end();
124
125 while (here != end) {
126 if (*here != ' ') {
127 if ((*here == '<') && ((here+3) < end)) {
128 if ((*(here+1) == 't' || *(here+1) == 'T') &&
129 (*(here+2) == 'd' || *(here+2) == 'D') &&
130 (*(here+3) == '>' || *(here+3) == ' '))
131 //|| *(here+3) == '\t' || *(here+3) == '\n'))
132 return true;
133 } else return false;
134 }
135 ++here;
136 }
137 return false;
138}
139
140bool is_table_content (const format_t *formatlistptr) {
141
142 if (formatlistptr == NULL) return false;
143
144 if (formatlistptr->command == comText)
145 return is_table_content (formatlistptr->text);
146
147 return false;
148}
149
150// returns false if key isn't in formatstringmap
151bool get_formatstring (const text_t &key, const text_tmap &formatstringmap,
152 text_t &formatstring) {
153
154 formatstring.clear();
155 text_tmap::const_iterator it = formatstringmap.find(key);
156 if (it == formatstringmap.end()) return false;
157 formatstring = (*it).second;
158 return true;
159}
160
161// tries to find "key1key2" then "key1" then "key2"
162bool get_formatstring (const text_t &key1, const text_t &key2,
163 const text_tmap &formatstringmap,
164 text_t &formatstring) {
165
166 formatstring.clear();
167 text_tmap::const_iterator it = formatstringmap.find(key1 + key2);
168 if (it != formatstringmap.end()) {
169 formatstring = (*it).second;
170 return true;
171 }
172 it = formatstringmap.find(key1);
173 if (it != formatstringmap.end()) {
174 formatstring = (*it).second;
175 return true;
176 }
177 it = formatstringmap.find(key2);
178 if (it != formatstringmap.end()) {
179 formatstring = (*it).second;
180 return true;
181 }
182 return false;
183}
184
185
186text_t remove_namespace(const text_t &meta_name) {
187 text_t::const_iterator end = meta_name.end();
188 text_t::const_iterator it = findchar(meta_name.begin(), end, '.');
189 if (it != end) {
190 return substr(it+1, end);
191 }
192
193 return meta_name;
194
195}
196// returns a date of form _format:date_(year, month, day)
197// input is date of type yyyy-?mm-?dd
198// at least the year must be present in date
199text_t format_date (const text_t &date) {
200
201 if (date.size() < 4) return "";
202
203 text_t::const_iterator datebegin = date.begin();
204
205 text_t year = substr (datebegin, datebegin+4);
206 int chars_seen_so_far = 4;
207 if (chars_seen_so_far == date.size()) return "_format:date_("+year+")";
208
209 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
210 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+")";
211
212 text_t month = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
213 int imonth = month.getint();
214 if (imonth <= 0 || imonth > 12) return "_format:date_("+year+")";
215
216 chars_seen_so_far += 2;
217 if (chars_seen_so_far == date.size()) return "_format:date_("+year+","+month+")";
218
219 if (date[chars_seen_so_far] == '-') ++chars_seen_so_far ;
220 if (date.size() < chars_seen_so_far+2) return "_format:date_("+year+","+month+")";
221
222 text_t day = substr (datebegin+chars_seen_so_far, datebegin+chars_seen_so_far+2);
223 if (day[0] == '0') day = substr (day.begin()+1, day.end());
224 int iday = day.getint();
225 if (iday <= 0 || iday > 31) return "_format:date_("+year+","+month+")";
226
227 return "_format:date_("+year+","+month+","+day+")";
228}
229
230// converts an iso639 language code to its English equivalent
231// should we be checking that the macro exists??
232text_t iso639 (const text_t &langcode) {
233 if (langcode.empty()) return "";
234 return "_iso639:iso639"+langcode+"_";
235}
236
237
238text_t get_href (const text_t &link) {
239
240 text_t href;
241
242 text_t::const_iterator here = findchar(link.begin(), link.end(), '"');
243 text_t::const_iterator end = link.end();
244 if (here == end) return g_EmptyText;
245
246 ++here;
247 while (here != end) {
248 if (*here == '"') break;
249 href.push_back(*here);
250 ++here;
251 }
252
253 return href;
254}
255
256//this function gets the information associated with the relation
257//metadata for the document associated with 'docinfo'. This relation
258//metadata consists of a line of pairs containing 'collection, document OID'
259//(this is the OID of the document related to the current document, and
260//the collection the related document belongs to). For each of these pairs
261//the title metadata is obtained and then an html link between the title
262//of the related doc and the document's position (the document will be
263//found in "<a href=\"_httpdocument_&c=collection&cl=search&d=OID">
264//(where collection is the related documents collection, and OID is the
265//related documents OID). A list of these html links are made for as many
266//related documents as there are. This list is then returned. If there are
267//no related documents available for the current document then the string
268//'.. no related documents .. ' is returned.
269text_t get_related_docs(const text_t& collection, recptproto* collectproto,
270 ResultDocInfo_t &docinfo, ostream& logout){
271
272 text_tset metadata;
273
274 //insert the metadata we wish to collect
275 metadata.insert("dc.Relation");
276 metadata.insert("Title");
277 metadata.insert("Subject"); //for emails, where title data doesn't apply
278
279 FilterResponse_t response;
280 text_t relation = ""; //string for displaying relation metadata
281 text_t relationTitle = ""; //the related documents Title (or subject)
282 text_t relationOID = ""; //the related documents OID
283
284 //get the information associated with the metadata for current doc
285 if (get_info (docinfo.OID, collection, "", metadata,
286 false, collectproto, response, logout)) {
287
288 //if the relation metadata exists, store for displaying
289 if(!response.docInfo[0].metadata["dc.Relation"].values.empty()){
290 relationOID += response.docInfo[0].metadata["dc.Relation"].values[0];
291
292 //split relation data into pairs of collectionname,ID number
293 text_tarray relationpairs;
294 splitchar (relationOID.begin(), relationOID.end(), ' ', relationpairs);
295
296 text_tarray::const_iterator currDoc = relationpairs.begin();
297 text_tarray::const_iterator lastDoc = relationpairs.end();
298
299 //iterate through the pairs to split and display
300 while(currDoc != lastDoc){
301
302 //split pairs into collectionname and ID
303 text_tarray relationdata;
304 splitchar ((*currDoc).begin(), (*currDoc).end(), ',', relationdata);
305
306 //get first element in the array (collection)
307 text_tarray::const_iterator doc_data = relationdata.begin();
308 text_t document_collection = *doc_data;
309 ++doc_data; //increment to get next item in array (oid)
310 text_t document_OID = *doc_data;
311
312 //create html link to related document
313 relation += "<a href=\"_httpdocument_&amp;c=" + document_collection;
314 relation += "&amp;cl=search&amp;d=" + document_OID;
315
316 //get the information associated with the metadata for related doc
317 if (get_info (document_OID, document_collection, "", metadata,
318 false, collectproto, response, logout)) {
319
320 //if title metadata doesn't exist, collect subject metadata
321 //if that doesn't exist, just call it 'related document'
322 if (!response.docInfo[0].metadata["Title"].values[0].empty())
323 relationTitle = response.docInfo[0].metadata["Title"].values[0];
324 else if (!response.docInfo[0].metadata["Subject"].values.empty())
325 relationTitle = response.docInfo[0].metadata["Subject"].values[0];
326 else relationTitle = "RELATED DOCUMENT";
327
328 }
329
330 //link the related document's title to its page
331 relation += "\">" + relationTitle + "</a>";
332 relation += " (" + document_collection + ")<br>";
333
334 ++currDoc;
335 }
336 }
337
338 }
339
340 if(relation.empty()) //no relation data for documnet
341 relation = ".. no related documents .. ";
342
343 return relation;
344}
345
346
347
348static void get_parent_options (text_t &instring, metadata_t &metaoption) {
349
350 assert (instring.size() > 7);
351 if (instring.size() <= 7) return;
352
353 text_t meta, com, op;
354 bool inbraces = false;
355 bool inquotes = false;
356 bool foundcolon = false;
357 text_t::const_iterator here = instring.begin()+6;
358 text_t::const_iterator end = instring.end();
359 while (here != end) {
360 if (foundcolon) meta.push_back (*here);
361 else if (*here == '(') inbraces = true;
362 else if (*here == ')') inbraces = false;
363 else if (*here == '\'' && !inquotes) inquotes = true;
364 else if (*here == '\'' && inquotes) inquotes = false;
365 else if (*here == ':' && !inbraces) foundcolon = true;
366 else if (inquotes) op.push_back (*here);
367 else com.push_back (*here);
368 ++here;
369 }
370
371 instring = meta;
372 if (com.empty())
373 metaoption.mqualifier.parent = pImmediate;
374 else if (com == "Top")
375 metaoption.mqualifier.parent = pTop;
376 else if (com == "All") {
377 metaoption.mqualifier.parent = pAll;
378 metaoption.parentoptions = op;
379 }
380}
381
382
383static void get_sibling_options (text_t &instring, metadata_t &metaoption) {
384
385 assert (instring.size() > 8);
386 if (instring.size() <= 8) return;
387 text_t meta, com, op;
388 bool inbraces = false;
389 bool inquotes = false;
390 bool foundcolon = false;
391 text_t::const_iterator here = instring.begin()+7;
392 text_t::const_iterator end = instring.end();
393 while (here != end) {
394 if (foundcolon) meta.push_back (*here);
395 else if (*here == '(') inbraces = true;
396 else if (*here == ')') inbraces = false;
397 else if (*here == '\'' && !inquotes) inquotes = true;
398 else if (*here == '\'' && inquotes) inquotes = false;
399 else if (*here == ':' && !inbraces) foundcolon = true;
400 else if (inquotes) op.push_back (*here);
401 else com.push_back (*here);
402 ++here;
403 }
404
405 instring = meta;
406 metaoption.siblingoptions.clear();
407
408 if (com.empty()) {
409 metaoption.mqualifier.sibling = sAll;
410 metaoption.siblingoptions = " ";
411 }
412 else if (com == "first") {
413 metaoption.mqualifier.sibling = sNum;
414 metaoption.siblingoptions = "0";
415 }
416 else if (com == "last") {
417 metaoption.mqualifier.sibling = sNum;
418 metaoption.siblingoptions = "-2"; // == last
419 }
420 else if (com.getint()>0) {
421 metaoption.mqualifier.sibling = sNum;
422 int pos = com.getint()-1;
423 metaoption.siblingoptions +=pos;
424 }
425 else {
426 metaoption.mqualifier.sibling = sAll;
427 metaoption.siblingoptions = op;
428 }
429}
430
431static void get_child_options (text_t &instring, metadata_t &metaoption) {
432
433 assert (instring.size() > 6);
434 if (instring.size() <= 6) return;
435 text_t meta, com, op;
436 bool inbraces = false;
437 bool inquotes = false;
438 bool foundcolon = false;
439 text_t::const_iterator here = instring.begin()+5;
440 text_t::const_iterator end = instring.end();
441 while (here != end) {
442 if (foundcolon) meta.push_back (*here);
443 else if (*here == '(') inbraces = true;
444 else if (*here == ')') inbraces = false;
445 else if (*here == '\'' && !inquotes) inquotes = true;
446 else if (*here == '\'' && inquotes) inquotes = false;
447 else if (*here == ':' && !inbraces) foundcolon = true;
448 else if (inquotes) op.push_back (*here);
449 else com.push_back (*here);
450 ++here;
451 }
452
453 instring = meta;
454 if (com.empty()) {
455 metaoption.mqualifier.child = cAll;
456 metaoption.childoptions = " ";
457 }
458 else if (com == "first") {
459 metaoption.mqualifier.child = cNum;
460 metaoption.childoptions = ".fc";
461 }
462 else if (com == "last") {
463 metaoption.mqualifier.child = cNum;
464 metaoption.childoptions = ".lc";
465 }
466 else if (com.getint()>0) {
467 metaoption.mqualifier.child = cNum;
468 metaoption.childoptions = "."+com;
469 }
470 else {
471 metaoption.mqualifier.child = cAll;
472 metaoption.childoptions = op;
473 }
474}
475
476
477static void get_truncate_options (text_t &instring, metadata_t &metaoption)
478{
479 assert (instring.size() > ((text_t) "truncate").size());
480 if (instring.size() <= ((text_t) "truncate").size()) return;
481 text_t meta, com;
482 bool inbraces = false;
483 bool foundcolon = false;
484 text_t::const_iterator here = instring.begin() + ((text_t) "truncate").size();
485 text_t::const_iterator end = instring.end();
486 while (here != end) {
487 if (foundcolon) meta.push_back (*here);
488 else if (*here == '(') inbraces = true;
489 else if (*here == ')') inbraces = false;
490 else if (*here == ':' && !inbraces) foundcolon = true;
491 else com.push_back (*here);
492 ++here;
493 }
494
495 instring = meta;
496
497 if (!com.empty())
498 {
499 metaoption.siblingoptions = com;
500 }
501 else
502 {
503 // Default is 100 characters if not specified
504 metaoption.siblingoptions = "100";
505 }
506}
507
508
509
510static void parse_meta (text_t &meta, metadata_t &metaoption,
511 text_tset &metadata, bool &getParents) {
512
513 // Look for the various format statement modifiers
514 // This needs to be done in a loop otherwise not all combinations will be accepted, but actually the order
515 // is irrelevant because this is not stored in metaoption.metacommand anyway
516 bool keep_trying = true;
517 while (keep_trying)
518 {
519 keep_trying = false;
520
521 if (meta.size() > ((text_t) "cgisafe:").size() && starts_with(meta, "cgisafe:"))
522 {
523 metaoption.metacommand |= mCgiSafe;
524 meta = substr(meta.begin() + ((text_t) "cgisafe:").size(), meta.end());
525 keep_trying = true;
526 }
527 if (meta.size() > ((text_t) "format:").size() && starts_with(meta, "format:"))
528 {
529 metaoption.metacommand |= mSpecial;
530 meta = substr(meta.begin() + ((text_t) "format:").size(), meta.end());
531 keep_trying = true;
532 }
533
534 // New "truncate" special formatting option
535 if (meta.size() > ((text_t) "truncate").size() && starts_with(meta, "truncate")) // No colons due to truncate(X)
536 {
537 metaoption.metacommand |= mTruncate;
538 get_truncate_options (meta, metaoption);
539 keep_trying = true;
540 }
541 // New "htmlsafe" special formatting option
542 if (meta.size() > ((text_t) "htmlsafe:").size() && starts_with(meta, "htmlsafe:"))
543 {
544 metaoption.metacommand |= mHTMLSafe;
545 meta = substr(meta.begin() + ((text_t) "htmlsafe:").size(), meta.end());
546 keep_trying = true;
547 }
548 // New "xmlsafe" special formatting option
549 if (meta.size() > ((text_t) "xmlsafe:").size() && starts_with(meta, "xmlsafe:"))
550 {
551 metaoption.metacommand |= mXMLSafe;
552 meta = substr(meta.begin() + ((text_t) "xmlsafe:").size(), meta.end());
553 keep_trying = true;
554 }
555 // New "dmsafe" special formatting option
556 if (meta.size() > ((text_t) "dmsafe:").size() && starts_with(meta, "dmsafe:"))
557 {
558 metaoption.metacommand |= mDMSafe;
559 meta = substr(meta.begin() + ((text_t) "dmsafe:").size(), meta.end());
560 keep_trying = true;
561 }
562 }
563
564 bool had_parent_or_child = true;
565 bool prev_was_parent = false;
566 bool prev_was_child = false;
567
568 while (had_parent_or_child) {
569 if (meta.size() > 7
570 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
571
572 // clear out sibling and child (cmd and options)
573 metaoption.metacommand &= ~(mChild|mSibling);
574 metaoption.childoptions.clear();
575 metaoption.siblingoptions.clear();
576
577 getParents = true;
578 metaoption.metacommand |= mParent;
579 get_parent_options (meta, metaoption);
580
581 if (prev_was_parent) {
582 metaoption.pre_tree_traverse += ".pr";
583 }
584 else if (prev_was_child) {
585 metaoption.pre_tree_traverse += ".fc";
586 }
587
588 prev_was_parent = true;
589 prev_was_child = false;
590 }
591 else if (meta.size() > 6
592 && (substr (meta.begin(), meta.begin()+5) == "child")) {
593
594 // clear out sibling and parent (cmd and options)
595 metaoption.metacommand &= ~(mParent|mSibling);
596 metaoption.parentoptions.clear();
597 metaoption.siblingoptions.clear();
598
599 metaoption.metacommand |= mChild;
600 get_child_options (meta, metaoption);
601 metadata.insert("contains");
602
603 if (prev_was_parent) {
604 metaoption.pre_tree_traverse += ".pr";
605 }
606 else if (prev_was_child) {
607 metaoption.pre_tree_traverse += ".fc";
608 }
609
610 prev_was_child = true;
611 prev_was_parent = false;
612 }
613 else {
614 prev_was_child = false;
615 prev_was_parent = false;
616 had_parent_or_child = false;
617 }
618 }
619
620 // parent/child can have sibling tacked on end also
621 if (meta.size() > 8 && (substr (meta.begin(), meta.begin()+7) == "sibling")) {
622 metaoption.metacommand |= mSibling;
623 get_sibling_options (meta, metaoption);
624 }
625
626 // check for ex. which may occur in format statements
627 // remove "ex." prefix, but only if there are no other metadata set qualifiers
628 // in the metaname, since we want to retain prefixes like "ex.dc." as-is
629 text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
630 text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
631
632 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
633 meta = substr (meta.begin()+3, meta.end());
634 }
635 metadata.insert (meta);
636 metaoption.metaname = meta;
637}
638
639static void parse_coll_meta(text_t &meta, metadata_t &metaoption) {
640 if (meta == "collection") {
641 // no qualifiers
642 metaoption.metaname = g_EmptyText;
643 return;
644 }
645 meta = substr (meta.begin()+11, meta.end());
646 metaoption.metaname = meta;
647
648}
649
650static void parse_meta (text_t &meta, format_t *formatlistptr,
651 text_tset &metadata, bool &getParents) {
652
653 // check for ex. which may occur in format statements
654 // remove "ex." prefix, but only if there are no other metadata set qualifiers
655 // in the metaname, since we want to retain prefixes like "ex.dc." as-is
656 text_t::iterator period = findchar(meta.begin(), meta.end(), '.');
657 text_t::iterator lastperiod = findlastchar(meta.begin(), meta.end(), '.');
658
659 if (meta.size()>3 && (substr(meta.begin(), meta.begin()+3) == "ex.") && period == lastperiod) {
660 meta = substr (meta.begin()+3, meta.end());
661 }
662 if (meta == "link")
663 formatlistptr->command = comLink;
664 else if (meta == "/link")
665 formatlistptr->command = comEndLink;
666
667 // the metaname "srclink_file" is deprecated, use "srclinkFile"
668 else if (meta == "srclink") {
669 formatlistptr->command = comAssocLink;
670 formatlistptr->meta.metaname = "srclinkFile";
671 metadata.insert("srclinkFile");
672 }
673 else if (meta == "srchref") {
674 formatlistptr->command = comAssocLink;
675 formatlistptr->text = "href";
676 formatlistptr->meta.metaname = "srclinkFile";
677 metadata.insert("srclinkFile");
678 }
679 else if (meta == "/srclink") {
680 formatlistptr->command = comEndAssocLink;
681 formatlistptr->meta.metaname = "srclinkFile";
682 }
683 // and weblink etc
684 else if (meta == "href")
685 formatlistptr->command = comHref;
686
687 else if (meta == "num")
688 formatlistptr->command = comNum;
689
690 else if (meta == "icon")
691 formatlistptr->command = comIcon;
692
693 else if (meta == "Text")
694 formatlistptr->command = comDoc;
695
696 else if (meta == "RelatedDocuments")
697 formatlistptr->command = comRel;
698
699 else if (meta == "highlight")
700 formatlistptr->command = comHighlight;
701
702 else if (meta == "/highlight")
703 formatlistptr->command = comEndHighlight;
704
705 else if (meta == "metadata-spanwrap")
706 formatlistptr->command = comMetadataSpanWrap;
707
708 else if (meta == "/metadata-spanwrap")
709 formatlistptr->command = comEndMetadataSpanWrap;
710
711 else if (meta == "metadata-divwrap")
712 formatlistptr->command = comMetadataDivWrap;
713
714 else if (meta == "/metadata-divwrap")
715 formatlistptr->command = comEndMetadataDivWrap;
716
717 else if (meta == "Summary")
718 formatlistptr->command = comSummary;
719
720 else if (meta == "DocImage")
721 formatlistptr->command = comImage;
722
723 else if (meta == "DocTOC")
724 formatlistptr->command = comTOC;
725
726 else if (meta == "DocumentButtonDetach")
727 formatlistptr->command = comDocumentButtonDetach;
728
729 else if (meta == "DocumentButtonHighlight")
730 formatlistptr->command = comDocumentButtonHighlight;
731
732 else if (meta == "DocumentButtonExpandContents")
733 formatlistptr->command = comDocumentButtonExpandContents;
734
735 else if (meta == "DocumentButtonExpandText")
736 formatlistptr->command = comDocumentButtonExpandText;
737
738 else if (meta == "DocOID")
739 formatlistptr->command = comOID;
740 else if (meta == "DocTopOID")
741 formatlistptr->command = comTopOID;
742 else if (meta == "DocRank")
743 formatlistptr->command = comRank;
744 else if (meta == "DocTermsFreqTotal")
745 formatlistptr->command = comDocTermsFreqTotal;
746 else if (meta.size() >= 10 && (substr(meta.begin(), meta.begin()+10) == "collection")) {
747 formatlistptr->command = comCollection;
748 parse_coll_meta(meta, formatlistptr->meta);
749 }
750 else {
751 formatlistptr->command = comMeta;
752 parse_meta (meta, formatlistptr->meta, metadata, getParents);
753 }
754}
755
756
757static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
758 text_tset &metadata, bool &getParents) {
759
760 text_t text;
761 text_t::const_iterator here = formatstring.begin();
762 text_t::const_iterator end = formatstring.end();
763
764 while (here != end) {
765
766 if (*here == '\\') {
767 ++here;
768 if (here != end) text.push_back (*here);
769
770 } else if (*here == '{') {
771 if (!text.empty()) {
772 formatlistptr->command = comText;
773 formatlistptr->text = text;
774 formatlistptr->nextptr = new format_t();
775 formatlistptr = formatlistptr->nextptr;
776
777 text.clear();
778 }
779 if (parse_action (++here, end, formatlistptr, metadata, getParents)) {
780
781 formatlistptr->nextptr = new format_t();
782 formatlistptr = formatlistptr->nextptr;
783 if (here == end) break;
784 }
785 } else if (*here == '[') {
786 if (!text.empty()) {
787 formatlistptr->command = comText;
788 formatlistptr->text = text;
789 formatlistptr->nextptr = new format_t();
790 formatlistptr = formatlistptr->nextptr;
791
792 text.clear();
793 }
794 text_t meta;
795 ++here;
796 while (*here != ']') {
797 if (here == end) return false;
798 meta.push_back (*here);
799 ++here;
800 }
801 parse_meta (meta, formatlistptr, metadata, getParents);
802 formatlistptr->nextptr = new format_t();
803 formatlistptr = formatlistptr->nextptr;
804
805 } else
806 text.push_back (*here);
807
808 if (here != end) ++here;
809 }
810 if (!text.empty()) {
811 formatlistptr->command = comText;
812 formatlistptr->text = text;
813 formatlistptr->nextptr = new format_t();
814 formatlistptr = formatlistptr->nextptr;
815
816 }
817 return true;
818}
819
820
821static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
822 format_t *formatlistptr, text_tset &metadata, bool &getParents) {
823
824 text_t::const_iterator it = findchar (here, end, '}');
825 if (it == end) return false;
826
827 text_t com = substr (here, it);
828 here = findchar (it, end, '{');
829 if (here == end) return false;
830 else ++here;
831
832 if (com == "If" || com == "if" || com == "IF") formatlistptr->command = comIf;
833 else if (com == "Or" || com == "or" || com == "OR") formatlistptr->command = comOr;
834 else return false;
835
836 int commacount = 0;
837 text_t text;
838 while (here != end) {
839
840 if (*here == '\\') {
841 ++here;
842 if (here != end) text.push_back(*here);
843
844 }
845
846 else if (*here == ',' || *here == '}' || *here == '{') {
847
848 if (formatlistptr->command == comOr) {
849 // the {Or}{this, or this, or this, or this} statement
850 format_t *or_ptr;
851
852 // find the next unused orptr
853 if (formatlistptr->orptr == NULL) {
854 formatlistptr->orptr = new format_t();
855 or_ptr = formatlistptr->orptr;
856 } else {
857 or_ptr = formatlistptr->orptr;
858 while (or_ptr->nextptr != NULL)
859 or_ptr = or_ptr->nextptr;
860 or_ptr->nextptr = new format_t();
861 or_ptr = or_ptr->nextptr;
862 }
863
864 if (!text.empty())
865 {
866 if (!parse_string(text, or_ptr, metadata, getParents)) { return false; }
867 }
868
869 if (*here == '{')
870 {
871 // Supports: {Or}{[Booktitle],[Title],{If}{[XXXX],aaa,bbb}}
872 // but not : {Or}{[Booktitle],[Title]{If}{[XXXX],aaa,bbb}}
873 // The latter can always be re-written:
874 // {Or}{[Booktitle],{If}{[Title],[Title]{If}{[XXXX],aaa,bbb}}}
875
876 if (!text.empty()) // already used up allocated format_t
877 {
878 // => allocate new one for detected action
879 or_ptr->nextptr = new format_t();
880 or_ptr = or_ptr->nextptr;
881 }
882 if (!parse_action(++here, end, or_ptr, metadata, getParents))
883 {
884 return false;
885 }
886 }
887 else
888 {
889 if (*here == '}') break;
890 }
891 text.clear();
892
893 }
894
895 // Parse an {If}{decide,do,else} statement
896 else {
897
898 // Read the decision component.
899 if (commacount == 0) {
900 // Decsion can be a metadata element, or a piece of text.
901 // Originally Stefan's code, updated 25/10/2000 by Gordon.
902
903 text_t::const_iterator beginbracket = text.begin();
904 text_t::const_iterator endbracket = (text.end() - 1);
905
906 // Decision is based on a metadata element
907 if ((*beginbracket == '[') && (*endbracket == ']')) {
908 // Ignore the surrounding square brackets
909 text_t meta = substr (beginbracket+1, endbracket);
910 parse_meta (meta, formatlistptr->decision.meta, metadata, getParents);
911 ++commacount;
912 text.clear();
913 }
914
915 // Decision is a piece of text (probably a macro like _cgiargmode_).
916 else {
917
918 // hunt for any metadata in string, which might be uses in
919 // to test a condition, e.g. [Format] eq 'PDF'
920 format_t* dummyformat = new format_t();
921 // update which metadata fields needed
922 // (not interested in updatng formatlistptr)
923 parse_string (text, dummyformat, metadata, getParents);
924 delete dummyformat;
925
926 formatlistptr->decision.command = dText;
927 formatlistptr->decision.text = text;
928 ++commacount;
929 text.clear();
930 }
931 }
932
933 // Read the "then" and "else" components of the {If} statement.
934 else {
935 format_t** nextlistptr = NULL;
936 if (commacount == 1) {
937 nextlistptr = &formatlistptr->ifptr;
938 } else if (commacount == 2 ) {
939 nextlistptr = &formatlistptr->elseptr;
940 } else {
941 return false;
942 }
943
944 if (!text.empty()) {
945 if (*nextlistptr == NULL) {
946 *nextlistptr = new format_t();
947 } else {
948
949 // skip to the end of any format_t statements already added
950 while ((*nextlistptr)->nextptr != NULL)
951 {
952 nextlistptr = &(*nextlistptr)->nextptr;
953 }
954
955 (*nextlistptr)->nextptr = new format_t();
956 nextlistptr = &(*nextlistptr)->nextptr;
957 }
958
959 if (!parse_string (text, *nextlistptr, metadata, getParents))
960 {
961 return false;
962 }
963 text.clear();
964 }
965
966 if (*here == '{')
967 {
968 if (*nextlistptr == NULL) {
969 *nextlistptr = new format_t();
970 } else {
971 // skip to the end of any format_t statements already added
972 while ((*nextlistptr)->nextptr != NULL)
973 {
974 nextlistptr = &(*nextlistptr)->nextptr;
975 }
976
977 (*nextlistptr)->nextptr = new format_t();
978 nextlistptr = &(*nextlistptr)->nextptr;
979 }
980
981 if (!parse_action(++here, end, *nextlistptr, metadata, getParents))
982 {
983 return false;
984 }
985 }
986 else
987 {
988 if (*here == '}') break;
989 ++commacount;
990 }
991 }
992 }
993
994 } else text.push_back(*here);
995
996 if (here != end) ++here;
997 }
998
999 return true;
1000}
1001
1002
1003static text_t wrap_metatext(const text_t& metatext, const text_t& OID,
1004 const text_t metaname, int metapos=-1)
1005{
1006
1007 text_t tag_type = metadata_wrap_type;
1008 text_t editable_type = (metaname == "Text") ? "text" : "metadata";
1009
1010 text_t wrapped_metatext = "<" + tag_type + " ";
1011 wrapped_metatext += "class=\"editable-" + editable_type + "\" ";
1012
1013 wrapped_metatext += "docoid=\"" + OID + "\" ";
1014 wrapped_metatext += "metaname=\"" + metaname + "\"";
1015
1016 if (metapos>=0) {
1017 text_t metapos_str = metapos;
1018 wrapped_metatext += " metapos=\"" + metapos_str + "\"";
1019 }
1020
1021 wrapped_metatext += ">" + metatext + "</" + tag_type + ">";
1022
1023 return wrapped_metatext;
1024}
1025
1026
1027
1028bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
1029 text_tset &metadata, bool &getParents) {
1030
1031 formatlistptr->clear();
1032 getParents = false;
1033
1034 return (parse_string (formatstring, formatlistptr, metadata, getParents));
1035}
1036
1037// position -1 for all, -2 for the last, 0 for the first, or x for a particular piece
1038// metainfo has all the values for the metadata, meta has the request metadata name and options, position tells which values to get
1039
1040static text_t get_formatted_meta_text(const text_t& OID, MetadataInfo_t &metainfo, const metadata_t &meta, int position, bool no_cgisafe = false)
1041{
1042 text_t no_ns_metaname = remove_namespace(meta.metaname);
1043 text_t formatted_metatext;
1044 bool first = true;
1045
1046 const int start_i=0;
1047 const int end_i = metainfo.values.size()-1;
1048
1049 if (position == -1) { // all
1050 for (int i=start_i; i<=end_i; ++i) {
1051 if (!first) formatted_metatext += meta.siblingoptions;
1052
1053 text_t fresh_metatext;
1054
1055 if (meta.metacommand & mSpecial) {
1056 // special formatting
1057 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[i]);
1058 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[i]);
1059 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[i]+")";
1060 }
1061 else fresh_metatext = metainfo.values[i];
1062
1063 // New "truncate" special formatting option
1064 if (meta.metacommand & mTruncate)
1065 {
1066 int truncate_length = meta.siblingoptions.getint();
1067 text_t truncated_value = fresh_metatext;
1068 if (truncated_value.size() > truncate_length)
1069 {
1070 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1071 }
1072 fresh_metatext = truncated_value;
1073 }
1074 // New "xmlsafe" special formatting option
1075 if (meta.metacommand & mXMLSafe)
1076 {
1077 // Make it XML-safe
1078 text_t text_xml_safe = "";
1079 text_t::const_iterator text_iterator = fresh_metatext.begin();
1080 while (text_iterator != fresh_metatext.end())
1081 {
1082 if (*text_iterator == '&') text_xml_safe += "&amp;";
1083 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1084 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1085 else text_xml_safe.push_back(*text_iterator);
1086 text_iterator++;
1087 }
1088 fresh_metatext = text_xml_safe;
1089 }
1090 // New "htmlsafe" special formatting option
1091 if (meta.metacommand & mHTMLSafe)
1092 {
1093 // Make it HTML-safe
1094 text_t text_html_safe = "";
1095 text_t::const_iterator text_iterator = fresh_metatext.begin();
1096 while (text_iterator != fresh_metatext.end())
1097 {
1098 if (*text_iterator == '&') text_html_safe += "&amp;";
1099 else if (*text_iterator == '<') text_html_safe += "&lt;";
1100 else if (*text_iterator == '>') text_html_safe += "&gt;";
1101 else if (*text_iterator == '"') text_html_safe += "&quot;";
1102 else text_html_safe.push_back(*text_iterator);
1103 text_iterator++;
1104 }
1105 fresh_metatext = text_html_safe;
1106 }
1107 // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1108 // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1109 if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1110 {
1111 // Make it macro-safe
1112 text_t text_dm_safe = dm_safe(fresh_metatext);
1113 fresh_metatext = text_dm_safe;
1114 }
1115
1116 if (metadata_wrap) {
1117 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,i);
1118 }
1119 formatted_metatext += fresh_metatext;
1120
1121 first = false;
1122
1123 }
1124 } else {
1125 if (position == -2) { // end
1126 position = end_i;
1127 } else if (position < start_i || position > end_i) {
1128 return "";
1129 }
1130
1131 text_t fresh_metatext;
1132 if (meta.metacommand & mSpecial) {
1133
1134 // special formatting
1135 if (no_ns_metaname == "Date") fresh_metatext = format_date (metainfo.values[position]);
1136 else if (no_ns_metaname == "Language") fresh_metatext = iso639(metainfo.values[position]);
1137 else fresh_metatext = "_format:"+meta.metaname+"_("+metainfo.values[position]+")";
1138 }
1139 else fresh_metatext = metainfo.values[position];
1140
1141 // New "truncate" special formatting option
1142 if (meta.metacommand & mTruncate)
1143 {
1144 int truncate_length = meta.siblingoptions.getint();
1145 text_t truncated_value = fresh_metatext;
1146 if (truncated_value.size() > truncate_length)
1147 {
1148 truncated_value = substr(truncated_value.begin(), truncated_value.begin() + truncate_length) + "... _texttruncated_";
1149 }
1150 fresh_metatext = truncated_value;
1151 }
1152 // New "xmlsafe" special formatting option
1153 if (meta.metacommand & mXMLSafe)
1154 {
1155 // Make it XML-safe
1156 text_t text_xml_safe = "";
1157 text_t::const_iterator text_iterator = fresh_metatext.begin();
1158 while (text_iterator != fresh_metatext.end())
1159 {
1160 if (*text_iterator == '&') text_xml_safe += "&amp;";
1161 else if (*text_iterator == '<') text_xml_safe += "&lt;";
1162 else if (*text_iterator == '>') text_xml_safe += "&gt;";
1163 else text_xml_safe.push_back(*text_iterator);
1164 text_iterator++;
1165 }
1166 fresh_metatext = text_xml_safe;
1167 }
1168 // New "htmlsafe" special formatting option
1169 if (meta.metacommand & mHTMLSafe)
1170 {
1171 // Make it HTML-safe
1172 text_t text_html_safe = "";
1173 text_t::const_iterator text_iterator = fresh_metatext.begin();
1174 while (text_iterator != fresh_metatext.end())
1175 {
1176 if (*text_iterator == '&') text_html_safe += "&amp;";
1177 else if (*text_iterator == '<') text_html_safe += "&lt;";
1178 else if (*text_iterator == '>') text_html_safe += "&gt;";
1179 else if (*text_iterator == '"') text_html_safe += "&quot;";
1180 else if (*text_iterator == '\'') text_html_safe += "&#39;";
1181 else if (*text_iterator == ',') text_html_safe += "&#44;";
1182 else text_html_safe.push_back(*text_iterator);
1183 text_iterator++;
1184 }
1185 fresh_metatext = text_html_safe;
1186 }
1187 // New "dmsafe" special formatting option (always apply to "srclinkFile" metadata)
1188 // (The metaname "srclink_file" is deprecated, use "srclinkFile")
1189 if (meta.metacommand & mDMSafe || meta.metaname == "srclinkFile")
1190 {
1191 // Make it macro-safe
1192 text_t text_dm_safe = dm_safe(fresh_metatext);
1193 fresh_metatext = text_dm_safe;
1194 }
1195
1196 if (metadata_wrap) {
1197 fresh_metatext = wrap_metatext(fresh_metatext,OID,meta.metaname,position);
1198 }
1199
1200 formatted_metatext += fresh_metatext;
1201 }
1202
1203 if (meta.metacommand & mCgiSafe && !no_cgisafe) return cgi_safe_unicode (formatted_metatext);
1204 else return formatted_metatext;
1205}
1206
1207static text_t get_parent_meta (ResultDocInfo_t &docinfo, const metadata_t &meta, int siblings_values)
1208{
1209
1210 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1211
1212 switch (meta.mqualifier.parent) {
1213 case pNone:
1214 return "Nothing!!";
1215 break;
1216
1217 case pImmediate:
1218 if (parent != NULL) {
1219 text_t parent_oid = get_parent(docinfo.OID);
1220 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1221 }
1222 break;
1223
1224 case pTop:
1225 if (parent != NULL) {
1226 text_t parent_oid = get_parent(docinfo.OID);
1227
1228 while (parent->parent != NULL) {
1229 parent = parent->parent;
1230 parent_oid = get_parent(parent_oid);
1231 }
1232 return get_formatted_meta_text(parent_oid,*parent, meta, siblings_values);
1233 }
1234 break;
1235
1236 case pAll:
1237 MetadataInfo_t *parent = docinfo.metadata[meta.metaname].parent;
1238 if (parent != NULL) {
1239 text_t parent_oid = get_parent(docinfo.OID);
1240
1241 text_tarray tmparray;
1242 while (parent != NULL) {
1243 tmparray.push_back (get_formatted_meta_text(parent_oid,*parent, meta, siblings_values, true)); // set no_cgisafe to true, as we'll do it once we have all the metadata
1244 parent = parent->parent;
1245 parent_oid = get_parent(parent_oid);
1246
1247 }
1248 // now join them up - use teh parent separator
1249 bool first = true;
1250 text_t tmp;
1251 text_tarray::reverse_iterator here = tmparray.rbegin();
1252 text_tarray::reverse_iterator end = tmparray.rend();
1253 while (here != end) {
1254 if (!first) tmp += meta.parentoptions;
1255 tmp += *here;
1256 first = false;
1257 ++here;
1258 }
1259 if (meta.metacommand & mCgiSafe) return cgi_safe_unicode (tmp);
1260 else return tmp;
1261 }
1262 }
1263 return "";
1264
1265}
1266
1267static text_t get_child_meta (const text_t& collection,
1268 recptproto* collectproto,
1269 ResultDocInfo_t &docinfo, displayclass &disp,
1270 const metadata_t &meta, text_tmap &options,
1271 ostream& logout, int siblings_values)
1272{
1273 if (docinfo.metadata["contains"].values[0].size()==0) return ""; // no children
1274
1275 const text_t& pre_tree_trav = meta.pre_tree_traverse;
1276 const text_t& child_metaname = meta.metaname;
1277 const text_t& child_field = meta.childoptions;
1278 text_tset child_metadata;
1279 child_metadata.insert(child_metaname);
1280
1281 FilterResponse_t child_response;
1282 if (meta.mqualifier.child == cNum) {
1283 // just one child
1284 //get the information associated with the metadata for child doc
1285 if (!get_info (docinfo.OID+pre_tree_trav+child_field, collection, "",
1286 child_metadata, false, collectproto, child_response,
1287 logout)) return ""; // invalid child number
1288
1289 if (child_response.docInfo.empty()) return false; // no info for the child
1290
1291 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1292 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1293
1294 text_t child_metavalue
1295 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1296 return expand_metadata(child_metavalue,collection,collectproto,
1297 child_docinfo,disp,options,logout);
1298 }
1299
1300
1301 if (meta.mqualifier.child != cAll) return false; // invalid qualifier
1302
1303
1304 if (!pre_tree_trav.empty()) {
1305 // need to get relevant "contains" metadata for new (e.g. pre tree trav) node
1306 FilterResponse_t trav_response;
1307
1308 text_tset trav_metadata;
1309 trav_metadata.insert("contains");
1310
1311 if (!get_info (docinfo.OID+pre_tree_trav, collection, "",
1312 trav_metadata, false, collectproto, trav_response,
1313 logout)) return ""; // invalid pre_tree_trav
1314
1315 if (trav_response.docInfo.empty()) return false; // no info for the pre_tree_trav OID
1316
1317 ResultDocInfo_t& trav_docinfo = trav_response.docInfo[0];
1318
1319 // use this for rest of routine
1320 docinfo = trav_docinfo;
1321 }
1322
1323 // we need to get all children
1324 text_t result = "";
1325 text_tarray children;
1326 text_t contains = docinfo.metadata["contains"].values[0];
1327 splitchar (contains.begin(), contains.end(), ';', children);
1328 text_tarray::const_iterator here = children.begin();
1329 text_tarray::const_iterator end = children.end();
1330 bool first = true;
1331 while (here !=end) {
1332 text_t oid = *here;
1333 here++;
1334 if (*(oid.begin()) == '"') translate_parent (oid, docinfo.OID);
1335
1336 //get the information associated with the metadata for child doc
1337 if (!get_info (oid, collection, "", child_metadata,
1338 false, collectproto, child_response, logout) ||
1339 child_response.docInfo.empty()) {
1340 first = false;
1341 continue;
1342 }
1343
1344
1345 ResultDocInfo_t& child_docinfo = child_response.docInfo[0];
1346 MetadataInfo_t& metaname_rec = child_docinfo.metadata[child_metaname];
1347
1348 text_t child_metavalue
1349 = get_formatted_meta_text(child_docinfo.OID,metaname_rec,meta,siblings_values);
1350
1351
1352 if (!first) result += child_field;
1353 first = false;
1354 // need to do this here cos otherwise we are in the wrong document
1355 text_t em = expand_metadata(child_metavalue,collection,collectproto,
1356 child_docinfo,disp,options,logout);
1357
1358 result += em;
1359 }
1360 return result;
1361
1362}
1363
1364static text_t get_meta (const text_t& collection, recptproto* collectproto,
1365 ResultDocInfo_t &docinfo, displayclass &disp,
1366 const metadata_t &meta, text_tmap &options,
1367 ostream& logout) {
1368
1369 // make sure we have the requested metadata
1370 MetadataInfo_tmap::iterator it = docinfo.metadata.find (meta.metaname);
1371 if (it == docinfo.metadata.end()) return "";
1372
1373 int siblings_values = 0; // default is no siblings, just the first metadata available
1374 if (meta.metacommand & mSibling) {
1375 if (meta.mqualifier.sibling == sAll) {
1376 siblings_values = -1; //all
1377 } else if (meta.mqualifier.sibling == sNum) {
1378 siblings_values = meta.siblingoptions.getint();
1379 }
1380 }
1381 if (meta.metacommand & mParent) {
1382 return get_parent_meta(docinfo,meta,siblings_values);
1383 }
1384
1385 else if (meta.metacommand & mChild) {
1386 return get_child_meta(collection,collectproto,docinfo,disp,meta,
1387 options,logout, siblings_values);
1388 }
1389 else if (meta.metacommand & mSibling) { // only siblings
1390 MetadataInfo_t& metaname_rec = docinfo.metadata[meta.metaname];
1391 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname],meta, siblings_values);
1392 }
1393 else {
1394
1395 // straightforward metadata request (nothing fancy)
1396
1397 text_t classifier_metaname = docinfo.classifier_metadata_type;
1398 int metaname_index
1399 = (classifier_metaname == meta.metaname) ? docinfo.classifier_metadata_offset : 0;
1400 return get_formatted_meta_text(docinfo.OID,docinfo.metadata[meta.metaname], meta, metaname_index);
1401 }
1402
1403 return "";
1404}
1405
1406static text_t get_or (const text_t& collection, recptproto* collectproto,
1407 ResultDocInfo_t &docinfo, displayclass &disp,
1408 format_t *orptr, text_tmap &options,
1409 ostream& logout) {
1410
1411 while (orptr != NULL) {
1412
1413 if (metadata_wrap) {
1414 // need to be a bit more careful about this
1415 // => test for it *without* spanwrap or divwrap, and if defined, then
1416 // got back and generate it again, this time with spanwrap/divwrap on
1417
1418 metadata_wrap = false;
1419 text_t test_tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1420 options, logout);
1421 metadata_wrap = true;
1422 if (!test_tmp.empty()) {
1423
1424 return format_string (collection,collectproto,docinfo, disp, orptr,
1425 options, logout);
1426 }
1427 }
1428 else {
1429 text_t tmp = format_string (collection,collectproto,docinfo, disp, orptr,
1430 options, logout);
1431 if (!tmp.empty()) return tmp;
1432 }
1433
1434 orptr = orptr->nextptr;
1435 }
1436 return "";
1437}
1438
1439static bool char_is_whitespace(const char c)
1440{
1441 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'));
1442
1443}
1444
1445static int scan_over_whitespace(const text_t& outstring, const int start_pos)
1446{
1447 int pos = start_pos;
1448 while (pos<outstring.size()) {
1449 if (!char_is_whitespace(outstring[pos])) {
1450 break;
1451 }
1452 ++pos;
1453 }
1454
1455 return pos;
1456}
1457
1458static int rscan_over_whitespace(const text_t& outstring, const int start_pos)
1459{
1460 int pos = start_pos;
1461 while (pos>=0) {
1462 if (!char_is_whitespace(outstring[pos])) {
1463 break;
1464 }
1465 --pos;
1466 }
1467
1468 return pos;
1469}
1470
1471static int rscan_for_whitespace(const text_t& outstring, const int start_pos)
1472{
1473 int pos = start_pos;
1474 while (pos>=0) {
1475 if (char_is_whitespace(outstring[pos])) {
1476 break;
1477 }
1478 --pos;
1479 }
1480
1481 return pos;
1482}
1483
1484
1485static int rscan_for(const text_t& outstring, const int start_pos,
1486 const char find_c)
1487{
1488 int pos = start_pos;
1489 while (pos>=0) {
1490 char c = outstring[pos];
1491 if (outstring[pos] == find_c) {
1492 break;
1493 }
1494 --pos;
1495 }
1496
1497 return pos;
1498}
1499
1500text_t extract_substr(const text_t& outstring, const int start_pos,
1501 const int end_pos)
1502{
1503 text_t extracted_str;
1504 extracted_str.clear();
1505
1506 for (int pos=start_pos; pos<=end_pos; ++pos) {
1507 extracted_str.push_back(outstring[pos]);
1508 }
1509
1510 return extracted_str;
1511}
1512
1513
1514static text_t expand_potential_metadata(const text_t& collection,
1515 recptproto* collectproto,
1516 ResultDocInfo_t &docinfo,
1517 displayclass &disp,
1518 const text_t& intext,
1519 text_tmap &options,
1520 ostream& logout)
1521{
1522 text_t outtext;
1523
1524 // decide if dealing with metadata or text
1525
1526 text_t::const_iterator beginbracket = intext.begin();
1527 text_t::const_iterator endbracket = (intext.end() - 1);
1528
1529 // Decision is based on a metadata element
1530 if ((*beginbracket == '[') && (*endbracket == ']')) {
1531 // Ignore the surrounding square brackets
1532 text_t meta_text = substr (beginbracket+1, endbracket);
1533
1534 if (meta_text == "Text") {
1535 outtext = format_text(collection, collectproto, docinfo, disp, options, logout);
1536 }
1537 else {
1538
1539 text_tset metadata;
1540 bool getParents =false;
1541 metadata_t meta;
1542
1543 parse_meta (meta_text, meta, metadata, getParents);
1544 outtext
1545 = get_meta (collection,collectproto,docinfo,disp,meta,options,logout);
1546 }
1547
1548 }
1549 else {
1550 outtext = intext;
1551 }
1552
1553 return outtext;
1554}
1555
1556
1557static bool uses_expression(const text_t& collection, recptproto* collectproto,
1558 ResultDocInfo_t &docinfo,
1559 displayclass &disp,
1560 const text_t& outstring, text_t& lhs_expr,
1561 text_t& op_expr, text_t& rhs_expr,
1562 text_tmap &options,
1563 ostream& logout)
1564{
1565 // Note: the string may not be of the form: str1 op str2, however
1566 // to deterine this we have to process it on the assumption it is,
1567 // and if at any point an 'erroneous' value is encountered, return
1568 // false and let something else have a go at evaluating it
1569
1570 // Starting at the end of the string and working backwards ..
1571
1572 const int outstring_len = outstring.size();
1573
1574 // skip over white space
1575 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
1576
1577 if (rhs_end<=0) {
1578 // no meaningful text or (rhs_end==0) no room for operator
1579 return false;
1580 }
1581
1582 // check for ' or " and then scan over token
1583 const char potential_quote = outstring[rhs_end];
1584 int rhs_start=rhs_end;
1585 bool quoted = false;
1586
1587 if ((potential_quote == '\'') || (potential_quote == '\"')) {
1588 --rhs_end;
1589 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
1590 quoted = true;
1591 }
1592 else {
1593 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
1594 }
1595
1596 if ((rhs_end-rhs_start)<0) {
1597 // no meaningful rhs expression
1598 return false;
1599 }
1600
1601 // form rhs_expr
1602 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
1603
1604 // skip over white space
1605 const int to_whitespace = (quoted) ? 2 : 1;
1606
1607 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
1608 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
1609
1610 if ((op_end<0) && (op_start<0)) {
1611 // no meaningful expression operator
1612 return false;
1613 }
1614
1615 if (op_end-op_start<0) {
1616 // no meaningful expression operator
1617 return false;
1618 }
1619
1620 op_expr = extract_substr(outstring,op_start,op_end);
1621
1622
1623 // check for operator
1624 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
1625 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
1626
1627 // not a valid operator
1628 return false;
1629 }
1630
1631 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
1632 if (lhs_end<0) {
1633 // no meaningful lhs expression
1634 return false;
1635 }
1636
1637 int lhs_start = scan_over_whitespace(outstring,0);
1638
1639 // form lhs_expr from remainder of string
1640 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
1641
1642 // Now we know we have a valid expression, look up any
1643 // metadata terms
1644
1645 rhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1646 disp,rhs_expr,options,logout);
1647 lhs_expr = expand_potential_metadata(collection,collectproto,docinfo,
1648 disp,lhs_expr,options,logout);
1649
1650 return true;
1651}
1652
1653static bool eval_expression_true(const text_t& lhs_expr,const text_t& op_expr,
1654 const text_t& rhs_expr, ostream& logout)
1655{
1656 if (op_expr == "eq") return (lhs_expr == rhs_expr);
1657 else if (op_expr == "ne" ) return (lhs_expr != rhs_expr);
1658 else if (op_expr == "gt") return (lhs_expr > rhs_expr);
1659 else if (op_expr == "ge") return (lhs_expr >= rhs_expr);
1660 else if (op_expr == "lt") return (lhs_expr < rhs_expr);
1661 else if (op_expr == "le") return (lhs_expr <= rhs_expr);
1662 else if (op_expr == "==") return (lhs_expr.getint() == rhs_expr.getint());
1663 else if (op_expr == "!=") return (lhs_expr.getint() != rhs_expr.getint());
1664 else if (op_expr == ">") return (lhs_expr.getint() > rhs_expr.getint());
1665 else if (op_expr == ">=") return (lhs_expr.getint() >= rhs_expr.getint());
1666 else if (op_expr == "<") return (lhs_expr.getint() < rhs_expr.getint());
1667 else if (op_expr == "<=") return (lhs_expr.getint() <= rhs_expr.getint());
1668 else if (op_expr == "sw") return (starts_with(lhs_expr,rhs_expr));
1669 else if (op_expr == "ew") return (ends_with(lhs_expr,rhs_expr));
1670 else {
1671 logout << "Error: '" << op_expr << "' is not a recognised operator." << endl;
1672 }
1673
1674 return false;
1675}
1676
1677
1678static text_t get_if (const text_t& collection, recptproto* collectproto,
1679 ResultDocInfo_t &docinfo, displayclass &disp,
1680 const decision_t &decision,
1681 format_t *ifptr, format_t *elseptr,
1682 text_tmap &options, ostream& logout)
1683{
1684 // If the decision component is a metadata element, then evaluate it
1685 // to see whether we output the "then" or the "else" clause
1686 if (decision.command == dMeta) {
1687
1688 bool store_metadata_wrap = metadata_wrap;
1689 metadata_wrap = 0;
1690
1691 // temporarily suspend metadata-XXXwrap (if on) so can test if metadata item really exits or not
1692 bool metadata_exists
1693 = (get_meta (collection,collectproto,docinfo,disp,decision.meta,options,
1694 logout) != "");
1695
1696 metadata_wrap = store_metadata_wrap;
1697
1698 if (metadata_exists) {
1699 if (ifptr != NULL)
1700 return get_formatted_string (collection,collectproto,docinfo, disp, ifptr,
1701 options, logout);
1702 }
1703 else {
1704 if (elseptr != NULL)
1705 return get_formatted_string (collection,collectproto,docinfo, disp, elseptr,
1706 options, logout);
1707 }
1708 }
1709
1710 // If the decision component is text, then evaluate it (it is probably a
1711 // macro like _cgiargmode_) to decide what to output.
1712 else if (decision.command == dText) {
1713
1714 text_t outstring;
1715 disp.expandstring (decision.text, outstring);
1716
1717 // Check for if expression in form: str1 op str2
1718 // (such as [x] eq "y")
1719 text_t lhs_expr, op_expr, rhs_expr;
1720 if (uses_expression(collection,collectproto,docinfo, disp, outstring,lhs_expr,op_expr,rhs_expr, options,logout)) {
1721 if (eval_expression_true(lhs_expr,op_expr,rhs_expr,logout)) {
1722 if (ifptr != NULL) {
1723 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1724 options, logout);
1725 }
1726 else {
1727 return "";
1728 }
1729 } else {
1730 if (elseptr != NULL) {
1731 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1732 options, logout);
1733 }
1734 else {
1735 return "";
1736 }
1737 }
1738 }
1739
1740
1741 // This is a tad tricky. When we expand a string like _cgiargmode_, that is
1742 // a cgi argument macro that has not been set, it evaluates to itself.
1743 // Therefore, we have to say that a piece of text evaluates true if
1744 // it is non-empty and if it is a cgi argument evaulating to itself.
1745
1746 if ((outstring != "") && !((outstring == decision.text) && (outstring[0] == '_'))) {
1747 if (ifptr != NULL)
1748 return get_formatted_string (collection, collectproto, docinfo, disp, ifptr,
1749 options, logout);
1750 } else {
1751 if (elseptr != NULL)
1752 return get_formatted_string (collection, collectproto, docinfo, disp, elseptr,
1753 options, logout);
1754 }
1755 }
1756
1757 return "";
1758}
1759
1760bool includes_metadata(const text_t& text)
1761{
1762 text_t::const_iterator here = text.begin();
1763 text_t::const_iterator end = text.end();
1764
1765 char startbracket = '[';
1766 char endbracket = ']';
1767
1768 char bracket = startbracket;
1769 while (here != end) {
1770 if (*here == bracket) {
1771 if(bracket == startbracket) {
1772 // seen a [, next look for a ] to confirm it's metadata
1773 bracket = endbracket;
1774 } else if(bracket == endbracket) {
1775 // found [ ... ] in text, so we think it includes metadata
1776 return true;
1777 }
1778 }
1779 ++here;
1780 }
1781
1782 return false;
1783}
1784
1785static text_t expand_metadata(const text_t &metavalue, const text_t& collection,
1786 recptproto* collectproto,
1787 ResultDocInfo_t &docinfo,
1788 displayclass &disp, text_tmap &options,
1789 ostream &logout) {
1790
1791 if (includes_metadata(metavalue)) {
1792
1793 // text has embedded metadata in it => expand it
1794 FilterRequest_t request;
1795 FilterResponse_t response;
1796
1797 request.getParents = false;
1798
1799 format_t *expanded_formatlistptr = new format_t();
1800 parse_formatstring (metavalue, expanded_formatlistptr,
1801 request.fields, request.getParents);
1802
1803 // retrieve metadata
1804 get_info(docinfo.OID, collection, "", request.fields, request.getParents,
1805 collectproto, response, logout);
1806
1807 if (!response.docInfo.empty()) {
1808
1809 text_t expanded_metavalue
1810 = get_formatted_string(collection, collectproto,
1811 response.docInfo[0], disp, expanded_formatlistptr,
1812 options, logout);
1813
1814 return expanded_metavalue;
1815 }
1816 else {
1817 return metavalue;
1818 }
1819 }
1820 else {
1821
1822 return metavalue;
1823 }
1824}
1825
1826text_t get_collection_meta(const text_t& collection, recptproto* collectproto,
1827 displayclass &disp,
1828 text_t meta_name, ostream& logout) {
1829
1830 ColInfoResponse_t collectinfo;
1831 comerror_t err;
1832 collectproto->get_collectinfo (collection, collectinfo,err,logout);
1833 text_t meta_value = "";
1834 text_t lang;
1835 disp.expandstring("_cgiargl_",lang);
1836 if (lang.empty()) {
1837 lang = "en";
1838 }
1839
1840 if (err == noError) {
1841 meta_value = collectinfo.get_collectionmeta(meta_name, lang);
1842 }
1843 return meta_value;
1844
1845
1846}
1847text_t format_string (const text_t& collection, recptproto* collectproto,
1848 ResultDocInfo_t &docinfo, displayclass &disp,
1849 format_t *formatlistptr, text_tmap &options,
1850 ostream& logout) {
1851
1852 if (formatlistptr == NULL) return "";
1853
1854 switch (formatlistptr->command) {
1855 case comOID:
1856 return docinfo.OID;
1857 case comTopOID:
1858 {
1859 text_t top_id;
1860 get_top(docinfo.OID, top_id);
1861 return top_id;
1862 }
1863 case comRank:
1864 return text_t(docinfo.ranking);
1865 case comText:
1866 return formatlistptr->text;
1867 case comLink:
1868 return options["link"];
1869 case comEndLink:
1870 {
1871 if (options["link"].empty()) return "";
1872 else return "</a>";
1873 }
1874 case comHref:
1875 return get_href(options["link"]);
1876 case comIcon:
1877 return options["icon"];
1878 case comNum:
1879 return docinfo.result_num;
1880 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
1881 return get_related_docs(collection, collectproto, docinfo, logout);
1882
1883 case comSummary:
1884 return format_summary(collection, collectproto, docinfo, disp, options, logout);
1885 case comAssocLink:
1886 {
1887 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1888 if (!link_filename.empty()) {
1889 text_t href= expand_metadata(options["assocfilepath"], collection, collectproto, docinfo, disp, options, logout) + link_filename;
1890 if (formatlistptr->text == "href") {
1891 return href;
1892 }
1893 return "<a href=\""+ href + "\">";
1894 }
1895 return "";
1896 }
1897 case comEndAssocLink:
1898 {
1899 text_t link_filename = get_meta(collection, collectproto, docinfo, disp, formatlistptr->meta, options, logout);
1900 if (!link_filename.empty()) {
1901 return "</a>";
1902 }
1903 return "";
1904 }
1905 case comMeta:
1906 {
1907 const text_t& metavalue = get_meta (collection,collectproto, docinfo, disp,formatlistptr->meta,options,logout);
1908 return expand_metadata(metavalue, collection, collectproto, docinfo, disp, options, logout);
1909 }
1910
1911 case comDoc:
1912 return format_text(collection, collectproto, docinfo, disp, options, logout);
1913
1914 case comImage:
1915 return expand_metadata(options["DocImage"], collection, collectproto, docinfo, disp, options, logout);
1916 case comTOC:
1917 return options["DocTOC"];
1918 case comDocumentButtonDetach:
1919 return options["DocumentButtonDetach"];
1920 case comDocumentButtonHighlight:
1921 return options["DocumentButtonHighlight"];
1922 case comDocumentButtonExpandContents:
1923 return options["DocumentButtonExpandContents"];
1924 case comDocumentButtonExpandText:
1925 return options["DocumentButtonExpandText"];
1926 case comHighlight:
1927 if (options["highlight"] == "1") return "<b>";
1928 break;
1929 case comEndHighlight:
1930 if (options["highlight"] == "1") return "</b>";
1931 break;
1932 case comMetadataSpanWrap:
1933 metadata_wrap=true; metadata_wrap_type="span"; return "";
1934 break;
1935 case comEndMetadataSpanWrap:
1936 metadata_wrap=false; metadata_wrap_type=""; return "";
1937 break;
1938 case comMetadataDivWrap:
1939 metadata_wrap=true; metadata_wrap_type="div"; return "";
1940 break;
1941 case comEndMetadataDivWrap:
1942 metadata_wrap=false; metadata_wrap_type=""; return "";
1943 break;
1944 case comIf:
1945 return get_if (collection, collectproto, docinfo, disp,
1946 formatlistptr->decision, formatlistptr->ifptr,
1947 formatlistptr->elseptr, options, logout);
1948 case comOr:
1949 return get_or (collection,collectproto, docinfo, disp, formatlistptr->orptr,
1950 options, logout);
1951 case comDocTermsFreqTotal:
1952 return docinfo.num_terms_matched;
1953 case comCollection:
1954 if (formatlistptr->meta.metaname == g_EmptyText) {
1955 return collection;
1956 }
1957 return get_collection_meta(collection, collectproto, disp, formatlistptr->meta.metaname, logout);
1958
1959 }
1960 return "";
1961}
1962
1963text_t get_formatted_string (const text_t& collection, recptproto* collectproto,
1964 ResultDocInfo_t &docinfo, displayclass &disp,
1965 format_t *formatlistptr, text_tmap &options,
1966 ostream& logout) {
1967
1968 text_t ft;
1969 while (formatlistptr != NULL)
1970 {
1971 ft += format_string (collection, collectproto, docinfo, disp, formatlistptr,
1972 options, logout);
1973 formatlistptr = formatlistptr->nextptr;
1974 }
1975
1976 return ft;
1977}
1978
1979
1980// we have only preloaded the text in DocumentAction. But you may want
1981// to get the text in query, so copy what we have done with
1982// format_summary and get the text here. Probably is quite expensive?
1983text_t format_text (const text_t& collection, recptproto* collectproto,
1984 ResultDocInfo_t &docinfo, displayclass &disp,
1985 text_tmap &options, ostream& logout)
1986{
1987 text_t text;
1988
1989 if (!options["text"].empty()) {
1990 text = options["text"];
1991 }
1992 else {
1993 // get document text here
1994 DocumentRequest_t docrequest;
1995 DocumentResponse_t docresponse;
1996 comerror_t err;
1997 docrequest.OID = docinfo.OID;
1998 collectproto->get_document (collection, docrequest, docresponse, err, logout);
1999 text = docresponse.doc;
2000 }
2001
2002 if (metadata_wrap) {
2003 text = wrap_metatext(text,docinfo.OID,"Text");
2004 }
2005
2006 return text;
2007}
2008
2009/* FUNCTION NAME: format_summary
2010 * DESC: this is invoked when a [Summary] special metadata is processed.
2011 * RETURNS: a query-biased summary for the document */
2012
2013text_t format_summary (const text_t& collection, recptproto* collectproto,
2014 ResultDocInfo_t &docinfo, displayclass &disp,
2015 text_tmap &options, ostream& logout) {
2016
2017 // GRB: added code here to ensure that the cstr (and other collections)
2018 // uses the document metadata item Summary, rather than compressing
2019 // the text of the document, processed via the methods in
2020 // summarise.cpp
2021
2022 text_t summary;
2023
2024 if (docinfo.metadata.count("Summary") > 0 &&
2025 docinfo.metadata["Summary"].values.size() > 0) {
2026 summary = docinfo.metadata["Summary"].values[0];
2027 }
2028 else {
2029
2030 text_t textToSummarise, query;
2031
2032 if(options["text"].empty()) { // get document text
2033 DocumentRequest_t docrequest;
2034 DocumentResponse_t docresponse;
2035 comerror_t err;
2036 docrequest.OID = docinfo.OID;
2037 collectproto->get_document (collection, docrequest, docresponse, err, logout);
2038 textToSummarise = docresponse.doc;
2039 }
2040 else {
2041 // in practice, this would not happen, because text is only
2042 // loaded with the [Text] command
2043 textToSummarise = options["text"];
2044 }
2045
2046 disp.expandstring("_cgiargq_",query);
2047 summary = summarise(textToSummarise,query,80);
2048 //summary = substr(textToSummarise.begin(),textToSummarise.begin()+80);
2049 }
2050
2051 summary.replace("'","&#039;");
2052 summary.replace("\n","&#013;");
2053
2054 if (metadata_wrap) {
2055 summary = wrap_metatext(summary,docinfo.OID,"Summary");
2056 }
2057
2058 return summary;
2059}
2060
2061//-------------- GS3 related functions --------------
2062// copy of the other uses_expression function, but without using the extra GS2-runtime-specific parameters
2063static bool uses_expression(const text_t& outstring, text_t& lhs_expr,
2064 text_t& op_expr, text_t& rhs_expr)
2065{
2066 // Note: the string may not be of the form: str1 op str2, however
2067 // to deterine this we have to process it on the assumption it is,
2068 // and if at any point an 'erroneous' value is encountered, return
2069 // false and let something else have a go at evaluating it
2070
2071 // Starting at the end of the string and working backwards ..
2072
2073 const int outstring_len = outstring.size();
2074
2075 // skip over white space
2076 int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
2077
2078 if (rhs_end<=0) {
2079 // no meaningful text or (rhs_end==0) no room for operator
2080 return false;
2081 }
2082
2083 // check for ' or " and then scan over token
2084 const char potential_quote = outstring[rhs_end];
2085 int rhs_start=rhs_end;
2086 bool quoted = false;
2087
2088 if ((potential_quote == '\'') || (potential_quote == '\"')) {
2089 --rhs_end;
2090 rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
2091 quoted = true;
2092 }
2093 else {
2094 rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
2095 }
2096
2097 if ((rhs_end-rhs_start)<0) {
2098 // no meaningful rhs expression
2099 return false;
2100 }
2101
2102 // form rhs_expr
2103 rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
2104
2105 // skip over white space
2106 const int to_whitespace = (quoted) ? 2 : 1;
2107
2108 int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
2109 int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
2110
2111 if ((op_end<0) && (op_start<0)) {
2112 // no meaningful expression operator
2113 return false;
2114 }
2115
2116 if (op_end-op_start<0) {
2117 // no meaningful expression operator
2118 return false;
2119 }
2120
2121 op_expr = extract_substr(outstring,op_start,op_end);
2122
2123
2124 // check for operator
2125 if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
2126 (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") && (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
2127
2128 // not a valid operator
2129 return false;
2130 }
2131
2132 int lhs_end = rscan_over_whitespace(outstring,op_start-1);
2133 if (lhs_end<0) {
2134 // no meaningful lhs expression
2135 return false;
2136 }
2137
2138 int lhs_start = scan_over_whitespace(outstring,0);
2139
2140 // form lhs_expr from remainder of string
2141 lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
2142
2143 return true;
2144}
2145
2146// [ex.Title] -> ex.Title
2147static text_t remove_bracket_bookends(const text_t &str) {
2148
2149 if(str[0] == '[' && str[str.size()-1] == ']') {
2150 return substr (str.begin()+1, str.end()-1);
2151 } else {
2152 return str;
2153 }
2154}
2155
2156static text_t get_gs3_if (const decision_t &decision, format_t *ifptr, format_t *elseptr, const text_t& nodeType)
2157{
2158 text_t ifstmt ="<gsf:switch>";
2159
2160
2161 if (decision.command == dMeta) {
2162 ifstmt += "<gsf:metadata name=\"";
2163 ifstmt += remove_bracket_bookends(decision.meta.metaname);
2164 ifstmt += "\"/>";
2165 ifstmt += "<gsf:when test=\"exists\">";
2166 }
2167
2168 else { //if(decision.command == dText)
2169
2170 text_t outstring = decision.text;
2171
2172 // Check for if expression in form: str1 op str2
2173 // (such as [x] eq "y")
2174 text_t lhs_expr, op_expr, rhs_expr;
2175 if (uses_expression(outstring,lhs_expr,op_expr,rhs_expr)) {
2176
2177 text_t if_operator = op_expr;
2178 if (op_expr == "eq" || op_expr == "==") {
2179 if_operator = "equals";
2180 } else if (op_expr == "ne" || op_expr == "!=") {
2181 if_operator = "notEquals";
2182 } else if (op_expr == "gt" || op_expr == ">") {
2183 if_operator = "greaterThan";
2184 } else if (op_expr == "lt" || op_expr == "<") {
2185 if_operator = "lessThan";
2186 } else if (op_expr == "ge" || op_expr == ">=") {
2187 if_operator = "greaterThanOrEquals";
2188 } else if (op_expr == "le" || op_expr == "<=") {
2189 if_operator = "lessThanOrEquals";
2190 } else if (op_expr == "sw") {
2191 if_operator = "startsWith";
2192 } else if (op_expr == "ew") {
2193 if_operator = "endsWith";
2194 }
2195
2196 ifstmt += "<gsf:metadata name=\"";
2197 ifstmt += remove_bracket_bookends(lhs_expr);
2198 ifstmt += "\"/>";
2199
2200 ifstmt += "<gsf:when test=\"";
2201 ifstmt += if_operator; // the test operator
2202 ifstmt += "\" test-value=\"";
2203 ifstmt += remove_bracket_bookends(rhs_expr); // the test-value
2204 ifstmt += "\">";
2205 }
2206 else {
2207 ifstmt += "<gsf:metadata name=\"";
2208 ifstmt += remove_bracket_bookends(decision.text);
2209 ifstmt += "\"/>";
2210 ifstmt += "<gsf:when test=\"exists\">";
2211 }
2212 }
2213
2214 // if portion
2215 text_t if_body = "";
2216 while(ifptr != NULL) { // body of if can contain a list of items to be transformed into GS3 format stmts
2217 if_body += transform_to_GS3_format (ifptr, nodeType);
2218 ifptr = ifptr->nextptr;
2219 }
2220 ifstmt += if_body;
2221 ifstmt += "</gsf:when>";
2222
2223 // else portion
2224 if(elseptr != NULL) {
2225
2226 ifstmt += "<gsf:otherwise>";
2227 text_t else_body = ""; // body of else can contain a list of items to be transformed into GS3 format stmts
2228 while(elseptr != NULL) {
2229 else_body += transform_to_GS3_format (elseptr, nodeType);
2230 elseptr = elseptr->nextptr;
2231 }
2232 ifstmt += else_body;
2233 ifstmt += "</gsf:otherwise>";
2234 }
2235
2236 ifstmt += "</gsf:switch>";
2237 return ifstmt;
2238}
2239
2240
2241static text_t get_gs3_or (format_t *orptr, const text_t& nodeType) {
2242 text_t result = "<gsf:choose-metadata>";
2243
2244 while(orptr != NULL) {
2245 text_t or_body = transform_to_GS3_format (orptr, nodeType);
2246 if (!or_body.empty()) {
2247 result += or_body;
2248 }
2249
2250 orptr = orptr->nextptr;
2251 }
2252 result += "</gsf:choose-metadata>";
2253 return result;
2254}
2255
2256// what about all the <td>? Does that get stored in formatlistptr, such as under the ->text field?
2257text_t get_GS3_formatstring (format_t *formatlistptr, const text_t& nodeType) {
2258 text_t result;
2259
2260 while (formatlistptr != NULL) {
2261 result += transform_to_GS3_format(formatlistptr, nodeType);
2262 formatlistptr = formatlistptr->nextptr;
2263 }
2264
2265 return result;
2266}
2267
2268text_t transform_to_GS3_format (format_t *formatlistptr, const text_t& nodeType) {
2269 if (formatlistptr == NULL) return "";
2270
2271 switch (formatlistptr->command) {
2272 case comOID:
2273 return "<gsf:OID/>";
2274 case comTopOID:
2275 return "<gsf:metadata name='OID' select='root' />"; // for now try this
2276 case comRank:
2277 return "<gsf:rank/>";
2278 case comText:
2279 return formatlistptr->text; // [text]? or any string that is not a command or reserved
2280 case comLink:
2281 if(nodeType == "classifier") {
2282 return "<gsf:link type='classifier'>";
2283 } else { // if nodeType is document or not set
2284 return "<gsf:link type='document'>";
2285 }
2286 case comEndLink:
2287 return "</gsf:link>";
2288 case comHref:
2289 return "<gsf:lib name=\"href\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to
2290 case comIcon:
2291 if(nodeType == "classifier") {
2292 return "<gsf:icon type='classifier'/>";
2293 } else { // if nodeType is document or not set
2294 return "<gsf:icon type='document'/>";
2295 }
2296 case comNum:
2297 return "<gsf:lib name=\"num\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to
2298 case comRel: //if [RelatedDocuments] appears in format string, collect relation data
2299 return "<gsf:lib name=\"RelatedDocuments\"/>"; // output comment marking this as deprecated or to be implemented for GS3 in gslib xslt
2300 case comSummary:
2301 return "<gsf:lib name=\"Summary\"/>"; // in gslib xslt output comment marking this as to be implemented for GS3
2302 // need to invent this for GS3 based on what GS2 does
2303 case comAssocLink:
2304 return "<gsf:link type='source'>";
2305 case comEndAssocLink:
2306 return "</gsf:link>";
2307 case comMeta:
2308 return "<gsf:metadata name=\"" + formatlistptr->meta.metaname + "\" />";//?
2309 case comDoc:
2310 return "<gsf:text/>";
2311 case comImage: // the cover img seems to be handled by some magic code in GS3
2312 return "<gsf:lib name=\"image\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable
2313 case comTOC:
2314 return "<gsf:lib name=\"TOC\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable
2315 // need to think about whether an equivalent actually exists
2316 // return "<gsf:option name=\"TOC\" value=\"true\"/>"; // this is wrong
2317 case comDocumentButtonDetach:
2318 return "<gsf:lib name=\"DocumentButtonDetach\"/>"; // output comment marking this as deprecated in gslib xslt
2319 case comDocumentButtonHighlight:
2320 return "<gsf:lib name=\"DocumentButtonHighlight\"/>"; // output comment marking this as deprecated in gslib xslt
2321 case comDocumentButtonExpandContents:
2322 return "<gsf:lib name=\"DocumentButtonExpandContents\"/>"; // output comment marking this as deprecated in gslib xslt
2323 case comDocumentButtonExpandText:
2324 return "<gsf:lib name=\"DocumentButtonExpandText\"/>"; // output comment marking this as deprecated in gslib xslt
2325 case comHighlight:
2326 return "<span class=\"highlight\">";
2327 break;
2328 case comEndHighlight:
2329 return "</span>";
2330 break;
2331 case comMetadataSpanWrap:
2332 metadata_wrap=true; metadata_wrap_type="span"; return "";
2333 break;
2334 case comEndMetadataSpanWrap:
2335 metadata_wrap=false; metadata_wrap_type=""; return "";
2336 break;
2337 case comMetadataDivWrap:
2338 metadata_wrap=true; metadata_wrap_type="div"; return "";
2339 break;
2340 case comEndMetadataDivWrap:
2341 metadata_wrap=false; metadata_wrap_type=""; return "";
2342 break;
2343 case comIf:
2344 if(formatlistptr->decision.meta.metaname == "numleafdocs") {
2345 if(nodeType == "classifier") {
2346 text_t if_body = "";
2347 while(formatlistptr->ifptr != NULL) { // body of if can contain a list of items to be transformed into GS3 format stmts
2348 if_body += transform_to_GS3_format (formatlistptr->ifptr, nodeType);
2349 formatlistptr->ifptr = formatlistptr->ifptr->nextptr;
2350 }
2351 return if_body;
2352 } else if(nodeType == "document") {
2353 text_t else_body = ""; // body of else can contain a list of items to be transformed into GS3 format stmts
2354 while(formatlistptr->elseptr != NULL) {
2355 else_body += transform_to_GS3_format (formatlistptr->elseptr, nodeType);
2356 formatlistptr->elseptr = formatlistptr->elseptr->nextptr;
2357 }
2358 return else_body;
2359 }
2360 }
2361
2362 // if nodeType not specified as classifier or document, or if the If test is not for numleafdocs' existence
2363 return get_gs3_if (formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr, nodeType);
2364 case comOr:
2365 return get_gs3_or (formatlistptr->orptr, nodeType);
2366 //return "<gsf:choose-metadata>"+get_gs3_or (formatlistptr->orptr, nodeType)+"</gsf:choose-metadata>";
2367 case comDocTermsFreqTotal:
2368 return "<gsf:lib name=\"DocTermsFreqTotal\"/>";
2369 case comCollection: // trying to get all the metadata for a collection. How is this done in GS3???
2370 return "<gsf:lib name=\"collection\"/>";
2371 }
2372 return "";
2373}
Note: See TracBrowser for help on using the repository browser.