1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * formattools.cpp --
|
---|
4 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
5 | *
|
---|
6 | * PUT COPYRIGHT NOTICE HERE
|
---|
7 | *
|
---|
8 | * $Id: formattools.cpp 422 1999-07-30 02:25:43Z sjboddie $
|
---|
9 | *
|
---|
10 | *********************************************************************/
|
---|
11 |
|
---|
12 | /*
|
---|
13 | $Log$
|
---|
14 | Revision 1.6 1999/07/30 02:25:42 sjboddie
|
---|
15 | made format_date function global
|
---|
16 |
|
---|
17 | Revision 1.5 1999/07/21 05:00:00 sjboddie
|
---|
18 | added some date formatting
|
---|
19 |
|
---|
20 | Revision 1.4 1999/07/20 03:02:15 sjboddie
|
---|
21 | added an [icon] option, added ability to call get_formatted_string
|
---|
22 | with icon and link arguments set
|
---|
23 |
|
---|
24 | Revision 1.3 1999/07/09 02:44:35 sjboddie
|
---|
25 | fixed parent(All) function so it only outputs parents and not current
|
---|
26 | level meta
|
---|
27 |
|
---|
28 | Revision 1.2 1999/07/08 20:48:33 rjmcnab
|
---|
29 | Added ability to print the result number
|
---|
30 |
|
---|
31 | Revision 1.1 1999/07/07 05:49:34 sjboddie
|
---|
32 | had another crack at the format string code - created a new formattools
|
---|
33 | module. It can now handle {If} and {Or} statements although there's a
|
---|
34 | bug preventing nested if's and or's.
|
---|
35 |
|
---|
36 | */
|
---|
37 |
|
---|
38 |
|
---|
39 | #include "formattools.h"
|
---|
40 |
|
---|
41 |
|
---|
42 | // a few function prototypes
|
---|
43 | static text_t format_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr,
|
---|
44 | const text_t &link, const text_t &icon);
|
---|
45 |
|
---|
46 | static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
|
---|
47 | format_t *formatlistptr, text_tarray &metadata, bool &getParents,
|
---|
48 | text_tmap &metamap, int &metacount);
|
---|
49 |
|
---|
50 | void metadata_t::clear() {
|
---|
51 | metaname.clear();
|
---|
52 | metaindex = 0;
|
---|
53 | parentcommand = pNone;
|
---|
54 | parentindex = 0;
|
---|
55 | parentoptions.clear();
|
---|
56 | };
|
---|
57 |
|
---|
58 | void decision_t::clear() {
|
---|
59 | command = dMeta;
|
---|
60 | meta.clear();
|
---|
61 | };
|
---|
62 |
|
---|
63 | void format_t::clear() {
|
---|
64 | command = comText;
|
---|
65 | decision.clear();
|
---|
66 | text.clear();
|
---|
67 | meta.clear();
|
---|
68 | nextptr = NULL;
|
---|
69 | ifptr = NULL;
|
---|
70 | elseptr = NULL;
|
---|
71 | orptr = NULL;
|
---|
72 | };
|
---|
73 |
|
---|
74 | // returns a date of form _textmonthnn_ 31, 1999
|
---|
75 | // input is date of type 19991231
|
---|
76 | // at least the year must be present in date
|
---|
77 | text_t format_date (const text_t &date) {
|
---|
78 |
|
---|
79 | if (date.size() < 4) return "";
|
---|
80 |
|
---|
81 | text_t::const_iterator datebegin = date.begin();
|
---|
82 |
|
---|
83 | text_t year = substr (datebegin, datebegin+4);
|
---|
84 |
|
---|
85 | if (date.size() < 6) return year;
|
---|
86 |
|
---|
87 | text_t month = "_textmonth" + substr (datebegin+4, datebegin+6) + "_";
|
---|
88 | int imonth = month.getint();
|
---|
89 | if (imonth < 0 || imonth > 12) return year;
|
---|
90 |
|
---|
91 | if (date.size() < 8) return month + ", " + year;
|
---|
92 |
|
---|
93 | text_t day = substr (datebegin+6, datebegin+8);
|
---|
94 | if (day[0] == '0') day = substr (day.begin()+1, day.end());
|
---|
95 | int iday = day.getint();
|
---|
96 | if (iday < 0 || iday > 31) return month + ", " + year;
|
---|
97 |
|
---|
98 | return month + " " + day + ", " + year;
|
---|
99 | }
|
---|
100 |
|
---|
101 | static void get_parent_options (text_t &instring, metadata_t &metaoption) {
|
---|
102 |
|
---|
103 | metaoption.clear();
|
---|
104 | text_t meta, com, op;
|
---|
105 | bool inbraces = false;
|
---|
106 | bool inquotes = false;
|
---|
107 | bool foundcolon = false;
|
---|
108 | text_t::const_iterator here = instring.begin()+6;
|
---|
109 | text_t::const_iterator end = instring.end();
|
---|
110 | while (here != end) {
|
---|
111 | if (*here == '(') inbraces = true;
|
---|
112 | else if (*here == ')') inbraces = false;
|
---|
113 | else if (*here == '\'' && !inquotes) inquotes = true;
|
---|
114 | else if (*here == '\'' && inquotes) inquotes = false;
|
---|
115 | else if (*here == ':' && !inbraces) foundcolon = true;
|
---|
116 | else if (foundcolon) meta.push_back (*here);
|
---|
117 | else if (inquotes) op.push_back (*here);
|
---|
118 | else com.push_back (*here);
|
---|
119 | here ++;
|
---|
120 | }
|
---|
121 | instring = meta;
|
---|
122 | if (com.empty())
|
---|
123 | metaoption.parentcommand = pImmediate;
|
---|
124 | else if (com == "Top")
|
---|
125 | metaoption.parentcommand = pTop;
|
---|
126 | else if (is_number(com)) {
|
---|
127 | metaoption.parentcommand = pIndex;
|
---|
128 | metaoption.metaindex = com.getint();
|
---|
129 | } else if (com == "All") {
|
---|
130 | metaoption.parentcommand = pAll;
|
---|
131 | metaoption.parentoptions = op;
|
---|
132 | }
|
---|
133 | }
|
---|
134 |
|
---|
135 | static bool parse_meta (text_t &meta, int &count, decision_t &decision,
|
---|
136 | text_tarray &metadata, bool &getParents, text_tmap &metamap) {
|
---|
137 |
|
---|
138 | if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
|
---|
139 | getParents = true;
|
---|
140 | metadata_t metaoption;
|
---|
141 | get_parent_options (meta, metaoption);
|
---|
142 | decision.meta = metaoption;
|
---|
143 | }
|
---|
144 | text_tmap::const_iterator it;
|
---|
145 | if ((it = metamap.find(meta)) != metamap.end()) {
|
---|
146 | decision.meta.metaindex = (*it).second.getint();
|
---|
147 | } else {
|
---|
148 | metamap[meta] = count;
|
---|
149 | metadata.push_back (meta);
|
---|
150 | decision.meta.metaindex = count;
|
---|
151 | count ++;
|
---|
152 | }
|
---|
153 | return true;
|
---|
154 | }
|
---|
155 |
|
---|
156 | static bool parse_meta (text_t &meta, int &count, format_t *formatlistptr,
|
---|
157 | text_tarray &metadata, bool &getParents, text_tmap &metamap) {
|
---|
158 |
|
---|
159 | if (meta == "link")
|
---|
160 | formatlistptr->command = comLink;
|
---|
161 | else if (meta == "/link")
|
---|
162 | formatlistptr->command = comEndLink;
|
---|
163 |
|
---|
164 | else if (meta == "num")
|
---|
165 | formatlistptr->command = comNum;
|
---|
166 |
|
---|
167 | else if (meta == "icon")
|
---|
168 | formatlistptr->command = comIcon;
|
---|
169 |
|
---|
170 | else {
|
---|
171 | formatlistptr->command = comMeta;
|
---|
172 | if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
|
---|
173 | getParents = true;
|
---|
174 | metadata_t metaoption;
|
---|
175 | get_parent_options (meta, metaoption);
|
---|
176 | formatlistptr->meta = metaoption;
|
---|
177 | }
|
---|
178 | text_tmap::const_iterator it;
|
---|
179 | if ((it = metamap.find(meta)) != metamap.end()) {
|
---|
180 | formatlistptr->meta.metaindex = (*it).second.getint();
|
---|
181 | } else {
|
---|
182 | metamap[meta] = count;
|
---|
183 | metadata.push_back (meta);
|
---|
184 | formatlistptr->meta.metaindex = count;
|
---|
185 | count ++;
|
---|
186 | }
|
---|
187 | formatlistptr->meta.metaname = meta;
|
---|
188 | }
|
---|
189 | return true;
|
---|
190 | }
|
---|
191 |
|
---|
192 | static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
|
---|
193 | text_tarray &metadata, bool &getParents,
|
---|
194 | text_tmap &metamap, int &metacount) {
|
---|
195 |
|
---|
196 | text_t text;
|
---|
197 | text_t::const_iterator here = formatstring.begin();
|
---|
198 | text_t::const_iterator end = formatstring.end();
|
---|
199 |
|
---|
200 | while (here != end) {
|
---|
201 |
|
---|
202 | if (*here == '\\')
|
---|
203 | text.push_back (*(++here));
|
---|
204 |
|
---|
205 | else if (*here == '{') {
|
---|
206 | if (!text.empty()) {
|
---|
207 | formatlistptr->command = comText;
|
---|
208 | formatlistptr->text = text;
|
---|
209 | formatlistptr->nextptr = new format_t();
|
---|
210 | formatlistptr = formatlistptr->nextptr;
|
---|
211 |
|
---|
212 | text.clear();
|
---|
213 | }
|
---|
214 | if (parse_action (++here, end, formatlistptr, metadata,
|
---|
215 | getParents, metamap, metacount)) {
|
---|
216 | formatlistptr->nextptr = new format_t();
|
---|
217 | formatlistptr = formatlistptr->nextptr;
|
---|
218 | if (here == end) break;
|
---|
219 | }
|
---|
220 | } else if (*here == '[') {
|
---|
221 | if (!text.empty()) {
|
---|
222 | formatlistptr->command = comText;
|
---|
223 | formatlistptr->text = text;
|
---|
224 | formatlistptr->nextptr = new format_t();
|
---|
225 | formatlistptr = formatlistptr->nextptr;
|
---|
226 |
|
---|
227 | text.clear();
|
---|
228 | }
|
---|
229 | text_t meta;
|
---|
230 | here ++;
|
---|
231 | while (*here != ']') {
|
---|
232 | if (here == end) return false;
|
---|
233 | meta.push_back (*here);
|
---|
234 | here ++;
|
---|
235 | }
|
---|
236 | if (parse_meta (meta, metacount, formatlistptr, metadata, getParents, metamap)) {
|
---|
237 | formatlistptr->nextptr = new format_t();
|
---|
238 | formatlistptr = formatlistptr->nextptr;
|
---|
239 | }
|
---|
240 |
|
---|
241 | } else
|
---|
242 | text.push_back (*here);
|
---|
243 |
|
---|
244 | here ++;
|
---|
245 | }
|
---|
246 | if (!text.empty()) {
|
---|
247 | formatlistptr->command = comText;
|
---|
248 | formatlistptr->text = text;
|
---|
249 | formatlistptr->nextptr = new format_t();
|
---|
250 | formatlistptr = formatlistptr->nextptr;
|
---|
251 |
|
---|
252 | }
|
---|
253 | return true;
|
---|
254 | }
|
---|
255 |
|
---|
256 |
|
---|
257 | static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
|
---|
258 | format_t *formatlistptr, text_tarray &metadata, bool &getParents,
|
---|
259 | text_tmap &metamap, int &metacount) {
|
---|
260 |
|
---|
261 | text_t::const_iterator it = findchar (here, end, '}');
|
---|
262 | if (it == end) return false;
|
---|
263 |
|
---|
264 | text_t com = substr (here, it);
|
---|
265 | here = findchar (it, end, '{');
|
---|
266 | if (here == end) return false;
|
---|
267 | else here ++;
|
---|
268 |
|
---|
269 | if (com == "If") formatlistptr->command = comIf;
|
---|
270 | else if (com == "Or") formatlistptr->command = comOr;
|
---|
271 | else return false;
|
---|
272 |
|
---|
273 | int curlycount = 0;
|
---|
274 | int commacount = 0;
|
---|
275 | text_t text;
|
---|
276 | while (here != end) {
|
---|
277 | if (*here == '{') {curlycount ++; text.push_back(*here);}
|
---|
278 | else if (*here == '}' && curlycount > 0) {
|
---|
279 | curlycount --;
|
---|
280 | text.push_back(*here);
|
---|
281 | }
|
---|
282 |
|
---|
283 | else if ((*here == ',' || *here == '}') && curlycount <= 0) {
|
---|
284 |
|
---|
285 | if (formatlistptr->command == comOr) {
|
---|
286 | // the {Or}{this, or this, or this, or this} statement
|
---|
287 | // or'ed statements may be either [metadata] or plain text
|
---|
288 | format_t *or_ptr;
|
---|
289 |
|
---|
290 | // find the next unused orptr
|
---|
291 | if (formatlistptr->orptr == NULL) {
|
---|
292 | formatlistptr->orptr = new format_t();
|
---|
293 | or_ptr = formatlistptr->orptr;
|
---|
294 | } else {
|
---|
295 | or_ptr = formatlistptr->orptr;
|
---|
296 | while (or_ptr->nextptr != NULL)
|
---|
297 | or_ptr = or_ptr->nextptr;
|
---|
298 | or_ptr->nextptr = new format_t();
|
---|
299 | or_ptr = or_ptr->nextptr;
|
---|
300 | }
|
---|
301 |
|
---|
302 | text_t::const_iterator beginbracket = text.begin();
|
---|
303 | text_t::const_iterator endbracket = (text.end() - 1);
|
---|
304 | if ((*beginbracket == '[') && (*endbracket == ']')) {
|
---|
305 | // it's metadata
|
---|
306 | text_t meta = substr (beginbracket+1, endbracket);
|
---|
307 | parse_meta (meta, metacount, or_ptr, metadata, getParents, metamap);
|
---|
308 |
|
---|
309 | } else {
|
---|
310 | // assume it's plain text
|
---|
311 | or_ptr->command = comText;
|
---|
312 | or_ptr->text = text;
|
---|
313 | }
|
---|
314 | text.clear();
|
---|
315 |
|
---|
316 | } else {
|
---|
317 | // the {If}{decide,do,else} statement
|
---|
318 | if (commacount == 0) {
|
---|
319 | // If decision only supports metadata at present
|
---|
320 |
|
---|
321 | // remove the surrounding square brackets
|
---|
322 | text_t::const_iterator beginbracket = text.begin();
|
---|
323 | text_t::const_iterator endbracket = (text.end() - 1);
|
---|
324 | if ((*beginbracket == '[') && (*endbracket == ']')) {
|
---|
325 | text_t meta = substr (beginbracket+1, endbracket);
|
---|
326 | decision_t decision;
|
---|
327 | parse_meta (meta, metacount, formatlistptr->decision,
|
---|
328 | metadata, getParents, metamap);
|
---|
329 | commacount ++;
|
---|
330 | text.clear();
|
---|
331 | }
|
---|
332 |
|
---|
333 | } else if (commacount == 1) {
|
---|
334 | formatlistptr->ifptr = new format_t();
|
---|
335 | parse_string (text, formatlistptr->ifptr, metadata,
|
---|
336 | getParents, metamap, metacount);
|
---|
337 | commacount ++;
|
---|
338 | text.clear();
|
---|
339 |
|
---|
340 | } else if (commacount == 2) {
|
---|
341 | formatlistptr->elseptr = new format_t();
|
---|
342 | parse_string (text, formatlistptr->elseptr, metadata,
|
---|
343 | getParents, metamap, metacount);
|
---|
344 | commacount ++;
|
---|
345 | text.clear();
|
---|
346 | }
|
---|
347 | }
|
---|
348 | if (*here == '}') break;
|
---|
349 | }
|
---|
350 |
|
---|
351 | else
|
---|
352 | text.push_back(*here);
|
---|
353 |
|
---|
354 | here ++;
|
---|
355 | }
|
---|
356 |
|
---|
357 | return true;
|
---|
358 | }
|
---|
359 |
|
---|
360 |
|
---|
361 | bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
|
---|
362 | text_tarray &metadata, bool &getParents) {
|
---|
363 |
|
---|
364 | formatlistptr->clear();
|
---|
365 | metadata.erase (metadata.begin(), metadata.end());
|
---|
366 | getParents = false;
|
---|
367 |
|
---|
368 | text_tmap metamap;
|
---|
369 | int metacount = 0;
|
---|
370 | return (parse_string (formatstring, formatlistptr, metadata,
|
---|
371 | getParents, metamap, metacount));
|
---|
372 | }
|
---|
373 |
|
---|
374 |
|
---|
375 | // note: all the format_date stuff is assuming that all Date metadata is going to
|
---|
376 | // be of the form yyyymmdd, this is of course, crap ;)
|
---|
377 |
|
---|
378 | static text_t get_meta (const ResultDocInfo_t &docinfo, const metadata_t &meta) {
|
---|
379 | int metasize = docinfo.metadata.size();
|
---|
380 | int mindex = meta.metaindex;
|
---|
381 | if (metasize < 1 || metasize <= mindex) return "";
|
---|
382 | int valuesize = docinfo.metadata[mindex].values.size();
|
---|
383 |
|
---|
384 | switch (meta.parentcommand) {
|
---|
385 | case pNone:
|
---|
386 | if (meta.metaname == "Date")
|
---|
387 | return format_date (docinfo.metadata[mindex].values.back());
|
---|
388 | return docinfo.metadata[mindex].values.back();
|
---|
389 |
|
---|
390 | case pImmediate:
|
---|
391 | if (valuesize > 1) {
|
---|
392 | if (meta.metaname == "Date")
|
---|
393 | return format_date (docinfo.metadata[mindex].values[metasize-2]);
|
---|
394 | return docinfo.metadata[mindex].values[metasize-2];
|
---|
395 | }
|
---|
396 | break;
|
---|
397 |
|
---|
398 | case pTop:
|
---|
399 | if (valuesize > 1) {
|
---|
400 | if (meta.metaname == "Date")
|
---|
401 | return format_date (docinfo.metadata[mindex].values[0]);
|
---|
402 | return docinfo.metadata[mindex].values[0];
|
---|
403 | }
|
---|
404 | break;
|
---|
405 |
|
---|
406 | case pIndex:
|
---|
407 | if (valuesize > meta.parentindex) {
|
---|
408 | if (meta.metaname == "Date")
|
---|
409 | return format_date (docinfo.metadata[mindex].values[meta.parentindex]);
|
---|
410 | return docinfo.metadata[mindex].values[meta.parentindex];
|
---|
411 | }
|
---|
412 | break;
|
---|
413 |
|
---|
414 | case pAll:
|
---|
415 | bool first = true;
|
---|
416 | text_t tmp;
|
---|
417 | if (valuesize > 1) {
|
---|
418 | text_tarray::const_iterator here = docinfo.metadata[mindex].values.begin();
|
---|
419 | // don't want last value as that's the value of the current level (i.e. it's
|
---|
420 | // not a parent
|
---|
421 | text_tarray::const_iterator end = docinfo.metadata[mindex].values.end() - 1;
|
---|
422 | while (here != end) {
|
---|
423 | if (!first) tmp += meta.parentoptions;
|
---|
424 | if (meta.metaname == "Date") tmp += format_date (*here);
|
---|
425 | else tmp += *here;
|
---|
426 | first = false;
|
---|
427 | here ++;
|
---|
428 | }
|
---|
429 | }
|
---|
430 | return tmp;
|
---|
431 | }
|
---|
432 | return "";
|
---|
433 | }
|
---|
434 |
|
---|
435 | static text_t get_or (const ResultDocInfo_t &docinfo, format_t *orptr,
|
---|
436 | const text_t &link, const text_t &icon) {
|
---|
437 |
|
---|
438 | text_t tmp;
|
---|
439 | while (orptr != NULL) {
|
---|
440 |
|
---|
441 | tmp = format_string (docinfo, orptr, link, icon);
|
---|
442 | if (!tmp.empty()) return tmp;
|
---|
443 |
|
---|
444 | orptr = orptr->nextptr;
|
---|
445 | }
|
---|
446 | return "";
|
---|
447 | }
|
---|
448 |
|
---|
449 | static text_t get_if (const ResultDocInfo_t &docinfo, const decision_t &decision,
|
---|
450 | format_t *ifptr, format_t *elseptr, const text_t &link,
|
---|
451 | const text_t &icon) {
|
---|
452 |
|
---|
453 | // not much of a choice yet ...
|
---|
454 | if (decision.command == dMeta) {
|
---|
455 | if (get_meta (docinfo, decision.meta) != "") {
|
---|
456 | if (ifptr != NULL)
|
---|
457 | return get_formatted_string (docinfo, ifptr, link, icon);
|
---|
458 | }
|
---|
459 | else {
|
---|
460 | if (elseptr != NULL)
|
---|
461 | return get_formatted_string (docinfo, elseptr, link, icon);
|
---|
462 | }
|
---|
463 | }
|
---|
464 | return "";
|
---|
465 | }
|
---|
466 |
|
---|
467 | static text_t format_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr,
|
---|
468 | const text_t &link, const text_t &icon) {
|
---|
469 |
|
---|
470 | if (formatlistptr == NULL) return "";
|
---|
471 |
|
---|
472 | switch (formatlistptr->command) {
|
---|
473 | case comText:
|
---|
474 | return formatlistptr->text;
|
---|
475 | case comLink:
|
---|
476 | return link;
|
---|
477 | case comEndLink:
|
---|
478 | return "</a>";
|
---|
479 | case comIcon:
|
---|
480 | return icon;
|
---|
481 | case comNum:
|
---|
482 | return docinfo.result_num;
|
---|
483 | case comMeta:
|
---|
484 | return get_meta (docinfo, formatlistptr->meta);
|
---|
485 | case comIf:
|
---|
486 | return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr,
|
---|
487 | formatlistptr->elseptr, link, icon);
|
---|
488 | case comOr:
|
---|
489 | return get_or (docinfo, formatlistptr->orptr, link, icon);
|
---|
490 | }
|
---|
491 | return "";
|
---|
492 | }
|
---|
493 |
|
---|
494 |
|
---|
495 |
|
---|
496 | text_t get_formatted_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr,
|
---|
497 | const text_t &link, const text_t &icon) {
|
---|
498 |
|
---|
499 | text_t ft;
|
---|
500 | while (formatlistptr != NULL) {
|
---|
501 | ft += format_string (docinfo, formatlistptr, link, icon);
|
---|
502 | formatlistptr = formatlistptr->nextptr;
|
---|
503 | }
|
---|
504 | return ft;
|
---|
505 | }
|
---|
506 |
|
---|
507 |
|
---|
508 | text_t get_formatted_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr) {
|
---|
509 |
|
---|
510 | text_t link = "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
|
---|
511 | text_t icon = "_icontext_";
|
---|
512 |
|
---|
513 | text_t ft;
|
---|
514 | while (formatlistptr != NULL) {
|
---|
515 | ft += format_string (docinfo, formatlistptr, link, icon);
|
---|
516 | formatlistptr = formatlistptr->nextptr;
|
---|
517 | }
|
---|
518 | return ft;
|
---|
519 | }
|
---|
520 |
|
---|
521 |
|
---|
522 |
|
---|
523 |
|
---|