1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * formattools.cpp --
|
---|
4 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
5 | *
|
---|
6 | * PUT COPYRIGHT NOTICE HERE
|
---|
7 | *
|
---|
8 | * $Id: formattools.cpp 359 1999-07-09 02:44:35Z sjboddie $
|
---|
9 | *
|
---|
10 | *********************************************************************/
|
---|
11 |
|
---|
12 | /*
|
---|
13 | $Log$
|
---|
14 | Revision 1.3 1999/07/09 02:44:35 sjboddie
|
---|
15 | fixed parent(All) function so it only outputs parents and not current
|
---|
16 | level meta
|
---|
17 |
|
---|
18 | Revision 1.2 1999/07/08 20:48:33 rjmcnab
|
---|
19 | Added ability to print the result number
|
---|
20 |
|
---|
21 | Revision 1.1 1999/07/07 05:49:34 sjboddie
|
---|
22 | had another crack at the format string code - created a new formattools
|
---|
23 | module. It can now handle {If} and {Or} statements although there's a
|
---|
24 | bug preventing nested if's and or's.
|
---|
25 |
|
---|
26 | */
|
---|
27 |
|
---|
28 |
|
---|
29 | #include "formattools.h"
|
---|
30 |
|
---|
31 |
|
---|
32 |
|
---|
33 |
|
---|
34 |
|
---|
35 | // a few function prototypes
|
---|
36 | static text_t format_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr);
|
---|
37 |
|
---|
38 | static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
|
---|
39 | format_t *formatlistptr, text_tarray &metadata, bool &getParents,
|
---|
40 | text_tmap &metamap, int &metacount);
|
---|
41 |
|
---|
42 |
|
---|
43 |
|
---|
44 | void metadata_t::clear() {
|
---|
45 | metaindex = 0;
|
---|
46 | parentcommand = pNone;
|
---|
47 | parentindex = 0;
|
---|
48 | parentoptions.clear();
|
---|
49 | };
|
---|
50 |
|
---|
51 | void decision_t::clear() {
|
---|
52 | command = dMeta;
|
---|
53 | meta.clear();
|
---|
54 | };
|
---|
55 |
|
---|
56 |
|
---|
57 | void format_t::clear() {
|
---|
58 | command = comText;
|
---|
59 | decision.clear();
|
---|
60 | text.clear();
|
---|
61 | meta.clear();
|
---|
62 | nextptr = NULL;
|
---|
63 | ifptr = NULL;
|
---|
64 | elseptr = NULL;
|
---|
65 | orptr = NULL;
|
---|
66 | };
|
---|
67 |
|
---|
68 |
|
---|
69 |
|
---|
70 |
|
---|
71 | static void get_parent_options (text_t &instring, metadata_t &metaoption) {
|
---|
72 |
|
---|
73 | metaoption.clear();
|
---|
74 | text_t meta, com, op;
|
---|
75 | bool inbraces = false;
|
---|
76 | bool inquotes = false;
|
---|
77 | bool foundcolon = false;
|
---|
78 | text_t::const_iterator here = instring.begin()+6;
|
---|
79 | text_t::const_iterator end = instring.end();
|
---|
80 | while (here != end) {
|
---|
81 | if (*here == '(') inbraces = true;
|
---|
82 | else if (*here == ')') inbraces = false;
|
---|
83 | else if (*here == '\'' && !inquotes) inquotes = true;
|
---|
84 | else if (*here == '\'' && inquotes) inquotes = false;
|
---|
85 | else if (*here == ':' && !inbraces) foundcolon = true;
|
---|
86 | else if (foundcolon) meta.push_back (*here);
|
---|
87 | else if (inquotes) op.push_back (*here);
|
---|
88 | else com.push_back (*here);
|
---|
89 | here ++;
|
---|
90 | }
|
---|
91 | instring = meta;
|
---|
92 | if (com.empty())
|
---|
93 | metaoption.parentcommand = pImmediate;
|
---|
94 | else if (com == "Top")
|
---|
95 | metaoption.parentcommand = pTop;
|
---|
96 | else if (is_number(com)) {
|
---|
97 | metaoption.parentcommand = pIndex;
|
---|
98 | metaoption.metaindex = com.getint();
|
---|
99 | } else if (com == "All") {
|
---|
100 | metaoption.parentcommand = pAll;
|
---|
101 | metaoption.parentoptions = op;
|
---|
102 | }
|
---|
103 | }
|
---|
104 |
|
---|
105 | static bool parse_meta (text_t &meta, int &count, decision_t &decision,
|
---|
106 | text_tarray &metadata, bool &getParents, text_tmap &metamap) {
|
---|
107 |
|
---|
108 | if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
|
---|
109 | getParents = true;
|
---|
110 | metadata_t metaoption;
|
---|
111 | get_parent_options (meta, metaoption);
|
---|
112 | decision.meta = metaoption;
|
---|
113 | }
|
---|
114 | text_tmap::const_iterator it;
|
---|
115 | if ((it = metamap.find(meta)) != metamap.end()) {
|
---|
116 | decision.meta.metaindex = (*it).second.getint();
|
---|
117 | } else {
|
---|
118 | metamap[meta] = count;
|
---|
119 | metadata.push_back (meta);
|
---|
120 | decision.meta.metaindex = count;
|
---|
121 | count ++;
|
---|
122 | }
|
---|
123 | return true;
|
---|
124 | }
|
---|
125 |
|
---|
126 | static bool parse_meta (text_t &meta, int &count, format_t *formatlistptr,
|
---|
127 | text_tarray &metadata, bool &getParents, text_tmap &metamap) {
|
---|
128 |
|
---|
129 | if (meta == "link")
|
---|
130 | formatlistptr->command = comLink;
|
---|
131 | else if (meta == "/link")
|
---|
132 | formatlistptr->command = comEndLink;
|
---|
133 |
|
---|
134 | else if (meta == "num")
|
---|
135 | formatlistptr->command = comNum;
|
---|
136 |
|
---|
137 | else {
|
---|
138 | formatlistptr->command = comMeta;
|
---|
139 | if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
|
---|
140 | getParents = true;
|
---|
141 | metadata_t metaoption;
|
---|
142 | get_parent_options (meta, metaoption);
|
---|
143 | formatlistptr->meta = metaoption;
|
---|
144 | }
|
---|
145 | text_tmap::const_iterator it;
|
---|
146 | if ((it = metamap.find(meta)) != metamap.end()) {
|
---|
147 | (formatlistptr->meta).metaindex = (*it).second.getint();
|
---|
148 | } else {
|
---|
149 | metamap[meta] = count;
|
---|
150 | metadata.push_back (meta);
|
---|
151 | (formatlistptr->meta).metaindex = count;
|
---|
152 | count ++;
|
---|
153 | }
|
---|
154 | }
|
---|
155 | return true;
|
---|
156 | }
|
---|
157 |
|
---|
158 | static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
|
---|
159 | text_tarray &metadata, bool &getParents,
|
---|
160 | text_tmap &metamap, int &metacount) {
|
---|
161 |
|
---|
162 | text_t text;
|
---|
163 | text_t::const_iterator here = formatstring.begin();
|
---|
164 | text_t::const_iterator end = formatstring.end();
|
---|
165 |
|
---|
166 | while (here != end) {
|
---|
167 |
|
---|
168 | if (*here == '\\')
|
---|
169 | text.push_back (*(++here));
|
---|
170 |
|
---|
171 | else if (*here == '{') {
|
---|
172 | if (!text.empty()) {
|
---|
173 | formatlistptr->command = comText;
|
---|
174 | formatlistptr->text = text;
|
---|
175 | formatlistptr->nextptr = new format_t();
|
---|
176 | formatlistptr = formatlistptr->nextptr;
|
---|
177 |
|
---|
178 | text.clear();
|
---|
179 | }
|
---|
180 | if (parse_action (++here, end, formatlistptr, metadata,
|
---|
181 | getParents, metamap, metacount)) {
|
---|
182 | formatlistptr->nextptr = new format_t();
|
---|
183 | formatlistptr = formatlistptr->nextptr;
|
---|
184 | if (here == end) break;
|
---|
185 | }
|
---|
186 | } else if (*here == '[') {
|
---|
187 | if (!text.empty()) {
|
---|
188 | formatlistptr->command = comText;
|
---|
189 | formatlistptr->text = text;
|
---|
190 | formatlistptr->nextptr = new format_t();
|
---|
191 | formatlistptr = formatlistptr->nextptr;
|
---|
192 |
|
---|
193 | text.clear();
|
---|
194 | }
|
---|
195 | text_t meta;
|
---|
196 | here ++;
|
---|
197 | while (*here != ']') {
|
---|
198 | if (here == end) return false;
|
---|
199 | meta.push_back (*here);
|
---|
200 | here ++;
|
---|
201 | }
|
---|
202 | if (parse_meta (meta, metacount, formatlistptr, metadata, getParents, metamap)) {
|
---|
203 | formatlistptr->nextptr = new format_t();
|
---|
204 | formatlistptr = formatlistptr->nextptr;
|
---|
205 | }
|
---|
206 |
|
---|
207 | } else
|
---|
208 | text.push_back (*here);
|
---|
209 |
|
---|
210 | here ++;
|
---|
211 | }
|
---|
212 | if (!text.empty()) {
|
---|
213 | formatlistptr->command = comText;
|
---|
214 | formatlistptr->text = text;
|
---|
215 | formatlistptr->nextptr = new format_t();
|
---|
216 | formatlistptr = formatlistptr->nextptr;
|
---|
217 |
|
---|
218 | }
|
---|
219 | return true;
|
---|
220 | }
|
---|
221 |
|
---|
222 |
|
---|
223 | static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
|
---|
224 | format_t *formatlistptr, text_tarray &metadata, bool &getParents,
|
---|
225 | text_tmap &metamap, int &metacount) {
|
---|
226 |
|
---|
227 | text_t::const_iterator it = findchar (here, end, '}');
|
---|
228 | if (it == end) return false;
|
---|
229 |
|
---|
230 | text_t com = substr (here, it);
|
---|
231 | here = findchar (it, end, '{');
|
---|
232 | if (here == end) return false;
|
---|
233 | else here ++;
|
---|
234 |
|
---|
235 | if (com == "If") formatlistptr->command = comIf;
|
---|
236 | else if (com == "Or") formatlistptr->command = comOr;
|
---|
237 | else return false;
|
---|
238 |
|
---|
239 | int curlycount = 0;
|
---|
240 | int commacount = 0;
|
---|
241 | text_t text;
|
---|
242 | while (here != end) {
|
---|
243 | if (*here == '{') {curlycount ++; text.push_back(*here);}
|
---|
244 | else if (*here == '}' && curlycount > 0) {
|
---|
245 | curlycount --;
|
---|
246 | text.push_back(*here);
|
---|
247 | }
|
---|
248 |
|
---|
249 | else if ((*here == ',' || *here == '}') && curlycount <= 0) {
|
---|
250 |
|
---|
251 | if (formatlistptr->command == comOr) {
|
---|
252 | // the {Or}{this, or this, or this, or this} statement
|
---|
253 | // or'ed statements may be either [metadata] or plain text
|
---|
254 | format_t *or_ptr;
|
---|
255 |
|
---|
256 | // find the next unused orptr
|
---|
257 | if (formatlistptr->orptr == NULL) {
|
---|
258 | formatlistptr->orptr = new format_t();
|
---|
259 | or_ptr = formatlistptr->orptr;
|
---|
260 | } else {
|
---|
261 | or_ptr = formatlistptr->orptr;
|
---|
262 | while (or_ptr->nextptr != NULL)
|
---|
263 | or_ptr = or_ptr->nextptr;
|
---|
264 | or_ptr->nextptr = new format_t();
|
---|
265 | or_ptr = or_ptr->nextptr;
|
---|
266 | }
|
---|
267 |
|
---|
268 | text_t::const_iterator beginbracket = text.begin();
|
---|
269 | text_t::const_iterator endbracket = (text.end() - 1);
|
---|
270 | if ((*beginbracket == '[') && (*endbracket == ']')) {
|
---|
271 | // it's metadata
|
---|
272 | text_t meta = substr (beginbracket+1, endbracket);
|
---|
273 | parse_meta (meta, metacount, or_ptr, metadata, getParents, metamap);
|
---|
274 |
|
---|
275 | } else {
|
---|
276 | // assume it's plain text
|
---|
277 | or_ptr->command = comText;
|
---|
278 | or_ptr->text = text;
|
---|
279 | }
|
---|
280 | text.clear();
|
---|
281 |
|
---|
282 | } else {
|
---|
283 | // the {If}{decide,do,else} statement
|
---|
284 | if (commacount == 0) {
|
---|
285 | // If decision only supports metadata at present
|
---|
286 |
|
---|
287 | // remove the surrounding square brackets
|
---|
288 | text_t::const_iterator beginbracket = text.begin();
|
---|
289 | text_t::const_iterator endbracket = (text.end() - 1);
|
---|
290 | if ((*beginbracket == '[') && (*endbracket == ']')) {
|
---|
291 | text_t meta = substr (beginbracket+1, endbracket);
|
---|
292 | decision_t decision;
|
---|
293 | parse_meta (meta, metacount, formatlistptr->decision,
|
---|
294 | metadata, getParents, metamap);
|
---|
295 | commacount ++;
|
---|
296 | text.clear();
|
---|
297 | }
|
---|
298 |
|
---|
299 | } else if (commacount == 1) {
|
---|
300 | formatlistptr->ifptr = new format_t();
|
---|
301 | parse_string (text, formatlistptr->ifptr, metadata,
|
---|
302 | getParents, metamap, metacount);
|
---|
303 | commacount ++;
|
---|
304 | text.clear();
|
---|
305 |
|
---|
306 | } else if (commacount == 2) {
|
---|
307 | formatlistptr->elseptr = new format_t();
|
---|
308 | parse_string (text, formatlistptr->elseptr, metadata,
|
---|
309 | getParents, metamap, metacount);
|
---|
310 | commacount ++;
|
---|
311 | text.clear();
|
---|
312 | }
|
---|
313 | }
|
---|
314 | if (*here == '}') break;
|
---|
315 | }
|
---|
316 |
|
---|
317 | else
|
---|
318 | text.push_back(*here);
|
---|
319 |
|
---|
320 | here ++;
|
---|
321 | }
|
---|
322 |
|
---|
323 | return true;
|
---|
324 | }
|
---|
325 |
|
---|
326 |
|
---|
327 | bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
|
---|
328 | text_tarray &metadata, bool &getParents) {
|
---|
329 |
|
---|
330 | formatlistptr->clear();
|
---|
331 | metadata.erase (metadata.begin(), metadata.end());
|
---|
332 | getParents = false;
|
---|
333 |
|
---|
334 | text_tmap metamap;
|
---|
335 | int metacount = 0;
|
---|
336 | return (parse_string (formatstring, formatlistptr, metadata,
|
---|
337 | getParents, metamap, metacount));
|
---|
338 | }
|
---|
339 |
|
---|
340 |
|
---|
341 |
|
---|
342 | static text_t get_meta (const ResultDocInfo_t &docinfo, const metadata_t &meta) {
|
---|
343 | int metasize = docinfo.metadata.size();
|
---|
344 | int mindex = meta.metaindex;
|
---|
345 | if (metasize < 1 || metasize <= mindex) return "";
|
---|
346 | int valuesize = docinfo.metadata[mindex].values.size();
|
---|
347 |
|
---|
348 | switch (meta.parentcommand) {
|
---|
349 | case pNone:
|
---|
350 | return docinfo.metadata[mindex].values.back();
|
---|
351 |
|
---|
352 | case pImmediate:
|
---|
353 | if (valuesize > 1)
|
---|
354 | return docinfo.metadata[mindex].values[metasize-2];
|
---|
355 | break;
|
---|
356 |
|
---|
357 | case pTop:
|
---|
358 | if (valuesize > 1)
|
---|
359 | return docinfo.metadata[mindex].values[0];
|
---|
360 | break;
|
---|
361 |
|
---|
362 | case pIndex:
|
---|
363 | if (valuesize > meta.parentindex)
|
---|
364 | return docinfo.metadata[mindex].values[meta.parentindex];
|
---|
365 | break;
|
---|
366 |
|
---|
367 | case pAll:
|
---|
368 | bool first = true;
|
---|
369 | text_t tmp;
|
---|
370 | if (valuesize > 1) {
|
---|
371 | text_tarray::const_iterator here = docinfo.metadata[mindex].values.begin();
|
---|
372 | // don't want last value as that's the value of the current level (i.e. it's
|
---|
373 | // not a parent
|
---|
374 | text_tarray::const_iterator end = docinfo.metadata[mindex].values.end() - 1;
|
---|
375 | while (here != end) {
|
---|
376 | if (!first) tmp += meta.parentoptions;
|
---|
377 | tmp += *here;
|
---|
378 | first = false;
|
---|
379 | here ++;
|
---|
380 | }
|
---|
381 | }
|
---|
382 | return tmp;
|
---|
383 | }
|
---|
384 | return "";
|
---|
385 | }
|
---|
386 |
|
---|
387 | static text_t get_or (const ResultDocInfo_t &docinfo, format_t *orptr) {
|
---|
388 |
|
---|
389 | text_t tmp;
|
---|
390 | while (orptr != NULL) {
|
---|
391 |
|
---|
392 | tmp = format_string (docinfo, orptr);
|
---|
393 | if (!tmp.empty()) return tmp;
|
---|
394 |
|
---|
395 | orptr = orptr->nextptr;
|
---|
396 | }
|
---|
397 | return "";
|
---|
398 | }
|
---|
399 |
|
---|
400 | static text_t get_if (const ResultDocInfo_t &docinfo, const decision_t &decision,
|
---|
401 | format_t *ifptr, format_t *elseptr) {
|
---|
402 |
|
---|
403 | // not much of a choice yet ...
|
---|
404 | if (decision.command == dMeta) {
|
---|
405 | if (get_meta (docinfo, decision.meta) != "") {
|
---|
406 | if (ifptr != NULL)
|
---|
407 | return get_formatted_string (docinfo, ifptr);
|
---|
408 | }
|
---|
409 | else {
|
---|
410 | if (elseptr != NULL)
|
---|
411 | return get_formatted_string (docinfo, elseptr);
|
---|
412 | }
|
---|
413 | }
|
---|
414 | return "";
|
---|
415 | }
|
---|
416 |
|
---|
417 | static text_t format_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr) {
|
---|
418 |
|
---|
419 | if (formatlistptr == NULL) return "";
|
---|
420 |
|
---|
421 | switch (formatlistptr->command) {
|
---|
422 | case comText:
|
---|
423 | return formatlistptr->text;
|
---|
424 | case comLink:
|
---|
425 | return "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
|
---|
426 | case comEndLink:
|
---|
427 | return "</a>";
|
---|
428 | case comNum:
|
---|
429 | return docinfo.result_num;
|
---|
430 | case comMeta:
|
---|
431 | return get_meta (docinfo, formatlistptr->meta);
|
---|
432 | case comIf:
|
---|
433 | return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr);
|
---|
434 | case comOr:
|
---|
435 | return get_or (docinfo, formatlistptr->orptr);
|
---|
436 | }
|
---|
437 | return "";
|
---|
438 | }
|
---|
439 |
|
---|
440 |
|
---|
441 |
|
---|
442 | text_t get_formatted_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr) {
|
---|
443 |
|
---|
444 | text_t ft;
|
---|
445 | while (formatlistptr != NULL) {
|
---|
446 | ft += format_string (docinfo, formatlistptr);
|
---|
447 | formatlistptr = formatlistptr->nextptr;
|
---|
448 | }
|
---|
449 | return ft;
|
---|
450 | }
|
---|
451 |
|
---|
452 |
|
---|
453 |
|
---|
454 |
|
---|