1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * formattools.cpp --
|
---|
4 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
5 | *
|
---|
6 | * PUT COPYRIGHT NOTICE HERE
|
---|
7 | *
|
---|
8 | * $Id: formattools.cpp 354 1999-07-08 20:48:33Z rjmcnab $
|
---|
9 | *
|
---|
10 | *********************************************************************/
|
---|
11 |
|
---|
12 | /*
|
---|
13 | $Log$
|
---|
14 | Revision 1.2 1999/07/08 20:48:33 rjmcnab
|
---|
15 | Added ability to print the result number
|
---|
16 |
|
---|
17 | Revision 1.1 1999/07/07 05:49:34 sjboddie
|
---|
18 | had another crack at the format string code - created a new formattools
|
---|
19 | module. It can now handle {If} and {Or} statements although there's a
|
---|
20 | bug preventing nested if's and or's.
|
---|
21 |
|
---|
22 | */
|
---|
23 |
|
---|
24 |
|
---|
25 | #include "formattools.h"
|
---|
26 |
|
---|
27 |
|
---|
28 |
|
---|
29 |
|
---|
30 |
|
---|
31 | // a few function prototypes
|
---|
32 | static text_t format_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr);
|
---|
33 |
|
---|
34 | static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
|
---|
35 | format_t *formatlistptr, text_tarray &metadata, bool &getParents,
|
---|
36 | text_tmap &metamap, int &metacount);
|
---|
37 |
|
---|
38 |
|
---|
39 |
|
---|
40 | void metadata_t::clear() {
|
---|
41 | metaindex = 0;
|
---|
42 | parentcommand = pNone;
|
---|
43 | parentindex = 0;
|
---|
44 | parentoptions.clear();
|
---|
45 | };
|
---|
46 |
|
---|
47 | void decision_t::clear() {
|
---|
48 | command = dMeta;
|
---|
49 | meta.clear();
|
---|
50 | };
|
---|
51 |
|
---|
52 |
|
---|
53 | void format_t::clear() {
|
---|
54 | command = comText;
|
---|
55 | decision.clear();
|
---|
56 | text.clear();
|
---|
57 | meta.clear();
|
---|
58 | nextptr = NULL;
|
---|
59 | ifptr = NULL;
|
---|
60 | elseptr = NULL;
|
---|
61 | orptr = NULL;
|
---|
62 | };
|
---|
63 |
|
---|
64 |
|
---|
65 |
|
---|
66 |
|
---|
67 | static void get_parent_options (text_t &instring, metadata_t &metaoption) {
|
---|
68 |
|
---|
69 | metaoption.clear();
|
---|
70 | text_t meta, com, op;
|
---|
71 | bool inbraces = false;
|
---|
72 | bool inquotes = false;
|
---|
73 | bool foundcolon = false;
|
---|
74 | text_t::const_iterator here = instring.begin()+6;
|
---|
75 | text_t::const_iterator end = instring.end();
|
---|
76 | while (here != end) {
|
---|
77 | if (*here == '(') inbraces = true;
|
---|
78 | else if (*here == ')') inbraces = false;
|
---|
79 | else if (*here == '\'' && !inquotes) inquotes = true;
|
---|
80 | else if (*here == '\'' && inquotes) inquotes = false;
|
---|
81 | else if (*here == ':' && !inbraces) foundcolon = true;
|
---|
82 | else if (foundcolon) meta.push_back (*here);
|
---|
83 | else if (inquotes) op.push_back (*here);
|
---|
84 | else com.push_back (*here);
|
---|
85 | here ++;
|
---|
86 | }
|
---|
87 | instring = meta;
|
---|
88 | if (com.empty())
|
---|
89 | metaoption.parentcommand = pImmediate;
|
---|
90 | else if (com == "Top")
|
---|
91 | metaoption.parentcommand = pTop;
|
---|
92 | else if (is_number(com)) {
|
---|
93 | metaoption.parentcommand = pIndex;
|
---|
94 | metaoption.metaindex = com.getint();
|
---|
95 | } else if (com == "All") {
|
---|
96 | metaoption.parentcommand = pAll;
|
---|
97 | metaoption.parentoptions = op;
|
---|
98 | }
|
---|
99 | }
|
---|
100 |
|
---|
101 | static bool parse_meta (text_t &meta, int &count, decision_t &decision,
|
---|
102 | text_tarray &metadata, bool &getParents, text_tmap &metamap) {
|
---|
103 |
|
---|
104 | if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
|
---|
105 | getParents = true;
|
---|
106 | metadata_t metaoption;
|
---|
107 | get_parent_options (meta, metaoption);
|
---|
108 | decision.meta = metaoption;
|
---|
109 | }
|
---|
110 | text_tmap::const_iterator it;
|
---|
111 | if ((it = metamap.find(meta)) != metamap.end()) {
|
---|
112 | decision.meta.metaindex = (*it).second.getint();
|
---|
113 | } else {
|
---|
114 | metamap[meta] = count;
|
---|
115 | metadata.push_back (meta);
|
---|
116 | decision.meta.metaindex = count;
|
---|
117 | count ++;
|
---|
118 | }
|
---|
119 | return true;
|
---|
120 | }
|
---|
121 |
|
---|
122 | static bool parse_meta (text_t &meta, int &count, format_t *formatlistptr,
|
---|
123 | text_tarray &metadata, bool &getParents, text_tmap &metamap) {
|
---|
124 |
|
---|
125 | if (meta == "link")
|
---|
126 | formatlistptr->command = comLink;
|
---|
127 | else if (meta == "/link")
|
---|
128 | formatlistptr->command = comEndLink;
|
---|
129 |
|
---|
130 | else if (meta == "num")
|
---|
131 | formatlistptr->command = comNum;
|
---|
132 |
|
---|
133 | else {
|
---|
134 | formatlistptr->command = comMeta;
|
---|
135 | if (meta.size() > 7 && (substr (meta.begin(), meta.begin()+6) == "parent")) {
|
---|
136 | getParents = true;
|
---|
137 | metadata_t metaoption;
|
---|
138 | get_parent_options (meta, metaoption);
|
---|
139 | formatlistptr->meta = metaoption;
|
---|
140 | }
|
---|
141 | text_tmap::const_iterator it;
|
---|
142 | if ((it = metamap.find(meta)) != metamap.end()) {
|
---|
143 | (formatlistptr->meta).metaindex = (*it).second.getint();
|
---|
144 | } else {
|
---|
145 | metamap[meta] = count;
|
---|
146 | metadata.push_back (meta);
|
---|
147 | (formatlistptr->meta).metaindex = count;
|
---|
148 | count ++;
|
---|
149 | }
|
---|
150 | }
|
---|
151 | return true;
|
---|
152 | }
|
---|
153 |
|
---|
154 | static bool parse_string (const text_t &formatstring, format_t *formatlistptr,
|
---|
155 | text_tarray &metadata, bool &getParents,
|
---|
156 | text_tmap &metamap, int &metacount) {
|
---|
157 |
|
---|
158 | text_t text;
|
---|
159 | text_t::const_iterator here = formatstring.begin();
|
---|
160 | text_t::const_iterator end = formatstring.end();
|
---|
161 |
|
---|
162 | while (here != end) {
|
---|
163 |
|
---|
164 | if (*here == '\\')
|
---|
165 | text.push_back (*(++here));
|
---|
166 |
|
---|
167 | else if (*here == '{') {
|
---|
168 | if (!text.empty()) {
|
---|
169 | formatlistptr->command = comText;
|
---|
170 | formatlistptr->text = text;
|
---|
171 | formatlistptr->nextptr = new format_t();
|
---|
172 | formatlistptr = formatlistptr->nextptr;
|
---|
173 |
|
---|
174 | text.clear();
|
---|
175 | }
|
---|
176 | if (parse_action (++here, end, formatlistptr, metadata,
|
---|
177 | getParents, metamap, metacount)) {
|
---|
178 | formatlistptr->nextptr = new format_t();
|
---|
179 | formatlistptr = formatlistptr->nextptr;
|
---|
180 | if (here == end) break;
|
---|
181 | }
|
---|
182 | } else if (*here == '[') {
|
---|
183 | if (!text.empty()) {
|
---|
184 | formatlistptr->command = comText;
|
---|
185 | formatlistptr->text = text;
|
---|
186 | formatlistptr->nextptr = new format_t();
|
---|
187 | formatlistptr = formatlistptr->nextptr;
|
---|
188 |
|
---|
189 | text.clear();
|
---|
190 | }
|
---|
191 | text_t meta;
|
---|
192 | here ++;
|
---|
193 | while (*here != ']') {
|
---|
194 | if (here == end) return false;
|
---|
195 | meta.push_back (*here);
|
---|
196 | here ++;
|
---|
197 | }
|
---|
198 | if (parse_meta (meta, metacount, formatlistptr, metadata, getParents, metamap)) {
|
---|
199 | formatlistptr->nextptr = new format_t();
|
---|
200 | formatlistptr = formatlistptr->nextptr;
|
---|
201 | }
|
---|
202 |
|
---|
203 | } else
|
---|
204 | text.push_back (*here);
|
---|
205 |
|
---|
206 | here ++;
|
---|
207 | }
|
---|
208 | if (!text.empty()) {
|
---|
209 | formatlistptr->command = comText;
|
---|
210 | formatlistptr->text = text;
|
---|
211 | formatlistptr->nextptr = new format_t();
|
---|
212 | formatlistptr = formatlistptr->nextptr;
|
---|
213 |
|
---|
214 | }
|
---|
215 | return true;
|
---|
216 | }
|
---|
217 |
|
---|
218 |
|
---|
219 | static bool parse_action (text_t::const_iterator &here, const text_t::const_iterator &end,
|
---|
220 | format_t *formatlistptr, text_tarray &metadata, bool &getParents,
|
---|
221 | text_tmap &metamap, int &metacount) {
|
---|
222 |
|
---|
223 | text_t::const_iterator it = findchar (here, end, '}');
|
---|
224 | if (it == end) return false;
|
---|
225 |
|
---|
226 | text_t com = substr (here, it);
|
---|
227 | here = findchar (it, end, '{');
|
---|
228 | if (here == end) return false;
|
---|
229 | else here ++;
|
---|
230 |
|
---|
231 | if (com == "If") formatlistptr->command = comIf;
|
---|
232 | else if (com == "Or") formatlistptr->command = comOr;
|
---|
233 | else return false;
|
---|
234 |
|
---|
235 | int curlycount = 0;
|
---|
236 | int commacount = 0;
|
---|
237 | text_t text;
|
---|
238 | while (here != end) {
|
---|
239 | if (*here == '{') {curlycount ++; text.push_back(*here);}
|
---|
240 | else if (*here == '}' && curlycount > 0) {
|
---|
241 | curlycount --;
|
---|
242 | text.push_back(*here);
|
---|
243 | }
|
---|
244 |
|
---|
245 | else if ((*here == ',' || *here == '}') && curlycount <= 0) {
|
---|
246 |
|
---|
247 | if (formatlistptr->command == comOr) {
|
---|
248 | // the {Or}{this, or this, or this, or this} statement
|
---|
249 | // or'ed statements may be either [metadata] or plain text
|
---|
250 | format_t *or_ptr;
|
---|
251 |
|
---|
252 | // find the next unused orptr
|
---|
253 | if (formatlistptr->orptr == NULL) {
|
---|
254 | formatlistptr->orptr = new format_t();
|
---|
255 | or_ptr = formatlistptr->orptr;
|
---|
256 | } else {
|
---|
257 | or_ptr = formatlistptr->orptr;
|
---|
258 | while (or_ptr->nextptr != NULL)
|
---|
259 | or_ptr = or_ptr->nextptr;
|
---|
260 | or_ptr->nextptr = new format_t();
|
---|
261 | or_ptr = or_ptr->nextptr;
|
---|
262 | }
|
---|
263 |
|
---|
264 | text_t::const_iterator beginbracket = text.begin();
|
---|
265 | text_t::const_iterator endbracket = (text.end() - 1);
|
---|
266 | if ((*beginbracket == '[') && (*endbracket == ']')) {
|
---|
267 | // it's metadata
|
---|
268 | text_t meta = substr (beginbracket+1, endbracket);
|
---|
269 | parse_meta (meta, metacount, or_ptr, metadata, getParents, metamap);
|
---|
270 |
|
---|
271 | } else {
|
---|
272 | // assume it's plain text
|
---|
273 | or_ptr->command = comText;
|
---|
274 | or_ptr->text = text;
|
---|
275 | }
|
---|
276 | text.clear();
|
---|
277 |
|
---|
278 | } else {
|
---|
279 | // the {If}{decide,do,else} statement
|
---|
280 | if (commacount == 0) {
|
---|
281 | // If decision only supports metadata at present
|
---|
282 |
|
---|
283 | // remove the surrounding square brackets
|
---|
284 | text_t::const_iterator beginbracket = text.begin();
|
---|
285 | text_t::const_iterator endbracket = (text.end() - 1);
|
---|
286 | if ((*beginbracket == '[') && (*endbracket == ']')) {
|
---|
287 | text_t meta = substr (beginbracket+1, endbracket);
|
---|
288 | decision_t decision;
|
---|
289 | parse_meta (meta, metacount, formatlistptr->decision,
|
---|
290 | metadata, getParents, metamap);
|
---|
291 | commacount ++;
|
---|
292 | text.clear();
|
---|
293 | }
|
---|
294 |
|
---|
295 | } else if (commacount == 1) {
|
---|
296 | formatlistptr->ifptr = new format_t();
|
---|
297 | parse_string (text, formatlistptr->ifptr, metadata,
|
---|
298 | getParents, metamap, metacount);
|
---|
299 | commacount ++;
|
---|
300 | text.clear();
|
---|
301 |
|
---|
302 | } else if (commacount == 2) {
|
---|
303 | formatlistptr->elseptr = new format_t();
|
---|
304 | parse_string (text, formatlistptr->elseptr, metadata,
|
---|
305 | getParents, metamap, metacount);
|
---|
306 | commacount ++;
|
---|
307 | text.clear();
|
---|
308 | }
|
---|
309 | }
|
---|
310 | if (*here == '}') break;
|
---|
311 | }
|
---|
312 |
|
---|
313 | else
|
---|
314 | text.push_back(*here);
|
---|
315 |
|
---|
316 | here ++;
|
---|
317 | }
|
---|
318 |
|
---|
319 | return true;
|
---|
320 | }
|
---|
321 |
|
---|
322 |
|
---|
323 | bool parse_formatstring (const text_t &formatstring, format_t *formatlistptr,
|
---|
324 | text_tarray &metadata, bool &getParents) {
|
---|
325 |
|
---|
326 | formatlistptr->clear();
|
---|
327 | metadata.erase (metadata.begin(), metadata.end());
|
---|
328 | getParents = false;
|
---|
329 |
|
---|
330 | text_tmap metamap;
|
---|
331 | int metacount = 0;
|
---|
332 | return (parse_string (formatstring, formatlistptr, metadata,
|
---|
333 | getParents, metamap, metacount));
|
---|
334 | }
|
---|
335 |
|
---|
336 |
|
---|
337 |
|
---|
338 | static text_t get_meta (const ResultDocInfo_t &docinfo, const metadata_t &meta) {
|
---|
339 | int metasize = docinfo.metadata.size();
|
---|
340 | int mindex = meta.metaindex;
|
---|
341 | if (metasize < 1 || metasize <= mindex) return "";
|
---|
342 | int valuesize = docinfo.metadata[mindex].values.size();
|
---|
343 |
|
---|
344 | switch (meta.parentcommand) {
|
---|
345 | case pNone:
|
---|
346 | return docinfo.metadata[mindex].values.back();
|
---|
347 |
|
---|
348 | case pImmediate:
|
---|
349 | if (valuesize > 1)
|
---|
350 | return docinfo.metadata[mindex].values[metasize-2];
|
---|
351 | break;
|
---|
352 |
|
---|
353 | case pTop:
|
---|
354 | if (valuesize > 1)
|
---|
355 | return docinfo.metadata[mindex].values[0];
|
---|
356 | break;
|
---|
357 |
|
---|
358 | case pIndex:
|
---|
359 | if (valuesize > meta.parentindex)
|
---|
360 | return docinfo.metadata[mindex].values[meta.parentindex];
|
---|
361 | break;
|
---|
362 |
|
---|
363 | case pAll:
|
---|
364 | bool first = true;
|
---|
365 | text_t tmp;
|
---|
366 | text_tarray::const_iterator here = docinfo.metadata[mindex].values.begin();
|
---|
367 | text_tarray::const_iterator end = docinfo.metadata[mindex].values.end();
|
---|
368 | while (here != end) {
|
---|
369 | if (!first) tmp += meta.parentoptions;
|
---|
370 | tmp += *here;
|
---|
371 | first = false;
|
---|
372 | here ++;
|
---|
373 | }
|
---|
374 | return tmp;
|
---|
375 | }
|
---|
376 | return "";
|
---|
377 | }
|
---|
378 |
|
---|
379 | static text_t get_or (const ResultDocInfo_t &docinfo, format_t *orptr) {
|
---|
380 |
|
---|
381 | text_t tmp;
|
---|
382 | while (orptr != NULL) {
|
---|
383 |
|
---|
384 | tmp = format_string (docinfo, orptr);
|
---|
385 | if (!tmp.empty()) return tmp;
|
---|
386 |
|
---|
387 | orptr = orptr->nextptr;
|
---|
388 | }
|
---|
389 | return "";
|
---|
390 | }
|
---|
391 |
|
---|
392 | static text_t get_if (const ResultDocInfo_t &docinfo, const decision_t &decision,
|
---|
393 | format_t *ifptr, format_t *elseptr) {
|
---|
394 |
|
---|
395 | // not much of a choice yet ...
|
---|
396 | if (decision.command == dMeta) {
|
---|
397 | if (get_meta (docinfo, decision.meta) != "") {
|
---|
398 | if (ifptr != NULL)
|
---|
399 | return get_formatted_string (docinfo, ifptr);
|
---|
400 | }
|
---|
401 | else {
|
---|
402 | if (elseptr != NULL)
|
---|
403 | return get_formatted_string (docinfo, elseptr);
|
---|
404 | }
|
---|
405 | }
|
---|
406 | return "";
|
---|
407 | }
|
---|
408 |
|
---|
409 | static text_t format_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr) {
|
---|
410 |
|
---|
411 | if (formatlistptr == NULL) return "";
|
---|
412 |
|
---|
413 | switch (formatlistptr->command) {
|
---|
414 | case comText:
|
---|
415 | return formatlistptr->text;
|
---|
416 | case comLink:
|
---|
417 | return "<a href=\"_httpdocument_&cl=search&d=" + docinfo.OID + "\">";
|
---|
418 | case comEndLink:
|
---|
419 | return "</a>";
|
---|
420 | case comNum:
|
---|
421 | return docinfo.result_num;
|
---|
422 | case comMeta:
|
---|
423 | return get_meta (docinfo, formatlistptr->meta);
|
---|
424 | case comIf:
|
---|
425 | return get_if (docinfo, formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr);
|
---|
426 | case comOr:
|
---|
427 | return get_or (docinfo, formatlistptr->orptr);
|
---|
428 | }
|
---|
429 | return "";
|
---|
430 | }
|
---|
431 |
|
---|
432 |
|
---|
433 |
|
---|
434 | text_t get_formatted_string (const ResultDocInfo_t &docinfo, format_t *formatlistptr) {
|
---|
435 |
|
---|
436 | text_t ft;
|
---|
437 | while (formatlistptr != NULL) {
|
---|
438 | ft += format_string (docinfo, formatlistptr);
|
---|
439 | formatlistptr = formatlistptr->nextptr;
|
---|
440 | }
|
---|
441 | return ft;
|
---|
442 | }
|
---|
443 |
|
---|
444 |
|
---|
445 |
|
---|
446 |
|
---|