1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * gdbmclass.cpp --
|
---|
4 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
5 | *
|
---|
6 | * PUT COPYRIGHT NOTICE HERE
|
---|
7 | *
|
---|
8 | * $Id: gdbmclass.cpp 125 1999-01-25 03:59:40Z sjboddie $
|
---|
9 | *
|
---|
10 | *********************************************************************/
|
---|
11 |
|
---|
12 | /*
|
---|
13 | $Log$
|
---|
14 | Revision 1.5 1999/01/25 03:59:40 sjboddie
|
---|
15 | fixed a bug in sorting code
|
---|
16 |
|
---|
17 | Revision 1.4 1999/01/21 21:20:08 sjboddie
|
---|
18 | removed unused collection parameter from several functions
|
---|
19 |
|
---|
20 | Revision 1.3 1999/01/19 01:38:15 rjmcnab
|
---|
21 |
|
---|
22 | Made the source more portable.
|
---|
23 |
|
---|
24 | Revision 1.2 1999/01/12 01:51:01 rjmcnab
|
---|
25 |
|
---|
26 | Standard header.
|
---|
27 |
|
---|
28 | Revision 1.1 1999/01/08 09:02:15 rjmcnab
|
---|
29 |
|
---|
30 | Moved from src/library.
|
---|
31 |
|
---|
32 | */
|
---|
33 |
|
---|
34 |
|
---|
35 | #include "text_t.h"
|
---|
36 | #include "gdbmclass.h"
|
---|
37 | #include "unitool.h"
|
---|
38 | #include "gsdlunicode.h"
|
---|
39 | #include "fileutil.h"
|
---|
40 | #include <ctype.h>
|
---|
41 | #include <string.h>
|
---|
42 |
|
---|
43 | #if defined(GSDL_NEED_STRINGS_H)
|
---|
44 | #include <strings.h>
|
---|
45 | #endif
|
---|
46 |
|
---|
47 | #if defined(GSDL_USE_OBJECTSPACE)
|
---|
48 | # include <ospace\std\algorithm>
|
---|
49 | #elif defined(GSDL_USE_STL_H)
|
---|
50 | # if defined(GSDL_USE_ALGO_H)
|
---|
51 | # include <algo.h>
|
---|
52 | # else
|
---|
53 | # include <algorithm.h>
|
---|
54 | # endif
|
---|
55 | #else
|
---|
56 | # include <algorithm>
|
---|
57 | #endif
|
---|
58 |
|
---|
59 |
|
---|
60 | static int compare_str (const char *e1, const char *e2) {
|
---|
61 | #ifdef __WIN32__
|
---|
62 | return _stricmp(e1, e2);
|
---|
63 | #else
|
---|
64 | return strcasecmp(e1, e2);
|
---|
65 | #endif
|
---|
66 | }
|
---|
67 |
|
---|
68 | static int compare_str (const void *e1, const void *e2) {
|
---|
69 | #ifdef __WIN32__
|
---|
70 | return _stricmp(*((char**)e1), *((char**)e2));
|
---|
71 | #else
|
---|
72 | return strcasecmp(*((char**)e1), *((char**)e2));
|
---|
73 | #endif
|
---|
74 | }
|
---|
75 |
|
---|
76 |
|
---|
77 |
|
---|
78 | void gdbm_info::clear () {
|
---|
79 | docnum = 0; // 'd'
|
---|
80 | title.clear(); // 't'
|
---|
81 | parent.clear(); // 'p'
|
---|
82 | classification.clear(); // 'x'
|
---|
83 | contents.clear(); // 'c'
|
---|
84 | jobnum.clear(); // 'j'
|
---|
85 | OID.clear(); // 'o'
|
---|
86 | author.clear(); // 'a'
|
---|
87 | source.clear(); // 's'
|
---|
88 | date.clear(); // 'i'
|
---|
89 | }
|
---|
90 |
|
---|
91 |
|
---|
92 |
|
---|
93 |
|
---|
94 |
|
---|
95 | // returns 0 if failed, 1 if opened
|
---|
96 | int gdbmclass::opendatabase (const text_t &filename) {
|
---|
97 | text_t data_location;
|
---|
98 | int block_size = 0;
|
---|
99 |
|
---|
100 | if (gdbmfile != NULL) {
|
---|
101 | if (openfile == filename) return 1;
|
---|
102 | else closedatabase ();
|
---|
103 | }
|
---|
104 |
|
---|
105 | openfile = filename;
|
---|
106 |
|
---|
107 | char *namebuffer = filename.getcstr();
|
---|
108 | gdbmfile = gdbm_open (namebuffer, block_size, GDBM_READER, 00664, NULL);
|
---|
109 | delete namebuffer;
|
---|
110 |
|
---|
111 | if (gdbmfile == NULL && logout != NULL) {
|
---|
112 | outconvertclass text_t2ascii;
|
---|
113 | (*logout) << text_t2ascii << "database open failed on: " << filename << "\n";
|
---|
114 | }
|
---|
115 |
|
---|
116 | return (gdbmfile != NULL);
|
---|
117 | }
|
---|
118 |
|
---|
119 |
|
---|
120 | void gdbmclass::closedatabase () {
|
---|
121 | if (gdbmfile == NULL) return;
|
---|
122 |
|
---|
123 | gdbm_close (gdbmfile);
|
---|
124 | gdbmfile = NULL;
|
---|
125 | openfile.clear();
|
---|
126 | }
|
---|
127 |
|
---|
128 |
|
---|
129 | // returns 0 on success, -1 on failure
|
---|
130 | // key and collection aren't references as they might be aliases to
|
---|
131 | // something in info
|
---|
132 | int gdbmclass::getinfo (text_t key, gdbm_info &info) {
|
---|
133 | text_t data;
|
---|
134 |
|
---|
135 | if (!getkeydata (key, data)) return -1;
|
---|
136 | text_t::iterator here = data.begin ();
|
---|
137 | text_t::iterator end = data.end ();
|
---|
138 |
|
---|
139 | text_t ikey, ivalue;
|
---|
140 | info.clear (); // reset info
|
---|
141 |
|
---|
142 | while (getinfoline(here, end, ikey, ivalue)) {
|
---|
143 | if (ikey == "d") { info.docnum = ivalue.getint(); }
|
---|
144 | else if (ikey == "t") { info.title = ivalue; }
|
---|
145 | else if (ikey == "p") { info.parent = ivalue; }
|
---|
146 | else if (ikey == "x") { info.classification = ivalue; }
|
---|
147 | else if (ikey == "c") { info.contents = ivalue; }
|
---|
148 | else if (ikey == "j") { info.jobnum = ivalue; }
|
---|
149 | else if (ikey == "o") { info.OID = ivalue; }
|
---|
150 | else if (ikey == "a") { info.author = ivalue; }
|
---|
151 | else if (ikey == "s") { info.source = ivalue; }
|
---|
152 | else if (ikey == "i") { info.date = ivalue; }
|
---|
153 | }
|
---|
154 |
|
---|
155 | return 0;
|
---|
156 | }
|
---|
157 |
|
---|
158 |
|
---|
159 | // returns 1 if the key exists
|
---|
160 | int gdbmclass::exists (text_t key) {
|
---|
161 | text_t data;
|
---|
162 | return getkeydata (key, data);
|
---|
163 | }
|
---|
164 |
|
---|
165 |
|
---|
166 | // returns 1 if successful
|
---|
167 | int gdbmclass::getkeydata (text_t key, text_t &data) {
|
---|
168 | datum key_data;
|
---|
169 | datum return_data;
|
---|
170 |
|
---|
171 | if (gdbmfile == NULL) return 0;
|
---|
172 |
|
---|
173 | // get a utf-8 encoded c string of the unicode key
|
---|
174 | key_data.dptr = (to_utf8(key)).getcstr();
|
---|
175 | if (key_data.dptr == NULL) {
|
---|
176 | if (logout != NULL) (*logout) << "gdbmclass: out of memory\n";
|
---|
177 | return 0;
|
---|
178 | }
|
---|
179 | key_data.dsize = strlen (key_data.dptr);
|
---|
180 |
|
---|
181 | // fetch the result
|
---|
182 | return_data = gdbm_fetch (gdbmfile, key_data);
|
---|
183 | delete key_data.dptr;
|
---|
184 |
|
---|
185 | if (return_data.dptr == NULL) return 0;
|
---|
186 |
|
---|
187 | data.setcarr (return_data.dptr, return_data.dsize);
|
---|
188 | free (return_data.dptr);
|
---|
189 | data = to_uni(data); // convert to unicode
|
---|
190 |
|
---|
191 | return 1;
|
---|
192 | }
|
---|
193 |
|
---|
194 |
|
---|
195 | // parses a line of the form <key>value\n
|
---|
196 | // returns 1 if successful
|
---|
197 | int gdbmclass::getinfoline (text_t::iterator &here, text_t::iterator end,
|
---|
198 | text_t &key, text_t &value) {
|
---|
199 | key.clear();
|
---|
200 | value.clear();
|
---|
201 |
|
---|
202 | // ignore white space
|
---|
203 | while (here != end && is_unicode_space (*here)) here++;
|
---|
204 |
|
---|
205 | // get the '<'
|
---|
206 | if (here == end || *here != '<') return 0;
|
---|
207 | here++;
|
---|
208 |
|
---|
209 | // get the key
|
---|
210 | while (here != end && *here != '>') {
|
---|
211 | key.push_back(*here);
|
---|
212 | here++;
|
---|
213 | }
|
---|
214 |
|
---|
215 | // get the '>'
|
---|
216 | if (here == end || *here != '>') return 0;
|
---|
217 | here++;
|
---|
218 |
|
---|
219 | // get the value
|
---|
220 | while (here != end && *here != '\n') {
|
---|
221 | value.push_back(*here);
|
---|
222 | here++;
|
---|
223 | }
|
---|
224 |
|
---|
225 | return 1;
|
---|
226 | }
|
---|
227 |
|
---|
228 |
|
---|
229 |
|
---|
230 |
|
---|
231 |
|
---|
232 | // a few useful functions
|
---|
233 |
|
---|
234 | //////////////////////////////////////////////////////////////////////////////////////////
|
---|
235 | // functions for testing classification strings
|
---|
236 |
|
---|
237 |
|
---|
238 | // returns 1 if targetdoc is top level of a book (i.e. =~ /B\.\d+$/) - otherwise 0;
|
---|
239 | int is_top_level (const text_t &targetdoc) {
|
---|
240 |
|
---|
241 | text_t::const_iterator here = targetdoc.begin();
|
---|
242 | text_t::const_iterator end = targetdoc.end();
|
---|
243 |
|
---|
244 | // look for the 'B'
|
---|
245 | here = findchar (here, end, 'B');
|
---|
246 |
|
---|
247 | // there must be exactly one dot after the 'B'
|
---|
248 | if ((here != end) && (countchar (here, end, '.') == 1))
|
---|
249 | return 1;
|
---|
250 |
|
---|
251 | return 0;
|
---|
252 | }
|
---|
253 |
|
---|
254 | // returns 1 if targetdoc is any level of a book (i.e. contains 'B') - otherwise 0
|
---|
255 | int is_book (const text_t &targetdoc) {
|
---|
256 |
|
---|
257 | text_t::const_iterator here = targetdoc.begin();
|
---|
258 | text_t::const_iterator end = targetdoc.end();
|
---|
259 |
|
---|
260 | here = findchar (here, end, 'B');
|
---|
261 | if (here != end) return 1;
|
---|
262 | return 0;
|
---|
263 | }
|
---|
264 |
|
---|
265 | // returns (in book_top) the top level of the book in targetdoc
|
---|
266 | void get_book_top (const text_t &targetdoc, text_t &book_top) {
|
---|
267 |
|
---|
268 | text_t::const_iterator here = targetdoc.begin();
|
---|
269 | text_t::const_iterator end = targetdoc.end();
|
---|
270 |
|
---|
271 | book_top.clear();
|
---|
272 |
|
---|
273 | // look for the 'B'
|
---|
274 | here = findchar (here, end, 'B');
|
---|
275 |
|
---|
276 | // copy up to the second '.'
|
---|
277 | int founddot = 0;
|
---|
278 | while (here != end) {
|
---|
279 | if (*here == '.') {
|
---|
280 | if (founddot) return;
|
---|
281 | founddot = 1;
|
---|
282 | }
|
---|
283 | book_top.push_back(*here);
|
---|
284 | here++;
|
---|
285 | }
|
---|
286 | }
|
---|
287 |
|
---|
288 | // returns (in book) the book section part of the classification
|
---|
289 | // contained in targetdoc
|
---|
290 | void get_book (const text_t &targetdoc, text_t &book) {
|
---|
291 |
|
---|
292 | text_t::const_iterator here = targetdoc.begin();
|
---|
293 | text_t::const_iterator end = targetdoc.end();
|
---|
294 |
|
---|
295 | book.clear ();
|
---|
296 |
|
---|
297 | // look for the 'B'
|
---|
298 | here = findchar (here, end, 'B');
|
---|
299 |
|
---|
300 | // copy the rest of the string
|
---|
301 | while (here != end) {
|
---|
302 | book.push_back(*here);
|
---|
303 | here ++;
|
---|
304 | }
|
---|
305 | }
|
---|
306 |
|
---|
307 | // get_parent_section removes the last part from section (i.e.=~ s/\.\d+$//)
|
---|
308 | void get_parent_section (text_t §ion) {
|
---|
309 | int founddot = 0;
|
---|
310 | text_t::iterator end;
|
---|
311 | while (!founddot && !section.empty()) {
|
---|
312 | end = section.end();
|
---|
313 | end --;
|
---|
314 | if (*end == '.') founddot = 1;
|
---|
315 | section.pop_back();
|
---|
316 | }
|
---|
317 | }
|
---|
318 |
|
---|
319 | // same as above but also returns ths child section that's removed
|
---|
320 | void get_parent_section (text_t &parentsection, text_t &childsection) {
|
---|
321 | int founddot = 0;
|
---|
322 | text_t tmp;
|
---|
323 | childsection.clear();
|
---|
324 | text_t::iterator end;
|
---|
325 | while (!founddot && !parentsection.empty()) {
|
---|
326 | end = parentsection.end();
|
---|
327 | end --;
|
---|
328 | if (*end == '.') founddot = 1;
|
---|
329 | else tmp.push_back(*end); childsection = tmp + childsection; tmp.clear();
|
---|
330 | parentsection.pop_back();
|
---|
331 | }
|
---|
332 | }
|
---|
333 |
|
---|
334 | // count_dots returns the number of dots ('.') there are
|
---|
335 | // in a range of a targetdoc string
|
---|
336 | int count_dots(text_t::const_iterator first, text_t::const_iterator last) {
|
---|
337 | return countchar (first, last, '.');
|
---|
338 | }
|
---|
339 |
|
---|
340 | int count_dots (const text_t &targetdoc) {
|
---|
341 | return count_dots(targetdoc.begin(), targetdoc.end());
|
---|
342 | }
|
---|
343 |
|
---|
344 | // returns 1 if targetdoc is a first level descendant
|
---|
345 | // (i.e. B.n.1, B.n.1.1, B.n.1.1.1 etc.) - otherwise 0
|
---|
346 | int is_section_top(const text_t &targetdoc) {
|
---|
347 | text_t::const_iterator here = targetdoc.begin();
|
---|
348 | text_t::const_iterator end = targetdoc.end();
|
---|
349 |
|
---|
350 | // look for the 'B'
|
---|
351 | here = findchar (here, end, 'B');
|
---|
352 | here = findchar (here, end, '.');
|
---|
353 | if (here != end) here++; // skip over the '.'
|
---|
354 | here = findchar (here, end, '.');
|
---|
355 |
|
---|
356 | // make sure that all '.' are followed by a '1'
|
---|
357 | while (here != end) {
|
---|
358 | if (*here != '.') return 0;
|
---|
359 | here ++;
|
---|
360 |
|
---|
361 | if (here != end) {
|
---|
362 | if (*here != '1') return 0;
|
---|
363 | here ++;
|
---|
364 | }
|
---|
365 | }
|
---|
366 | return 1;
|
---|
367 | }
|
---|
368 |
|
---|
369 | // seperate_parts seperates targetdoc into its classification and booksection
|
---|
370 | // if classification isn't supplied it gets the first classification for the
|
---|
371 | // book from the gdbm
|
---|
372 | // if booksection doesn't exist it remains blank
|
---|
373 | void separate_parts(const text_t &targetdoc, gdbmclass &gdbm,
|
---|
374 | text_t &classification, text_t &booksection) {
|
---|
375 |
|
---|
376 | split_targetdoc (targetdoc, classification, booksection);
|
---|
377 |
|
---|
378 | if (classification.empty()) {
|
---|
379 | // no classification included so get first one for this book
|
---|
380 | gdbm_info info;
|
---|
381 | text_t book_top;
|
---|
382 | vector<text_t> classarray;
|
---|
383 | get_book_top (targetdoc, book_top);
|
---|
384 | gdbm.getinfo(book_top, info);
|
---|
385 | splitstring (info.classification, classarray);
|
---|
386 | if (!classarray.empty()) classification = classarray[0];
|
---|
387 | else classification = "C.1";
|
---|
388 | }
|
---|
389 | }
|
---|
390 |
|
---|
391 | // split_targetdoc splits up a string containing a classification
|
---|
392 | // and book (or one or the other)
|
---|
393 | void split_targetdoc(const text_t &targetdoc, text_t &classification,
|
---|
394 | text_t &booksection) {
|
---|
395 |
|
---|
396 | classification.clear ();
|
---|
397 | booksection.clear();
|
---|
398 |
|
---|
399 | text_t::const_iterator here = targetdoc.begin();
|
---|
400 | text_t::const_iterator end = targetdoc.end();
|
---|
401 |
|
---|
402 | // copy everything up to the first 'B'
|
---|
403 | while (here != end) {
|
---|
404 | if (*here == 'B') break;
|
---|
405 | classification.push_back(*here);
|
---|
406 | here++;
|
---|
407 | }
|
---|
408 |
|
---|
409 | // remove middle '.'
|
---|
410 | if (!classification.empty() &&
|
---|
411 | classification[classification.size()-1] == '.')
|
---|
412 | classification.pop_back();
|
---|
413 |
|
---|
414 | // copy the rest of the string
|
---|
415 | while (here != end) {
|
---|
416 | booksection.push_back(*here);
|
---|
417 | here++;
|
---|
418 | }
|
---|
419 | }
|
---|
420 |
|
---|
421 | // splitstring splits a colon seperated string into an array
|
---|
422 | void splitstring (const text_t &string, vector<text_t> &array) {
|
---|
423 | splitchar (string.begin(), string.end(), ':', array);
|
---|
424 | }
|
---|
425 |
|
---|
426 | // get_parents returns the parents array containing all the parents of the
|
---|
427 | // document specified by classification and booksection
|
---|
428 | void get_parents (const text_t &targetdoc, vector<text_t> &parents)
|
---|
429 | {
|
---|
430 | text_t::const_iterator here = targetdoc.begin ();
|
---|
431 | text_t::const_iterator end = targetdoc.end ();
|
---|
432 |
|
---|
433 | text_t currentparent;
|
---|
434 | text_t newsuffixpart;
|
---|
435 | text_t newsuffix;
|
---|
436 | bool first = true;
|
---|
437 | while (here != end)
|
---|
438 | {
|
---|
439 | // if there is a newsuffix add it to the current parent
|
---|
440 | // and add that parent to the parents vector
|
---|
441 | if (!newsuffix.empty())
|
---|
442 | {
|
---|
443 | currentparent += newsuffix;
|
---|
444 | parents.push_back (currentparent);
|
---|
445 | }
|
---|
446 |
|
---|
447 | // keep getting suffixes until one is found which starts with
|
---|
448 | // a number
|
---|
449 | newsuffix.clear();
|
---|
450 | do
|
---|
451 | {
|
---|
452 | here = getdelimitstr (here, end, '.', newsuffixpart);
|
---|
453 | if (!first) newsuffix.push_back ('.');
|
---|
454 | first = false;
|
---|
455 | newsuffix += newsuffixpart;
|
---|
456 | }
|
---|
457 | while ((here != end) && !newsuffixpart.empty() &&
|
---|
458 | (newsuffixpart[0] < '0' || newsuffixpart[0] > '9'));
|
---|
459 | }
|
---|
460 | }
|
---|
461 |
|
---|
462 |
|
---|
463 | // get_siblings returns the siblings array containing all the siblings of the current
|
---|
464 | // classification or booksection
|
---|
465 | void get_siblings (const text_t &classification, const text_t &booksection,
|
---|
466 | gdbmclass &gdbm, vector<text_t> &siblings) {
|
---|
467 |
|
---|
468 | gdbm_info info;
|
---|
469 |
|
---|
470 | if (booksection.empty() && classification.size() == 1) {
|
---|
471 | // top level classification has no siblings
|
---|
472 | return;
|
---|
473 |
|
---|
474 | } else if (booksection.empty()) {
|
---|
475 | // get classification siblings
|
---|
476 | gdbm.getinfo(classification, info);
|
---|
477 | gdbm.getinfo(info.parent, info); // info is now parent info
|
---|
478 | splitstring(info.contents, siblings);
|
---|
479 | return;
|
---|
480 |
|
---|
481 | } else {
|
---|
482 | // get book section siblings
|
---|
483 | if (is_top_level(booksection)) {
|
---|
484 | // top level of book so siblings are children of classification
|
---|
485 | gdbm.getinfo(classification, info);
|
---|
486 | splitstring(info.contents, siblings);
|
---|
487 |
|
---|
488 | // add classifications to book sections
|
---|
489 | for (unsigned int i = 0; i < siblings.size(); i++) {
|
---|
490 | if (is_book(siblings[i])) siblings[i] = classification + "." + siblings[i];
|
---|
491 | }
|
---|
492 |
|
---|
493 | } else {
|
---|
494 | // siblings come from immediate parent
|
---|
495 | gdbm.getinfo(booksection, info);
|
---|
496 | gdbm.getinfo(info.parent, info); // info is now parent info
|
---|
497 | splitstring(info.contents, siblings);
|
---|
498 |
|
---|
499 | // add classifications to book sections
|
---|
500 | for (unsigned int i = 0; i < siblings.size(); i++) {
|
---|
501 | if (is_book(siblings[i])) siblings[i] = classification + "." + siblings[i];
|
---|
502 | }
|
---|
503 | }
|
---|
504 | }
|
---|
505 | }
|
---|
506 |
|
---|
507 | // compares section 1 and section 2 and returns 1 if section2 belongs to
|
---|
508 | // the same chapter as section1 (i.e. is sibling of or child of or child of sibling)
|
---|
509 | int are_same_chapter(text_t section1, text_t section2)
|
---|
510 | {
|
---|
511 | get_parent_section(section1);
|
---|
512 |
|
---|
513 | while (!section2.empty()) {
|
---|
514 | get_parent_section(section2);
|
---|
515 | if (section2 == section1) return 1;
|
---|
516 | }
|
---|
517 | return 0;
|
---|
518 | }
|
---|
519 |
|
---|
520 | // get_first_section gets the first section from a colon separated
|
---|
521 | // list (instring)
|
---|
522 | void get_first_section(const text_t &instring, text_t &returnstring) {
|
---|
523 |
|
---|
524 | returnstring.clear();
|
---|
525 |
|
---|
526 | text_t::const_iterator here = instring.begin();
|
---|
527 | text_t::const_iterator end = instring.end();
|
---|
528 |
|
---|
529 | while (here != end) {
|
---|
530 | if (*here == ':') return;
|
---|
531 | returnstring.push_back(*here);
|
---|
532 | here ++;
|
---|
533 | }
|
---|
534 | }
|
---|
535 |
|
---|
536 |
|
---|
537 | // removes html tags from string - everything after < will be removed
|
---|
538 | // if < occurs without >
|
---|
539 | void remove_tags (text_t &text)
|
---|
540 | {
|
---|
541 | text_t::const_iterator here = text.begin ();
|
---|
542 | text_t::const_iterator end = text.end ();
|
---|
543 | int found = 0;
|
---|
544 | text_t tmp;
|
---|
545 |
|
---|
546 | while (here != end) {
|
---|
547 | if (*here == '<') {found = 1; here ++; continue;}
|
---|
548 | if (*here == '>') {found = 0; here ++; continue;}
|
---|
549 |
|
---|
550 | if (!found) tmp.push_back(*here);
|
---|
551 | here ++;
|
---|
552 | }
|
---|
553 | text = tmp;
|
---|
554 | }
|
---|
555 |
|
---|
556 | // checks text to see if it is a number (i.e. contains only 0-9)
|
---|
557 | // returns 1 if true, 0 if false
|
---|
558 | int is_number (text_t &text) {
|
---|
559 |
|
---|
560 | text_t::const_iterator here = text.begin();
|
---|
561 | text_t::const_iterator end = text.end();
|
---|
562 |
|
---|
563 | while (here != end) {
|
---|
564 | if ((*here!='0') && (*here!='1') && (*here!='2') &&
|
---|
565 | (*here!='3') && (*here!='4') && (*here!='5') &&
|
---|
566 | (*here!='6') && (*here!='7') && (*here!='8') &&
|
---|
567 | (*here!='9')) return 0;
|
---|
568 | here ++;
|
---|
569 | }
|
---|
570 | return 1;
|
---|
571 | }
|
---|
572 |
|
---|
573 | // functions related to sorting
|
---|
574 |
|
---|
575 | // returns whatever comes after ':#:' in str
|
---|
576 | // -- this is a nasty hack that I'm sure Rodger will want to change ;-)
|
---|
577 | text_t get_section_str(const text_t &str) {
|
---|
578 |
|
---|
579 | text_t ret;
|
---|
580 | int found = 0;
|
---|
581 |
|
---|
582 | text_t::const_iterator here = str.begin();
|
---|
583 | text_t::const_iterator end = str.end();
|
---|
584 |
|
---|
585 | while (here != end) {
|
---|
586 | if (found) {
|
---|
587 | ret.push_back(*here);
|
---|
588 | } else {
|
---|
589 | here = findchar (here, end, ':');
|
---|
590 | if ((*(here+1) == '#') && (*(here+2) == ':')) {
|
---|
591 | found = 1;
|
---|
592 | here = here+2;
|
---|
593 | }
|
---|
594 | }
|
---|
595 | here ++;
|
---|
596 | }
|
---|
597 | return ret;
|
---|
598 | }
|
---|
599 |
|
---|
600 | // removes leading spaces and leading 'the' 'a' and 'an'
|
---|
601 | // from string
|
---|
602 | void alphabetize_string_english (text_t &text) {
|
---|
603 |
|
---|
604 | if (text.empty()) return;
|
---|
605 |
|
---|
606 | text_t firstword;
|
---|
607 | char *word;
|
---|
608 |
|
---|
609 | text_t::iterator here = text.begin();
|
---|
610 | text_t::const_iterator end = text.end();
|
---|
611 |
|
---|
612 | if ((*here != ' ') && (*here != 'a') && (*here != 'A') &&
|
---|
613 | (*here != 't') && (*here != 'T')) return;
|
---|
614 |
|
---|
615 | int foundchar = 0;
|
---|
616 | while (here != end) {
|
---|
617 | if (*here == ' ' && !foundchar) {here ++; continue;}
|
---|
618 | if (*here == ' ' && foundchar) {
|
---|
619 | text.erase(text.begin(), (here+1));
|
---|
620 | break;
|
---|
621 | }
|
---|
622 | foundchar ++;
|
---|
623 | if (foundchar == 1) {
|
---|
624 | getdelimitstr (here, end, ' ', firstword);
|
---|
625 | word = firstword.getcstr();
|
---|
626 | if ((compare_str(word, "the") != 0) && (compare_str(word, "a") != 0) &&
|
---|
627 | (compare_str(word, "an") != 0)) break;
|
---|
628 | }
|
---|
629 | here ++;
|
---|
630 | }
|
---|
631 | delete word;
|
---|
632 | }
|
---|
633 |
|
---|
634 | // removes leading space, puts last name before
|
---|
635 | // any preceeding names
|
---|
636 | void alphabetize_string_name (text_t &text) {
|
---|
637 |
|
---|
638 | if (text.empty()) return;
|
---|
639 |
|
---|
640 | text_t lastname;
|
---|
641 | char *lname;
|
---|
642 | vector<text_t> words;
|
---|
643 | splitchar (text.begin(), text.end(), ' ', words);
|
---|
644 | lastname = words.back();
|
---|
645 | words.pop_back();
|
---|
646 | lname = lastname.getcstr();
|
---|
647 |
|
---|
648 | while ((compare_str(lname, "jnr") == 0) || (compare_str(lname, "snr") == 0) ||
|
---|
649 | (compare_str(lname, "esq") == 0)) {
|
---|
650 | lastname = words.back();
|
---|
651 | words.pop_back();
|
---|
652 | lname = lastname.getcstr();
|
---|
653 | }
|
---|
654 |
|
---|
655 | text.clear();
|
---|
656 | joinchar (words, ' ', text);
|
---|
657 | text = lastname + text;
|
---|
658 | }
|
---|
659 |
|
---|
660 | char ** string_add (char **array, int *len, char *str) {
|
---|
661 | char **ret;
|
---|
662 |
|
---|
663 | ret = (char**)realloc(array, (*len+1)*sizeof(char*));
|
---|
664 | ret[*len] = (char*)strdup(str);
|
---|
665 | (*len) ++;
|
---|
666 |
|
---|
667 | return ret;
|
---|
668 | }
|
---|
669 |
|
---|
670 | void string_sort (char **array, int len) {
|
---|
671 | qsort((void*)array, (unsigned int)(len), sizeof(char*), compare_str);
|
---|
672 | }
|
---|
673 |
|
---|
674 | void string_free (char **array, int len) {
|
---|
675 | for (int i = 0; i < len; i++)
|
---|
676 | free (array[i]);
|
---|
677 | free (array);
|
---|
678 | }
|
---|
679 |
|
---|
680 | // returns a date of form _dec_ 31, 1999
|
---|
681 | // input is date of type 19991231
|
---|
682 | // at least the year must be present in date
|
---|
683 | text_t format_date (const text_t &date) {
|
---|
684 | text_t::const_iterator here = date.begin();
|
---|
685 | text_t::const_iterator end = date.end();
|
---|
686 |
|
---|
687 | text_t year, month, day, dreturn;
|
---|
688 | int i;
|
---|
689 |
|
---|
690 | for (i = 0; i < 4 && here != end; i++) {
|
---|
691 | year.push_back(*here);
|
---|
692 | here ++;
|
---|
693 | }
|
---|
694 | if (year.empty()) return "";
|
---|
695 |
|
---|
696 | for (i = 0; i < 2 && here != end; i++) {
|
---|
697 | month.push_back(*here);
|
---|
698 | here ++;
|
---|
699 | }
|
---|
700 | for (i = 0; i < 2 && here != end; i++) {
|
---|
701 | day.push_back(*here);
|
---|
702 | here ++;
|
---|
703 | }
|
---|
704 |
|
---|
705 | if (!month.empty()) format_month(month);
|
---|
706 |
|
---|
707 | if (!day.empty()) format_day(day);
|
---|
708 |
|
---|
709 | if (!month.empty()) {
|
---|
710 | dreturn += month + " ";
|
---|
711 | if (!day.empty()) {
|
---|
712 | dreturn += day + ", ";
|
---|
713 | }
|
---|
714 | }
|
---|
715 | dreturn += year;
|
---|
716 | return dreturn;
|
---|
717 | }
|
---|
718 |
|
---|
719 | void format_month (text_t &month) {
|
---|
720 | if (month == "01") month = "_jan_";
|
---|
721 | else if (month == "02") month = "_feb_";
|
---|
722 | else if (month == "03") month = "_mar_";
|
---|
723 | else if (month == "04") month = "_apr_";
|
---|
724 | else if (month == "05") month = "_may_";
|
---|
725 | else if (month == "06") month = "_jun_";
|
---|
726 | else if (month == "07") month = "_jul_";
|
---|
727 | else if (month == "08") month = "_aug_";
|
---|
728 | else if (month == "09") month = "_sep_";
|
---|
729 | else if (month == "10") month = "_oct_";
|
---|
730 | else if (month == "11") month = "_nov_";
|
---|
731 | else if (month == "12") month = "_dec_";
|
---|
732 | else month.clear();
|
---|
733 | }
|
---|
734 |
|
---|
735 | void format_day(text_t &day) {
|
---|
736 | if (day[0] == '0') {
|
---|
737 | char tmp = day[1];
|
---|
738 | day.clear();
|
---|
739 | day.push_back(tmp);
|
---|
740 | }
|
---|
741 | }
|
---|
742 |
|
---|