source: trunk/gsdl/src/library/gdbmclass.cpp@ 22

Last change on this file since 22 was 22, checked in by sjboddie, 25 years ago

Added functionality to allow books to be sorted on the fly rather than being displayed only in the order they were built.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 17.0 KB
Line 
1#include "text_t.h"
2#include "gdbmclass.h"
3#include <ctype.h>
4
5#ifndef USE_OBJECTSPACE
6# include <algorithm>
7#else
8# include <ospace\std\algorithm>
9#endif
10
11
12void gdbm_info::clear () {
13 d = 0;
14 t.clear();
15 p.clear();
16 x.clear();
17 c.clear();
18 j.clear();
19 o.clear();
20 a.clear();
21}
22
23// checks if key is present in infodb.
24// returns 1 on success 0 on failure
25int gdbmclass::exists (text_t key, text_t collection) {
26
27 datum key_data;
28 datum return_data;
29
30 // try to open the database if it's not already open
31 if (!open_database(collection)) {
32 if (logout != NULL) (*logout) << "gdbmclass: can't open database\n";
33 return 0;
34 }
35
36 // sanity check
37 if (gdbm_file == NULL) {
38 if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
39 return 0;
40 }
41
42 char *db_key = key.getcstr();
43
44 if (db_key == NULL) {
45 if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
46 return 0;
47 }
48
49 key_data.dptr = db_key;
50 key_data.dsize = strlen (db_key);
51 return_data = gdbm_fetch (gdbm_file, key_data);
52 delete db_key;
53 db_key = NULL;
54
55 if (return_data.dptr == NULL) {
56 if (logout != NULL) {
57 (*logout) << "gdbmclass: null data pointer\n";
58 outconvertclass text_t2ascii;
59 (*logout) << text_t2ascii << "gdbmclass: key was \"" << key << "\"\n";
60 }
61 return 0;
62 }
63 free (return_data.dptr);
64 return 1;
65}
66
67
68// returns 0 on success, -1 on failure
69// key and collection aren't references as they might be aliases to
70// something in info
71int gdbmclass::getinfo (text_t key, text_t collection, gdbm_info &info) {
72
73 info.clear (); // reset info
74
75 datum key_data;
76 datum return_data;
77 char ikey[256];
78 char ivalue[16384];
79 int pos = 0;
80
81 // try to open the database if it's not already open
82 if (!open_database(collection)) {
83 if (logout != NULL) (*logout) << "gdbmclass: can't open database\n";
84 return -1;
85 }
86
87 // sanity check
88 if (gdbm_file == NULL) {
89 if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
90 return -1;
91 }
92
93 char *db_key = key.getcstr();
94
95 if (db_key == NULL) {
96 if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
97 return -1;
98 }
99
100 key_data.dptr = db_key;
101 key_data.dsize = strlen (db_key);
102 return_data = gdbm_fetch (gdbm_file, key_data);
103 delete db_key;
104 db_key = NULL;
105
106 if (return_data.dptr == NULL) {
107 if (logout != NULL) {
108 (*logout) << "gdbmclass: null data pointer\n";
109 outconvertclass text_t2ascii;
110 (*logout) << text_t2ascii << "gdbmclass: key was \"" << key << "\"\n";
111 }
112 return -1;
113 }
114
115 while ((pos = get_infoline(return_data.dptr, return_data.dsize,
116 pos, ikey, ivalue)) >= 0) {
117
118 if (strcmp (ikey, "d") == 0) {
119 info.d = atoi(ivalue);
120
121 } else if (strcmp (ikey, "p") == 0) {
122 info.p = ivalue;
123
124 } else if (strcmp (ikey, "t") == 0) {
125 info.t = ivalue;
126
127 } else if (strcmp (ikey, "x") == 0) {
128 info.x = ivalue;
129
130 } else if (strcmp (ikey, "c") == 0) {
131 info.c = ivalue;
132
133 } else if (strcmp (ikey, "j") == 0) {
134 info.j = ivalue;
135
136 } else if (strcmp (ikey, "o") == 0) {
137 info.o = ivalue;
138
139 } else if (strcmp (ikey, "a") == 0) {
140 info.a = ivalue;
141 }
142 }
143
144 free (return_data.dptr);
145
146 return 0;
147}
148
149void gdbmclass::setgdbmhome (const text_t &thegdbmhome)
150{
151 gdbmhome = thegdbmhome;
152}
153
154// parses a line of the form <key>value\n
155// returns next position, -1 if there was nothing left to process
156// in the string, -2 if there was an error
157int gdbmclass::get_infoline (char *str, int len, int pos, char *key, char *value) {
158 int keylen = 0;
159 int valuelen = 0;
160
161 key[0] = '\0';
162 value[0] = '\0';
163
164 // ignore white space
165 while (pos < len && isspace(str[pos])) pos++;
166
167 // get the '<'
168 if (pos >= len) return -1;
169 if (str[pos] != '<') return -2;
170 pos++;
171
172 // get the key
173 while (pos < len && str[pos] != '>')
174 key[keylen++] = str[pos++];
175
176 key[keylen] = '\0';
177
178 // get the '>'
179 if (pos >= len || str[pos] != '>') return -2;
180 pos++;
181
182 // get the value
183 while (pos < len && str[pos] != '\n')
184 value[valuelen++] = str[pos++];
185
186 value[valuelen] = '\0';
187
188 return pos;
189}
190
191
192// returns 0 if failed, 1 if opened
193int gdbmclass::open_database (const text_t &collection) {
194 text_t data_location, text_dir;
195 int block_size = 0;
196
197 if (gdbm_file != NULL) {
198 if (collection != gdbm_open_name) {
199 close_database();
200 }
201 }
202 if (gdbm_file == NULL) {
203 gdbm_open_name = collection;
204 gettextsuffix (collection, text_dir);
205
206#ifdef __WIN32__
207 data_location = gdbmhome + "\\" + text_dir;
208#else
209 data_location = gdbmhome + "/" + text_dir;
210#endif
211
212#ifdef _LITTLE_ENDIAN
213 data_location += ".ldb"; // little endian version of the gdbm database
214#else
215 data_location += ".bdb"; // big endian version on the gdbm database
216#endif
217 char *namebuffer = data_location.getcstr();
218 gdbm_file = gdbm_open (namebuffer, block_size, GDBM_READER, 00664, NULL);
219 delete namebuffer;
220 }
221 return (gdbm_file != NULL);
222}
223
224
225void gdbmclass::close_database () {
226
227 if (gdbm_file == NULL) return;
228
229 gdbm_close (gdbm_file);
230 gdbm_file = NULL;
231}
232
233
234
235// a few useful functions
236
237//////////////////////////////////////////////////////////////////////////////////////////
238// functions for testing classification strings
239
240
241// returns 1 if targetdoc is top level of a book (i.e. =~ /B\.\d+$/) - otherwise 0;
242int is_top_level (const text_t &targetdoc) {
243
244 text_t::const_iterator here = targetdoc.begin();
245 text_t::const_iterator end = targetdoc.end();
246
247 // look for the 'B'
248 here = findchar (here, end, 'B');
249
250 // there must be exactly one dot after the 'B'
251 if ((here != end) && (countchar (here, end, '.') == 1))
252 return 1;
253
254 return 0;
255}
256
257// returns 1 if targetdoc is any level of a book (i.e. contains 'B') - otherwise 0
258int is_book (const text_t &targetdoc) {
259
260 text_t::const_iterator here = targetdoc.begin();
261 text_t::const_iterator end = targetdoc.end();
262
263 here = findchar (here, end, 'B');
264 if (here != end) return 1;
265 return 0;
266}
267
268// returns (in book_top) the top level of the book in targetdoc
269void get_book_top (const text_t &targetdoc, text_t &book_top) {
270
271 text_t::const_iterator here = targetdoc.begin();
272 text_t::const_iterator end = targetdoc.end();
273
274 book_top.clear();
275
276 // look for the 'B'
277 here = findchar (here, end, 'B');
278
279 // copy up to the second '.'
280 int founddot = 0;
281 while (here != end) {
282 if (*here == '.') {
283 if (founddot) return;
284 founddot = 1;
285 }
286 book_top.push_back(*here);
287 here++;
288 }
289}
290
291// returns (in book) the book section part of the classification
292// contained in targetdoc
293void get_book (const text_t &targetdoc, text_t &book) {
294
295 text_t::const_iterator here = targetdoc.begin();
296 text_t::const_iterator end = targetdoc.end();
297
298 book.clear ();
299
300 // look for the 'B'
301 here = findchar (here, end, 'B');
302
303 // copy the rest of the string
304 while (here != end) {
305 book.push_back(*here);
306 here ++;
307 }
308}
309
310// get_parent_section removes the last part from section (i.e.=~ s/\.\d+$//)
311void get_parent_section (text_t &section) {
312 int founddot = 0;
313 text_t::iterator end;
314 while (!founddot && !section.empty()) {
315 end = section.end();
316 end --;
317 if (*end == '.') founddot = 1;
318 section.pop_back();
319 }
320}
321
322// same as above but also returns ths child section that's removed
323void get_parent_section (text_t &parentsection, text_t &childsection) {
324 int founddot = 0;
325 text_t tmp;
326 childsection.clear();
327 text_t::iterator end;
328 while (!founddot && !parentsection.empty()) {
329 end = parentsection.end();
330 end --;
331 if (*end == '.') founddot = 1;
332 else tmp.push_back(*end); childsection = tmp + childsection; tmp.clear();
333 parentsection.pop_back();
334 }
335}
336
337// count_dots returns the number of dots ('.') there are
338// in a range of a targetdoc string
339int count_dots(text_t::const_iterator first, text_t::const_iterator last) {
340 return countchar (first, last, '.');
341}
342
343int count_dots (const text_t &targetdoc) {
344 return count_dots(targetdoc.begin(), targetdoc.end());
345}
346
347// returns 1 if targetdoc is a first level descendant
348// (i.e. B.n.1, B.n.1.1, B.n.1.1.1 etc.) - otherwise 0
349int is_section_top(const text_t &targetdoc) {
350 text_t::const_iterator here = targetdoc.begin();
351 text_t::const_iterator end = targetdoc.end();
352
353 // look for the 'B'
354 here = findchar (here, end, 'B');
355 here = findchar (here, end, '.');
356 if (here != end) here++; // skip over the '.'
357 here = findchar (here, end, '.');
358
359 // make sure that all '.' are followed by a '1'
360 while (here != end) {
361 if (*here != '.') return 0;
362 here ++;
363
364 if (here != end) {
365 if (*here != '1') return 0;
366 here ++;
367 }
368 }
369 return 1;
370}
371
372// seperate_parts seperates targetdoc into its classification and booksection
373// if classification isn't supplied it gets the first classification for the
374// book from the gdbm
375// if booksection doesn't exist it remains blank
376void seperate_parts(const text_t &targetdoc, gdbmclass &gdbm, const text_t &collection,
377 text_t &classification, text_t &booksection) {
378
379 split_targetdoc (targetdoc, classification, booksection);
380
381 if (classification.empty()) {
382 // no classification included so get first one for this book
383 gdbm_info info;
384 text_t book_top;
385 vector<text_t> classarray;
386 get_book_top (targetdoc, book_top);
387 gdbm.getinfo(book_top, collection, info);
388 splitstring (info.x, classarray);
389 if (!classarray.empty()) classification = classarray[0];
390 else classification = "C.1";
391 }
392}
393
394// split_targetdoc splits up a string containing a classification
395// and book (or one or the other)
396void split_targetdoc(const text_t &targetdoc, text_t &classification,
397 text_t &booksection) {
398
399 classification.clear ();
400 booksection.clear();
401
402 text_t::const_iterator here = targetdoc.begin();
403 text_t::const_iterator end = targetdoc.end();
404
405 // copy everything up to the first 'B'
406 while (here != end) {
407 if (*here == 'B') break;
408 classification.push_back(*here);
409 here++;
410 }
411
412 // remove middle '.'
413 if (!classification.empty() &&
414 classification[classification.size()-1] == '.')
415 classification.pop_back();
416
417 // copy the rest of the string
418 while (here != end) {
419 booksection.push_back(*here);
420 here++;
421 }
422}
423
424// splitstring splits a colon seperated string into an array
425void splitstring (const text_t &string, vector<text_t> &array) {
426 splitchar (string.begin(), string.end(), ':', array);
427}
428
429// get_parents returns the parents array containing all the parents of the
430// document specified by classification and booksection
431void get_parents (const text_t &targetdoc, vector<text_t> &parents)
432{
433 text_t::const_iterator here = targetdoc.begin ();
434 text_t::const_iterator end = targetdoc.end ();
435
436 text_t currentparent;
437 text_t newsuffixpart;
438 text_t newsuffix;
439 bool first = true;
440 while (here != end)
441 {
442 // if there is a newsuffix add it to the current parent
443 // and add that parent to the parents vector
444 if (!newsuffix.empty())
445 {
446 currentparent += newsuffix;
447 parents.push_back (currentparent);
448 }
449
450 // keep getting suffixes until one is found which starts with
451 // a number
452 newsuffix.clear();
453 do
454 {
455 here = getdelimitstr (here, end, '.', newsuffixpart);
456 if (!first) newsuffix.push_back ('.');
457 first = false;
458 newsuffix += newsuffixpart;
459 }
460 while ((here != end) && !newsuffixpart.empty() &&
461 (newsuffixpart[0] < '0' || newsuffixpart[0] > '9'));
462 }
463}
464
465
466// get_siblings returns the siblings array containing all the siblings of the current
467// classification or booksection
468void get_siblings (const text_t &classification, const text_t &booksection,
469 gdbmclass &gdbm, const text_t &collection,
470 vector<text_t> &siblings) {
471
472 gdbm_info info;
473
474 if (booksection.empty() && classification.size() == 1) {
475 // top level classification has no siblings
476 return;
477
478 } else if (booksection.empty()) {
479 // get classification siblings
480 gdbm.getinfo(classification, collection, info);
481 gdbm.getinfo(info.p, collection, info); // info is now parent info
482 splitstring(info.c, siblings);
483 return;
484
485 } else {
486 // get book section siblings
487 if (is_top_level(booksection)) {
488 // top level of book so siblings are children of classification
489 gdbm.getinfo(classification, collection, info);
490 splitstring(info.c, siblings);
491
492 // add classifications to book sections
493 for (unsigned int i = 0; i < siblings.size(); i++) {
494 if (is_book(siblings[i])) siblings[i] = classification + "." + siblings[i];
495 }
496
497 } else {
498 // siblings come from immediate parent
499 gdbm.getinfo(booksection, collection, info);
500 gdbm.getinfo(info.p, collection, info); // info is now parent info
501 splitstring(info.c, siblings);
502
503 // add classifications to book sections
504 for (unsigned int i = 0; i < siblings.size(); i++) {
505 if (is_book(siblings[i])) siblings[i] = classification + "." + siblings[i];
506 }
507 }
508 }
509}
510
511// compares section 1 and section 2 and returns 1 if section2 belongs to
512// the same chapter as section1 (i.e. is sibling of or child of or child of sibling)
513int are_same_chapter(text_t section1, text_t section2)
514{
515 get_parent_section(section1);
516
517 while (!section2.empty()) {
518 get_parent_section(section2);
519 if (section2 == section1) return 1;
520 }
521 return 0;
522}
523
524// get_first_section gets the first section from a colon separated
525// list (instring)
526void get_first_section(const text_t &instring, text_t &returnstring) {
527
528 returnstring.clear();
529
530 text_t::const_iterator here = instring.begin();
531 text_t::const_iterator end = instring.end();
532
533 while (here != end) {
534 if (*here == ':') return;
535 returnstring.push_back(*here);
536 here ++;
537 }
538}
539
540
541// removes html tags from string - everything after < will be removed
542// if < occurs without >
543void remove_tags (text_t &text)
544{
545 text_t::const_iterator here = text.begin ();
546 text_t::const_iterator end = text.end ();
547 int found = 0;
548 text_t tmp;
549
550 while (here != end) {
551 if (*here == '<') {found = 1; here ++; continue;}
552 if (*here == '>') {found = 0; here ++; continue;}
553
554 if (!found) tmp.push_back(*here);
555 here ++;
556 }
557 text = tmp;
558}
559
560// checks text to see if it is a number (i.e. contains only 0-9)
561// returns 1 if true, 0 if false
562int is_number (text_t &text) {
563
564 text_t::const_iterator here = text.begin();
565 text_t::const_iterator end = text.end();
566
567 while (here != end) {
568 if ((*here!='0') && (*here!='1') && (*here!='2') &&
569 (*here!='3') && (*here!='4') && (*here!='5') &&
570 (*here!='6') && (*here!='7') && (*here!='8') &&
571 (*here!='9')) return 0;
572 here ++;
573 }
574 return 1;
575}
576
577// functions related to sorting
578
579// returns whatever comes after ':#:' in str
580// -- this is a nasty hack that I'm sure Rodger will want to change ;-)
581text_t get_section_str(const text_t &str) {
582
583 text_t ret;
584 int found = 0;
585
586 text_t::const_iterator here = str.begin();
587 text_t::const_iterator end = str.end();
588
589 while (here != end) {
590 if (found) {
591 ret.push_back(*here);
592 } else {
593 here = findchar (here, end, ':');
594 if ((*(here+1) == '#') && (*(here+2) == ':')) {
595 found = 1;
596 here = here+2;
597 }
598 }
599 here ++;
600 }
601 return ret;
602}
603
604// removes leading spaces and leading 'the' 'a' and 'an'
605// from string
606void alphabetize_string_english (text_t &text) {
607
608 if (text.empty()) return;
609
610 text_t firstword;
611 char *word;
612
613 text_t::iterator here = text.begin();
614 text_t::const_iterator end = text.end();
615
616 if ((*here != ' ') && (*here != 'a') && (*here != 'A') &&
617 (*here != 't') && (*here != 'T')) return;
618
619 int foundchar = 0;
620 while (here != end) {
621 if (*here == ' ' && !foundchar) {here ++; continue;}
622 if (*here == ' ' && foundchar) {
623 text.erase(text.begin(), (here+1));
624 break;
625 }
626 foundchar ++;
627 if (foundchar == 1) {
628 getdelimitstr (here, end, ' ', firstword);
629 word = firstword.getcstr();
630 if ((_stricmp(word, "the") != 0) && (_stricmp(word, "a") != 0) &&
631 (_stricmp(word, "an") != 0)) break;
632 }
633 here ++;
634 }
635 delete word;
636}
637
638// removes leading space, puts last name before
639// any preceeding names
640void alphabetize_string_name (text_t &text) {
641
642 if (text.empty()) return;
643
644 text_t lastname;
645 char *lname;
646 vector<text_t> words;
647 splitchar (text.begin(), text.end(), ' ', words);
648 lastname = words.back();
649 words.pop_back();
650 lname = lastname.getcstr();
651
652 while ((_stricmp(lname, "jnr") == 0) || (_stricmp(lname, "snr") == 0) ||
653 (_stricmp(lname, "esq") == 0)) {
654 lastname = words.back();
655 words.pop_back();
656 lname = lastname.getcstr();
657 }
658
659 text.clear();
660 joinchar (words, ' ', text);
661 text = lastname + text;
662}
663
664char ** string_add (char **array, int *len, char *str) {
665 char **ret;
666
667 ret = (char**)realloc(array, (*len+1)*sizeof(char*));
668 ret[*len] = (char*)strdup(str);
669 (*len) ++;
670
671 return ret;
672}
673
674void string_sort (char **array, int len) {
675 qsort((void*)array, (unsigned int)(len), sizeof(char*), compare_str);
676}
677
678static int compare_str (const void *e1, const void *e2) {
679 return _stricmp(*((char**)e1), *((char**)e2));
680}
681
682void string_free(char **array, int len) {
683 for (int i = 0; i < len; i++)
684 free (array[i]);
685 free (array);
686}
Note: See TracBrowser for help on using the repository browser.