Context Navigation

source: trunk/gsdl/src/library/gdbmclass.cpp@ 22

Last change on this file since 22 was 22, checked in by sjboddie, 25 years ago
Added functionality to allow books to be sorted on the fly rather than being displayed only in the order they were built.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 17.0 KB

Line
1	#include "text_t.h"
2	#include "gdbmclass.h"
3	#include <ctype.h>
4
5	#ifndef USE_OBJECTSPACE
6	# include <algorithm>
7	#else
8	# include <ospace\std\algorithm>
9	#endif
10
11
12	void gdbm_info::clear () {
13	d = 0;
14	t.clear();
15	p.clear();
16	x.clear();
17	c.clear();
18	j.clear();
19	o.clear();
20	a.clear();
21	}
22
23	// checks if key is present in infodb.
24	// returns 1 on success 0 on failure
25	int gdbmclass::exists (text_t key, text_t collection) {
26
27	datum key_data;
28	datum return_data;
29
30	// try to open the database if it's not already open
31	if (!open_database(collection)) {
32	if (logout != NULL) (*logout) << "gdbmclass: can't open database\n";
33	return 0;
34	}
35
36	// sanity check
37	if (gdbm_file == NULL) {
38	if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
39	return 0;
40	}
41
42	char *db_key = key.getcstr();
43
44	if (db_key == NULL) {
45	if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
46	return 0;
47	}
48
49	key_data.dptr = db_key;
50	key_data.dsize = strlen (db_key);
51	return_data = gdbm_fetch (gdbm_file, key_data);
52	delete db_key;
53	db_key = NULL;
54
55	if (return_data.dptr == NULL) {
56	if (logout != NULL) {
57	(*logout) << "gdbmclass: null data pointer\n";
58	outconvertclass text_t2ascii;
59	(*logout) << text_t2ascii << "gdbmclass: key was \"" << key << "\"\n";
60	}
61	return 0;
62	}
63	free (return_data.dptr);
64	return 1;
65	}
66
67
68	// returns 0 on success, -1 on failure
69	// key and collection aren't references as they might be aliases to
70	// something in info
71	int gdbmclass::getinfo (text_t key, text_t collection, gdbm_info &info) {
72
73	info.clear (); // reset info
74
75	datum key_data;
76	datum return_data;
77	char ikey[256];
78	char ivalue[16384];
79	int pos = 0;
80
81	// try to open the database if it's not already open
82	if (!open_database(collection)) {
83	if (logout != NULL) (*logout) << "gdbmclass: can't open database\n";
84	return -1;
85	}
86
87	// sanity check
88	if (gdbm_file == NULL) {
89	if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
90	return -1;
91	}
92
93	char *db_key = key.getcstr();
94
95	if (db_key == NULL) {
96	if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
97	return -1;
98	}
99
100	key_data.dptr = db_key;
101	key_data.dsize = strlen (db_key);
102	return_data = gdbm_fetch (gdbm_file, key_data);
103	delete db_key;
104	db_key = NULL;
105
106	if (return_data.dptr == NULL) {
107	if (logout != NULL) {
108	(*logout) << "gdbmclass: null data pointer\n";
109	outconvertclass text_t2ascii;
110	(*logout) << text_t2ascii << "gdbmclass: key was \"" << key << "\"\n";
111	}
112	return -1;
113	}
114
115	while ((pos = get_infoline(return_data.dptr, return_data.dsize,
116	pos, ikey, ivalue)) >= 0) {
117
118	if (strcmp (ikey, "d") == 0) {
119	info.d = atoi(ivalue);
120
121	} else if (strcmp (ikey, "p") == 0) {
122	info.p = ivalue;
123
124	} else if (strcmp (ikey, "t") == 0) {
125	info.t = ivalue;
126
127	} else if (strcmp (ikey, "x") == 0) {
128	info.x = ivalue;
129
130	} else if (strcmp (ikey, "c") == 0) {
131	info.c = ivalue;
132
133	} else if (strcmp (ikey, "j") == 0) {
134	info.j = ivalue;
135
136	} else if (strcmp (ikey, "o") == 0) {
137	info.o = ivalue;
138
139	} else if (strcmp (ikey, "a") == 0) {
140	info.a = ivalue;
141	}
142	}
143
144	free (return_data.dptr);
145
146	return 0;
147	}
148
149	void gdbmclass::setgdbmhome (const text_t &thegdbmhome)
150	{
151	gdbmhome = thegdbmhome;
152	}
153
154	// parses a line of the form <key>value\n
155	// returns next position, -1 if there was nothing left to process
156	// in the string, -2 if there was an error
157	int gdbmclass::get_infoline (char str, int len, int pos, char key, char *value) {
158	int keylen = 0;
159	int valuelen = 0;
160
161	key[0] = '\0';
162	value[0] = '\0';
163
164	// ignore white space
165	while (pos < len && isspace(str[pos])) pos++;
166
167	// get the '<'
168	if (pos >= len) return -1;
169	if (str[pos] != '<') return -2;
170	pos++;
171
172	// get the key
173	while (pos < len && str[pos] != '>')
174	key[keylen++] = str[pos++];
175
176	key[keylen] = '\0';
177
178	// get the '>'
179	if (pos >= len \|\| str[pos] != '>') return -2;
180	pos++;
181
182	// get the value
183	while (pos < len && str[pos] != '\n')
184	value[valuelen++] = str[pos++];
185
186	value[valuelen] = '\0';
187
188	return pos;
189	}
190
191
192	// returns 0 if failed, 1 if opened
193	int gdbmclass::open_database (const text_t &collection) {
194	text_t data_location, text_dir;
195	int block_size = 0;
196
197	if (gdbm_file != NULL) {
198	if (collection != gdbm_open_name) {
199	close_database();
200	}
201	}
202	if (gdbm_file == NULL) {
203	gdbm_open_name = collection;
204	gettextsuffix (collection, text_dir);
205
206	#ifdef __WIN32__
207	data_location = gdbmhome + "\\" + text_dir;
208	#else
209	data_location = gdbmhome + "/" + text_dir;
210	#endif
211
212	#ifdef _LITTLE_ENDIAN
213	data_location += ".ldb"; // little endian version of the gdbm database
214	#else
215	data_location += ".bdb"; // big endian version on the gdbm database
216	#endif
217	char *namebuffer = data_location.getcstr();
218	gdbm_file = gdbm_open (namebuffer, block_size, GDBM_READER, 00664, NULL);
219	delete namebuffer;
220	}
221	return (gdbm_file != NULL);
222	}
223
224
225	void gdbmclass::close_database () {
226
227	if (gdbm_file == NULL) return;
228
229	gdbm_close (gdbm_file);
230	gdbm_file = NULL;
231	}
232
233
234
235	// a few useful functions
236
237	//////////////////////////////////////////////////////////////////////////////////////////
238	// functions for testing classification strings
239
240
241	// returns 1 if targetdoc is top level of a book (i.e. =~ /B\.\d+$/) - otherwise 0;
242	int is_top_level (const text_t &targetdoc) {
243
244	text_t::const_iterator here = targetdoc.begin();
245	text_t::const_iterator end = targetdoc.end();
246
247	// look for the 'B'
248	here = findchar (here, end, 'B');
249
250	// there must be exactly one dot after the 'B'
251	if ((here != end) && (countchar (here, end, '.') == 1))
252	return 1;
253
254	return 0;
255	}
256
257	// returns 1 if targetdoc is any level of a book (i.e. contains 'B') - otherwise 0
258	int is_book (const text_t &targetdoc) {
259
260	text_t::const_iterator here = targetdoc.begin();
261	text_t::const_iterator end = targetdoc.end();
262
263	here = findchar (here, end, 'B');
264	if (here != end) return 1;
265	return 0;
266	}
267
268	// returns (in book_top) the top level of the book in targetdoc
269	void get_book_top (const text_t &targetdoc, text_t &book_top) {
270
271	text_t::const_iterator here = targetdoc.begin();
272	text_t::const_iterator end = targetdoc.end();
273
274	book_top.clear();
275
276	// look for the 'B'
277	here = findchar (here, end, 'B');
278
279	// copy up to the second '.'
280	int founddot = 0;
281	while (here != end) {
282	if (*here == '.') {
283	if (founddot) return;
284	founddot = 1;
285	}
286	book_top.push_back(*here);
287	here++;
288	}
289	}
290
291	// returns (in book) the book section part of the classification
292	// contained in targetdoc
293	void get_book (const text_t &targetdoc, text_t &book) {
294
295	text_t::const_iterator here = targetdoc.begin();
296	text_t::const_iterator end = targetdoc.end();
297
298	book.clear ();
299
300	// look for the 'B'
301	here = findchar (here, end, 'B');
302
303	// copy the rest of the string
304	while (here != end) {
305	book.push_back(*here);
306	here ++;
307	}
308	}
309
310	// get_parent_section removes the last part from section (i.e.=~ s/\.\d+$//)
311	void get_parent_section (text_t &section) {
312	int founddot = 0;
313	text_t::iterator end;
314	while (!founddot && !section.empty()) {
315	end = section.end();
316	end --;
317	if (*end == '.') founddot = 1;
318	section.pop_back();
319	}
320	}
321
322	// same as above but also returns ths child section that's removed
323	void get_parent_section (text_t &parentsection, text_t &childsection) {
324	int founddot = 0;
325	text_t tmp;
326	childsection.clear();
327	text_t::iterator end;
328	while (!founddot && !parentsection.empty()) {
329	end = parentsection.end();
330	end --;
331	if (*end == '.') founddot = 1;
332	else tmp.push_back(*end); childsection = tmp + childsection; tmp.clear();
333	parentsection.pop_back();
334	}
335	}
336
337	// count_dots returns the number of dots ('.') there are
338	// in a range of a targetdoc string
339	int count_dots(text_t::const_iterator first, text_t::const_iterator last) {
340	return countchar (first, last, '.');
341	}
342
343	int count_dots (const text_t &targetdoc) {
344	return count_dots(targetdoc.begin(), targetdoc.end());
345	}
346
347	// returns 1 if targetdoc is a first level descendant
348	// (i.e. B.n.1, B.n.1.1, B.n.1.1.1 etc.) - otherwise 0
349	int is_section_top(const text_t &targetdoc) {
350	text_t::const_iterator here = targetdoc.begin();
351	text_t::const_iterator end = targetdoc.end();
352
353	// look for the 'B'
354	here = findchar (here, end, 'B');
355	here = findchar (here, end, '.');
356	if (here != end) here++; // skip over the '.'
357	here = findchar (here, end, '.');
358
359	// make sure that all '.' are followed by a '1'
360	while (here != end) {
361	if (*here != '.') return 0;
362	here ++;
363
364	if (here != end) {
365	if (*here != '1') return 0;
366	here ++;
367	}
368	}
369	return 1;
370	}
371
372	// seperate_parts seperates targetdoc into its classification and booksection
373	// if classification isn't supplied it gets the first classification for the
374	// book from the gdbm
375	// if booksection doesn't exist it remains blank
376	void seperate_parts(const text_t &targetdoc, gdbmclass &gdbm, const text_t &collection,
377	text_t &classification, text_t &booksection) {
378
379	split_targetdoc (targetdoc, classification, booksection);
380
381	if (classification.empty()) {
382	// no classification included so get first one for this book
383	gdbm_info info;
384	text_t book_top;
385	vector<text_t> classarray;
386	get_book_top (targetdoc, book_top);
387	gdbm.getinfo(book_top, collection, info);
388	splitstring (info.x, classarray);
389	if (!classarray.empty()) classification = classarray[0];
390	else classification = "C.1";
391	}
392	}
393
394	// split_targetdoc splits up a string containing a classification
395	// and book (or one or the other)
396	void split_targetdoc(const text_t &targetdoc, text_t &classification,
397	text_t &booksection) {
398
399	classification.clear ();
400	booksection.clear();
401
402	text_t::const_iterator here = targetdoc.begin();
403	text_t::const_iterator end = targetdoc.end();
404
405	// copy everything up to the first 'B'
406	while (here != end) {
407	if (*here == 'B') break;
408	classification.push_back(*here);
409	here++;
410	}
411
412	// remove middle '.'
413	if (!classification.empty() &&
414	classification[classification.size()-1] == '.')
415	classification.pop_back();
416
417	// copy the rest of the string
418	while (here != end) {
419	booksection.push_back(*here);
420	here++;
421	}
422	}
423
424	// splitstring splits a colon seperated string into an array
425	void splitstring (const text_t &string, vector<text_t> &array) {
426	splitchar (string.begin(), string.end(), ':', array);
427	}
428
429	// get_parents returns the parents array containing all the parents of the
430	// document specified by classification and booksection
431	void get_parents (const text_t &targetdoc, vector<text_t> &parents)
432	{
433	text_t::const_iterator here = targetdoc.begin ();
434	text_t::const_iterator end = targetdoc.end ();
435
436	text_t currentparent;
437	text_t newsuffixpart;
438	text_t newsuffix;
439	bool first = true;
440	while (here != end)
441	{
442	// if there is a newsuffix add it to the current parent
443	// and add that parent to the parents vector
444	if (!newsuffix.empty())
445	{
446	currentparent += newsuffix;
447	parents.push_back (currentparent);
448	}
449
450	// keep getting suffixes until one is found which starts with
451	// a number
452	newsuffix.clear();
453	do
454	{
455	here = getdelimitstr (here, end, '.', newsuffixpart);
456	if (!first) newsuffix.push_back ('.');
457	first = false;
458	newsuffix += newsuffixpart;
459	}
460	while ((here != end) && !newsuffixpart.empty() &&
461	(newsuffixpart[0] < '0' \|\| newsuffixpart[0] > '9'));
462	}
463	}
464
465
466	// get_siblings returns the siblings array containing all the siblings of the current
467	// classification or booksection
468	void get_siblings (const text_t &classification, const text_t &booksection,
469	gdbmclass &gdbm, const text_t &collection,
470	vector<text_t> &siblings) {
471
472	gdbm_info info;
473
474	if (booksection.empty() && classification.size() == 1) {
475	// top level classification has no siblings
476	return;
477
478	} else if (booksection.empty()) {
479	// get classification siblings
480	gdbm.getinfo(classification, collection, info);
481	gdbm.getinfo(info.p, collection, info); // info is now parent info
482	splitstring(info.c, siblings);
483	return;
484
485	} else {
486	// get book section siblings
487	if (is_top_level(booksection)) {
488	// top level of book so siblings are children of classification
489	gdbm.getinfo(classification, collection, info);
490	splitstring(info.c, siblings);
491
492	// add classifications to book sections
493	for (unsigned int i = 0; i < siblings.size(); i++) {
494	if (is_book(siblings[i])) siblings[i] = classification + "." + siblings[i];
495	}
496
497	} else {
498	// siblings come from immediate parent
499	gdbm.getinfo(booksection, collection, info);
500	gdbm.getinfo(info.p, collection, info); // info is now parent info
501	splitstring(info.c, siblings);
502
503	// add classifications to book sections
504	for (unsigned int i = 0; i < siblings.size(); i++) {
505	if (is_book(siblings[i])) siblings[i] = classification + "." + siblings[i];
506	}
507	}
508	}
509	}
510
511	// compares section 1 and section 2 and returns 1 if section2 belongs to
512	// the same chapter as section1 (i.e. is sibling of or child of or child of sibling)
513	int are_same_chapter(text_t section1, text_t section2)
514	{
515	get_parent_section(section1);
516
517	while (!section2.empty()) {
518	get_parent_section(section2);
519	if (section2 == section1) return 1;
520	}
521	return 0;
522	}
523
524	// get_first_section gets the first section from a colon separated
525	// list (instring)
526	void get_first_section(const text_t &instring, text_t &returnstring) {
527
528	returnstring.clear();
529
530	text_t::const_iterator here = instring.begin();
531	text_t::const_iterator end = instring.end();
532
533	while (here != end) {
534	if (*here == ':') return;
535	returnstring.push_back(*here);
536	here ++;
537	}
538	}
539
540
541	// removes html tags from string - everything after < will be removed
542	// if < occurs without >
543	void remove_tags (text_t &text)
544	{
545	text_t::const_iterator here = text.begin ();
546	text_t::const_iterator end = text.end ();
547	int found = 0;
548	text_t tmp;
549
550	while (here != end) {
551	if (*here == '<') {found = 1; here ++; continue;}
552	if (*here == '>') {found = 0; here ++; continue;}
553
554	if (!found) tmp.push_back(*here);
555	here ++;
556	}
557	text = tmp;
558	}
559
560	// checks text to see if it is a number (i.e. contains only 0-9)
561	// returns 1 if true, 0 if false
562	int is_number (text_t &text) {
563
564	text_t::const_iterator here = text.begin();
565	text_t::const_iterator end = text.end();
566
567	while (here != end) {
568	if ((here!='0') && (here!='1') && (*here!='2') &&
569	(here!='3') && (here!='4') && (*here!='5') &&
570	(here!='6') && (here!='7') && (*here!='8') &&
571	(*here!='9')) return 0;
572	here ++;
573	}
574	return 1;
575	}
576
577	// functions related to sorting
578
579	// returns whatever comes after ':#:' in str
580	// -- this is a nasty hack that I'm sure Rodger will want to change ;-)
581	text_t get_section_str(const text_t &str) {
582
583	text_t ret;
584	int found = 0;
585
586	text_t::const_iterator here = str.begin();
587	text_t::const_iterator end = str.end();
588
589	while (here != end) {
590	if (found) {
591	ret.push_back(*here);
592	} else {
593	here = findchar (here, end, ':');
594	if (((here+1) == '#') && ((here+2) == ':')) {
595	found = 1;
596	here = here+2;
597	}
598	}
599	here ++;
600	}
601	return ret;
602	}
603
604	// removes leading spaces and leading 'the' 'a' and 'an'
605	// from string
606	void alphabetize_string_english (text_t &text) {
607
608	if (text.empty()) return;
609
610	text_t firstword;
611	char *word;
612
613	text_t::iterator here = text.begin();
614	text_t::const_iterator end = text.end();
615
616	if ((here != ' ') && (here != 'a') && (*here != 'A') &&
617	(here != 't') && (here != 'T')) return;
618
619	int foundchar = 0;
620	while (here != end) {
621	if (*here == ' ' && !foundchar) {here ++; continue;}
622	if (*here == ' ' && foundchar) {
623	text.erase(text.begin(), (here+1));
624	break;
625	}
626	foundchar ++;
627	if (foundchar == 1) {
628	getdelimitstr (here, end, ' ', firstword);
629	word = firstword.getcstr();
630	if ((_stricmp(word, "the") != 0) && (_stricmp(word, "a") != 0) &&
631	(_stricmp(word, "an") != 0)) break;
632	}
633	here ++;
634	}
635	delete word;
636	}
637
638	// removes leading space, puts last name before
639	// any preceeding names
640	void alphabetize_string_name (text_t &text) {
641
642	if (text.empty()) return;
643
644	text_t lastname;
645	char *lname;
646	vector<text_t> words;
647	splitchar (text.begin(), text.end(), ' ', words);
648	lastname = words.back();
649	words.pop_back();
650	lname = lastname.getcstr();
651
652	while ((_stricmp(lname, "jnr") == 0) \|\| (_stricmp(lname, "snr") == 0) \|\|
653	(_stricmp(lname, "esq") == 0)) {
654	lastname = words.back();
655	words.pop_back();
656	lname = lastname.getcstr();
657	}
658
659	text.clear();
660	joinchar (words, ' ', text);
661	text = lastname + text;
662	}
663
664	char string_add (char array, int len, char str) {
665	char **ret;
666
667	ret = (char*)realloc(array, (len+1)sizeof(char));
668	ret[len] = (char)strdup(str);
669	(*len) ++;
670
671	return ret;
672	}
673
674	void string_sort (char **array, int len) {
675	qsort((void)array, (unsigned int)(len), sizeof(char), compare_str);
676	}
677
678	static int compare_str (const void e1, const void e2) {
679	return _stricmp(((char)e1), ((char**)e2));
680	}
681
682	void string_free(char **array, int len) {
683	for (int i = 0; i < len; i++)
684	free (array[i]);
685	free (array);
686	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: