1 | #include "text_t.h"
|
---|
2 | #include "gdbmclass.h"
|
---|
3 | #include <ctype.h>
|
---|
4 |
|
---|
5 | #ifndef USE_OBJECTSPACE
|
---|
6 | # include <algorithm>
|
---|
7 | #else
|
---|
8 | # include <ospace\std\algorithm>
|
---|
9 | #endif
|
---|
10 |
|
---|
11 |
|
---|
12 | void gdbm_info::clear () {
|
---|
13 | d = 0;
|
---|
14 | t.clear();
|
---|
15 | p.clear();
|
---|
16 | x.clear();
|
---|
17 | c.clear();
|
---|
18 | j.clear();
|
---|
19 | o.clear();
|
---|
20 | a.clear();
|
---|
21 | }
|
---|
22 |
|
---|
23 | // checks if key is present in infodb.
|
---|
24 | // returns 1 on success 0 on failure
|
---|
25 | int gdbmclass::exists (text_t key, text_t collection) {
|
---|
26 |
|
---|
27 | datum key_data;
|
---|
28 | datum return_data;
|
---|
29 |
|
---|
30 | // try to open the database if it's not already open
|
---|
31 | if (!open_database(collection)) {
|
---|
32 | if (logout != NULL) (*logout) << "gdbmclass: can't open database\n";
|
---|
33 | return 0;
|
---|
34 | }
|
---|
35 |
|
---|
36 | // sanity check
|
---|
37 | if (gdbm_file == NULL) {
|
---|
38 | if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
|
---|
39 | return 0;
|
---|
40 | }
|
---|
41 |
|
---|
42 | char *db_key = key.getcstr();
|
---|
43 |
|
---|
44 | if (db_key == NULL) {
|
---|
45 | if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
|
---|
46 | return 0;
|
---|
47 | }
|
---|
48 |
|
---|
49 | key_data.dptr = db_key;
|
---|
50 | key_data.dsize = strlen (db_key);
|
---|
51 | return_data = gdbm_fetch (gdbm_file, key_data);
|
---|
52 | delete db_key;
|
---|
53 | db_key = NULL;
|
---|
54 |
|
---|
55 | if (return_data.dptr == NULL) {
|
---|
56 | if (logout != NULL) {
|
---|
57 | (*logout) << "gdbmclass: null data pointer\n";
|
---|
58 | outconvertclass text_t2ascii;
|
---|
59 | (*logout) << text_t2ascii << "gdbmclass: key was \"" << key << "\"\n";
|
---|
60 | }
|
---|
61 | return 0;
|
---|
62 | }
|
---|
63 | free (return_data.dptr);
|
---|
64 | return 1;
|
---|
65 | }
|
---|
66 |
|
---|
67 |
|
---|
68 | // returns 0 on success, -1 on failure
|
---|
69 | // key and collection aren't references as they might be aliases to
|
---|
70 | // something in info
|
---|
71 | int gdbmclass::getinfo (text_t key, text_t collection, gdbm_info &info) {
|
---|
72 |
|
---|
73 | info.clear (); // reset info
|
---|
74 |
|
---|
75 | datum key_data;
|
---|
76 | datum return_data;
|
---|
77 | char ikey[256];
|
---|
78 | char ivalue[16384];
|
---|
79 | int pos = 0;
|
---|
80 |
|
---|
81 | // try to open the database if it's not already open
|
---|
82 | if (!open_database(collection)) {
|
---|
83 | if (logout != NULL) (*logout) << "gdbmclass: can't open database\n";
|
---|
84 | return -1;
|
---|
85 | }
|
---|
86 |
|
---|
87 | // sanity check
|
---|
88 | if (gdbm_file == NULL) {
|
---|
89 | if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
|
---|
90 | return -1;
|
---|
91 | }
|
---|
92 |
|
---|
93 | char *db_key = key.getcstr();
|
---|
94 |
|
---|
95 | if (db_key == NULL) {
|
---|
96 | if (logout != NULL) (*logout) << "gdbmclass: failed sanity check\n";
|
---|
97 | return -1;
|
---|
98 | }
|
---|
99 |
|
---|
100 | key_data.dptr = db_key;
|
---|
101 | key_data.dsize = strlen (db_key);
|
---|
102 | return_data = gdbm_fetch (gdbm_file, key_data);
|
---|
103 | delete db_key;
|
---|
104 | db_key = NULL;
|
---|
105 |
|
---|
106 | if (return_data.dptr == NULL) {
|
---|
107 | if (logout != NULL) {
|
---|
108 | (*logout) << "gdbmclass: null data pointer\n";
|
---|
109 | outconvertclass text_t2ascii;
|
---|
110 | (*logout) << text_t2ascii << "gdbmclass: key was \"" << key << "\"\n";
|
---|
111 | }
|
---|
112 | return -1;
|
---|
113 | }
|
---|
114 |
|
---|
115 | while ((pos = get_infoline(return_data.dptr, return_data.dsize,
|
---|
116 | pos, ikey, ivalue)) >= 0) {
|
---|
117 |
|
---|
118 | if (strcmp (ikey, "d") == 0) {
|
---|
119 | info.d = atoi(ivalue);
|
---|
120 |
|
---|
121 | } else if (strcmp (ikey, "p") == 0) {
|
---|
122 | info.p = ivalue;
|
---|
123 |
|
---|
124 | } else if (strcmp (ikey, "t") == 0) {
|
---|
125 | info.t = ivalue;
|
---|
126 |
|
---|
127 | } else if (strcmp (ikey, "x") == 0) {
|
---|
128 | info.x = ivalue;
|
---|
129 |
|
---|
130 | } else if (strcmp (ikey, "c") == 0) {
|
---|
131 | info.c = ivalue;
|
---|
132 |
|
---|
133 | } else if (strcmp (ikey, "j") == 0) {
|
---|
134 | info.j = ivalue;
|
---|
135 |
|
---|
136 | } else if (strcmp (ikey, "o") == 0) {
|
---|
137 | info.o = ivalue;
|
---|
138 |
|
---|
139 | } else if (strcmp (ikey, "a") == 0) {
|
---|
140 | info.a = ivalue;
|
---|
141 | }
|
---|
142 | }
|
---|
143 |
|
---|
144 | free (return_data.dptr);
|
---|
145 |
|
---|
146 | return 0;
|
---|
147 | }
|
---|
148 |
|
---|
149 | void gdbmclass::setgdbmhome (const text_t &thegdbmhome)
|
---|
150 | {
|
---|
151 | gdbmhome = thegdbmhome;
|
---|
152 | }
|
---|
153 |
|
---|
154 | // parses a line of the form <key>value\n
|
---|
155 | // returns next position, -1 if there was nothing left to process
|
---|
156 | // in the string, -2 if there was an error
|
---|
157 | int gdbmclass::get_infoline (char *str, int len, int pos, char *key, char *value) {
|
---|
158 | int keylen = 0;
|
---|
159 | int valuelen = 0;
|
---|
160 |
|
---|
161 | key[0] = '\0';
|
---|
162 | value[0] = '\0';
|
---|
163 |
|
---|
164 | // ignore white space
|
---|
165 | while (pos < len && isspace(str[pos])) pos++;
|
---|
166 |
|
---|
167 | // get the '<'
|
---|
168 | if (pos >= len) return -1;
|
---|
169 | if (str[pos] != '<') return -2;
|
---|
170 | pos++;
|
---|
171 |
|
---|
172 | // get the key
|
---|
173 | while (pos < len && str[pos] != '>')
|
---|
174 | key[keylen++] = str[pos++];
|
---|
175 |
|
---|
176 | key[keylen] = '\0';
|
---|
177 |
|
---|
178 | // get the '>'
|
---|
179 | if (pos >= len || str[pos] != '>') return -2;
|
---|
180 | pos++;
|
---|
181 |
|
---|
182 | // get the value
|
---|
183 | while (pos < len && str[pos] != '\n')
|
---|
184 | value[valuelen++] = str[pos++];
|
---|
185 |
|
---|
186 | value[valuelen] = '\0';
|
---|
187 |
|
---|
188 | return pos;
|
---|
189 | }
|
---|
190 |
|
---|
191 |
|
---|
192 | // returns 0 if failed, 1 if opened
|
---|
193 | int gdbmclass::open_database (const text_t &collection) {
|
---|
194 | text_t data_location, text_dir;
|
---|
195 | int block_size = 0;
|
---|
196 |
|
---|
197 | if (gdbm_file != NULL) {
|
---|
198 | if (collection != gdbm_open_name) {
|
---|
199 | close_database();
|
---|
200 | }
|
---|
201 | }
|
---|
202 | if (gdbm_file == NULL) {
|
---|
203 | gdbm_open_name = collection;
|
---|
204 | gettextsuffix (collection, text_dir);
|
---|
205 |
|
---|
206 | #ifdef __WIN32__
|
---|
207 | data_location = gdbmhome + "\\" + text_dir;
|
---|
208 | #else
|
---|
209 | data_location = gdbmhome + "/" + text_dir;
|
---|
210 | #endif
|
---|
211 |
|
---|
212 | #ifdef _LITTLE_ENDIAN
|
---|
213 | data_location += ".ldb"; // little endian version of the gdbm database
|
---|
214 | #else
|
---|
215 | data_location += ".bdb"; // big endian version on the gdbm database
|
---|
216 | #endif
|
---|
217 | char *namebuffer = data_location.getcstr();
|
---|
218 | gdbm_file = gdbm_open (namebuffer, block_size, GDBM_READER, 00664, NULL);
|
---|
219 | delete namebuffer;
|
---|
220 | }
|
---|
221 | return (gdbm_file != NULL);
|
---|
222 | }
|
---|
223 |
|
---|
224 |
|
---|
225 | void gdbmclass::close_database () {
|
---|
226 |
|
---|
227 | if (gdbm_file == NULL) return;
|
---|
228 |
|
---|
229 | gdbm_close (gdbm_file);
|
---|
230 | gdbm_file = NULL;
|
---|
231 | }
|
---|
232 |
|
---|
233 |
|
---|
234 |
|
---|
235 | // a few useful functions
|
---|
236 |
|
---|
237 | //////////////////////////////////////////////////////////////////////////////////////////
|
---|
238 | // functions for testing classification strings
|
---|
239 |
|
---|
240 |
|
---|
241 | // returns 1 if targetdoc is top level of a book (i.e. =~ /B\.\d+$/) - otherwise 0;
|
---|
242 | int is_top_level (const text_t &targetdoc) {
|
---|
243 |
|
---|
244 | text_t::const_iterator here = targetdoc.begin();
|
---|
245 | text_t::const_iterator end = targetdoc.end();
|
---|
246 |
|
---|
247 | // look for the 'B'
|
---|
248 | here = findchar (here, end, 'B');
|
---|
249 |
|
---|
250 | // there must be exactly one dot after the 'B'
|
---|
251 | if ((here != end) && (countchar (here, end, '.') == 1))
|
---|
252 | return 1;
|
---|
253 |
|
---|
254 | return 0;
|
---|
255 | }
|
---|
256 |
|
---|
257 | // returns 1 if targetdoc is any level of a book (i.e. contains 'B') - otherwise 0
|
---|
258 | int is_book (const text_t &targetdoc) {
|
---|
259 |
|
---|
260 | text_t::const_iterator here = targetdoc.begin();
|
---|
261 | text_t::const_iterator end = targetdoc.end();
|
---|
262 |
|
---|
263 | here = findchar (here, end, 'B');
|
---|
264 | if (here != end) return 1;
|
---|
265 | return 0;
|
---|
266 | }
|
---|
267 |
|
---|
268 | // returns (in book_top) the top level of the book in targetdoc
|
---|
269 | void get_book_top (const text_t &targetdoc, text_t &book_top) {
|
---|
270 |
|
---|
271 | text_t::const_iterator here = targetdoc.begin();
|
---|
272 | text_t::const_iterator end = targetdoc.end();
|
---|
273 |
|
---|
274 | book_top.clear();
|
---|
275 |
|
---|
276 | // look for the 'B'
|
---|
277 | here = findchar (here, end, 'B');
|
---|
278 |
|
---|
279 | // copy up to the second '.'
|
---|
280 | int founddot = 0;
|
---|
281 | while (here != end) {
|
---|
282 | if (*here == '.') {
|
---|
283 | if (founddot) return;
|
---|
284 | founddot = 1;
|
---|
285 | }
|
---|
286 | book_top.push_back(*here);
|
---|
287 | here++;
|
---|
288 | }
|
---|
289 | }
|
---|
290 |
|
---|
291 | // returns (in book) the book section part of the classification
|
---|
292 | // contained in targetdoc
|
---|
293 | void get_book (const text_t &targetdoc, text_t &book) {
|
---|
294 |
|
---|
295 | text_t::const_iterator here = targetdoc.begin();
|
---|
296 | text_t::const_iterator end = targetdoc.end();
|
---|
297 |
|
---|
298 | book.clear ();
|
---|
299 |
|
---|
300 | // look for the 'B'
|
---|
301 | here = findchar (here, end, 'B');
|
---|
302 |
|
---|
303 | // copy the rest of the string
|
---|
304 | while (here != end) {
|
---|
305 | book.push_back(*here);
|
---|
306 | here ++;
|
---|
307 | }
|
---|
308 | }
|
---|
309 |
|
---|
310 | // get_parent_section removes the last part from section (i.e.=~ s/\.\d+$//)
|
---|
311 | void get_parent_section (text_t §ion) {
|
---|
312 | int founddot = 0;
|
---|
313 | text_t::iterator end;
|
---|
314 | while (!founddot && !section.empty()) {
|
---|
315 | end = section.end();
|
---|
316 | end --;
|
---|
317 | if (*end == '.') founddot = 1;
|
---|
318 | section.pop_back();
|
---|
319 | }
|
---|
320 | }
|
---|
321 |
|
---|
322 | // same as above but also returns ths child section that's removed
|
---|
323 | void get_parent_section (text_t &parentsection, text_t &childsection) {
|
---|
324 | int founddot = 0;
|
---|
325 | text_t tmp;
|
---|
326 | childsection.clear();
|
---|
327 | text_t::iterator end;
|
---|
328 | while (!founddot && !parentsection.empty()) {
|
---|
329 | end = parentsection.end();
|
---|
330 | end --;
|
---|
331 | if (*end == '.') founddot = 1;
|
---|
332 | else tmp.push_back(*end); childsection = tmp + childsection; tmp.clear();
|
---|
333 | parentsection.pop_back();
|
---|
334 | }
|
---|
335 | }
|
---|
336 |
|
---|
337 | // count_dots returns the number of dots ('.') there are
|
---|
338 | // in a range of a targetdoc string
|
---|
339 | int count_dots(text_t::const_iterator first, text_t::const_iterator last) {
|
---|
340 | return countchar (first, last, '.');
|
---|
341 | }
|
---|
342 |
|
---|
343 | int count_dots (const text_t &targetdoc) {
|
---|
344 | return count_dots(targetdoc.begin(), targetdoc.end());
|
---|
345 | }
|
---|
346 |
|
---|
347 | // returns 1 if targetdoc is a first level descendant
|
---|
348 | // (i.e. B.n.1, B.n.1.1, B.n.1.1.1 etc.) - otherwise 0
|
---|
349 | int is_section_top(const text_t &targetdoc) {
|
---|
350 | text_t::const_iterator here = targetdoc.begin();
|
---|
351 | text_t::const_iterator end = targetdoc.end();
|
---|
352 |
|
---|
353 | // look for the 'B'
|
---|
354 | here = findchar (here, end, 'B');
|
---|
355 | here = findchar (here, end, '.');
|
---|
356 | if (here != end) here++; // skip over the '.'
|
---|
357 | here = findchar (here, end, '.');
|
---|
358 |
|
---|
359 | // make sure that all '.' are followed by a '1'
|
---|
360 | while (here != end) {
|
---|
361 | if (*here != '.') return 0;
|
---|
362 | here ++;
|
---|
363 |
|
---|
364 | if (here != end) {
|
---|
365 | if (*here != '1') return 0;
|
---|
366 | here ++;
|
---|
367 | }
|
---|
368 | }
|
---|
369 | return 1;
|
---|
370 | }
|
---|
371 |
|
---|
372 | // seperate_parts seperates targetdoc into its classification and booksection
|
---|
373 | // if classification isn't supplied it gets the first classification for the
|
---|
374 | // book from the gdbm
|
---|
375 | // if booksection doesn't exist it remains blank
|
---|
376 | void seperate_parts(const text_t &targetdoc, gdbmclass &gdbm, const text_t &collection,
|
---|
377 | text_t &classification, text_t &booksection) {
|
---|
378 |
|
---|
379 | split_targetdoc (targetdoc, classification, booksection);
|
---|
380 |
|
---|
381 | if (classification.empty()) {
|
---|
382 | // no classification included so get first one for this book
|
---|
383 | gdbm_info info;
|
---|
384 | text_t book_top;
|
---|
385 | vector<text_t> classarray;
|
---|
386 | get_book_top (targetdoc, book_top);
|
---|
387 | gdbm.getinfo(book_top, collection, info);
|
---|
388 | splitstring (info.x, classarray);
|
---|
389 | if (!classarray.empty()) classification = classarray[0];
|
---|
390 | else classification = "C.1";
|
---|
391 | }
|
---|
392 | }
|
---|
393 |
|
---|
394 | // split_targetdoc splits up a string containing a classification
|
---|
395 | // and book (or one or the other)
|
---|
396 | void split_targetdoc(const text_t &targetdoc, text_t &classification,
|
---|
397 | text_t &booksection) {
|
---|
398 |
|
---|
399 | classification.clear ();
|
---|
400 | booksection.clear();
|
---|
401 |
|
---|
402 | text_t::const_iterator here = targetdoc.begin();
|
---|
403 | text_t::const_iterator end = targetdoc.end();
|
---|
404 |
|
---|
405 | // copy everything up to the first 'B'
|
---|
406 | while (here != end) {
|
---|
407 | if (*here == 'B') break;
|
---|
408 | classification.push_back(*here);
|
---|
409 | here++;
|
---|
410 | }
|
---|
411 |
|
---|
412 | // remove middle '.'
|
---|
413 | if (!classification.empty() &&
|
---|
414 | classification[classification.size()-1] == '.')
|
---|
415 | classification.pop_back();
|
---|
416 |
|
---|
417 | // copy the rest of the string
|
---|
418 | while (here != end) {
|
---|
419 | booksection.push_back(*here);
|
---|
420 | here++;
|
---|
421 | }
|
---|
422 | }
|
---|
423 |
|
---|
424 | // splitstring splits a colon seperated string into an array
|
---|
425 | void splitstring (const text_t &string, vector<text_t> &array) {
|
---|
426 | splitchar (string.begin(), string.end(), ':', array);
|
---|
427 | }
|
---|
428 |
|
---|
429 | // get_parents returns the parents array containing all the parents of the
|
---|
430 | // document specified by classification and booksection
|
---|
431 | void get_parents (const text_t &targetdoc, vector<text_t> &parents)
|
---|
432 | {
|
---|
433 | text_t::const_iterator here = targetdoc.begin ();
|
---|
434 | text_t::const_iterator end = targetdoc.end ();
|
---|
435 |
|
---|
436 | text_t currentparent;
|
---|
437 | text_t newsuffixpart;
|
---|
438 | text_t newsuffix;
|
---|
439 | bool first = true;
|
---|
440 | while (here != end)
|
---|
441 | {
|
---|
442 | // if there is a newsuffix add it to the current parent
|
---|
443 | // and add that parent to the parents vector
|
---|
444 | if (!newsuffix.empty())
|
---|
445 | {
|
---|
446 | currentparent += newsuffix;
|
---|
447 | parents.push_back (currentparent);
|
---|
448 | }
|
---|
449 |
|
---|
450 | // keep getting suffixes until one is found which starts with
|
---|
451 | // a number
|
---|
452 | newsuffix.clear();
|
---|
453 | do
|
---|
454 | {
|
---|
455 | here = getdelimitstr (here, end, '.', newsuffixpart);
|
---|
456 | if (!first) newsuffix.push_back ('.');
|
---|
457 | first = false;
|
---|
458 | newsuffix += newsuffixpart;
|
---|
459 | }
|
---|
460 | while ((here != end) && !newsuffixpart.empty() &&
|
---|
461 | (newsuffixpart[0] < '0' || newsuffixpart[0] > '9'));
|
---|
462 | }
|
---|
463 | }
|
---|
464 |
|
---|
465 |
|
---|
466 | // get_siblings returns the siblings array containing all the siblings of the current
|
---|
467 | // classification or booksection
|
---|
468 | void get_siblings (const text_t &classification, const text_t &booksection,
|
---|
469 | gdbmclass &gdbm, const text_t &collection,
|
---|
470 | vector<text_t> &siblings) {
|
---|
471 |
|
---|
472 | gdbm_info info;
|
---|
473 |
|
---|
474 | if (booksection.empty() && classification.size() == 1) {
|
---|
475 | // top level classification has no siblings
|
---|
476 | return;
|
---|
477 |
|
---|
478 | } else if (booksection.empty()) {
|
---|
479 | // get classification siblings
|
---|
480 | gdbm.getinfo(classification, collection, info);
|
---|
481 | gdbm.getinfo(info.p, collection, info); // info is now parent info
|
---|
482 | splitstring(info.c, siblings);
|
---|
483 | return;
|
---|
484 |
|
---|
485 | } else {
|
---|
486 | // get book section siblings
|
---|
487 | if (is_top_level(booksection)) {
|
---|
488 | // top level of book so siblings are children of classification
|
---|
489 | gdbm.getinfo(classification, collection, info);
|
---|
490 | splitstring(info.c, siblings);
|
---|
491 |
|
---|
492 | // add classifications to book sections
|
---|
493 | for (unsigned int i = 0; i < siblings.size(); i++) {
|
---|
494 | if (is_book(siblings[i])) siblings[i] = classification + "." + siblings[i];
|
---|
495 | }
|
---|
496 |
|
---|
497 | } else {
|
---|
498 | // siblings come from immediate parent
|
---|
499 | gdbm.getinfo(booksection, collection, info);
|
---|
500 | gdbm.getinfo(info.p, collection, info); // info is now parent info
|
---|
501 | splitstring(info.c, siblings);
|
---|
502 |
|
---|
503 | // add classifications to book sections
|
---|
504 | for (unsigned int i = 0; i < siblings.size(); i++) {
|
---|
505 | if (is_book(siblings[i])) siblings[i] = classification + "." + siblings[i];
|
---|
506 | }
|
---|
507 | }
|
---|
508 | }
|
---|
509 | }
|
---|
510 |
|
---|
511 | // compares section 1 and section 2 and returns 1 if section2 belongs to
|
---|
512 | // the same chapter as section1 (i.e. is sibling of or child of or child of sibling)
|
---|
513 | int are_same_chapter(text_t section1, text_t section2)
|
---|
514 | {
|
---|
515 | get_parent_section(section1);
|
---|
516 |
|
---|
517 | while (!section2.empty()) {
|
---|
518 | get_parent_section(section2);
|
---|
519 | if (section2 == section1) return 1;
|
---|
520 | }
|
---|
521 | return 0;
|
---|
522 | }
|
---|
523 |
|
---|
524 | // get_first_section gets the first section from a colon separated
|
---|
525 | // list (instring)
|
---|
526 | void get_first_section(const text_t &instring, text_t &returnstring) {
|
---|
527 |
|
---|
528 | returnstring.clear();
|
---|
529 |
|
---|
530 | text_t::const_iterator here = instring.begin();
|
---|
531 | text_t::const_iterator end = instring.end();
|
---|
532 |
|
---|
533 | while (here != end) {
|
---|
534 | if (*here == ':') return;
|
---|
535 | returnstring.push_back(*here);
|
---|
536 | here ++;
|
---|
537 | }
|
---|
538 | }
|
---|
539 |
|
---|
540 |
|
---|
541 | // removes html tags from string - everything after < will be removed
|
---|
542 | // if < occurs without >
|
---|
543 | void remove_tags (text_t &text)
|
---|
544 | {
|
---|
545 | text_t::const_iterator here = text.begin ();
|
---|
546 | text_t::const_iterator end = text.end ();
|
---|
547 | int found = 0;
|
---|
548 | text_t tmp;
|
---|
549 |
|
---|
550 | while (here != end) {
|
---|
551 | if (*here == '<') {found = 1; here ++; continue;}
|
---|
552 | if (*here == '>') {found = 0; here ++; continue;}
|
---|
553 |
|
---|
554 | if (!found) tmp.push_back(*here);
|
---|
555 | here ++;
|
---|
556 | }
|
---|
557 | text = tmp;
|
---|
558 | }
|
---|
559 |
|
---|
560 | // checks text to see if it is a number (i.e. contains only 0-9)
|
---|
561 | // returns 1 if true, 0 if false
|
---|
562 | int is_number (text_t &text) {
|
---|
563 |
|
---|
564 | text_t::const_iterator here = text.begin();
|
---|
565 | text_t::const_iterator end = text.end();
|
---|
566 |
|
---|
567 | while (here != end) {
|
---|
568 | if ((*here!='0') && (*here!='1') && (*here!='2') &&
|
---|
569 | (*here!='3') && (*here!='4') && (*here!='5') &&
|
---|
570 | (*here!='6') && (*here!='7') && (*here!='8') &&
|
---|
571 | (*here!='9')) return 0;
|
---|
572 | here ++;
|
---|
573 | }
|
---|
574 | return 1;
|
---|
575 | }
|
---|
576 |
|
---|
577 | // functions related to sorting
|
---|
578 |
|
---|
579 | // returns whatever comes after ':#:' in str
|
---|
580 | // -- this is a nasty hack that I'm sure Rodger will want to change ;-)
|
---|
581 | text_t get_section_str(const text_t &str) {
|
---|
582 |
|
---|
583 | text_t ret;
|
---|
584 | int found = 0;
|
---|
585 |
|
---|
586 | text_t::const_iterator here = str.begin();
|
---|
587 | text_t::const_iterator end = str.end();
|
---|
588 |
|
---|
589 | while (here != end) {
|
---|
590 | if (found) {
|
---|
591 | ret.push_back(*here);
|
---|
592 | } else {
|
---|
593 | here = findchar (here, end, ':');
|
---|
594 | if ((*(here+1) == '#') && (*(here+2) == ':')) {
|
---|
595 | found = 1;
|
---|
596 | here = here+2;
|
---|
597 | }
|
---|
598 | }
|
---|
599 | here ++;
|
---|
600 | }
|
---|
601 | return ret;
|
---|
602 | }
|
---|
603 |
|
---|
604 | // removes leading spaces and leading 'the' 'a' and 'an'
|
---|
605 | // from string
|
---|
606 | void alphabetize_string_english (text_t &text) {
|
---|
607 |
|
---|
608 | if (text.empty()) return;
|
---|
609 |
|
---|
610 | text_t firstword;
|
---|
611 | char *word;
|
---|
612 |
|
---|
613 | text_t::iterator here = text.begin();
|
---|
614 | text_t::const_iterator end = text.end();
|
---|
615 |
|
---|
616 | if ((*here != ' ') && (*here != 'a') && (*here != 'A') &&
|
---|
617 | (*here != 't') && (*here != 'T')) return;
|
---|
618 |
|
---|
619 | int foundchar = 0;
|
---|
620 | while (here != end) {
|
---|
621 | if (*here == ' ' && !foundchar) {here ++; continue;}
|
---|
622 | if (*here == ' ' && foundchar) {
|
---|
623 | text.erase(text.begin(), (here+1));
|
---|
624 | break;
|
---|
625 | }
|
---|
626 | foundchar ++;
|
---|
627 | if (foundchar == 1) {
|
---|
628 | getdelimitstr (here, end, ' ', firstword);
|
---|
629 | word = firstword.getcstr();
|
---|
630 | if ((_stricmp(word, "the") != 0) && (_stricmp(word, "a") != 0) &&
|
---|
631 | (_stricmp(word, "an") != 0)) break;
|
---|
632 | }
|
---|
633 | here ++;
|
---|
634 | }
|
---|
635 | delete word;
|
---|
636 | }
|
---|
637 |
|
---|
638 | // removes leading space, puts last name before
|
---|
639 | // any preceeding names
|
---|
640 | void alphabetize_string_name (text_t &text) {
|
---|
641 |
|
---|
642 | if (text.empty()) return;
|
---|
643 |
|
---|
644 | text_t lastname;
|
---|
645 | char *lname;
|
---|
646 | vector<text_t> words;
|
---|
647 | splitchar (text.begin(), text.end(), ' ', words);
|
---|
648 | lastname = words.back();
|
---|
649 | words.pop_back();
|
---|
650 | lname = lastname.getcstr();
|
---|
651 |
|
---|
652 | while ((_stricmp(lname, "jnr") == 0) || (_stricmp(lname, "snr") == 0) ||
|
---|
653 | (_stricmp(lname, "esq") == 0)) {
|
---|
654 | lastname = words.back();
|
---|
655 | words.pop_back();
|
---|
656 | lname = lastname.getcstr();
|
---|
657 | }
|
---|
658 |
|
---|
659 | text.clear();
|
---|
660 | joinchar (words, ' ', text);
|
---|
661 | text = lastname + text;
|
---|
662 | }
|
---|
663 |
|
---|
664 | char ** string_add (char **array, int *len, char *str) {
|
---|
665 | char **ret;
|
---|
666 |
|
---|
667 | ret = (char**)realloc(array, (*len+1)*sizeof(char*));
|
---|
668 | ret[*len] = (char*)strdup(str);
|
---|
669 | (*len) ++;
|
---|
670 |
|
---|
671 | return ret;
|
---|
672 | }
|
---|
673 |
|
---|
674 | void string_sort (char **array, int len) {
|
---|
675 | qsort((void*)array, (unsigned int)(len), sizeof(char*), compare_str);
|
---|
676 | }
|
---|
677 |
|
---|
678 | static int compare_str (const void *e1, const void *e2) {
|
---|
679 | return _stricmp(*((char**)e1), *((char**)e2));
|
---|
680 | }
|
---|
681 |
|
---|
682 | void string_free(char **array, int len) {
|
---|
683 | for (int i = 0; i < len; i++)
|
---|
684 | free (array[i]);
|
---|
685 | free (array);
|
---|
686 | }
|
---|