source: trunk/gsdl/src/phind/host/phindcgi.cpp@ 1809

Last change on this file since 1809 was 1809, checked in by paynter, 23 years ago

Sundry improvements. Better handling of empty prefixes & suffixes, full
text of a phrase no longer transmitted (only prefix & suffix), error
messages returned in XML when in xml mode.

  • Property svn:keywords set to Author Date Id Revision
File size: 22.4 KB
Line 
1/**********************************************************************
2 *
3 * phindcgi.cpp -- cgi program to serve phind phrase hierarchies
4 *
5 * Copyright 2000 Gordon W. Paynter
6 * Copyright 2000 The New Zealand Digital Library Project
7 *
8 *
9 * A component of the Greenstone digital library software
10 * from the New Zealand Digital Library Project at the
11 * University of Waikato, New Zealand.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 *
27 *********************************************************************/
28
29/*
30 * phindcgi.cpp
31 *
32 * The program itself reads request for a phrase's data from the
33 * QUERY_STRING variable, looks up the phrase (if necessary) in the MGPP
34 * pword database, then looks up the phrase's charatoristics in the MGPP
35 * pdata database, and reports output to STDOUT ar crude HTML or XML.
36 *
37 */
38
39
40#include <iostream.h>
41#include <fstream.h>
42#include <stdlib.h>
43#include <stdio.h>
44#include <assert.h>
45
46#include <vector.h>
47#include <algo.h>
48
49// Include MGPP functionality.
50#include <TextGet.h>
51#include <MGQuery.h>
52#include <Terms.h>
53#include <messages.h>
54#include <GSDLQueryParser.h>
55
56// Include GSDL's text_t object, which makes parsing cgi arguments easier.
57#include <text_t.h>
58// Note that GSDL stores strings as text_t objects (vectors of 16-bit short int),
59// while MGPP stores strings as UCArray objects (vectors of 8-bit unsigned char).
60
61
62
63void get_gsdlsite_parameters(char *&gsdlhome);
64
65void get_cgi_parameters(char *&collection,
66 unsigned long &phrasenumber, UCArray &phrasetext,
67 unsigned long &first_e, unsigned long &last_e,
68 unsigned long &first_d, unsigned long &last_d,
69 bool &XMLmode);
70
71void print_expansions(char *cgi_script, char *collection, bool XMLmode, UCArray body,
72 TextData &textdata, vector <unsigned long> elist,
73 unsigned long first, unsigned long last);
74
75void print_documents(bool XMLmode, char *basepath, char *cgi_script,
76 char *collection,
77 vector <unsigned long> docNums,
78 vector <unsigned long> docFreq,
79 unsigned long first, unsigned long last);
80
81void find_phrase_number_from_word(char *basepath, UCArray &query, DocNumArray &result);
82
83void get_phrase_freq_data(TextData &textdata, unsigned long phrase,
84 UCArray &word, unsigned long &tf,
85 unsigned long &ef, unsigned long &df);
86
87void get_phrase_all_data(TextData &textdata, unsigned long phrase,
88 UCArray &word, unsigned long &tf,
89 unsigned long &ef, unsigned long &df,
90 vector <unsigned long> &el,
91 vector <unsigned long> &docnum,
92 vector <unsigned long> &docfrq);
93
94void split_phrase(UCArray word, UCArray body, UCArray &prefix, UCArray &suffix);
95bool phrase_match(UCArray text, UCArray::iterator &here, UCArray::iterator end);
96
97void get_document_all_data(TextData &docdata, unsigned long docNum,
98 UCArray &title, UCArray &hash);
99
100void cgi_error(bool XMLmode, char *message);
101
102void toUCArray(text_t &in, UCArray &out);
103unsigned long toLongInt(text_t &value);
104
105
106
107int main (int argc, char * argv[]) {
108
109
110 // the phrase to expand
111 unsigned long phrase = 0;
112 UCArray word;
113
114 // the frequency and occurances of the phrase
115 unsigned long tf, ef, df;
116 vector <unsigned long> el, docNums, docfreq;
117
118 // the number of occurances to display
119 unsigned long first_e, last_e, count_e, first_d, last_d, count_d;
120
121 // are we in XML mode (as opposed to HTML mode)
122 bool XMLmode = false;
123
124 // Read the gsdlsite.cfg file
125 char *gsdlhome = NULL;
126 get_gsdlsite_parameters(gsdlhome);
127
128 if (gsdlhome == NULL) {
129 cgi_error(XMLmode, "GSDLHOME not set in gsdlsite.cfg file.");
130 }
131
132 // Get command-line parameters
133 char *collection;
134 text_tmap param;
135 get_cgi_parameters(collection, phrase, word,
136 first_e, last_e, first_d, last_d, XMLmode);
137
138 if (collection == NULL) {
139 cgi_error(XMLmode, "No collection");
140 }
141
142 char basepath[FILENAME_MAX] = "";
143 strcat(basepath, gsdlhome);
144 strcat(basepath, "/collect/");
145 strcat(basepath, collection);
146 strcat(basepath, "/index/phind");
147
148 // If we don't know the phrase number, look itup
149 if (phrase == 0) {
150
151 if (word.empty()) {
152 cgi_error(XMLmode, "No phrase number or word.");
153 }
154
155 DocNumArray result;
156 find_phrase_number_from_word(basepath, word, result);
157
158 if (result.empty()) {
159 cgi_error(XMLmode, "The search term does not occur in the collection.");
160 exit(0);
161 } else {
162 phrase = result[0];
163 }
164 }
165
166 // Create a TextData object to read the phrase data (pdata)
167 TextData textdata;
168 char filename[FILENAME_MAX] = "pdata";
169 if (!textdata.LoadData (basepath, filename)) {
170 FatalError (1, "Couldn't load text information for \"%s\"", filename);
171 }
172 get_phrase_all_data(textdata, phrase, word, tf, ef, df, el, docNums, docfreq);
173
174
175 // Output the header
176 if (XMLmode) {
177 cout << "Content-type: text/plain" << endl << endl
178 << "<phinddata id=\"" << phrase
179 << "\" text=\"" << word
180 << "\" tf=\"" << tf
181 << "\" df=\"" << df
182 << "\" ef=\"" << ef
183 << "\">" << endl;
184 } else {
185 cout << "Content-type: text/html" << endl << endl
186 << "<html><head><title>" << word << "</title></head>" << endl
187 << "<body><center>" << endl
188 << "<p><h1>" << word << "</h1>" << endl
189 << "<p><b>"<< word << "</b> occurs "
190 << tf << " times in " << df << " documents" << endl;
191 }
192
193 // Output the expansions
194 if ((ef > 0) && (first_e < last_e)) {
195
196 // figure out the number of phrases to output
197 if (last_e > el.size()) {
198 last_e = el.size();
199 }
200 count_e = last_e - first_e;
201
202 // output expansions as XML
203 if (XMLmode) {
204 cout << "<expansionlist length=\"" << ef
205 << "\" start=\"" << first_e
206 << "\" end=\"" << last_e << "\">" << endl;
207
208 print_expansions(argv[0], collection, XMLmode, word, textdata, el, first_e, last_e);
209
210 cout << "</expansionlist>" << endl;
211 }
212
213 // output expansions as HTML
214 else {
215 if (count_e == el.size()) {
216 cout << "<p><b> " << count_e << " expansions</b>" << endl;
217 } else {
218 cout << "<p><b>" << count_e << " of " << ef << " expansions</b>" << endl;
219 }
220
221 cout << "<p><table border=0><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl;
222 print_expansions(argv[0], collection, XMLmode, word, textdata, el, first_e, last_e);
223 cout << "</table>" << endl;
224
225 if (last_e < el.size()) {
226 cout << "<br><a href='" << argv[0]
227 << "?c=" << collection << "&n=" << phrase
228 << "&e=" << (last_e + 10) << "&d=" << last_d
229 << "&g=" << first_e << "&f=" << first_d
230 << "'>Get more phrases</a>"
231 << endl
232 << "<br><a href='" << argv[0]
233 << "?c=" << collection << "&n=" << phrase
234 << "&e=" << el.size() << "&d=" << last_d
235 << "&g=" << first_e << "&f=" << first_d
236 << "'>Get every phrase</a>"
237 << endl;
238 }
239 }
240 }
241
242 // Output the document occurances
243 if ((df > 0) && (first_d < last_d)) {
244
245 // figure out the phrases to output
246 if (last_d > docNums.size()) {
247 last_d = docNums.size();
248 }
249 count_d = last_d - first_d;
250
251 // output document list as XML
252 if (XMLmode) {
253 cout << "<documentlist length=\"" << df
254 << "\" start=\"" << first_d
255 << "\" end=\"" << last_d << "\">" << endl;
256
257 print_documents(XMLmode, basepath, "library", collection,
258 docNums, docfreq, first_d, last_d);
259
260 cout << "</documentlist>" << endl;
261 }
262
263 // output document list as HTML
264 else {
265
266 if (count_d == docNums.size()) {
267 cout << "<p><b> " << count_d << " documents</b>" << endl;
268 } else {
269 cout << "<p><b>" << count_d << " of " << df << " documents</b>" << endl;
270 }
271
272 cout << "<p><table><tr><th align=left>Document</th><th>freq</th></tr>" << endl;
273 print_documents(XMLmode, basepath, "library", collection,
274 docNums, docfreq, first_d, last_d);
275 cout << "</table>" << endl;
276
277 if (last_d < docNums.size()) {
278 cout << "<br><a href='" << argv[0]
279 << "?c=" << collection << "&n=" << phrase
280 << "&e=" << last_e << "&d=" << (last_d + 10)
281 << "&g=" << first_e << "&f=" << first_d
282 << "'>Get more documents</a>" << endl
283 << "<br><a href='" << argv[0]
284 << "?c=" << collection << "&n=" << phrase
285 << "&g=" << first_e
286 << "&e=" << last_e
287 << "&f=" << first_d
288 << "&d=" << docNums.size()
289 << "'>Get every document</a>" << endl;
290 }
291 }
292 }
293
294 // Close the document
295 if (XMLmode) {
296 cout << "</phinddata>" << endl;
297 } else {
298 cout << "</center></body></html>" << endl;
299 }
300
301 textdata.UnloadData ();
302 return 0;
303}
304
305
306// Print a list of expansions
307//
308// Given the textData and a list of phrase numbers, print out each of the
309// expansions.
310
311void print_expansions(char *cgi_script, char *collection, bool XMLmode, UCArray body,
312 TextData &textdata, vector <unsigned long> elist,
313 unsigned long first, unsigned long last) {
314
315 UCArray word;
316 unsigned long phrase, tf, df, ef;
317
318 UCArray suffix, prefix;
319
320 for (unsigned long e = first; e < last; e++) {
321
322 phrase = elist[e];
323 get_phrase_freq_data(textdata, phrase, word, tf, ef, df);
324
325 split_phrase(word, body, prefix, suffix);
326
327 if (XMLmode) {
328 // body is always the same as the text of the phrase, so no need to send it
329 cout << "<expansion num=\"" << e
330 << "\" id=\"" << phrase
331 << "\" prefix=\"" << prefix
332 << "\" suffix=\"" << suffix
333 << "\" tf=\"" << tf
334 << "\" df=\"" << df << "\"/>" << endl;
335 } else {
336 cout << "<tr valign=top><td align=right><a href='" << cgi_script
337 << "?c=" << collection << "&n=" << phrase << "'>" << prefix << "</a></td>"
338 << "<td align=center><a href='" << cgi_script
339 << "?c=" << collection << "&n=" << phrase << "'>" << body << "</a></td>"
340 << "<td align=left><a href='" << cgi_script
341 << "?c=" << collection << "&n=" << phrase << "'>" << suffix << "</a></td>"
342 << "<td>" << tf << "</td><td>" << df << "</td></tr>" << endl;
343 }
344 }
345}
346
347void print_documents(bool XMLmode, char *basepath, char *cgi_script, char *collection,
348 vector <unsigned long> docNums, vector <unsigned long> docFreq,
349 unsigned long first, unsigned long last) {
350
351 // Create a TextData object to read the document data
352 TextData docdata;
353 char filename[FILENAME_MAX] = "docs";
354 if (!docdata.LoadData (basepath, filename)) {
355 FatalError (1, "Couldn't load text information for \"%s\"", filename);
356 }
357
358 UCArray title, hash;
359 unsigned long freq, doc;
360
361 for (unsigned long d = first; d < last; d++) {
362 doc = docNums[d];
363 freq = docFreq[d];
364
365 get_document_all_data(docdata, doc, title, hash);
366
367 if (XMLmode) {
368 cout << "<document num=\"" << d
369 << "\" hash=\"" << hash
370 << "\" freq=\"" << freq
371 << "\" title=\"" << title << "\"/>" << endl;
372 } else {
373 cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection
374 << "&a=d&d=" << hash << "'>" << title << "</a>"
375 << "</td><td>" << freq << "</td></tr>"
376 << endl;
377 }
378 }
379}
380
381
382
383// Get the frequency data about a phrase
384//
385// The phrase is stored in textData as record phrase.
386// We retrieve:
387// word - the text of the phrase
388// tf - the total frequency of the phrase
389// ef - the expansion frequency of the phrase
390// df - the document frequency of the phrase
391
392void get_phrase_freq_data(TextData &textdata, unsigned long phrase,
393 UCArray &word, unsigned long &tf,
394 unsigned long &ef, unsigned long &df) {
395
396 UCArray text;
397 UCArray docLevel;
398 SetCStr(docLevel, "Document");
399
400 // Look the word up in the textData
401 if (!GetDocText (textdata, docLevel, phrase, text)) {
402 FatalError (1, "Error while trying to get document %u", phrase);
403 }
404
405 // Ignore everything up to the first colon
406 UCArray::iterator next = text.begin();
407 while (*next++ != ':');
408
409 // Get the word
410 word.clear();
411 for (; *next != ':'; next++) {
412 word.push_back(*next);
413 }
414
415 // Get total frequency
416 tf = 0;
417 for (next++; *next != ':'; next++) {
418 tf *= 10;
419 tf += (*next - '0');
420 }
421
422 // Get expansion frequency
423 ef = 0;
424 for (next++; *next != ':'; next++) {
425 ef *= 10;
426 ef += (*next - '0');
427 }
428
429 // Get document frequency
430 df = 0;
431 for (next++; *next != ':'; next++) {
432 df *= 10;
433 df += (*next - '0');
434 }
435}
436
437// Get all the data about a phrase
438//
439// The phrase is stored in textData as record phrase.
440// We retrieve:
441// word - the text od the phrase
442// tf - the total frequency of the phrase
443// ef - the expansion frequency of the phrase
444// df - the document frequency of the phrase
445// el - the list of phrases that are expansions of phrase
446// dl - the list of documents that contain phrase
447
448void get_phrase_all_data(TextData &textdata, unsigned long phrase,
449 UCArray &word, unsigned long &tf,
450 unsigned long &ef, unsigned long &df,
451 vector <unsigned long> &el,
452 vector <unsigned long> &docnum,
453 vector <unsigned long> &docfrq) {
454 UCArray text;
455 UCArray docLevel;
456 SetCStr(docLevel, "Document");
457
458 // Look thwe word up in the textData
459 if (!GetDocText (textdata, docLevel, phrase, text)) {
460 FatalError (1, "Error while trying to get phrase %u", phrase);
461 }
462
463 // Ignore everything up to the first colon
464 UCArray::iterator next = text.begin();
465 while (*next++ != ':');
466
467 // Get the word
468 word.clear();
469 for (; *next != ':'; next++) {
470 word.push_back(*next);
471 }
472
473 // Get total frequency
474 tf = 0;
475 for (next++; *next != ':'; next++) {
476 tf *= 10;
477 tf += (*next - '0');
478 }
479
480 // Get expansion frequency
481 ef = 0;
482 for (next++; *next != ':'; next++) {
483 ef *= 10;
484 ef += (*next - '0');
485 }
486
487 // Get document frequency
488 df = 0;
489 for (next++; *next != ':'; next++) {
490 df *= 10;
491 df += (*next - '0');
492 }
493
494 // Get expansion list
495 el.clear();
496 unsigned long e = 0;
497 for (next++; *next != ':'; next++) {
498 if (*next == ',') {
499 el.push_back(e);
500 e = 0;
501 } else {
502 e *= 10;
503 e += (*next - '0');
504 }
505 }
506 el.push_back(e);
507
508 // Get document list & the document frequency list
509 while (text.back() == '\n') {
510 text.pop_back();
511 }
512 text.push_back(';');
513 text.push_back(':');
514 docnum.clear();
515 docfrq.clear();
516 bool readnum = false;
517 unsigned long d = 0;
518 for (next++; *next != ':'; next++) {
519 if (*next == ',') {
520 docnum.push_back(d);
521 readnum = true;
522 d = 0;
523 } else if (*next == ';') {
524 if (readnum) {
525 docfrq.push_back(d);
526 } else {
527 docnum.push_back(d);
528 docfrq.push_back(1);
529 }
530 readnum = false;
531 d = 0;
532 } else {
533 d *= 10;
534 d += (*next - '0');
535 }
536 }
537}
538
539// Get all the data about a docment
540//
541// The document's detailes are stored in docData as record docNum.
542// We retrieve:
543// title - the document's title
544// hash - the documnt's unique OID
545
546void get_document_all_data(TextData &docdata, unsigned long docNum,
547 UCArray &title, UCArray &hash) {
548
549 UCArray text;
550 UCArray docLevel;
551 SetCStr(docLevel, "Document");
552
553 // Look the word up in the textData
554 if (!GetDocText (docdata, docLevel, docNum, text)) {
555 FatalError (1, "Error while trying to get document %u", docNum);
556 }
557
558 // Ignore everything up to the first colon
559 UCArray::iterator next = text.begin();
560 while (*next++ != '\t');
561
562 // Get the document OID (hash)
563 hash.clear();
564 for (; *next != '\t'; next++) {
565 hash.push_back(*next);
566 }
567
568 // Get the title
569 text.push_back('\n');
570 title.clear();
571 for (next++; *next != '\n'; next++) {
572 title.push_back(*next);
573 }
574}
575
576
577void get_gsdlsite_parameters(char *&gsdlhome) {
578
579 // open the file
580 ifstream gsdl("gsdlsite.cfg", ios::in);
581 if (!gsdl) {
582 cerr << "File gsdlsite.cfg could not be opened\n";
583 exit(1);
584 }
585
586 // read each line of the file
587 char buffer[2000];
588 while (!gsdl.eof()) {
589 gsdl.getline(buffer, 2000, '\n');
590
591 // read the gsdlhome variable
592 if (strncmp(buffer, "gsdlhome", 8) == 0) {
593
594 // find the start of the gsdlhome string
595 int len = strlen(buffer);
596 int i = 8;
597 while (i < len && (buffer[i] == ' ' || buffer[i] == '\t')) {
598 i++;
599 }
600 // store the gsdlhome string
601 gsdlhome = new (char)[len-i];
602 strncpy(gsdlhome, &(buffer[i]), len-i);
603 }
604 }
605}
606
607void get_cgi_parameters(char *&collection,
608 unsigned long &phrasenumber, UCArray &phrasetext,
609 unsigned long &first_e, unsigned long &last_e,
610 unsigned long &first_d, unsigned long &last_d,
611 bool &XMLmode) {
612
613
614 // set the default parameters
615 phrasenumber = 0;
616 phrasetext.clear();
617 first_e = 0;
618 last_e = 10;
619 first_d = 0;
620 last_d = 10;
621
622 // get the query string
623 char *request_method_str = getenv("REQUEST_METHOD");
624 char *query_string = getenv("QUERY_STRING");
625 text_t query;
626
627 if (request_method_str != NULL
628 && (strcmp(request_method_str, "GET") == 0)
629 && query_string != NULL) {
630 // GET cgi args from querystring
631 query = query_string;
632
633 } else {
634 // debugging from command line
635 cout << "? " << endl;
636 char query_input[1024];
637 cin.get(query_input, 1024, '\n');
638 query = query_input;
639 }
640
641 // extract out the key=value pairs
642 text_t::iterator here = query.begin();
643 text_t::iterator end = query.end();
644 text_t key, value;
645
646 while (here != end) {
647 // get the next key and value pair
648 here = getdelimitstr (here, end, '=', key);
649 here = getdelimitstr (here, end, '&', value);
650
651 // store this key=value pair
652 if (!key.empty() && !value.empty()) {
653
654 // c: the collection name
655 if (key[0] == 'c') {
656 UCArray tmp;
657 toUCArray(value, tmp);
658 collection = GetCStr(tmp);
659 }
660
661 // d: the last document number
662 else if (key[0] == 'd') {
663 last_d = toLongInt(value);
664 }
665
666 // e: the last expansion number
667 else if (key[0] == 'e') {
668 last_e = toLongInt(value);
669 }
670
671 // f: the first document number
672 else if (key[0] == 'f') {
673 first_d = toLongInt(value);
674 }
675
676 // g: the first expansion number
677 else if (key[0] == 'g') {
678 first_e = toLongInt(value);
679 }
680
681 // x: XML mode
682 else if (key[0] == 'x') {
683 XMLmode = true;
684 }
685
686 // n: the phrase number
687 else if (key[0] == 'n') {
688 phrasenumber = toLongInt(value);
689 }
690
691 // p: the phrase text
692 else if (key[0] == 'p') {
693 toUCArray(value, phrasetext);
694 }
695
696 }
697 }
698}
699
700
701// Find the phrase number of a word in the index file
702
703void find_phrase_number_from_word(char *basepath, UCArray &query, DocNumArray &result) {
704
705 // Open the index file for searching
706 IndexData indexData;
707 char indexfilename[FILENAME_MAX] = "pword";
708 if (!indexData.LoadData (basepath, indexfilename)) {
709 FatalError (1, "Couldn't load index information for \"%s\"", indexfilename);
710 }
711
712 // set up the query object
713 QueryInfo queryInfo;
714 SetCStr (queryInfo.docLevel, "Document");
715 queryInfo.maxDocs = 5;
716 queryInfo.sortByRank = true;
717 queryInfo.exactWeights = false;
718 queryInfo.needRankInfo = true;
719 queryInfo.needTermFreqs = true;
720
721 // mode 1 = casefolded, unstemmed search
722 QueryNode *queryTree = ParseQuery(query, 1, 1);
723
724 // cout << "-- query --" << endl;
725 // PrintNode (cout, queryTree);
726
727 // perform the query
728 ExtQueryResult queryResult;
729 MGQuery (indexData, queryInfo, queryTree, queryResult);
730 // cout << "-- word lookup result -- " << endl << queryResult << endl ;
731
732 result.clear();
733 result = queryResult.docs;
734
735 // delete the query
736 if (queryTree != NULL) delete queryTree;
737}
738
739
740
741
742// cgi_error
743//
744// If for some reason we cannot proceed, output a simple error
745// page and exit(0) the program.
746
747void cgi_error(bool XMLmode, char *message) {
748
749 if (XMLmode) {
750 cout << "Content-type: text/plain" << endl << endl
751 << "<phinddata>" << endl
752 << "<phinderror>" << message << "</phinderror>" << endl
753 << "</phinddata>" << endl;
754 } else {
755 cout << "Content-type: text/html" << endl << endl
756 << "<html><head><title>phind error</title></head>" << endl
757 << "<body>" << endl
758 << "<p><h1>phind error</h1>"
759 << "<p> An error occured processing your request: <p><b>"
760 << message
761 << "</b></body></html>" << endl;
762 }
763 exit(0);
764}
765
766
767// split an expansion into prefix and suffix
768
769void split_phrase(UCArray word, UCArray body, UCArray &prefix, UCArray &suffix) {
770
771 prefix.clear();
772 suffix.clear();
773
774 bool readingPrefix = true;
775 UCArray::iterator here = word.begin();
776 UCArray::iterator end = word.end();
777
778 while (here != end) {
779
780 // if we've not read all the prefix, add the next char to the prefix
781 if (readingPrefix) {
782 if (phrase_match(body, here, end)) {
783 readingPrefix = false;
784 // trim whitespace from end of prefix & start of suffix
785 if (!prefix.empty()) {
786 prefix.pop_back();
787 }
788 if ((here != end) && (*here == ' ')) {
789 here++;
790 }
791 } else {
792 prefix.push_back(*here);
793 here++;
794 }
795 }
796 // if we've finished with the prefix, update the suffix
797 else {
798 suffix.push_back(*here);
799 here++;
800 }
801 }
802}
803
804// phrase_match
805//
806// compare two strings, one represented as an UCArray, the other as two
807// UCArray iterators.
808//
809// Return true if the UCArray is the same as the phrase the iterators point
810// to for the length of the UCArray.
811
812bool phrase_match(UCArray text, UCArray::iterator &here, UCArray::iterator end) {
813
814 UCArray::iterator one_here = text.begin();
815 UCArray::iterator one_end = text.end();
816 UCArray::iterator two_here = here;
817
818 // iterate over the length of the first string, comparing each element to
819 // the corresponding element in the second string.
820 while (one_here != one_end) {
821
822 if (two_here == end) {
823 return false;
824 } else if (*one_here != *two_here) {
825 return false;
826 }
827 one_here++;
828 two_here++;
829 }
830
831 here = two_here;
832 return true;
833}
834
835
836// Convert from text_t format
837//
838// Conversions from text_t to other types
839
840unsigned long toLongInt(text_t &value) {
841
842 unsigned long result = 0;
843
844 text_t::iterator here = value.begin();
845 text_t::iterator end = value.end();
846 while (here != end) {
847 result *= 10;
848 result += *here - '0';
849 here++;
850 }
851
852 return result;
853}
854
855void toUCArray(text_t &in, UCArray &out) {
856 out.clear();
857 text_t::iterator here = in.begin();
858 text_t::iterator end = in.end();
859 while (here != end) {
860 out.push_back((unsigned char) *here);
861 here++;
862 }
863}
864
Note: See TracBrowser for help on using the repository browser.