Changeset 12784
- Timestamp:
- 2006-09-20T09:53:39+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/recpt/querytools.cpp
r12771 r12784 28 28 #include "unitool.h" // for is_unicode_letdig 29 29 30 // sets the ct, qt, qto arguments 30 31 void set_query_type_args(ColInfoResponse_t *cinfo, cgiargsclass &args) { 31 32 … … 53 54 54 55 text_tmap::iterator check = cinfo->format.find("SearchTypes"); 55 text_t search_types = "plain,form";56 if(check != cinfo->format.end() ){56 text_t search_types; 57 if(check != cinfo->format.end() && !(*check).second.empty()){ 57 58 search_types = (*check).second; 58 if (search_types.empty()) { 59 search_types = "plain,form"; 60 } 61 } 59 } else { 60 // assume plain,form 61 if (args["qto"].empty()) args["qto"] = "3"; 62 if (args["qt"].empty()) { 63 int arg_qto = args.getintarg("qto"); 64 if (arg_qto > 1) { 65 args["qt"] = "1"; 66 } else { 67 args["qt"] = "0"; 68 } 69 } 70 return; 71 } 72 62 73 63 74 if (args["qto"].empty()) { … … 73 84 74 85 if (args["qt"].empty()) { 75 bool form_default = false;76 86 int arg_qto = args.getintarg("qto"); 77 87 if (arg_qto == 2 || (arg_qto == 3 && starts_with(search_types, "form"))) { … … 85 95 // request.filterResultOptions and request.fields (if required) should 86 96 // be set from the calling code 87 void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring, 97 void set_queryfilter_options (FilterRequest_t &request, 98 const text_t &querystring, 88 99 cgiargsclass &args) { 89 100 … … 162 173 } 163 174 164 void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring1, 175 void set_queryfilter_options (FilterRequest_t &request, 176 const text_t &querystring1, 165 177 const text_t &querystring2, cgiargsclass &args) { 166 178 … … 212 224 } 213 225 214 void set_more_queryfilter_options (FilterRequest_t &request, cgiargsclass &args) { 226 void set_more_queryfilter_options (FilterRequest_t &request, 227 cgiargsclass &args) { 215 228 216 229 OptionValue_t option; … … 238 251 } 239 252 // lucene 240 else if (indexer_type == 2) {253 else if (indexer_type == 2) { 241 254 return (character == '?' || character == '*' || character == '~' || 242 255 character == '^'); … … 245 258 } 246 259 260 // This function removes boolean operators from simple searches, and segments 261 // chinese characters if segment=true 247 262 void format_querystring (text_t &querystring, int querymode, bool segment) { 248 263 text_t formattedstring; 249 264 265 // advanced search, no segmenting, don't need to do anything 250 266 if (querymode == 1 && !segment) return; 251 267 … … 290 306 291 307 308 309 // search history tool 310 // also used for form query macros 311 text_t escape_quotes(const text_t &querystring) { 312 313 text_t::const_iterator here = querystring.begin(); 314 text_t::const_iterator end = querystring.end(); 315 316 text_t escquery = ""; 317 while (here != end) { 318 if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here); 319 else if (*here == '\n' || *here == '\r') { 320 escquery.push_back(' '); 321 } else { 322 escquery +="\\\\"; 323 escquery.push_back(*here); 324 } 325 326 ++here; 327 } 328 return escquery; 329 330 } 331 332 // Parses the terms into words, and adds #si if necessary 333 text_t addstemcase(const text_t &terms, const text_t &stem, const text_t &fold, 334 const int indexer_type) { 335 336 // the default stem and case are set to 0 if this is being used, so we are only adding on qualifiers if stem or fold is 1. 337 if (stem == "0" && fold == "0") { 338 return; 339 } 340 // this is only for mgpp collections, shouldn't be called for anything else 341 if (indexer_type != 1) { 342 return; 343 } 344 345 text_t outtext; 346 text_t word; 347 348 text_t::const_iterator here = terms.begin(); 349 text_t::const_iterator end = terms.end(); 350 351 while (here !=end) { 352 353 if (is_unicode_letdig(*here) || is_special_character(indexer_type, *here)) { 354 // not word boundary 355 word.push_back(*here); 356 ++here; 357 } 358 else { 359 // found word boundary 360 if (!word.empty() ) { 361 if (starts_with(word, "NEAR") || starts_with(word, "WITHIN")) { 362 outtext += word; 363 word.clear(); 364 } 365 else { 366 word += "#"; 367 if (stem == "1") word += "s"; 368 if (fold == "1") word += "i"; 369 outtext += word; 370 word.clear(); 371 } 372 } 373 // this only used in advanced form, so we leave in boolean operators 374 if (*here == '\"' || *here == '&' || *here == '|' || *here == '!' || is_unicode_space(*here)) { 375 outtext.push_back(*here); 376 } 377 ++here; 378 } 379 } 380 381 // get last word 382 if (!word.empty()) { 383 word += "#"; 384 if (stem == "1") word += "s"; 385 if (fold == "1") word += "i"; 386 word += " "; 387 outtext += word; 388 } 389 return outtext; 390 } 391 392 393 // some query form parsing functions for use with mgpp & lucene 394 395 void parse_reg_query_form(text_t &querystring, cgiargsclass &args, bool segment) 396 { 397 querystring.clear(); 398 399 int argct = args.getintarg("ct"); 400 int argt = args.getintarg("t");// t=0 -and, t=1 - or 401 int argb = args.getintarg("b"); 402 403 text_t combine; 404 405 // lucene uses global combine, so only need this for mgpp 406 if (argct==1) { 407 if (argt == 0) combine = "&"; 408 else combine = "|"; 409 } 410 411 text_t field = args["fqf"]; 412 if (field.empty()) return; // no query 413 text_tarray fields; 414 splitchar(field.begin(), field.end(), ',', fields); 415 416 text_t value = args["fqv"]; 417 if (value.empty()) return; // somethings wrong 418 text_tarray values; 419 splitchar(value.begin(), value.end(), ',', values); 420 421 422 for (int i=0; i< values.size(); ++i) { 423 if (!values[i].empty()) { 424 text_t this_value = values[i]; 425 // remove operators for simple search, segments text if necessary 426 format_querystring(this_value, argb, segment); 427 // add tag info for this field (and other processing) 428 format_field_info(this_value, fields[i], argct, argt, argb); 429 // add into query string 430 if (argct == 2) { 431 // lucene 432 // we don't worry about AND/OR, cos this is done by defaultcombineoperator 433 querystring += this_value+" "; 434 } else { 435 // mgpp 436 if (!querystring.empty()) { 437 querystring += " "+ combine+ " "; 438 } 439 querystring += this_value; 440 } 441 } 442 } 443 } 444 445 446 void parse_adv_query_form(text_t &querystring, cgiargsclass &args, bool segment){ 447 querystring.clear(); 448 449 const int argct = args.getintarg("ct"); 450 int argt = 0;// arg t is either not used (lucene) or used for natural/ranked (mgpp), so we set it to 0 = AND, by default 451 int argb = args.getintarg("b"); 452 text_t combine; 453 if (argct==1) { 454 combine = "&"; 455 } 456 else { // lucene 457 combine = "AND"; 458 } 459 460 text_t field = args["fqf"]; 461 if (field.empty()) return; // no query 462 text_tarray fields; 463 splitchar(field.begin(), field.end(), ',', fields); 464 465 text_t value = args["fqv"]; 466 if (value.empty()) return; // somethings wrong 467 text_tarray values; 468 splitchar(value.begin(), value.end(), ',', values); 469 470 text_t comb = args["fqc"]; 471 if (comb.empty()) return; //somethings wrong 472 text_tarray combs; 473 splitchar(comb.begin(), comb.end(), ',', combs); 474 475 text_tarray stems; 476 text_tarray folds; 477 if (argct == 1) {// mgpp - lucene doesn't do stem/case 478 text_t stem = args["fqs"]; 479 if (stem.empty()) return; // somethings wrong 480 splitchar(stem.begin(), stem.end(), ',', stems); 481 482 text_t fold = args["fqk"]; 483 if (fold.empty()) return; // somethings wrong 484 splitchar(fold.begin(), fold.end(), ',', folds); 485 } 486 487 for(int i=0; i< values.size(); ++i) { 488 if (!values[i].empty()) { 489 if (i!=0) { 490 if (argct==1) { 491 if (combs[i-1]=="and") combine = "&"; 492 else if (combs[i-1]=="or")combine = "|"; 493 else if (combs[i-1]=="not")combine = "!"; 494 } 495 else { // lucene 496 if (combs[i-1]=="and") combine = "AND"; 497 else if (combs[i-1]=="or")combine = "OR"; 498 else if (combs[i-1]=="not")combine = "NOT"; 499 } 500 } 501 text_t this_value = values[i]; 502 // remove operators for simple search, segments text if necessary 503 format_querystring(this_value, argb, segment); 504 if (argct == 1) { // mgpp only 505 this_value = addstemcase(this_value, stems[i], folds[i], argct); 506 } 507 // add tag info for this field (and other processing) 508 format_field_info(this_value, fields[i], argct, argt, argb); 509 // add into query string 510 if (!querystring.empty()) { 511 querystring += " "+ combine+ " "; 512 } 513 querystring += this_value; 514 515 } 516 } 517 } 518 519 // Extended addqueryelem for Human Info project 520 void addqueryelem_ex(text_t &querystring, const text_t &tag, 521 const text_t &terms, const text_t &stem, 522 const text_t &fold, 523 const text_t& combine, const text_t& word_combine) { 524 525 if (!querystring.empty()) { // have to put and/or 526 querystring += " " + combine + " "; 527 } 528 text_t outtext; outtext.reserve(512); 529 text_t word; word.reserve(100); 530 //unsigned short c; 531 text_t::const_iterator here = terms.begin(); 532 text_t::const_iterator end = terms.end(); 533 bool inquote = false, firstword = true; 534 535 text_t word2; word2.reserve(256); 536 537 while (here !=end) { 538 if (is_unicode_space(*here)) { 539 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); } 540 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); } 541 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); } 542 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); } 543 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); } 544 if (inquote) { 545 word2.push_back(*here); 546 } 547 word.append(word2); word2.clear(); 548 549 if (!inquote && !word.empty() ) { 550 // found word boundary 551 552 if (stem == "1" || fold =="1") { 553 word += "#"; 554 if (stem == "1") word += "s"; 555 //else word += "u"; 556 557 if (fold == "1") word += "i"; 558 //else word += "c"; 559 } 560 if (firstword) { 561 firstword = false; 562 } else { 563 outtext += " " + word_combine + " "; 564 } 565 outtext += "[" + word + "]:"+tag; 566 word.clear(); 567 } 568 ++here; 569 } else if (*here == '\"') { 570 word2.push_back(*here); 571 inquote = !inquote; 572 ++here; 573 } else { 574 // not word boundary 575 word2.push_back(*here); 576 ++here; 577 } 578 } 579 580 // get last word 581 if (!word2.empty()) { 582 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); } 583 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); } 584 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); } 585 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); } 586 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); } 587 word.append(word2); word2.clear(); 588 589 if (stem == "1"|| fold == "1") { 590 word += "#"; 591 if (stem == "1") word += "s"; 592 //else word += "u"; 593 594 if (fold == "1") word += "i"; 595 //else word += "c"; 596 } 597 if (!outtext.empty()) outtext += " " + word_combine + " "; 598 outtext += "[" + word + "]:"+tag; 599 } 600 querystring += "(" + outtext + ")"; 601 } 602 603 void add_field_info(text_t &querystring, const text_t &tag, int type) { 604 605 if (tag == "" || tag == "ZZ") return; // do nothing 606 if (type == 1) { //mgpp 607 querystring = "["+querystring+"]:"+tag; 608 } else if (type == 2) { // lucene 609 querystring = tag+":("+querystring+")"; 610 } 611 612 } 613 614 615 void format_field_info_lucene(text_t &querystring, text_t tag, int argt, int argb) { 616 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields) 617 int type = 2; //lucene 618 619 if (argb==0) { // simple 620 // there will be no & or | as they should have already been removed 621 // just tag the entire thing 622 if (tag != "") { 623 add_field_info(querystring, tag, type); 624 } 625 return; 626 } 627 628 // need to replace & with &&, | with || 629 text_t::const_iterator here = querystring.begin(); 630 text_t::const_iterator end = querystring.end(); 631 632 text_t finalquery = ""; 633 while (here != end) { 634 if (*here == '&') { 635 finalquery.push_back('&'); 636 finalquery.push_back('&'); 637 while (*(here+1) == '&') { 638 ++here; 639 } 640 } 641 else if (*here == '|') { 642 finalquery.push_back('|'); 643 finalquery.push_back('|'); 644 while (*(here+1) == '|') { 645 ++here; 646 } 647 } 648 else { 649 finalquery.push_back(*here); 650 } 651 ++here; 652 } 653 querystring = finalquery; 654 add_field_info(querystring, tag, type); 655 } 656 657 658 void format_field_info_mgpp(text_t &querystring, text_t tag, int argt, int argb) { 659 660 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields) 661 if (tag == "" && argb == 1) { 662 return; // no field specifier, advanced mode, the query stays as written 663 } 664 665 int type = 1; // mgpp 666 667 bool simple_and = (argb==0 && argt==0); 668 text_t finalquery = ""; 669 text_t fieldpart =""; 670 text_t queryelem = ""; 671 bool in_phrase = false; 672 bool in_field = false; 673 674 text_t::const_iterator here = querystring.begin(); 675 text_t::const_iterator end = querystring.end(); 676 while (here != end) { 677 if (is_unicode_letdig(*here) || *here == '&' || is_special_character(type, *here)) { 678 queryelem.push_back(*here); 679 } 680 else if (*here == '|') { 681 in_field = false; 682 } 683 else if (*here == '!' || *here == '(' || *here == ')') { 684 if (!in_phrase) { // ignore these if in_phrase 685 // output field, then output operator 686 in_field = false; 687 if (!queryelem.empty()) { 688 if (!simple_and && !fieldpart.empty()) { 689 add_field_info(fieldpart, tag, type); 690 finalquery += fieldpart; 691 finalquery.push_back(' '); 692 fieldpart.clear(); 693 } 694 fieldpart += queryelem; 695 } 696 if (!fieldpart.empty()) { 697 add_field_info(fieldpart, tag, type); 698 finalquery += fieldpart; 699 finalquery.push_back(' '); 700 } 701 fieldpart.clear(); 702 queryelem.clear(); 703 finalquery.push_back(*here); 704 finalquery.push_back(' '); 705 } 706 } 707 else if (*here == '"') { 708 queryelem.push_back(*here); 709 if (in_phrase == false) in_phrase = true; 710 else { 711 in_phrase = false; 712 } 713 } 714 715 // Found word boundary, in a phrase 716 else if (in_phrase) { 717 queryelem.push_back(*here); 718 } 719 // Found a word boundary 720 else { 721 if (!queryelem.empty()) { 722 if (queryelem == "&") { 723 in_field = true; 724 queryelem.clear(); 725 } 726 else if (starts_with(queryelem, "NEAR") || starts_with(queryelem, "WITHIN")) { 727 728 if (argb==1) { 729 // simple search, these not allowed 730 in_field = true; 731 fieldpart += queryelem; 732 fieldpart.push_back(' '); 733 } 734 queryelem.clear(); 735 736 } 737 else { 738 if (!simple_and && !in_field) { 739 if (!fieldpart.empty()) { 740 add_field_info(fieldpart, tag, type); 741 finalquery += fieldpart; 742 finalquery.push_back(' '); 743 fieldpart.clear(); 744 } 745 } 746 747 fieldpart += queryelem; 748 fieldpart.push_back(' '); 749 queryelem.clear(); 750 } 751 } 752 } 753 ++here; 754 } 755 // at the end 756 if (!queryelem.empty()) { 757 if (!simple_and && !in_field && !fieldpart.empty()) { 758 add_field_info(fieldpart, tag, type); 759 finalquery += fieldpart; 760 finalquery.push_back(' '); 761 fieldpart.clear(); 762 } 763 fieldpart += queryelem; 764 } 765 if (!fieldpart.empty()) { 766 add_field_info(fieldpart, tag, type); 767 finalquery += fieldpart; 768 fieldpart.clear(); 769 finalquery.push_back(' '); 770 } 771 772 querystring = finalquery; 773 } 774 775 776 void format_field_info(text_t &querystring, text_t tag, int argct, int argt, int argb) { 777 if (argct == 1) { 778 format_field_info_mgpp(querystring, tag, argt, argb); 779 } else if (argct == 2) { 780 format_field_info_lucene(querystring, tag, argt, argb); 781 } 782 } 783 784 void mgpp_adddateelem(text_t& querystring, const int date) 785 { 786 querystring.appendcstr(" ["); 787 if(date<0) { 788 querystring.appendcstr("bc"); 789 querystring.appendint((date*-1)); 790 } 791 else { 792 querystring.appendint(date); 793 } 794 querystring.appendcstr("]:CV"); 795 } 796 797 void lucene_adddateelem(text_t& querystring, const int date) 798 { 799 querystring.appendcstr(" CV:("); 800 if(date<0) { 801 querystring.appendcstr("bc"); 802 querystring.appendint((date*-1)); 803 } 804 else { 805 querystring.appendint(date); 806 } 807 querystring.appendcstr(")"); 808 } 809 810 292 811 void add_dates(text_t &querystring, int startdate, int enddate, 293 812 int startbc, int endbc, int ct) … … 352 871 353 872 } 354 355 // search history tool356 // also used for form query macros357 text_t escape_quotes(const text_t &querystring) {358 359 text_t::const_iterator here = querystring.begin();360 text_t::const_iterator end = querystring.end();361 362 text_t escquery = "";363 while (here != end) {364 if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here);365 else if (*here == '\n' || *here == '\r') {366 escquery.push_back(' ');367 } else {368 escquery +="\\\\";369 escquery.push_back(*here);370 }371 372 ++here;373 }374 return escquery;375 376 }377 378 // some query form parsing functions for use with mgpp & lucene379 380 void parse_reg_query_form(text_t &querystring, cgiargsclass &args)381 {382 querystring.clear();383 384 const int ct = args.getintarg("ct");385 int argt = args.getintarg("t");// t=0 -and, t=1 - or386 387 text_t combine;388 if (ct==1) {389 if (argt == 0) combine = "&";390 else combine = "|";391 }392 else { // lucene393 if (argt == 0) combine = "AND";394 else combine = "OR";395 }396 397 text_t field = args["fqf"];398 if (field.empty()) return; // no query399 text_tarray fields;400 splitchar(field.begin(), field.end(), ',', fields);401 402 text_t value = args["fqv"];403 if (value.empty()) return; // somethings wrong404 text_tarray values;405 splitchar(value.begin(), value.end(), ',', values);406 407 408 for (int i=0; i< values.size(); ++i) {409 if (!values[i].empty()) {410 if (ct == 1) {411 mgpp_addqueryelem(querystring, fields[i], values[i], combine);412 }413 else { // lucene414 lucene_addqueryelem(querystring, fields[i], values[i], combine);415 }416 }417 }418 419 }420 421 422 void parse_adv_query_form(text_t &querystring, cgiargsclass &args){423 424 querystring.clear();425 426 const int ct = args.getintarg("ct");427 text_t combine;428 if (ct==1) {429 combine = "&";430 }431 else { // lucene432 combine = "AND";433 }434 435 text_t field = args["fqf"];436 if (field.empty()) return; // no query437 text_tarray fields;438 splitchar(field.begin(), field.end(), ',', fields);439 440 text_t value = args["fqv"];441 if (value.empty()) return; // somethings wrong442 text_tarray values;443 splitchar(value.begin(), value.end(), ',', values);444 445 text_t stem = args["fqs"];446 if (stem.empty()) return; // somethings wrong447 text_tarray stems;448 splitchar(stem.begin(), stem.end(), ',', stems);449 450 text_t fold = args["fqk"];451 if (fold.empty()) return; // somethings wrong452 text_tarray folds;453 splitchar(fold.begin(), fold.end(), ',', folds);454 455 text_t comb = args["fqc"];456 if (comb.empty()) return; //somethings wrong457 text_tarray combs;458 splitchar(comb.begin(), comb.end(), ',', combs);459 460 for(int i=0; i< values.size(); ++i) {461 if (!values[i].empty()) {462 if (i!=0) {463 if (ct==1) {464 if (combs[i-1]=="and") combine = "&";465 else if (combs[i-1]=="or")combine = "|";466 else if (combs[i-1]=="not")combine = "!";467 }468 else { // lucene469 if (combs[i-1]=="and") combine = "AND";470 else if (combs[i-1]=="or")combine = "OR";471 else if (combs[i-1]=="not")combine = "NOT";472 }473 }474 text_t term = addstemcase(values[i], stems[i], folds[i], ct);475 mgpp_addqueryelem(querystring, fields[i], term, combine);476 }477 478 }479 }480 481 text_t addstemcase(const text_t &terms, const text_t &stem, const text_t &fold,482 const int indexer_type) {483 484 text_t outtext;485 text_t word;486 //unsigned short c;487 text_t::const_iterator here = terms.begin();488 text_t::const_iterator end = terms.end();489 490 while (here !=end) {491 492 if (is_unicode_letdig(*here) || is_special_character(indexer_type, *here)) {493 // not word boundary494 word.push_back(*here);495 ++here;496 }497 else {498 // found word boundary499 if (!word.empty() ) {500 if (stem == "1" || fold =="1") {501 word += "#";502 if (stem == "1") word += "s";503 //else word += "u";504 505 if (fold == "1") word += "i";506 //else word += "c";507 }508 509 word += " ";510 outtext += word;511 word.clear();512 }513 if (*here == '\"') {514 outtext.push_back(*here);515 }516 ++here;517 }518 }519 520 // get last word521 if (!word.empty()) {522 if (stem == "1"|| fold == "1") {523 word += "#";524 if (stem == "1") word += "s";525 //else word += "u";526 527 if (fold == "1") word += "i";528 //else word += "c";529 }530 word += " ";531 outtext += word;532 }533 return outtext;534 }535 536 537 void mgpp_adddateelem(text_t& querystring, const int date)538 {539 querystring.appendcstr(" [");540 if(date<0) {541 querystring.appendcstr("bc");542 querystring.appendint((date*-1));543 }544 else {545 querystring.appendint(date);546 }547 querystring.appendcstr("]:CV");548 }549 550 void lucene_adddateelem(text_t& querystring, const int date)551 {552 querystring.appendcstr(" CV:(");553 if(date<0) {554 querystring.appendcstr("bc");555 querystring.appendint((date*-1));556 }557 else {558 querystring.appendint(date);559 }560 querystring.appendcstr(")");561 }562 563 564 void mgpp_addqueryelem(text_t &querystring, text_t &tag,565 text_t &query, text_t &combine) {566 if (!querystring.empty()) { // have to put and/or567 querystring += " " + combine + " ";568 569 }570 if (tag=="ZZ" || tag=="") { // just add onto querystring571 querystring += query;572 }573 else {574 querystring += "["+query+"]:"+tag;575 }576 577 }578 579 void lucene_addqueryelem(text_t &querystring, text_t &tag,580 text_t &query, text_t &combine) {581 if (!querystring.empty()) { // have to put and/or582 querystring += " " + combine + " ";583 584 }585 if (tag=="ZZ" || tag=="") { // just add onto querystring586 querystring += query;587 }588 else {589 querystring += tag+":("+query+")";590 }591 }592 593 594 void addqueryelem_ex(text_t &querystring, const text_t &tag,595 const text_t &terms, const text_t &stem, const text_t &fold,596 const text_t& combine, const text_t& word_combine) {597 if (!querystring.empty()) { // have to put and/or598 querystring += " " + combine + " ";599 }600 text_t outtext; outtext.reserve(512);601 text_t word; word.reserve(100);602 //unsigned short c;603 text_t::const_iterator here = terms.begin();604 text_t::const_iterator end = terms.end();605 bool inquote = false, firstword = true;606 607 text_t word2; word2.reserve(256);608 609 while (here !=end) {610 if (is_unicode_space(*here)) {611 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }612 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }613 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }614 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }615 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }616 if (inquote) {617 word2.push_back(*here);618 }619 word.append(word2); word2.clear();620 621 if (!inquote && !word.empty() ) {622 // found word boundary623 624 if (stem == "1" || fold =="1") {625 word += "#";626 if (stem == "1") word += "s";627 //else word += "u";628 629 if (fold == "1") word += "i";630 //else word += "c";631 }632 if (firstword) {633 firstword = false;634 } else {635 outtext += " " + word_combine + " ";636 }637 outtext += "[" + word + "]:"+tag;638 word.clear();639 }640 ++here;641 } else if (*here == '\"') {642 word2.push_back(*here);643 inquote = !inquote;644 ++here;645 } else {646 // not word boundary647 word2.push_back(*here);648 ++here;649 }650 }651 652 // get last word653 if (!word2.empty()) {654 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }655 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }656 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }657 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }658 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }659 word.append(word2); word2.clear();660 661 if (stem == "1"|| fold == "1") {662 word += "#";663 if (stem == "1") word += "s";664 //else word += "u";665 666 if (fold == "1") word += "i";667 //else word += "c";668 }669 if (!outtext.empty()) outtext += " " + word_combine + " ";670 outtext += "[" + word + "]:"+tag;671 }672 querystring += "(" + outtext + ")";673 }674 675 676 void add_field_info(text_t &querystring, const text_t &tag, int type) {677 678 if (tag == "") return; // do nothing679 if (type == 1) { //mgpp680 querystring = "["+querystring+"]:"+tag;681 } else if (type == 2) { // lucene682 querystring = tag+":("+querystring+")";683 }684 685 }686 687 688 void format_field_info_lucene(text_t &querystring, cgiargsclass &args) {689 text_t tag = args["fqf"];690 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)691 int type = 2; //lucene692 int argt = args.getintarg("t");// t=0 -and, t=1 - or693 int argb = args.getintarg("b"); // b=0 simple, b=1 advanced694 695 // lucene simple OR - the string stays as is, but may need field tag696 if (argb==0 && argt == 1) {697 // just tag the entire thing698 if (tag != "") {699 add_field_info(querystring, tag, type);700 }701 return;702 }703 bool in_phrase = false;704 705 text_t queryelem = "";706 text_t finalquery = "";707 708 // only add in + for simple AND search709 text_t combine = ((argb==0)? "+" : "");710 711 // for lucene, we need to change & to && and | to || if advanced search712 // we need to tag the entire string, if we have a field713 // if we are simple and search, then we put && in between words714 715 text_t::const_iterator here = querystring.begin();716 text_t::const_iterator end = querystring.end();717 while (here != end) {718 if (is_unicode_letdig(*here) || is_special_character(type, *here)) {719 queryelem.push_back(*here);720 }721 722 // Detect phrase starts/finishes723 else if (*here == '"') {724 queryelem.push_back(*here);725 if (in_phrase == false) in_phrase = true;726 else {727 finalquery += combine + queryelem;728 queryelem.clear();729 in_phrase = false;730 }731 }732 733 // Found word boundary, in a phrase734 else if (in_phrase) {735 queryelem.push_back(*here);736 }737 // Word boundary, but not in a phrase738 else {739 if (*here == '&') {740 queryelem.push_back('&');741 queryelem.push_back('&');742 } else if (*here == '|') {743 queryelem.push_back('|');744 queryelem.push_back('|');745 } else {746 if (!queryelem.empty()) {747 finalquery += combine + queryelem;748 queryelem.clear();749 }750 finalquery.push_back(*here);751 }752 }753 754 ++here;755 }756 757 // Get last element758 if (!queryelem.empty()) {759 finalquery += combine + queryelem;760 }761 762 add_field_info(finalquery, tag, type);763 querystring = finalquery;764 }765 766 void format_field_info_mgpp(text_t &querystring, cgiargsclass &args) {767 text_t tag = args["fqf"];768 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)769 770 int argt = args.getintarg("t");// t=0 -and, t=1 - or771 int argb = args.getintarg("b"); // b=0 simple, b=1 advanced772 773 if (tag == "" && argb ==1) {774 return; // no field specifier, advanced mode, the query stays as written775 }776 777 int type = 1; // mgpp778 779 bool simple_and = (argb==0 && argt==0);780 text_t finalquery = "";781 text_t fieldpart ="";782 text_t queryelem = "";783 bool in_phrase = false;784 bool in_field = false;785 786 text_t::const_iterator here = querystring.begin();787 text_t::const_iterator end = querystring.end();788 while (here != end) {789 if (is_unicode_letdig(*here) || *here == '&' || is_special_character(type, *here)) {790 queryelem.push_back(*here);791 }792 else if (*here == '|') {793 in_field = false;794 }795 else if (*here == '!' || *here == '(' || *here == ')') {796 if (!in_phrase) { // ignore these if in_phrase797 // output field, then output operator798 in_field = false;799 if (!queryelem.empty()) {800 if (!simple_and && !fieldpart.empty()) {801 add_field_info(fieldpart, tag, type);802 finalquery += fieldpart;803 finalquery.push_back(' ');804 fieldpart.clear();805 }806 fieldpart += queryelem;807 }808 if (!fieldpart.empty()) {809 add_field_info(fieldpart, tag, type);810 finalquery += fieldpart;811 finalquery.push_back(' ');812 }813 fieldpart.clear();814 queryelem.clear();815 finalquery.push_back(*here);816 finalquery.push_back(' ');817 }818 }819 else if (*here == '"') {820 queryelem.push_back(*here);821 if (in_phrase == false) in_phrase = true;822 else {823 in_phrase = false;824 }825 }826 827 // Found word boundary, in a phrase828 else if (in_phrase) {829 queryelem.push_back(*here);830 }831 // Found a word boundary832 else {833 if (!queryelem.empty()) {834 if (queryelem == "&") {835 in_field = true;836 queryelem.clear();837 }838 else if (starts_with(queryelem, "NEAR") || starts_with(queryelem, "WITHIN")) {839 840 if (argb==1) {841 // simple search, these not allowed842 in_field = true;843 fieldpart += queryelem;844 fieldpart.push_back(' ');845 }846 queryelem.clear();847 848 }849 else {850 if (!simple_and && !in_field) {851 if (!fieldpart.empty()) {852 add_field_info(fieldpart, tag, type);853 finalquery += fieldpart;854 finalquery.push_back(' ');855 fieldpart.clear();856 }857 }858 859 fieldpart += queryelem;860 fieldpart.push_back(' ');861 queryelem.clear();862 }863 }864 }865 ++here;866 }867 // at the end868 if (!queryelem.empty()) {869 if (!simple_and && !in_field && !fieldpart.empty()) {870 add_field_info(fieldpart, tag, type);871 finalquery += fieldpart;872 finalquery.push_back(' ');873 fieldpart.clear();874 }875 fieldpart += queryelem;876 }877 if (!fieldpart.empty()) {878 add_field_info(fieldpart, tag, type);879 finalquery += fieldpart;880 fieldpart.clear();881 finalquery.push_back(' ');882 }883 884 querystring = finalquery;885 cerr << "final query = "<<finalquery<<endl;886 }887 888 void format_field_info(text_t &querystring, cgiargsclass &args) {889 int argct = args.getintarg("ct");890 if (argct == 1) {891 format_field_info_mgpp(querystring, args);892 } else if (argct == 2) {893 format_field_info_lucene(querystring, args);894 }895 }896
Note:
See TracChangeset
for help on using the changeset viewer.