Changeset 11765
- Timestamp:
- 2006-05-03T16:06:16+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/recpt/querytools.cpp
r11004 r11765 46 46 47 47 option.name = "MatchMode"; 48 option.value = (args.getintarg("t")) ? "some" : "all"; 48 // mgpp in advanced mode, always use some query 49 if (args.getintarg("ct") !=0 && args.getintarg("b") == 1) { 50 option.value = "some"; 51 } else { 52 option.value = (args.getintarg("t")) ? "some" : "all"; 53 } 49 54 request.filterOptions.push_back (option); 50 55 … … 285 290 } 286 291 287 // some query form parsing functions for use with mgpp 292 // some query form parsing functions for use with mgpp & lucene 288 293 289 294 void parse_reg_query_form(text_t &querystring, cgiargsclass &args) … … 606 611 } 607 612 608 void format_field_info(text_t &querystring, cgiargsclass &args) { 609 613 void format_field_info_lucene(text_t &querystring, cgiargsclass &args) { 610 614 text_t tag = args["fqf"]; 611 615 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields) 612 613 int argct = args.getintarg("ct"); 614 bool mgpp = (argct == 1); 615 bool lucene = (argct == 2); 616 617 if (mgpp && tag == "") { 618 return; // no field specifier: do nothing 619 } 620 616 int type = 2; //lucene 621 617 int argt = args.getintarg("t");// t=0 -and, t=1 - or 622 618 int argb = args.getintarg("b"); // b=0 simple, b=1 advanced 623 619 624 bool simple_AND_search = (argb==0 && argt==0);625 bool simple_OR_search = (argb==0 && argt==1);626 627 if (mgpp && simple_AND_search) {628 // mgpp, simple AND search, tag the whole query string629 add_field_info(querystring, tag, argct);620 // lucene simple OR - the string stays as is, but may need field tag 621 if (argb==0 && argt == 1) { 622 // just tag the entire thing 623 if (tag != "") { 624 add_field_info(querystring, tag, type); 625 } 630 626 return; 631 627 } 632 // resulting mgpp case - we need to tag each individual term or phrase633 // TODO - allow AND. OR in query string and don't tag these words634 635 if (lucene && (simple_OR_search || argb == 1)) {636 // OR search or advanced search (here we assume that the user has added their term mods - don't need to add term mods637 if (tag != "") {638 // tag the whole string639 add_field_info(querystring, tag, argct);640 }641 return;642 }643 644 645 // if we have got here, we need to add in combiners (lucene) or646 // we need to tag each individual word (mgpp OR search - mgpp can't do OR inside a field)647 648 text_t combine = ((lucene)? "+" : "");649 650 text_t processed_querystring = "";651 text_t queryelement = "";652 653 628 bool in_phrase = false; 629 630 text_t queryelem = ""; 631 text_t finalquery = ""; 632 633 // only add in + for simple AND search 634 text_t combine = ((argb==0)? "+" : ""); 635 636 // for lucene, we need to change & to && and | to || if advanced search 637 // we need to tag the entire string, if we have a field 638 // if we are simple and search, then we put && in between words 639 654 640 text_t::const_iterator here = querystring.begin(); 655 641 text_t::const_iterator end = querystring.end(); 656 642 while (here != end) { 657 if (is_unicode_letdig(*here) || is_special_character( argct, *here)) {658 queryelem ent.push_back(*here);643 if (is_unicode_letdig(*here) || is_special_character(type, *here)) { 644 queryelem.push_back(*here); 659 645 } 660 646 661 647 // Detect phrase starts/finishes 662 648 else if (*here == '"') { 663 queryelem ent.push_back(*here);649 queryelem.push_back(*here); 664 650 if (in_phrase == false) in_phrase = true; 665 651 else { 666 if (mgpp) {add_field_info(queryelement, tag, argct);} 667 processed_querystring += combine + queryelement; 668 queryelement.clear(); 652 finalquery += combine + queryelem; 653 queryelem.clear(); 669 654 in_phrase = false; 670 655 } … … 673 658 // Found word boundary, in a phrase 674 659 else if (in_phrase) { 675 queryelem ent.push_back(*here);660 queryelem.push_back(*here); 676 661 } 677 662 // Word boundary, but not in a phrase 678 663 else { 679 if (!queryelement.empty()) { 680 if (mgpp) {add_field_info(queryelement, tag, argct);} 681 processed_querystring += combine + queryelement; 682 queryelement.clear(); 683 } 684 processed_querystring.push_back(*here); 664 if (*here == '&') { 665 queryelem.push_back('&'); 666 queryelem.push_back('&'); 667 } else if (*here == '|') { 668 queryelem.push_back('|'); 669 queryelem.push_back('|'); 670 } else { 671 if (!queryelem.empty()) { 672 finalquery += combine + queryelem; 673 queryelem.clear(); 674 } 675 finalquery.push_back(*here); 676 } 685 677 } 686 678 … … 689 681 690 682 // Get last element 691 if (!queryelement.empty()) { 692 if (mgpp) {add_field_info(queryelement, tag, argct);} 693 processed_querystring += combine + queryelement; 694 } 695 696 querystring = processed_querystring; 683 if (!queryelem.empty()) { 684 finalquery += combine + queryelem; 685 } 686 687 add_field_info(finalquery, tag, type); 688 querystring = finalquery; 689 cerr << "final query = "<<finalquery<<endl; 690 } 691 692 void format_field_info_mgpp(text_t &querystring, cgiargsclass &args) { 693 694 text_t tag = args["fqf"]; 695 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields) 697 696 698 if (lucene) { 699 // tag the whole query string 700 add_field_info(querystring, tag, argct); 701 } 702 } 703 704 697 int argt = args.getintarg("t");// t=0 -and, t=1 - or 698 int argb = args.getintarg("b"); // b=0 simple, b=1 advanced 699 700 if (tag == "" && argb ==1) { 701 return; // no field specifier, advanced mode, the query stays as written 702 } 703 704 int type = 1; // mgpp 705 706 bool simple_and = (argb==0 && argt==0); 707 text_t finalquery = ""; 708 text_t fieldpart =""; 709 text_t queryelem = ""; 710 bool in_phrase = false; 711 bool in_field = false; 712 713 text_t::const_iterator here = querystring.begin(); 714 text_t::const_iterator end = querystring.end(); 715 while (here != end) { 716 if (is_unicode_letdig(*here) || *here == '&' || is_special_character(type, *here)) { 717 queryelem.push_back(*here); 718 } 719 else if (*here == '|') { 720 in_field = false; 721 } 722 else if (*here == '!' || *here == '(' || *here == ')') { 723 if (!in_phrase) { // ignore these if in_phrase 724 // output field, then output operator 725 in_field = false; 726 if (!queryelem.empty()) { 727 if (!simple_and && !fieldpart.empty()) { 728 add_field_info(fieldpart, tag, type); 729 finalquery += fieldpart; 730 finalquery.push_back(' '); 731 fieldpart.clear(); 732 } 733 fieldpart += queryelem; 734 } 735 if (!fieldpart.empty()) { 736 add_field_info(fieldpart, tag, type); 737 finalquery += fieldpart; 738 finalquery.push_back(' '); 739 } 740 fieldpart.clear(); 741 queryelem.clear(); 742 finalquery.push_back(*here); 743 finalquery.push_back(' '); 744 } 745 } 746 else if (*here == '"') { 747 queryelem.push_back(*here); 748 if (in_phrase == false) in_phrase = true; 749 else { 750 in_phrase = false; 751 } 752 } 753 754 // Found word boundary, in a phrase 755 else if (in_phrase) { 756 queryelem.push_back(*here); 757 } 758 // Found a word boundary 759 else { 760 if (!queryelem.empty()) { 761 if (queryelem == "&") { 762 in_field = true; 763 queryelem.clear(); 764 } 765 else if (starts_with(queryelem, "NEAR") || starts_with(queryelem, "WITHIN")) { 766 767 if (argb==1) { 768 // simple search, these not allowed 769 in_field = true; 770 fieldpart += queryelem; 771 fieldpart.push_back(' '); 772 } 773 queryelem.clear(); 774 775 } 776 else { 777 if (!simple_and && !in_field) { 778 if (!fieldpart.empty()) { 779 add_field_info(fieldpart, tag, type); 780 finalquery += fieldpart; 781 finalquery.push_back(' '); 782 fieldpart.clear(); 783 } 784 } 785 786 fieldpart += queryelem; 787 fieldpart.push_back(' '); 788 queryelem.clear(); 789 } 790 } 791 } 792 ++here; 793 } 794 // at the end 795 if (!queryelem.empty()) { 796 if (!simple_and && !in_field && !fieldpart.empty()) { 797 add_field_info(fieldpart, tag, type); 798 finalquery += fieldpart; 799 finalquery.push_back(' '); 800 fieldpart.clear(); 801 } 802 fieldpart += queryelem; 803 } 804 if (!fieldpart.empty()) { 805 add_field_info(fieldpart, tag, type); 806 finalquery += fieldpart; 807 fieldpart.clear(); 808 finalquery.push_back(' '); 809 } 810 811 querystring = finalquery; 812 cerr << "final query = "<<finalquery<<endl; 813 } 814 815 void format_field_info(text_t &querystring, cgiargsclass &args) { 816 int argct = args.getintarg("ct"); 817 if (argct == 1) { 818 format_field_info_mgpp(querystring, args); 819 } else if (argct == 2) { 820 format_field_info_lucene(querystring, args); 821 } 822 } 823
Note:
See TracChangeset
for help on using the changeset viewer.