Changeset 10995 for trunk/gsdl
- Timestamp:
- 2005-12-15T16:57:58+13:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/recpt/querytools.cpp
r10411 r10995 612 612 void add_field_info(text_t &querystring, const text_t &tag, int type) { 613 613 614 if (tag == "") return; // do nothing 614 615 if (type == 1) { //mgpp 615 616 querystring = "["+querystring+"]:"+tag; … … 620 621 } 621 622 623 bool is_special_character(int indexer_type, unsigned short character) { 624 // mgpp 625 if (indexer_type == 1) { 626 return (character == '#' || character == '/' || character == '*'); 627 } 628 // lucene 629 else if (indexer_type ==2) { 630 return (character == '?' || character == '*' || character == '~' || 631 character == '^'); 632 } 633 return false; 634 } 622 635 623 636 void format_field_info(text_t &querystring, cgiargsclass &args) { 624 637 625 638 text_t tag = args["fqf"]; 626 if (tag == "ZZ" || tag == "") { 627 return; // do nothing 628 } 629 639 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields) 640 630 641 int argct = args.getintarg("ct"); 642 bool mgpp = (argct == 1); 643 bool lucene = (argct == 2); 644 645 if (mgpp && tag == "") { 646 return; // no field specifier: do nothing 647 } 648 631 649 int argt = args.getintarg("t");// t=0 -and, t=1 - or 632 650 int argb = args.getintarg("b"); // b=0 simple, b=1 advanced 633 651 634 // Special code for Lucene 635 // The default operator for Lucene is "or", so we need to add "+" symbols when t == 0 636 // Also, we need to be careful not to mess up phrase searches 637 if (argct == 2) { 638 text_t processed_querystring = ""; 639 text_t queryelement = ""; 640 text_t combine = ((argt == 0) ? "+" : ""); 641 bool in_phrase = false; 642 text_t::const_iterator here = querystring.begin(); 643 text_t::const_iterator end = querystring.end(); 644 while (here != end) { 645 if (is_unicode_letdig(*here)) { 646 queryelement.push_back(*here); 647 } 648 649 // Detect phrase starts/finishes 650 else if (*here == '"') { 651 queryelement.push_back(*here); 652 if (in_phrase == false) in_phrase = true; 653 else if (in_phrase == true) { 654 add_field_info(queryelement, tag, argct); 655 processed_querystring += combine + queryelement; 656 queryelement.clear(); 657 in_phrase = false; 658 } 659 } 660 661 // Found word boundary 662 else if (in_phrase) { 663 queryelement.push_back(*here); 664 } 665 else { 666 if (!queryelement.empty()) { 667 add_field_info(queryelement, tag, argct); 668 processed_querystring += combine + queryelement; 669 queryelement.clear(); 670 } 671 processed_querystring.push_back(*here); 672 } 673 674 ++here; 675 } 676 677 // Get last element 678 if (!queryelement.empty()) { 679 add_field_info(queryelement, tag, argct); 680 processed_querystring += combine + queryelement; 681 } 682 683 querystring = processed_querystring; 684 return; 685 } 686 687 if (argb==0 && argt==0) { 688 // simple 'and' search - just put tag info round whole query string 652 bool simple_AND_search = (argb==0 && argt==0); 653 bool simple_OR_search = (argb==0 && argt==1); 654 655 if (mgpp && simple_AND_search) { 656 // mgpp, simple AND search, tag the whole query string 689 657 add_field_info(querystring, tag, argct); 690 658 return; 691 659 } 692 693 // we need to individually tag words 694 text_t outtext; 695 text_t word; 696 //unsigned short c; 660 // resulting mgpp case - we need to tag each individual term or phrase 661 // TODO - allow AND. OR in query string and don't tag these words 662 663 if (lucene && (simple_OR_search || argb == 1)) { 664 // OR search or advanced search (here we assume that the user has added their term mods - don't need to add term mods 665 if (tag != "") { 666 // tag the whole string 667 add_field_info(querystring, tag, argct); 668 } 669 return; 670 } 671 672 673 // if we have got here, we need to add in combiners (lucene) or 674 // we need to tag each individual word (mgpp OR search - mgpp can't do OR inside a field) 675 676 text_t combine = ((lucene)? "+" : ""); 677 678 text_t processed_querystring = ""; 679 text_t queryelement = ""; 680 681 bool in_phrase = false; 697 682 text_t::const_iterator here = querystring.begin(); 698 683 text_t::const_iterator end = querystring.end(); 699 700 while (here !=end) { 701 702 if (is_unicode_letdig(*here)|| *here == '#' || *here == '/' ) { 703 // include term modifiers in a word just in case 704 // not word boundary 705 word.push_back(*here); 706 ++here; 707 } 684 while (here != end) { 685 if (is_unicode_letdig(*here) || is_special_character(argct, *here)) { 686 queryelement.push_back(*here); 687 } 688 689 // Detect phrase starts/finishes 690 else if (*here == '"') { 691 queryelement.push_back(*here); 692 if (in_phrase == false) in_phrase = true; 693 else { 694 if (mgpp) {add_field_info(queryelement, tag, argct);} 695 processed_querystring += combine + queryelement; 696 queryelement.clear(); 697 in_phrase = false; 698 } 699 } 700 701 // Found word boundary, in a phrase 702 else if (in_phrase) { 703 queryelement.push_back(*here); 704 } 705 // Word boundary, but not in a phrase 708 706 else { 709 // found word boundary 710 if (!word.empty() ) { 711 add_field_info(word, tag, argct); 712 outtext += word; 713 word.clear(); 714 } 715 // everything else, we add into the query string 716 outtext.push_back(*here); 717 ++here; 718 } 719 } 720 721 // get last word 722 if (!word.empty()) { 723 add_field_info(word, tag, argct); 724 outtext += word; 725 } 726 727 querystring = outtext; 728 } 729 707 if (!queryelement.empty()) { 708 if (mgpp) {add_field_info(queryelement, tag, argct);} 709 processed_querystring += combine + queryelement; 710 queryelement.clear(); 711 } 712 processed_querystring.push_back(*here); 713 } 714 715 ++here; 716 } 717 718 // Get last element 719 if (!queryelement.empty()) { 720 if (mgpp) {add_field_info(queryelement, tag, argct);} 721 processed_querystring += combine + queryelement; 722 } 723 724 querystring = processed_querystring; 725 726 if (lucene) { 727 // tag the whole query string 728 add_field_info(querystring, tag, argct); 729 } 730 } 731 732
Note:
See TracChangeset
for help on using the changeset viewer.