Changeset 30554
- Timestamp:
- 2016-06-02T14:20:42+12:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java
r30553 r30554 32 32 // General Java classes 33 33 import java.util.ArrayList; 34 import java.util.Arrays;35 34 import java.util.HashMap; 36 35 import java.util.HashSet; … … 122 121 } 123 122 124 String query_terms = (String) params.get("terms");125 logger.error("terms = "+query_terms);126 String query = (String) params.get("query");127 123 UserContext userContext = new UserContext(request); 128 124 … … 550 546 if (highlight_query_terms) 551 547 { 552 content = highlightQueryTermsOld(request, (Element) content); // highlightQueryTerms(query_terms, query, request.getOwnerDocument(), (Element) content); //request, (Element) content);548 content = highlightQueryTerms(request, (Element) content); 553 549 } 554 550 doc_nodes.item(i).appendChild(doc.importNode(content, true)); … … 601 597 dc_response_doc.removeChild(dc_response_doc_content); 602 598 603 dc_response_doc_content = highlightQueryTerms Old(request, dc_response_doc_content); //highlightQueryTerms(query_terms, query, request.getOwnerDocument(), dc_response_doc_content); //request, dc_response_doc_content);599 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content); 604 600 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true)); 605 601 } … … 843 839 * found in the text. 844 840 */ 845 protected Element highlightQueryTerms(String terms, String performed_query, Document doc, Element dc_response_doc_content) { 846 logger.error("in highlight, terms = "+terms); 847 if (terms == null || performed_query == null) { 848 return dc_response_doc_content; 849 } 850 HashMap<String, HashSet<String>> term_to_variants_map = new HashMap<String, HashSet<String>>(); 851 HashSet<String> query_term_variants = new HashSet<String>(); 852 853 // terms in the form snail:snail,SNAILS,Snail;farm:farm,farming,Farming 854 String[] term_list = terms.split(";"); 855 for (int i=0; i<term_list.length; i++) { 856 String term_x = term_list[i]; 857 int colon_index = term_x.indexOf(';'); 858 String main_term; 859 String term_variants; 860 if (colon_index == -1) { 861 main_term = term_x; 862 term_variants = main_term; 863 } else { 864 main_term = term_x.substring(0, colon_index); 865 term_variants = term_x.substring(colon_index+1); 866 } 867 query_term_variants.add(main_term); 868 term_to_variants_map.put(main_term, new HashSet<String>(Arrays.asList(term_variants.split(",")))); 869 } 870 871 String content = GSXML.getNodeText(dc_response_doc_content); 872 873 ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>(); 874 875 //Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query"); 876 //String performed_query = //GSXML.getNodeText(query_element) + " "; 877 878 ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>(); 879 int term_start = 0; 880 boolean in_term = false; 881 boolean in_phrase = false; 882 for (int i = 0; i < performed_query.length(); i++) 883 { 884 char character = performed_query.charAt(i); 885 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character); 886 887 // Has a query term just started? 888 if (in_term == false && is_character_letter_or_digit == true) 889 { 890 in_term = true; 891 term_start = i; 892 } 893 894 // Or has a term just finished? 895 else if (in_term == true && is_character_letter_or_digit == false) 896 { 897 in_term = false; 898 String term = performed_query.substring(term_start, i); 899 HashSet<String> phrase_query_p_term_x_variants = term_to_variants_map.get(term); 900 // Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term); 901 // if (term_element != null) 902 // { 903 904 // HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>(); 905 906 // NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList"); 907 // if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0) 908 // { 909 // String termValueU = null; 910 // String termValueL = null; 911 912 // if (term.length() > 1) 913 // { 914 // termValueU = term.substring(0, 1).toUpperCase() + term.substring(1); 915 // termValueL = term.substring(0, 1).toLowerCase() + term.substring(1); 916 // } 917 // else 918 // { 919 // termValueU = term.substring(0, 1).toUpperCase(); 920 // termValueL = term.substring(0, 1).toLowerCase(); 921 // } 922 923 // phrase_query_p_term_x_variants.add(termValueU); 924 // phrase_query_p_term_x_variants.add(termValueL); 925 // } 926 // else 927 // { 928 // for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) 929 // { 930 // Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j); 931 // String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT); 932 // for (int k = 0; k < term_equivalent_terms.length; k++) 933 // { 934 // phrase_query_p_term_x_variants.add(term_equivalent_terms[k]); 935 // } 936 // } 937 // } 938 if (phrase_query_p_term_x_variants != null) { 939 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants); 940 941 if (in_phrase == false) 942 { 943 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list); 944 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>(); 945 } 946 } 947 //} 948 } 949 // Watch for phrases (surrounded by quotes) 950 if (character == '\"') 951 { 952 // Has a phrase just started? 953 if (in_phrase == false) 954 { 955 in_phrase = true; 956 } 957 // Or has a phrase just finished? 958 else if (in_phrase == true) 959 { 960 in_phrase = false; 961 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list); 962 } 963 964 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>(); 965 } 966 } 967 968 return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy); 969 } 970 protected Element highlightQueryTermsOld(Element request, Element dc_response_doc_content) 841 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) 971 842 { 972 843 Document doc = request.getOwnerDocument(); … … 1194 1065 private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy) 1195 1066 { 1196 1197 logger.error("size = "+ query_term_variants.size());1198 1067 // Convert the content string to an array of characters for speed 1199 1068 char[] content_characters = new char[content.length()];
Note:
See TracChangeset
for help on using the changeset viewer.