Changeset 4287


Ignore:
Timestamp:
2003-05-19T16:00:47+12:00 (21 years ago)
Author:
kjdon
Message:

added some code for highlighting search terms in the document. this involves a call to teh query service, so may not be a good way to do it. phrase highlighting not done properly

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java

    r4268 r4287  
    2727import org.w3c.dom.Element;
    2828import org.w3c.dom.Node;
     29import org.w3c.dom.Text;
    2930import org.w3c.dom.NodeList;
    3031
    3132// General Java classes
    3233import java.util.HashMap;
     34import java.util.HashSet;
    3335import java.io.File;
    3436
     
    236238    Element dc_response_doc = (Element) GSXML.getNodeByPath(dc_response_message, path);
    237239    Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
     240           
     241    boolean highlight_query_terms = true;
     242    if (highlight_query_terms) {
     243        dc_response_doc.removeChild(dc_response_doc_content);
     244       
     245        dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
     246        dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
     247    }
    238248    if (provide_annotations) {
    239249        // now we can modifiy the response doc if needed
     
    255265        path = GSPath.createPath(links);
    256266        dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
     267       
    257268        }
    258269    }
     
    355366       
    356367    }
     368
     369    /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or shoul dit send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all teh terms found in the text.
     370     */
     371    protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
     372
     373    // do the query again to get term info
     374    Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
     375    HashMap params = GSXML.extractParams(cgi_param_list, false);
     376   
     377    String service_name = (String)params.get(GSCGI.SERVICE_ARG);
     378    if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
     379        return dc_response_doc_content;
     380    }
     381    String collection = (String)params.get(GSCGI.COLLECTION_ARG);
     382    String lang = request.getAttribute(GSXML.LANG_ATT);
     383    String to = GSPath.appendLink(collection, service_name);
     384   
     385    Element mr_query_message = doc_.createElement(GSXML.MESSAGE_ELEM);
     386    Element mr_query_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, to, lang);
     387    mr_query_message.appendChild(mr_query_request);
     388   
     389    // paramList
     390    Element query_param_list = (Element)doc_.importNode(cgi_param_list, true);
     391    mr_query_request.appendChild(query_param_list);
     392
     393    // do the query
     394        Element mr_query_response = (Element)mr_.process(mr_query_message);
     395
     396    String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
     397    Element query_term_info_list = (Element) GSXML.getNodeByPath(mr_query_response, path);
     398    if (query_term_info_list == null) {
     399        // no term info
     400        System.err.println("DocumentAction: Warning: No query term information.\n");
     401        return dc_response_doc_content;
     402    }
     403
     404    NodeList equivs = query_term_info_list.getElementsByTagName("equivTermList");
     405    HashSet all_terms = new HashSet();
     406    for (int i=0; i<equivs.getLength(); i++) {
     407       
     408        // get the terms
     409        String [] terms = GSXML.getAttributeValuesFromList((Element)equivs.item(i), GSXML.NAME_ATT);
     410        for (int j=0; j<terms.length; j++) {
     411       
     412        all_terms.add(terms[j]);
     413        }
     414    }
     415
     416    String content = GSXML.getNodeText(dc_response_doc_content);
     417
     418    StringBuffer temp = new StringBuffer();
     419    StringBuffer new_content = new StringBuffer();
     420
     421    for (int i=0; i<content.length(); i++) {
     422        char c = content.charAt(i);
     423        if (Character.isLetterOrDigit(c)) {
     424        // not word boundary
     425        temp.append(c);
     426        } else {
     427        // word boundary
     428        // add the last word if there was one
     429        if (temp.length()>0) {
     430            if (all_terms.contains(temp.toString())) {
     431            new_content.append("<annotation type='query_term'>"+temp+"</annotation>");
     432            } else {
     433            new_content.append(temp);
     434            }
     435            temp.delete(0, temp.length());
     436        }
     437        if (c=='<') {
     438            temp.append(c);
     439            i++;
     440            // skip over html
     441            while (i<content.length() && content.charAt(i)!='>') {
     442            temp.append(content.charAt(i));
     443            i++;
     444            }
     445            temp.append(content.charAt(i));
     446            new_content.append(GSXML.xmlSafe(temp.toString()));
     447            temp.delete(0, temp.length());
     448           
     449        } else {
     450            new_content.append(c);
     451        }
     452        }
     453    }
     454   
     455    String content_string = "<nodeContent>"+new_content.toString()+"</nodeContent>";
     456    Element content_elem = converter_.getDOM(content_string).getDocumentElement();
     457    return content_elem;
     458    }
    357459}
Note: See TracChangeset for help on using the changeset viewer.