Changeset 29986


Ignore:
Timestamp:
06/19/15 23:21:26 (6 years ago)
Author:
ak19
Message:

The getTerms() functionality previously used by the EmbeddedSolrServer has now been re-implemented for HttpSolrServer with the new custom Greenstone Solr RequestHandler class Greenstone3SearchHandler, which lives on the solr server side, in tomcat's solr webapp. The functionality has been improvemed, such as being able to search for: econom* cat, by recursively calling setRewriteMethods on any PrefixQuery and WildcardQuery MultiQueries within an overall BooleanQuery, and by handling BooleanQuery.TooManyClauses exceptions when the number of expanded terms is too large, such as for a search of a*.

Location:
gs3-extensions/solr/trunk/src
Files:
2 added
3 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/solr/trunk/src/build.xml

    r29932 r29986  
    130130    </jar>
    131131    <copy file="${build.home}/gs3-solr.jar" todir="${web.libdir}"/>
     132
     133    <!-- customisations to the http solr server -->
     134    <jar destfile="${build.home}/gs3-solrserver.jar">
     135      <fileset dir="${build.home}">
     136        <include name="org/greenstone/solrserver/**"/>
     137      </fileset>
     138      <manifest>
     139        <attribute name="Built-By" value="greenstone3" />
     140      </manifest>
     141    </jar>
     142    <!--<copy file="${build.home}/gs3-solrserver.jar" todir="${tomcat.dir}/webapps/solr/WEB-INF/lib"/>
     143
     144          But there's no guarantee the destination will exist at the beginning.
     145          The jar file also needs to go into solr.war. Which is taken care of in the add-service task.
     146          Instead, for testing, call ant compile-gs3-solrserver, which will copy it across to solr webapp.
     147      -->
    132148  </target>
    133149 
     
    188204    <copy todir="${tomcat.dir}/webapps" file="webapps/solr.war" />
    189205    <unwar src="${tomcat.dir}/webapps/solr.war" dest="${tomcat.dir}/webapps/solr"/>
    190     <echo>Copying xalan related jar files from ${web.libdir} into ${tomcat.dir}/webapps/solr.war</echo>
     206    <echo>Copying xalan related jar files, morphology and gs3-solrserver jars from ${web.libdir} into ${tomcat.dir}/webapps/solr.war</echo>
    191207    <copy todir="${tomcat.dir}/webapps/solr/WEB-INF/lib">
    192208      <filelist dir="${web.libdir}" files="${shared-xalan-jars}" />
     209      <file file="${build.home}/gs3-solrserver.jar" />
    193210      <filelist dir="lib/russianmorphology" files="${russian-morph-jars}" />
    194211    </copy>
     
    224241
    225242
    226   <target name="add-service" depends="copy-solr-web,copy-files,solr-for-tomcat,compile" description="Run this target to setup the Solr extension for Greenstone3" />
     243  <target name="add-service" depends="copy-solr-web,copy-files,compile,solr-for-tomcat" description="Run this target to setup the Solr extension for Greenstone3" />
    227244 
    228245  <target name="del-service" depends="del-files,del-solr-for-tomcat"
     
    284301  </target>
    285302 
     303  <target name="compile-gs3-solrserver" description="TEST Target to compile the solr server side gs3-solserver.jar for testing">
     304    <delete dir="build/org/greenstone/solrserver" />
     305    <delete file="build/gs3-solrserver.jar" />
     306    <antcall target="compile" />
     307    <copy file="${build.home}/gs3-solrserver.jar" todir="${tomcat.dir}/webapps/solr/WEB-INF/lib" />
     308  </target>
     309
    286310</project>
  • gs3-extensions/solr/trunk/src/conf/solrconfig.xml

    r29135 r29986  
    824824       queries across multiple shards
    825825    -->
    826   <requestHandler name="/select" class="solr.SearchHandler">
     826  <!--<requestHandler name="/select" class="solr.SearchHandler">-->
     827    <requestHandler name="/select" class="org.greenstone.solrserver.Greenstone3SearchHandler">
    827828    <!-- default values for query parameters can be specified, these
    828829         will be overridden by parameters in the request
     
    888889
    889890  <!-- A request handler that returns indented JSON by default -->
    890   <requestHandler name="/query" class="solr.SearchHandler">
     891  <!--<requestHandler name="/query" class="solr.SearchHandler">-->
     892  <requestHandler name="/query" class="org.greenstone.solrserver.Greenstone3SearchHandler">
    891893     <lst name="defaults">
    892894       <str name="echoParams">explicit</str>
  • gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/util/SolrQueryWrapper.java

    r29711 r29986  
    3636import java.util.HashSet;
    3737
     38import java.util.regex.Pattern;
     39import java.util.regex.Matcher;
     40
    3841import org.apache.log4j.Logger;
    3942import org.apache.solr.client.solrj.SolrQuery; // subclass of ModifiableSolrParams
     
    149152
    150153
    151     /** Extracts the query terms from the query string. The query string can be a boolean
     154    /**
     155     * UNUSED.
     156     * Back when we used the EmbeddedSolrServer, this getTerms method would expand the terms of a query.
     157     * Because of Solr/Lucene Index locking exceptions, we switched over to the HttpSolrServer instead
     158     * of the Embedded kind.
     159     *
     160     * The functionality of getTerms has been moved to
     161     * ../solrserver/Greenstone3SearchHandler.java, which will sit on the solrserver side (inside
     162     * tomcat's solr webapp).
     163     *
     164     * Extracts the query terms from the query string. The query string can be a boolean
    152165     * combination of the various search fields with their search terms or phrases
    153166     */
     
    371384        }
    372385
     386        // the solrserver will now
    373387        // get the individual terms that make up the query, then request solr to return the totaltermfreq for each term
    374         Term[] terms = getTerms(solrQuery, query_string);
    375         if(terms != null) {
    376             for(int i = 0; i < terms.length; i++) {
    377             Term term = terms[i];
    378             String field = term.field();
    379             String queryTerm = term.text();
    380             // totaltermfreq(TI, 'farming') termfreq(TI, 'farming')
    381            
    382             solrQuery.addField("totaltermfreq(" + field + ",'" + queryTerm + "')");
    383             solrQuery.addField("termfreq(" + field + ",'" + queryTerm + "')");
    384             }
    385         }
    386388
    387389        // do the query
     
    433435                    // solr returns each term's totaltermfreq, ttf, at the document level, even though
    434436                    // the ttf is the same for each document. So extract this information just for the first document
    435                     if(i == 0) { // first document
    436                        
    437                         if(terms != null) {
    438                         for(int j = 0; j < terms.length; j++) {
    439                             Term term = terms[j];
    440                             String field = term.field();
    441                             String queryTerm = term.text();
    442 
    443                             // totaltermfreq(TI, 'farming') termfreq(TI, 'farming')
    444                             Long totaltermfreq = (Long)doc.get("totaltermfreq("+field+",'"+queryTerm+"')");
    445                             Integer termfreq = (Integer)doc.get("termfreq("+field+",'"+queryTerm+"')");
    446 
    447                             //System.err.println("**** ttf = " + totaltermfreq);
    448                             //System.err.println("**** tf = " + termfreq);
    449                             //logger.info("**** ttf = " + totaltermfreq);
    450                             //logger.info("**** tf = " + termfreq);
     437                    if(i == 0) { // first document, all others repeat the same termfreq data
     438                        boolean foundTermInfo = false;
     439
     440                        Collection<String> fieldNames = doc.getFieldNames();
     441                        for(Iterator<String> it = fieldNames.iterator(); it.hasNext(); ) {
     442                        String fieldName = it.next(); // e.g. looking for totaltermfreq(ZZ,'economically')
     443                        //logger.info("@@@@ found fieldName " + fieldName);
    451444                       
    452                             solr_query_result.addTerm(queryTerm, field, (int) hits.getNumFound(), totaltermfreq.intValue()); // long totaltermfreq to int
    453                         }
    454                         } else { // no terms extracted from query_string
     445
     446                        if(fieldName.startsWith("totaltermfreq")) {
     447                           //|| fieldName.startsWith("termfreq")) {
     448                           
     449                            foundTermInfo = true;
     450
     451                            // e.g. totaltermfreq(TI,'farming')
     452                            // e.g. termfreq(TI,'farming')
     453                            Pattern pattern = Pattern.compile("(.*?termfreq)\\((.*?),'(.*?)'\\)");
     454                            Matcher matcher = pattern.matcher(fieldName);
     455                            String metaField, indexField, queryTerm;
     456                            while (matcher.find()) {
     457                            metaField = matcher.group(1); // termfreq or totaltermfreq
     458                            indexField = matcher.group(2); //ZZ, TI
     459                            queryTerm = matcher.group(3);
     460
     461                            //logger.info("\t@@@@ found field " + indexField);
     462                            //logger.info("\t@@@@ queryTerm " + queryTerm);
     463
     464                            // Finally, can ask for the totaltermfreq value for this
     465                            // searchterm in its indexed field:
     466                            // e.g. totaltermfreq(TI,'farming'), e.g. termfreq(TI,'farming')
     467                            Long totaltermfreq = (Long)doc.get("totaltermfreq("+indexField+",'"+queryTerm+"')");
     468                           
     469                            Integer termfreq = (Integer)doc.get("termfreq("+indexField+",'"+queryTerm+"')");
     470                           
     471                            //System.err.println("**** ttf = " + totaltermfreq);
     472                            //System.err.println("**** tf = " + termfreq);
     473                            //logger.info("**** ttf = " + totaltermfreq);
     474                            //logger.info("**** tf = " + termfreq);
     475                            solr_query_result.addTerm(queryTerm, indexField, (int) hits.getNumFound(), totaltermfreq.intValue()); // long totaltermfreq to int
     476                            }
     477                        }                       
     478                        }
     479                        if(!foundTermInfo) { // no terms extracted from query_string
    455480                        solr_query_result.addTerm(query_string, defaultField, (int) hits.getNumFound(), -1); // no terms
    456481                        }
Note: See TracChangeset for help on using the changeset viewer.