Context Navigation

← Previous Changeset
Next Changeset →

Changeset 12655

Timestamp:

2006-09-04T09:56:41+12:00 (18 years ago)

Author:

mdewsnip

Message:

Lucene now returns just the results we're interested in (ie. those for a page), rather than all the results, making searching significantly faster in many cases. Many thanks to John Thompson and DL Consulting Ltd.

Location:

trunk/gsdl/src/colservr

Files:

: 5 edited

lucenequeryfilter.cpp (modified) (1 diff)
lucenesearch.cpp (modified) (1 diff)
queryfilter.cpp (modified) (1 diff)
queryinfo.cpp (modified) (4 diffs)
queryinfo.h (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/src/colservr/lucenequeryfilter.cpp

-              r12421
+              r12655
     vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
+    if (endresults == -1) endresults = MAXNUMDOCS;
+    while (docorder_here != docorder_end) {
+      if (resultnum > endresults) break;
+    // Now handled by Lucene directly
+    //if (endresults == -1) endresults = MAXNUMDOCS;
+    while (docorder_here != docorder_end)
+      {
+        // Now handled by Lucene directly
+        //if (resultnum > endresults) break;
+      // translate the document number
+      if (!translate(gdbmptr, *docorder_here, trans_OID)) {
+    logout << text_t2ascii
+           << "warning: could not translate lucene document number \""
+           << *docorder_here << "\" to OID.\n\n";
+      } else {
+    docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
+    // see if there is a result for this number,
+    // if it is in the request set (or the request set is empty)
+    if (docset_here != queryresults.docs.docset.end() &&
+        (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
+      if (resultnum >= startresults) {
+        // add this document
+        resultdoc.OID = trans_OID;
+        resultdoc.result_num = resultnum;
+        resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
+        response.docInfo.push_back (resultdoc);
+      }
+      ++resultnum;
+    }
+      } // else
+      ++docorder_here;
+    }
+        // translate the document number
+        if (!translate(gdbmptr, *docorder_here, trans_OID))
+          {
+            logout << text_t2ascii
+                   << "warning: could not translate lucene document number \""
+                   << *docorder_here << "\" to OID.\n\n";
+          }
+        else
+          {
+            docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
+            // see if there is a result for this number,
+            // if it is in the request set (or the request set is empty)
+            if (docset_here != queryresults.docs.docset.end() && (request.docSet.empty() || in_set(request.docSet, trans_OID)))
+              {
+                // Now handled by Lucene directly
+                //if (resultnum >= startresults) {
+                // add this document
+                resultdoc.OID = trans_OID;
+                resultdoc.result_num = resultnum;
+                resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
+                response.docInfo.push_back (resultdoc);
+                //}
+                ++resultnum;
+              }
+          } // else
+        ++docorder_here;
+      }
   } // if need matching docs
   // assemble the term results
   if (need_term_info(request.filterResultOptions)) {

trunk/gsdl/src/colservr/lucenesearch.cpp

-              r12419
+              r12655
+    }
+  // New code to allow Lucene to do paging of search results. This should
+  // substantially improve performance as we don't have to return all 12000
+  // hits if we only need the first 20!
+  if (queryparams.startresults && queryparams.endresults)
+    {
+      cerr << "Start Results: " << queryparams.startresults << endl;
+      cmd += (text_t)" -startresults " + queryparams.startresults;
+      cerr << "End Results: " << queryparams.endresults << endl;
+      cmd += (text_t)" -endresults " + queryparams.endresults;
+    }
   cerr << "Lucene command: " << cmd << endl;

trunk/gsdl/src/colservr/queryfilter.cpp

-              r12410
+              r12655
+  }
+  // Store the start and end results in the query too, as lucene now needs to
+  // pass them through to the Java
+  query.startresults = startresults;
+  query.endresults = endresults;
   // add the last query
   query_params.push_back (query);

trunk/gsdl/src/colservr/queryinfo.cpp

-              r12421
+              r12655
   sortfield.clear();
   fuzzysearch = 0; // 0 = not fuzzy, 1 = fuzzy
+  startresults = 1; // all
+  endresults = 10; // all
+}
 …
   sortfield = q.sortfield;
   fuzzysearch = q.fuzzysearch;
+  startresults = q.startresults;
+  endresults = q.endresults;
   return *this;
+}
 …
       (x.filterstring == y.filterstring) &&
           (x.sortfield == y.sortfield) &&
+          (x.fuzzysearch == y.fuzzysearch));
+          (x.fuzzysearch == y.fuzzysearch) &&
+          (x.startresults == y.startresults) &&
+          (x.startresults == y.startresults));
+}
 …
   outs << " sortfield = \"" << q.sortfield << "\"\n";
   outs << " fuzzysearch = \"" << q.fuzzysearch << "\"\n";
+  outs << " startresults = \"" << q.startresults << "\"\n";
+  outs << " endresults = \"" << q.endresults << "\"\n";
   outs << "\n";

trunk/gsdl/src/colservr/queryinfo.h

-              r12421
+              r12655
   text_t sortfield; // Field to use for sorting result set (currently used by lucene)
   int fuzzysearch; // Should search be fuzzy (only used by Lucene)
+  int startresults;
+  int endresults;
   queryparamclass ();

Note: See TracChangeset for help on using the changeset viewer.