Context Navigation

← Previous Changeset
Next Changeset →

Changeset 16947

Timestamp:

2008-08-21T15:13:41+12:00 (16 years ago)

Author:

mdewsnip

Message:

Changed the Lucene code to use the Greenstone document OIDs directly, instead of creating its own numeric IDs and then mapping them to the Greenstone OIDs in the GDBM file. As well as being simpler and more space and speed efficient (the mapping no longer needs to be stored in the GDBM file, and no lookup needs to be done for each search result), this is another important step along the road to true incremental building.

Files:

: 5 edited

gsdl/trunk/runtime-src/src/colservr/expat_resultset.cpp (modified) (2 diffs)
gsdl/trunk/runtime-src/src/colservr/lucenequeryfilter.cpp (modified) (1 diff)
gsdl/trunk/runtime-src/src/colservr/queryinfo.cpp (modified) (4 diffs)
gsdl/trunk/runtime-src/src/colservr/queryinfo.h (modified) (1 diff)
indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

gsdl/trunk/runtime-src/src/colservr/expat_resultset.cpp

-              r16915
+              r16947
     char* id = get_attribute(attributes,"id");
+    if (id!=NULL) {
+      int id_num = atoi(id);
+    if (id != NULL)
+    {
       docresultclass doc;
       doc.clear();
       doc.docnum = id_num;
+      doc.docid = id;
       doc.docweight = qrpack_ptr->match_count;
 …
+      }
       queryresult_ptr->docs.docset[doc.docnum] = doc;
       queryresult_ptr->docs.docorder.push_back(doc.docnum);
+      queryresult_ptr->docs.docset[doc.docid] = doc;
+      queryresult_ptr->docs.docorder.push_back(doc.docid);
       ++qrpack_ptr->match_count;

gsdl/trunk/runtime-src/src/colservr/lucenequeryfilter.cpp

-              r16915
+              r16947
   // assemble document results
+  if (need_matching_docs (request.filterResultOptions)) {
+  if (need_matching_docs (request.filterResultOptions))
+  {
+    // Loop through the query results (ordered by ranking)
     int resultnum = 1;
     ResultDocInfo_t resultdoc;
     text_t trans_OID;
     vector<text_t>::iterator docorder_here = queryresults.docs.docorder.begin();
     vector<text_t>::iterator docorder_end = queryresults.docs.docorder.end();
+    // Now handled by Lucene directly
     //if (endresults == -1) endresults = MAXNUMDOCS;
     while (docorder_here != docorder_end)
+    vector<text_t>::iterator docorder_iterator = queryresults.docs.docorder.begin();
+    while (docorder_iterator != queryresults.docs.docorder.end())
+    {
+      text_t doc_OID = (*docorder_iterator);
+      // logout << "Matching doc OID: " << doc_OID << endl;
+      // Make sure this result is in the docset, and either in the request set or the request set is empty
+      docresultmap::iterator doc_result = queryresults.docs.docset.find (doc_OID);
+      if (doc_result != queryresults.docs.docset.end() && (request.docSet.empty() || in_set(request.docSet, doc_OID)))
+      {
+        // Now handled by Lucene directly
+        //if (resultnum > endresults) break;
+        // translate the document number
+        if (!translate(db_ptr, *docorder_here, trans_OID))
+          {
+            logout << text_t2ascii
+                   << "warning: could not translate lucene document number \""
+                   << *docorder_here << "\" to OID.\n\n";
+          }
+        else
+          {
+            docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
+            // see if there is a result for this number,
+            // if it is in the request set (or the request set is empty)
+            if (docset_here != queryresults.docs.docset.end() && (request.docSet.empty() || in_set(request.docSet, trans_OID)))
+              {
+                // Now handled by Lucene directly
+                //if (resultnum >= startresults) {
+                // add this document
+                resultdoc.OID = trans_OID;
+                resultdoc.result_num = resultnum;
+                resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
+                resultdoc.num_terms_matched = (*docset_here).second.num_query_terms_matched;
+                response.docInfo.push_back (resultdoc);
+                //}
+                ++resultnum;
+              }
+          } // else
+        ++docorder_here;
+      }
+  } // if need matching docs
+    // Add the matching document
+    ResultDocInfo_t resultdoc;
+    resultdoc.OID = doc_OID;
+    resultdoc.result_num = resultnum;
+    resultdoc.ranking = (int)((*doc_result).second.docweight * 10000.0 + 0.5);
+    resultdoc.num_terms_matched = (*doc_result).second.num_query_terms_matched;
+    response.docInfo.push_back (resultdoc);
+    resultnum++;
+      }
+      docorder_iterator++;
+    }
+  }
   // assemble the term results

gsdl/trunk/runtime-src/src/colservr/queryinfo.cpp

-              r12868
+              r16947
 void docresultclass::clear () {
+  docid="";
   docnum=-1;
   docweight=0.0;
 …
 docresultclass &docresultclass::operator=(const docresultclass &d) {
+  docid = d.docid;
   docnum = d.docnum;
   docweight = d.docweight;
 …
 bool operator==(const docresultclass &x, const docresultclass &y) {
   return ((x.docnum == y.docnum) && (x.docweight == y.docweight) &&
+  return ((x.docid == y.docid) && (x.docnum == y.docnum) && (x.docweight == y.docweight) &&
       (x.num_query_terms_matched == y.num_query_terms_matched) &&
       (x.num_phrase_match == y.num_phrase_match));
 …
 bool operator<(const docresultclass &x, const docresultclass &y) {
+  return ((x.docnum < y.docnum) ||
+      ((x.docnum == y.docnum) &&
+       ((x.docweight < y.docweight) ||
+        ((x.docweight == y.docweight) &&
+         ((x.num_query_terms_matched < y.num_query_terms_matched) ||
+          ((x.num_query_terms_matched == y.num_query_terms_matched) &&
+           ((x.num_phrase_match < y.num_phrase_match))))))));
+  return ((x.docid < y.docid) ||
+      ((x.docid == y.docid) &&
+       ((x.docnum < y.docnum) ||
+        ((x.docnum == y.docnum) &&
+         ((x.docweight < y.docweight) ||
+          ((x.docweight == y.docweight) &&
+           ((x.num_query_terms_matched < y.num_query_terms_matched) ||
+        ((x.num_query_terms_matched == y.num_query_terms_matched) &&
+         ((x.num_phrase_match < y.num_phrase_match))))))))));
+}

gsdl/trunk/runtime-src/src/colservr/queryinfo.h

r16445	r16947
115	115
116	116	struct docresultclass {
	117	text_t docid; // currently used by Lucene only
117	118	int docnum;
118	119	float docweight;

indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

r16912	r16947
203	203	Document doc = hits.doc(i - 1);
204	204	int doc_term_freq = ((Integer) doc_term_freq_map.get(new Integer(lucene_doc_num))).intValue();
205		lucene_query_result.addDoc(doc.get("~~node~~ID").trim(), hits.score(i-1), doc_term_freq);
	205	lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);
206	206	}
207	207	}
…	…
221	221	Document doc = reader.document(lucene_doc_num);
222	222	int doc_term_freq = ((Integer) doc_term_freq_map.get(new Integer(lucene_doc_num))).intValue();
223		lucene_query_result.addDoc(doc.get("~~node~~ID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
	223	lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
224	224	}
225	225	}

Note: See TracChangeset for help on using the changeset viewer.