- Timestamp:
- 2011-10-07T11:36:07+13:00 (13 years ago)
- Location:
- main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2Analyzer.java
r24725 r24731 33 33 import org.apache.lucene.analysis.standard.*; 34 34 35 import org.apache.lucene.analysis.ASCIIFoldingFilter;36 35 37 import org.apache.lucene.util.Version; 38 39 40 class GS2Analyzer extends GS2StandardAnalyzer 36 class GS2Analyzer extends StandardAnalyzer 41 37 { 42 43 static Version matchVersion = Version.LUCENE_24;44 45 46 38 public GS2Analyzer() 47 39 { 48 super( matchVersion);40 super(); 49 41 } 50 51 42 52 43 public GS2Analyzer(Set stopWords) 53 44 { 54 super( matchVersion,stopWords);45 super(stopWords); 55 46 } 56 47 … … 58 49 public GS2Analyzer(String [] stopwords) 59 50 { 60 super(matchVersion,StopFilter.makeStopSet(stopwords)); 51 super(stopwords); 52 } 53 54 public TokenStream tokenStream(String fieldName, Reader reader) 55 { 56 TokenStream result = super.tokenStream(fieldName,reader); 57 result = new ISOLatin1AccentFilter(result); 58 59 return result; 61 60 } 62 61 63 @Override64 protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {65 final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);66 src.setMaxTokenLength(maxTokenLength);67 src.setReplaceInvalidAcronym(replaceInvalidAcronym);68 TokenStream tok = new StandardFilter(matchVersion, src);69 tok = new LowerCaseFilter(matchVersion, tok);70 tok = new StopFilter(matchVersion, tok, stopwords);71 62 72 // top it up with accent folding 73 tok = new ASCIIFoldingFilter(tok); 63 public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { 64 TokenStream result = super.reusableTokenStream(fieldName,reader); 65 66 result = new ISOLatin1AccentFilter(result); 67 68 return result; 69 } 74 70 75 return new TokenStreamComponents(src, tok) {76 @Override77 protected boolean reset(final Reader reader) throws IOException {78 src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength);79 return super.reset(reader);80 }81 };82 }83 71 84 72 } -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2IndexModifier.java
r24725 r24731 34 34 import org.apache.lucene.analysis.Analyzer; 35 35 import org.apache.lucene.document.Document; 36 import org.apache.lucene.index.IndexModifier; 36 37 import org.apache.lucene.index.IndexReader; 37 38 import org.apache.lucene.index.IndexWriter; -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java
r24725 r24731 33 33 34 34 import java.io.IOException; 35 import java.io.File; 36 //import org.apache.lucene.analysis.standard.StandardAnalyzer; 35 import org.apache.lucene.analysis.standard.StandardAnalyzer; 37 36 import org.apache.lucene.index.IndexWriter; 38 37 import org.apache.lucene.index.Term; 39 40 import org.apache.lucene.store.SimpleFSDirectory;41 import org.apache.lucene.index.IndexWriter.MaxFieldLength;42 38 43 39 … … 132 128 throws IOException 133 129 { 134 SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path)); 135 index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(), 136 MaxFieldLength.UNLIMITED); 130 index_writer = new IndexWriter(index_path, new StandardAnalyzer()); 137 131 } 138 132 … … 169 163 { 170 164 debug("GS2LuceneDelete.deleteDocument(" + node_id + ")"); 171 debug("- Initial number of documents in index: " + index_writer. numDocs());165 debug("- Initial number of documents in index: " + index_writer.docCount()); 172 166 index_writer.deleteDocuments(new Term("nodeid", "" + node_id)); 173 debug("- Final number of documents in index: " + index_writer. numDocs());167 debug("- Final number of documents in index: " + index_writer.docCount()); 174 168 } 175 169 } -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneEditor.java
r24725 r24731 33 33 34 34 import java.io.IOException; 35 import java.io.File;36 35 import java.util.Arrays; 37 36 import java.util.Enumeration; … … 39 38 40 39 import org.apache.lucene.analysis.Analyzer; 41 //import org.apache.lucene.analysis.standard.StandardAnalyzer;40 import org.apache.lucene.analysis.standard.StandardAnalyzer; 42 41 import org.apache.lucene.document.Document; 43 42 import org.apache.lucene.document.Field; 44 45 import org.apache.lucene.store.SimpleFSDirectory;46 import org.apache.lucene.index.IndexWriter.MaxFieldLength;47 43 48 44 … … 150 146 throws IOException 151 147 { 152 Analyzer analyzer = new GS2Analyzer();148 Analyzer analyzer = new StandardAnalyzer(); 153 149 // create an index in /tmp/index, overwriting an existing one: 154 150 index_modifier = new GS2IndexModifier(index_path, analyzer); … … 192 188 { 193 189 debug("GS2LuceneEditor.editIndex(" + node_id + ",'" + field + "','" + old_value + "','" + new_value + "')"); 194 debug("- Initial number of documents in index: " + index_modifier. numDocs());190 debug("- Initial number of documents in index: " + index_modifier.docCount()); 195 191 // Retrieve the document requested 196 192 int doc_num = index_modifier.getDocNumByNodeID(node_id); … … 234 230 // We also have to initialize the nodeId value 235 231 // changed to use docOID --kjdon 236 document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index. ANALYZED));232 document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.TOKENIZED)); 237 233 238 234 // Re-index document … … 300 296 for(int i = 0; i < values.size(); i++) 301 297 { 302 document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index. ANALYZED));298 document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED)); 303 299 } 304 300 values.clear(); … … 322 318 for(int i = 0; i < values.size(); i++) 323 319 { 324 document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index. ANALYZED));320 document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED)); 325 321 } 326 322 values.clear(); -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java
r24725 r24731 45 45 import org.apache.lucene.index.Term; 46 46 import org.apache.lucene.analysis.Analyzer; 47 48 import org.apache.lucene.store.SimpleFSDirectory;49 import org.apache.lucene.index.IndexWriter.MaxFieldLength;50 47 51 48 import java.util.Stack; … … 193 190 protected String file_id_ = null; 194 191 192 static private String[] stop_words = GS2Analyzer.STOP_WORDS; 193 194 195 195 /** pass in true if want to create a new index, false if want to use the existing one */ 196 196 public Indexer (String doc_tag_level, File index_dir, boolean create) … … 206 206 reader.setFeature("http://xml.org/sax/features/validation", false); 207 207 208 SimpleFSDirectory index_dir_dir = new SimpleFSDirectory(new File(index_dir.getPath())); 209 210 analyzer_ = new GS2Analyzer(); // uses build in stop_word_set 211 212 writer_ = new IndexWriter(index_dir_dir, analyzer_, create, MaxFieldLength.UNLIMITED); 213 208 analyzer_ = new GS2Analyzer(stop_words); 209 210 writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create); 214 211 // by default, will only index 10,000 words per document 215 212 // Can throw out_of_memory errors … … 321 318 //String node_id = atts.getValue("gs2:id"); 322 319 //print(" " + qName + ": " + node_id + " (" + mode_ + ")" ); 323 //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index. NOT_ANALYZED));320 //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.UN_TOKENIZED)); 324 321 325 322 current_doc_oid_ = atts.getValue("gs2:docOID"); 326 323 print(" " + qName + ": " + current_doc_oid_ + " (" + mode_ + ")" ); 327 current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index. NOT_ANALYZED));324 current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.UN_TOKENIZED)); 328 325 } 329 326 … … 362 359 if (qName.equals(indexable_current_node_)) 363 360 { 364 current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index. ANALYZED, Field.TermVector.YES));361 current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); 365 362 // The byXX fields are used for sorting search results 366 363 // We don't want to do that for Text or AllFields fields … … 368 365 if (!qName.equals("TX") && !qName.equals("ZZ")) 369 366 { 370 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index. NOT_ANALYZED, Field.TermVector.NO));367 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); 371 368 } 372 369 … … 475 472 { 476 473 debug("GS2LuceneDelete.deleteDocument(" + doc_id + ")"); 477 debug("- Initial number of documents in index: " + writer_. numDocs());474 debug("- Initial number of documents in index: " + writer_.docCount()); 478 475 writer_.deleteDocuments(new Term("docOID", doc_id)); 479 debug("- Final number of documents in index: " + writer_. numDocs());476 debug("- Final number of documents in index: " + writer_.docCount()); 480 477 } 481 478 -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
r24725 r24731 45 45 import org.apache.lucene.search.IndexSearcher; 46 46 import org.apache.lucene.search.Query; 47 import org.apache.lucene.search. TermRangeFilter;47 import org.apache.lucene.search.RangeFilter; 48 48 import org.apache.lucene.search.Searcher; 49 49 import org.apache.lucene.search.ScoreDoc; 50 50 import org.apache.lucene.search.Sort; 51 import org.apache.lucene.search.SortField;52 51 import org.apache.lucene.search.TopFieldDocs; 53 52 54 import org.apache.lucene.store.Directory; 55 import org.apache.lucene.store.FSDirectory; 56 import org.apache.lucene.util.Version; 57 58 public class GS2LuceneQuery extends SharedSoleneQuery 53 54 public class GS2LuceneQuery 59 55 { 60 protected String full_indexdir=""; 61 62 protected Sort sorter=new Sort(); 63 protected Filter filter = null; 64 65 protected static Version matchVersion = Version.LUCENE_24; 66 67 protected QueryParser query_parser = null; 68 protected QueryParser query_parser_no_stop_words = null; 69 protected Searcher searcher = null; 70 protected IndexReader reader = null; 71 56 57 58 static private String TEXTFIELD = "TX"; 59 60 // Use the standard set of English stop words by default 61 static private String[] stop_words = GS2Analyzer.STOP_WORDS; 62 63 private String full_indexdir=""; 64 private String default_conjunction_operator = "OR"; 65 private String fuzziness = null; 66 private String sort_field = null; 67 private Sort sorter=new Sort(); 68 private String filter_string = null; 69 private Filter filter = null; 70 private int start_results=1; 71 private int end_results=Integer.MAX_VALUE; 72 73 private QueryParser query_parser = null; 74 private QueryParser query_parser_no_stop_words = null; 75 private Searcher searcher = null; 76 private IndexReader reader = null; 77 78 static private PrintWriter utf8out = null; 79 80 static 81 { 82 try { 83 OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8"); 84 utf8out = new PrintWriter(osw, true); 85 } 86 catch (UnsupportedEncodingException e) { 87 System.out.println(e); 88 } 89 } 90 91 72 92 public GS2LuceneQuery() { 73 super();74 93 75 94 // Create one query parser with the standard set of stop words, and one with none 76 95 77 query_parser = new QueryParser( matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set78 query_parser_no_stop_words = new QueryParser( matchVersion,TEXTFIELD, new GS2Analyzer(new String[] { }));96 query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words)); 97 query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { })); 79 98 } 80 99 81 100 82 101 public boolean initialise() { 83 84 if (!super.initialise()) {85 return false;86 }87 88 102 89 103 if (full_indexdir==null || full_indexdir.length()==-1){ … … 92 106 return false; 93 107 } 94 95 108 try { 96 Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir)); 97 searcher = new IndexSearcher(full_indexdir_dir,true); 109 searcher = new IndexSearcher(full_indexdir); 98 110 reader = ((IndexSearcher) searcher).getIndexReader(); 99 111 … … 106 118 107 119 } 108 109 public void setIndexDir(String full_indexdir) {110 this.full_indexdir = full_indexdir;111 }112 113 public void setSortField(String sort_field) {114 super.setSortField(sort_field);115 116 if (sort_field == null) {117 this.sorter = new Sort();118 } else {119 this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!?120 }121 }122 123 public void setFilterString(String filter_string) {124 super.setFilterString(filter_string);125 this.filter = parseFilterString(filter_string);126 }127 128 public Filter getFilter() {129 return this.filter;130 }131 132 120 133 121 public LuceneQueryResult runQuery(String query_string) { … … 206 194 if (end_results == Integer.MAX_VALUE) { 207 195 // Perform the query (filter and sorter may be null) 208 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);209 lucene_query_result.setTotalDocs(hits. totalHits);196 Hits hits = searcher.search(query, filter, sorter); 197 lucene_query_result.setTotalDocs(hits.length()); 210 198 211 199 // Output the matching documents 212 200 lucene_query_result.setStartResults(start_results); 213 lucene_query_result.setEndResults(hits. totalHits);214 215 for (int i = start_results; i <= hits. totalHits; i++) {216 int lucene_doc_num = hits. scoreDocs[i - 1].doc;217 Document doc = reader.document(lucene_doc_num);201 lucene_query_result.setEndResults(hits.length()); 202 203 for (int i = start_results; i <= hits.length(); i++) { 204 int lucene_doc_num = hits.id(i - 1); 205 Document doc = hits.doc(i - 1); 218 206 int doc_term_freq = 0; 219 207 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num)); … … 222 210 doc_term_freq = doc_term_freq_object.intValue(); 223 211 } 224 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score Docs[i-1].score, doc_term_freq);212 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq); 225 213 } 226 214 } … … 268 256 269 257 public void setDefaultConjunctionOperator(String default_conjunction_operator) { 270 super.setDefaultConjunctionOperator(default_conjunction_operator); 271 258 this.default_conjunction_operator = default_conjunction_operator.toUpperCase(); 272 259 if (default_conjunction_operator.equals("AND")) { 273 260 query_parser.setDefaultOperator(query_parser.AND_OPERATOR); … … 278 265 } 279 266 } 280 281 267 268 public String getDefaultConjunctionOperator() { 269 return this.default_conjunction_operator; 270 } 271 272 public void setEndResults(int end_results) { 273 this.end_results = end_results; 274 } 275 public int getEndResults() { 276 return this.end_results; 277 } 278 279 public void setFilterString(String filter_string) { 280 this.filter_string = filter_string; 281 this.filter = parseFilterString(filter_string); 282 } 283 public String getFilterString() { 284 return this.filter_string ; 285 } 286 287 public Filter getFilter() { 288 return this.filter; 289 } 290 291 public void setIndexDir(String full_indexdir) { 292 this.full_indexdir = full_indexdir; 293 } 294 295 public void setFuzziness(String fuzziness) { 296 this.fuzziness = fuzziness; 297 } 298 public String getFuzziness() { 299 return this.fuzziness; 300 } 301 302 public void setSortField(String sort_field) { 303 this.sort_field = sort_field; 304 if (sort_field == null) { 305 this.sorter = new Sort(); 306 } else { 307 this.sorter = new Sort(sort_field); 308 } 309 } 310 public String getSortField() { 311 return this.sort_field; 312 } 313 314 public void setStartResults(int start_results) { 315 if (start_results < 1) { 316 start_results = 1; 317 } 318 this.start_results = start_results; 319 } 320 public int getStartResults() { 321 return this.start_results; 322 } 323 282 324 public void cleanUp() { 283 super.cleanUp();284 325 try { 285 326 if (searcher != null) { … … 291 332 } 292 333 293 294 protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 334 private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 295 335 throws java.io.IOException, org.apache.lucene.queryParser.ParseException 296 336 { … … 394 434 } 395 435 396 pr otectedFilter parseFilterString(String filter_string)436 private Filter parseFilterString(String filter_string) 397 437 { 398 438 Filter result = null; … … 405 445 String upper_term = matcher.group(4); 406 446 boolean include_upper = matcher.group(5).equals("]"); 407 result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);447 result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 408 448 } 409 449 else { … … 412 452 return result; 413 453 } 414 415 454 455 456 protected void finalize() throws Throwable 457 { 458 try { 459 utf8out.flush(); 460 } finally { 461 super.finalize(); 462 } 463 } 464 465 416 466 /** command line program and auxiliary methods */ 417 467 418 468 // Fairly self-explanatory I should hope 419 static pr otectedboolean query_result_caching_enabled = false;469 static private boolean query_result_caching_enabled = false; 420 470 421 471 422 472 static public void main (String args[]) 423 473 { 474 475 424 476 if (args.length == 0) { 425 477 System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]"); … … 514 566 } 515 567 516 pr otectedstatic void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)568 private static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string) 517 569 throws IOException 518 570 { … … 602 654 } 603 655 604 pr otectedstatic String fileSafe(String text)656 private static String fileSafe(String text) 605 657 { 606 658 StringBuffer file_safe_text = new StringBuffer(); -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/LuceneQueryResult.java
r24725 r24731 29 29 import java.util.Vector; 30 30 31 /** Opportunity to fine tune QueryResult forlucene search31 /** a QueryResult class for a lucene search 32 32 * 33 33 */ 34 35 public class LuceneQueryResult extends SharedSoleneQueryResult { 36 37 // Currently no fine tuning -- rely on underlying shared Solr/Lucene base class 34 public class LuceneQueryResult { 35 36 public static final int NO_ERROR = 0; 37 public static final int PARSE_ERROR = 1; 38 public static final int TOO_MANY_CLAUSES_ERROR = 2; 39 public static final int IO_ERROR = 3; 40 public static final int OTHER_ERROR = 4; 41 42 /** the list of DocInfo */ 43 protected Vector docs_=null; 44 /** the list of TermInfo */ 45 protected Vector terms_=null; 46 /** the list of stopwords found in the query */ 47 protected Vector stopwords_ = null; 48 /** the total number of docs found - not necessarily the size of docs_*/ 49 protected int total_num_docs_=0; 50 /** the start result number if we are retrieving only a portion of the results */ 51 protected int start_results_ = 0; 52 /** the end result number if we are retrieving only a portion of the results */ 53 protected int end_results_ = 0; 54 /** whether an error has occurred and what kind it is*/ 55 protected int error_ = NO_ERROR; 56 38 57 LuceneQueryResult() { 39 super(); 40 } 58 docs_ = new Vector(); 59 terms_ = new Vector(); 60 stopwords_ = new Vector(); 61 } 62 63 /** clear the info from the last query - should be called before setting any new docs/terms */ 64 public void clear() { 65 total_num_docs_=0; 66 docs_.clear(); 67 terms_.clear(); 68 stopwords_.clear(); 69 error_ = NO_ERROR; 70 } 71 72 /** returns the result as a String - useful for printing out results */ 73 public String toString() { 74 75 String result = ""; 76 result += "docs (ranks): "; 77 for (int i=0; i<docs_.size(); i++) { 78 result += ((DocInfo)docs_.elementAt(i)).toString()+", "; 79 } 80 result += "\nterms: "; 81 for (int i=0; i<terms_.size(); i++) { 82 result += ((TermInfo)terms_.elementAt(i)).toString()+", "; 83 } 84 result += "\nactual number of docs found = "+total_num_docs_; 85 86 return result; 87 } 88 /** a shorter representation - just terms and total docs - not the 89 individual docnums and ranks */ 90 public String toShortString() { 91 String result = ""; 92 result += "\nterms: "; 93 for (int i=0; i<terms_.size(); i++) { 94 result += ((TermInfo)terms_.elementAt(i)).toString()+", "; 95 } 96 result += "\nactual number of docs found = "+total_num_docs_; 97 return result; 98 } 99 100 public void setTotalDocs(int num) { 101 total_num_docs_=num; 102 } 103 104 public void setStartResults(int start) { 105 start_results_ = start; 106 } 107 108 public void setEndResults(int end) { 109 end_results_ = end; 110 } 111 112 public void addDoc(String id, float rank, int termfreq) 113 { 114 docs_.add(new DocInfo(id, rank, termfreq)); 115 } 116 117 public void addTerm(String term, String field, int match, int freq) { 118 TermInfo ti = new TermInfo(); 119 ti.term_=term; 120 ti.field_=field; 121 ti.match_docs_=match; 122 ti.term_freq_=freq; 123 terms_.add(ti); 124 } 125 public void addStopWord(String stopword) { 126 stopwords_.add(stopword); 127 } 128 public Vector getDocs() { 129 return docs_; 130 } 131 132 public int getError() { 133 return error_; 134 } 135 136 public String getErrorString() { 137 if (error_ == PARSE_ERROR) { 138 return "PARSE_EXCEPTION"; 139 } 140 if (error_ == TOO_MANY_CLAUSES_ERROR) { 141 return "TOO_MANY_CLAUSES"; 142 } 143 if (error_ == IO_ERROR) { 144 return "IO_ERROR"; 145 } 146 if (error_ == NO_ERROR) { 147 return "NO_ERROR"; 148 } 149 return "UNKNOWN"; 150 } 151 152 public Vector getTerms() { 153 return terms_; 154 } 155 156 public Vector getStopWords() { 157 return stopwords_; 158 } 159 public int getTotalDocs() { 160 return total_num_docs_; 161 } 162 163 public void setError(int error) { 164 error_ = error; 165 } 166 167 public String getXMLString() { 168 StringBuffer buffer = new StringBuffer(); 169 170 // terms 171 buffer.append("<QueryTermsInfo num=\"" + terms_.size() + "\"/>\n"); 172 for (int i=0; i<terms_.size(); i++) { 173 buffer.append(((TermInfo)terms_.elementAt(i)).toXMLString()+"\n"); 174 } 175 176 // stopwords 177 for (int i=0; i<stopwords_.size(); i++) { 178 buffer.append("<StopWord value=\"" + (String)stopwords_.elementAt(i)+"\" />\n"); 179 } 180 181 // results 182 buffer.append("<MatchingDocsInfo num=\"" + total_num_docs_ + "\"/>\n"); 183 buffer.append("<StartResults num=\"" + start_results_ + "\"/>\n"); 184 buffer.append("<EndResults num=\"" + end_results_ + "\"/>\n"); 185 186 for (int i=0; i< docs_.size(); i++) { 187 buffer.append(((DocInfo)docs_.elementAt(i)).toXMLString()+"\n"); 188 } 189 190 return buffer.toString(); 191 } 192 193 194 public class TermInfo { 195 196 /** the term itself */ 197 public String term_=null; 198 /** the field for which this term was queried */ 199 public String field_=null; 200 /** the number of documents containing this term */ 201 public int match_docs_=0; 202 /** overall term freq for this term */ 203 public int term_freq_=0; 204 205 public TermInfo() { 206 } 207 208 /** output the class as a string */ 209 public String toString() { 210 String result=""; 211 result +="<"+field_+">\""+term_+" docs("+match_docs_; 212 result +=")freq("+term_freq_+")"; 213 return result; 214 } 215 216 /** output as an XML element */ 217 public String toXMLString() { 218 return "<Term value=\"" + xmlSafe(term_) + "\" field=\"" + field_ + "\" freq=\"" + term_freq_ + "\" />"; 219 } 220 } 221 222 223 public class DocInfo 224 { 225 public String id_ = ""; 226 public float rank_ = 0; 227 public int termfreq_ = 0; 228 229 public DocInfo (String id, float rank, int termfreq) 230 { 231 id_ = id; 232 rank_ = rank; 233 termfreq_ = termfreq; 234 } 235 236 public String toString() 237 { 238 return "" + id_ + " (" + rank_ + ") (" + termfreq_ + ")"; 239 } 240 241 public String toXMLString() 242 { 243 return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" termfreq=\"" + termfreq_ + "\" />"; 244 } 245 } 246 247 248 // where should this go??? 249 public static String xmlSafe(String text) { 250 text = text.replaceAll("&","&amp;"); 251 text = text.replaceAll("<","&lt;"); 252 text = text.replaceAll(">","&gt;"); 253 text = text.replaceAll("'","&#039;"); 254 text = text.replaceAll("\\\"","&quot;"); 255 return text; 256 } 257 41 258 }
Note:
See TracChangeset
for help on using the changeset viewer.