- Timestamp:
- 2011-10-05T15:44:19+13:00 (13 years ago)
- Location:
- main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone
- Files:
-
- 7 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2Analyzer.java
r17804 r24725 33 33 import org.apache.lucene.analysis.standard.*; 34 34 35 import org.apache.lucene.analysis.ASCIIFoldingFilter; 35 36 36 class GS2Analyzer extends StandardAnalyzer 37 import org.apache.lucene.util.Version; 38 39 40 class GS2Analyzer extends GS2StandardAnalyzer 37 41 { 42 43 static Version matchVersion = Version.LUCENE_24; 44 45 38 46 public GS2Analyzer() 39 47 { 40 super( );48 super(matchVersion); 41 49 } 50 42 51 43 52 public GS2Analyzer(Set stopWords) 44 53 { 45 super( stopWords);54 super(matchVersion,stopWords); 46 55 } 47 56 … … 49 58 public GS2Analyzer(String [] stopwords) 50 59 { 51 super(stopwords); 52 } 53 54 public TokenStream tokenStream(String fieldName, Reader reader) 55 { 56 TokenStream result = super.tokenStream(fieldName,reader); 57 result = new ISOLatin1AccentFilter(result); 58 59 return result; 60 super(matchVersion,StopFilter.makeStopSet(stopwords)); 60 61 } 61 62 63 @Override 64 protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { 65 final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); 66 src.setMaxTokenLength(maxTokenLength); 67 src.setReplaceInvalidAcronym(replaceInvalidAcronym); 68 TokenStream tok = new StandardFilter(matchVersion, src); 69 tok = new LowerCaseFilter(matchVersion, tok); 70 tok = new StopFilter(matchVersion, tok, stopwords); 62 71 63 public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { 64 TokenStream result = super.reusableTokenStream(fieldName,reader); 65 66 result = new ISOLatin1AccentFilter(result); 67 68 return result; 72 // top it up with accent folding 73 tok = new ASCIIFoldingFilter(tok); 74 75 return new TokenStreamComponents(src, tok) { 76 @Override 77 protected boolean reset(final Reader reader) throws IOException { 78 src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength); 79 return super.reset(reader); 80 } 81 }; 69 82 } 70 71 83 72 84 } -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2IndexModifier.java
r20731 r24725 34 34 import org.apache.lucene.analysis.Analyzer; 35 35 import org.apache.lucene.document.Document; 36 import org.apache.lucene.index.IndexModifier;37 36 import org.apache.lucene.index.IndexReader; 38 37 import org.apache.lucene.index.IndexWriter; -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java
r16583 r24725 33 33 34 34 import java.io.IOException; 35 import org.apache.lucene.analysis.standard.StandardAnalyzer; 35 import java.io.File; 36 //import org.apache.lucene.analysis.standard.StandardAnalyzer; 36 37 import org.apache.lucene.index.IndexWriter; 37 38 import org.apache.lucene.index.Term; 39 40 import org.apache.lucene.store.SimpleFSDirectory; 41 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 38 42 39 43 … … 128 132 throws IOException 129 133 { 130 index_writer = new IndexWriter(index_path, new StandardAnalyzer()); 134 SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path)); 135 index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(), 136 MaxFieldLength.UNLIMITED); 131 137 } 132 138 … … 163 169 { 164 170 debug("GS2LuceneDelete.deleteDocument(" + node_id + ")"); 165 debug("- Initial number of documents in index: " + index_writer. docCount());171 debug("- Initial number of documents in index: " + index_writer.numDocs()); 166 172 index_writer.deleteDocuments(new Term("nodeid", "" + node_id)); 167 debug("- Final number of documents in index: " + index_writer. docCount());173 debug("- Final number of documents in index: " + index_writer.numDocs()); 168 174 } 169 175 } -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneEditor.java
r20731 r24725 33 33 34 34 import java.io.IOException; 35 import java.io.File; 35 36 import java.util.Arrays; 36 37 import java.util.Enumeration; … … 38 39 39 40 import org.apache.lucene.analysis.Analyzer; 40 import org.apache.lucene.analysis.standard.StandardAnalyzer;41 //import org.apache.lucene.analysis.standard.StandardAnalyzer; 41 42 import org.apache.lucene.document.Document; 42 43 import org.apache.lucene.document.Field; 44 45 import org.apache.lucene.store.SimpleFSDirectory; 46 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 43 47 44 48 … … 146 150 throws IOException 147 151 { 148 Analyzer analyzer = new StandardAnalyzer();152 Analyzer analyzer = new GS2Analyzer(); 149 153 // create an index in /tmp/index, overwriting an existing one: 150 154 index_modifier = new GS2IndexModifier(index_path, analyzer); … … 188 192 { 189 193 debug("GS2LuceneEditor.editIndex(" + node_id + ",'" + field + "','" + old_value + "','" + new_value + "')"); 190 debug("- Initial number of documents in index: " + index_modifier. docCount());194 debug("- Initial number of documents in index: " + index_modifier.numDocs()); 191 195 // Retrieve the document requested 192 196 int doc_num = index_modifier.getDocNumByNodeID(node_id); … … 230 234 // We also have to initialize the nodeId value 231 235 // changed to use docOID --kjdon 232 document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index. TOKENIZED));236 document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.ANALYZED)); 233 237 234 238 // Re-index document … … 296 300 for(int i = 0; i < values.size(); i++) 297 301 { 298 document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index. TOKENIZED));302 document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED)); 299 303 } 300 304 values.clear(); … … 318 322 for(int i = 0; i < values.size(); i++) 319 323 { 320 document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index. TOKENIZED));324 document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED)); 321 325 } 322 326 values.clear(); -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java
r23194 r24725 45 45 import org.apache.lucene.index.Term; 46 46 import org.apache.lucene.analysis.Analyzer; 47 48 import org.apache.lucene.store.SimpleFSDirectory; 49 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 47 50 48 51 import java.util.Stack; … … 190 193 protected String file_id_ = null; 191 194 192 static private String[] stop_words = GS2Analyzer.STOP_WORDS;193 194 195 195 /** pass in true if want to create a new index, false if want to use the existing one */ 196 196 public Indexer (String doc_tag_level, File index_dir, boolean create) … … 206 206 reader.setFeature("http://xml.org/sax/features/validation", false); 207 207 208 analyzer_ = new GS2Analyzer(stop_words); 209 210 writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create); 208 SimpleFSDirectory index_dir_dir = new SimpleFSDirectory(new File(index_dir.getPath())); 209 210 analyzer_ = new GS2Analyzer(); // uses build in stop_word_set 211 212 writer_ = new IndexWriter(index_dir_dir, analyzer_, create, MaxFieldLength.UNLIMITED); 213 211 214 // by default, will only index 10,000 words per document 212 215 // Can throw out_of_memory errors … … 318 321 //String node_id = atts.getValue("gs2:id"); 319 322 //print(" " + qName + ": " + node_id + " (" + mode_ + ")" ); 320 //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index. UN_TOKENIZED));323 //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.NOT_ANALYZED)); 321 324 322 325 current_doc_oid_ = atts.getValue("gs2:docOID"); 323 326 print(" " + qName + ": " + current_doc_oid_ + " (" + mode_ + ")" ); 324 current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index. UN_TOKENIZED));327 current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.NOT_ANALYZED)); 325 328 } 326 329 … … 359 362 if (qName.equals(indexable_current_node_)) 360 363 { 361 current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index. TOKENIZED, Field.TermVector.YES));364 current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); 362 365 // The byXX fields are used for sorting search results 363 366 // We don't want to do that for Text or AllFields fields … … 365 368 if (!qName.equals("TX") && !qName.equals("ZZ")) 366 369 { 367 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index. UN_TOKENIZED, Field.TermVector.NO));370 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); 368 371 } 369 372 … … 472 475 { 473 476 debug("GS2LuceneDelete.deleteDocument(" + doc_id + ")"); 474 debug("- Initial number of documents in index: " + writer_. docCount());477 debug("- Initial number of documents in index: " + writer_.numDocs()); 475 478 writer_.deleteDocuments(new Term("docOID", doc_id)); 476 debug("- Final number of documents in index: " + writer_. docCount());479 debug("- Final number of documents in index: " + writer_.numDocs()); 477 480 } 478 481 -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
r20910 r24725 45 45 import org.apache.lucene.search.IndexSearcher; 46 46 import org.apache.lucene.search.Query; 47 import org.apache.lucene.search. RangeFilter;47 import org.apache.lucene.search.TermRangeFilter; 48 48 import org.apache.lucene.search.Searcher; 49 49 import org.apache.lucene.search.ScoreDoc; 50 50 import org.apache.lucene.search.Sort; 51 import org.apache.lucene.search.SortField; 51 52 import org.apache.lucene.search.TopFieldDocs; 52 53 53 54 public class GS2LuceneQuery 54 import org.apache.lucene.store.Directory; 55 import org.apache.lucene.store.FSDirectory; 56 import org.apache.lucene.util.Version; 57 58 public class GS2LuceneQuery extends SharedSoleneQuery 55 59 { 56 57 58 static private String TEXTFIELD = "TX"; 59 60 // Use the standard set of English stop words by default 61 static private String[] stop_words = GS2Analyzer.STOP_WORDS; 62 63 private String full_indexdir=""; 64 private String default_conjunction_operator = "OR"; 65 private String fuzziness = null; 66 private String sort_field = null; 67 private Sort sorter=new Sort(); 68 private String filter_string = null; 69 private Filter filter = null; 70 private int start_results=1; 71 private int end_results=Integer.MAX_VALUE; 72 73 private QueryParser query_parser = null; 74 private QueryParser query_parser_no_stop_words = null; 75 private Searcher searcher = null; 76 private IndexReader reader = null; 77 78 static private PrintWriter utf8out = null; 79 80 static 81 { 82 try { 83 OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8"); 84 utf8out = new PrintWriter(osw, true); 85 } 86 catch (UnsupportedEncodingException e) { 87 System.out.println(e); 88 } 89 } 90 91 60 protected String full_indexdir=""; 61 62 protected Sort sorter=new Sort(); 63 protected Filter filter = null; 64 65 protected static Version matchVersion = Version.LUCENE_24; 66 67 protected QueryParser query_parser = null; 68 protected QueryParser query_parser_no_stop_words = null; 69 protected Searcher searcher = null; 70 protected IndexReader reader = null; 71 92 72 public GS2LuceneQuery() { 73 super(); 93 74 94 75 // Create one query parser with the standard set of stop words, and one with none 95 76 96 query_parser = new QueryParser( TEXTFIELD, new GS2Analyzer(stop_words));97 query_parser_no_stop_words = new QueryParser( TEXTFIELD, new GS2Analyzer(new String[] { }));77 query_parser = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set 78 query_parser_no_stop_words = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer(new String[] { })); 98 79 } 99 80 100 81 101 82 public boolean initialise() { 83 84 if (!super.initialise()) { 85 return false; 86 } 87 102 88 103 89 if (full_indexdir==null || full_indexdir.length()==-1){ … … 106 92 return false; 107 93 } 94 108 95 try { 109 searcher = new IndexSearcher(full_indexdir); 96 Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir)); 97 searcher = new IndexSearcher(full_indexdir_dir,true); 110 98 reader = ((IndexSearcher) searcher).getIndexReader(); 111 99 … … 118 106 119 107 } 108 109 public void setIndexDir(String full_indexdir) { 110 this.full_indexdir = full_indexdir; 111 } 112 113 public void setSortField(String sort_field) { 114 super.setSortField(sort_field); 115 116 if (sort_field == null) { 117 this.sorter = new Sort(); 118 } else { 119 this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!? 120 } 121 } 122 123 public void setFilterString(String filter_string) { 124 super.setFilterString(filter_string); 125 this.filter = parseFilterString(filter_string); 126 } 127 128 public Filter getFilter() { 129 return this.filter; 130 } 131 120 132 121 133 public LuceneQueryResult runQuery(String query_string) { … … 194 206 if (end_results == Integer.MAX_VALUE) { 195 207 // Perform the query (filter and sorter may be null) 196 Hits hits = searcher.search(query, filter, sorter);197 lucene_query_result.setTotalDocs(hits. length());208 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter); 209 lucene_query_result.setTotalDocs(hits.totalHits); 198 210 199 211 // Output the matching documents 200 212 lucene_query_result.setStartResults(start_results); 201 lucene_query_result.setEndResults(hits. length());202 203 for (int i = start_results; i <= hits. length(); i++) {204 int lucene_doc_num = hits. id(i - 1);205 Document doc = hits.doc(i - 1);213 lucene_query_result.setEndResults(hits.totalHits); 214 215 for (int i = start_results; i <= hits.totalHits; i++) { 216 int lucene_doc_num = hits.scoreDocs[i - 1].doc; 217 Document doc = reader.document(lucene_doc_num); 206 218 int doc_term_freq = 0; 207 219 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num)); … … 210 222 doc_term_freq = doc_term_freq_object.intValue(); 211 223 } 212 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score (i-1), doc_term_freq);224 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq); 213 225 } 214 226 } … … 256 268 257 269 public void setDefaultConjunctionOperator(String default_conjunction_operator) { 258 this.default_conjunction_operator = default_conjunction_operator.toUpperCase(); 270 super.setDefaultConjunctionOperator(default_conjunction_operator); 271 259 272 if (default_conjunction_operator.equals("AND")) { 260 273 query_parser.setDefaultOperator(query_parser.AND_OPERATOR); … … 265 278 } 266 279 } 267 268 public String getDefaultConjunctionOperator() { 269 return this.default_conjunction_operator; 270 } 271 272 public void setEndResults(int end_results) { 273 this.end_results = end_results; 274 } 275 public int getEndResults() { 276 return this.end_results; 277 } 278 279 public void setFilterString(String filter_string) { 280 this.filter_string = filter_string; 281 this.filter = parseFilterString(filter_string); 282 } 283 public String getFilterString() { 284 return this.filter_string ; 285 } 286 287 public Filter getFilter() { 288 return this.filter; 289 } 290 291 public void setIndexDir(String full_indexdir) { 292 this.full_indexdir = full_indexdir; 293 } 294 295 public void setFuzziness(String fuzziness) { 296 this.fuzziness = fuzziness; 297 } 298 public String getFuzziness() { 299 return this.fuzziness; 300 } 301 302 public void setSortField(String sort_field) { 303 this.sort_field = sort_field; 304 if (sort_field == null) { 305 this.sorter = new Sort(); 306 } else { 307 this.sorter = new Sort(sort_field); 308 } 309 } 310 public String getSortField() { 311 return this.sort_field; 312 } 313 314 public void setStartResults(int start_results) { 315 if (start_results < 1) { 316 start_results = 1; 317 } 318 this.start_results = start_results; 319 } 320 public int getStartResults() { 321 return this.start_results; 322 } 323 280 281 324 282 public void cleanUp() { 283 super.cleanUp(); 325 284 try { 326 285 if (searcher != null) { … … 332 291 } 333 292 334 private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 293 294 protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 335 295 throws java.io.IOException, org.apache.lucene.queryParser.ParseException 336 296 { … … 434 394 } 435 395 436 pr ivateFilter parseFilterString(String filter_string)396 protected Filter parseFilterString(String filter_string) 437 397 { 438 398 Filter result = null; … … 445 405 String upper_term = matcher.group(4); 446 406 boolean include_upper = matcher.group(5).equals("]"); 447 result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);407 result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 448 408 } 449 409 else { … … 452 412 return result; 453 413 } 454 455 456 protected void finalize() throws Throwable457 {458 try {459 utf8out.flush();460 } finally {461 super.finalize();462 }463 }464 465 414 415 466 416 /** command line program and auxiliary methods */ 467 417 468 418 // Fairly self-explanatory I should hope 469 static pr ivateboolean query_result_caching_enabled = false;419 static protected boolean query_result_caching_enabled = false; 470 420 471 421 472 422 static public void main (String args[]) 473 423 { 474 475 476 424 if (args.length == 0) { 477 425 System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]"); … … 566 514 } 567 515 568 pr ivatestatic void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)516 protected static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string) 569 517 throws IOException 570 518 { … … 654 602 } 655 603 656 pr ivatestatic String fileSafe(String text)604 protected static String fileSafe(String text) 657 605 { 658 606 StringBuffer file_safe_text = new StringBuffer(); -
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/LuceneQueryResult.java
r16912 r24725 29 29 import java.util.Vector; 30 30 31 /** a QueryResult class for alucene search31 /** Opportunity to fine tune QueryResult for lucene search 32 32 * 33 33 */ 34 public class LuceneQueryResult { 34 35 public class LuceneQueryResult extends SharedSoleneQueryResult { 35 36 36 public static final int NO_ERROR = 0; 37 public static final int PARSE_ERROR = 1; 38 public static final int TOO_MANY_CLAUSES_ERROR = 2; 39 public static final int IO_ERROR = 3; 40 public static final int OTHER_ERROR = 4; 41 42 /** the list of DocInfo */ 43 protected Vector docs_=null; 44 /** the list of TermInfo */ 45 protected Vector terms_=null; 46 /** the list of stopwords found in the query */ 47 protected Vector stopwords_ = null; 48 /** the total number of docs found - not necessarily the size of docs_*/ 49 protected int total_num_docs_=0; 50 /** the start result number if we are retrieving only a portion of the results */ 51 protected int start_results_ = 0; 52 /** the end result number if we are retrieving only a portion of the results */ 53 protected int end_results_ = 0; 54 /** whether an error has occurred and what kind it is*/ 55 protected int error_ = NO_ERROR; 56 37 // Currently no fine tuning -- rely on underlying shared Solr/Lucene base class 57 38 LuceneQueryResult() { 58 docs_ = new Vector(); 59 terms_ = new Vector(); 60 stopwords_ = new Vector(); 61 } 62 63 /** clear the info from the last query - should be called before setting any new docs/terms */ 64 public void clear() { 65 total_num_docs_=0; 66 docs_.clear(); 67 terms_.clear(); 68 stopwords_.clear(); 69 error_ = NO_ERROR; 70 } 71 72 /** returns the result as a String - useful for printing out results */ 73 public String toString() { 74 75 String result = ""; 76 result += "docs (ranks): "; 77 for (int i=0; i<docs_.size(); i++) { 78 result += ((DocInfo)docs_.elementAt(i)).toString()+", "; 79 } 80 result += "\nterms: "; 81 for (int i=0; i<terms_.size(); i++) { 82 result += ((TermInfo)terms_.elementAt(i)).toString()+", "; 83 } 84 result += "\nactual number of docs found = "+total_num_docs_; 85 86 return result; 87 } 88 /** a shorter representation - just terms and total docs - not the 89 individual docnums and ranks */ 90 public String toShortString() { 91 String result = ""; 92 result += "\nterms: "; 93 for (int i=0; i<terms_.size(); i++) { 94 result += ((TermInfo)terms_.elementAt(i)).toString()+", "; 95 } 96 result += "\nactual number of docs found = "+total_num_docs_; 97 return result; 98 } 99 100 public void setTotalDocs(int num) { 101 total_num_docs_=num; 102 } 103 104 public void setStartResults(int start) { 105 start_results_ = start; 106 } 107 108 public void setEndResults(int end) { 109 end_results_ = end; 110 } 111 112 public void addDoc(String id, float rank, int termfreq) 113 { 114 docs_.add(new DocInfo(id, rank, termfreq)); 115 } 116 117 public void addTerm(String term, String field, int match, int freq) { 118 TermInfo ti = new TermInfo(); 119 ti.term_=term; 120 ti.field_=field; 121 ti.match_docs_=match; 122 ti.term_freq_=freq; 123 terms_.add(ti); 124 } 125 public void addStopWord(String stopword) { 126 stopwords_.add(stopword); 127 } 128 public Vector getDocs() { 129 return docs_; 130 } 131 132 public int getError() { 133 return error_; 134 } 135 136 public String getErrorString() { 137 if (error_ == PARSE_ERROR) { 138 return "PARSE_EXCEPTION"; 139 } 140 if (error_ == TOO_MANY_CLAUSES_ERROR) { 141 return "TOO_MANY_CLAUSES"; 142 } 143 if (error_ == IO_ERROR) { 144 return "IO_ERROR"; 145 } 146 if (error_ == NO_ERROR) { 147 return "NO_ERROR"; 148 } 149 return "UNKNOWN"; 150 } 151 152 public Vector getTerms() { 153 return terms_; 154 } 155 156 public Vector getStopWords() { 157 return stopwords_; 158 } 159 public int getTotalDocs() { 160 return total_num_docs_; 161 } 162 163 public void setError(int error) { 164 error_ = error; 165 } 166 167 public String getXMLString() { 168 StringBuffer buffer = new StringBuffer(); 169 170 // terms 171 buffer.append("<QueryTermsInfo num=\"" + terms_.size() + "\"/>\n"); 172 for (int i=0; i<terms_.size(); i++) { 173 buffer.append(((TermInfo)terms_.elementAt(i)).toXMLString()+"\n"); 174 } 175 176 // stopwords 177 for (int i=0; i<stopwords_.size(); i++) { 178 buffer.append("<StopWord value=\"" + (String)stopwords_.elementAt(i)+"\" />\n"); 179 } 180 181 // results 182 buffer.append("<MatchingDocsInfo num=\"" + total_num_docs_ + "\"/>\n"); 183 buffer.append("<StartResults num=\"" + start_results_ + "\"/>\n"); 184 buffer.append("<EndResults num=\"" + end_results_ + "\"/>\n"); 185 186 for (int i=0; i< docs_.size(); i++) { 187 buffer.append(((DocInfo)docs_.elementAt(i)).toXMLString()+"\n"); 188 } 189 190 return buffer.toString(); 191 } 192 193 194 public class TermInfo { 195 196 /** the term itself */ 197 public String term_=null; 198 /** the field for which this term was queried */ 199 public String field_=null; 200 /** the number of documents containing this term */ 201 public int match_docs_=0; 202 /** overall term freq for this term */ 203 public int term_freq_=0; 204 205 public TermInfo() { 206 } 207 208 /** output the class as a string */ 209 public String toString() { 210 String result=""; 211 result +="<"+field_+">\""+term_+" docs("+match_docs_; 212 result +=")freq("+term_freq_+")"; 213 return result; 214 } 215 216 /** output as an XML element */ 217 public String toXMLString() { 218 return "<Term value=\"" + xmlSafe(term_) + "\" field=\"" + field_ + "\" freq=\"" + term_freq_ + "\" />"; 219 } 220 } 221 222 223 public class DocInfo 224 { 225 public String id_ = ""; 226 public float rank_ = 0; 227 public int termfreq_ = 0; 228 229 public DocInfo (String id, float rank, int termfreq) 230 { 231 id_ = id; 232 rank_ = rank; 233 termfreq_ = termfreq; 234 } 235 236 public String toString() 237 { 238 return "" + id_ + " (" + rank_ + ") (" + termfreq_ + ")"; 239 } 240 241 public String toXMLString() 242 { 243 return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" termfreq=\"" + termfreq_ + "\" />"; 244 } 245 } 246 247 248 // where should this go??? 249 public static String xmlSafe(String text) { 250 text = text.replaceAll("&","&amp;"); 251 text = text.replaceAll("<","&lt;"); 252 text = text.replaceAll(">","&gt;"); 253 text = text.replaceAll("'","&#039;"); 254 text = text.replaceAll("\\\"","&quot;"); 255 return text; 256 } 257 39 super(); 40 } 258 41 }
Note:
See TracChangeset
for help on using the changeset viewer.