Show
Ignore:
Timestamp:
05.10.2011 15:44:19 (9 years ago)
Author:
davidb
Message:

Restruturing of Lucene version 2.x and 3.x to make it easier to control which one is used

Location:
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone
Files:
7 modified
1 moved

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2Analyzer.java

    r17804 r24725  
    3333import org.apache.lucene.analysis.standard.*; 
    3434 
     35import org.apache.lucene.analysis.ASCIIFoldingFilter; 
    3536 
    36 class GS2Analyzer extends StandardAnalyzer  
     37import org.apache.lucene.util.Version; 
     38 
     39 
     40class GS2Analyzer extends GS2StandardAnalyzer  
    3741{ 
     42     
     43    static Version matchVersion = Version.LUCENE_24; 
     44 
     45 
    3846    public GS2Analyzer()  
    3947    { 
    40     super(); 
     48    super(matchVersion); 
    4149    } 
     50     
    4251 
    4352    public GS2Analyzer(Set stopWords)  
    4453    { 
    45     super(stopWords); 
     54    super(matchVersion,stopWords); 
    4655    } 
    4756 
     
    4958    public GS2Analyzer(String [] stopwords)  
    5059    { 
    51     super(stopwords); 
    52     } 
    53      
    54     public TokenStream tokenStream(String fieldName, Reader reader)  
    55     { 
    56     TokenStream result = super.tokenStream(fieldName,reader); 
    57     result = new ISOLatin1AccentFilter(result); 
    58  
    59     return result;   
     60    super(matchVersion,StopFilter.makeStopSet(stopwords)); 
    6061    } 
    6162 
     63  @Override 
     64  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { 
     65    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); 
     66    src.setMaxTokenLength(maxTokenLength); 
     67    src.setReplaceInvalidAcronym(replaceInvalidAcronym); 
     68    TokenStream tok = new StandardFilter(matchVersion, src); 
     69    tok = new LowerCaseFilter(matchVersion, tok); 
     70    tok = new StopFilter(matchVersion, tok, stopwords); 
    6271 
    63   public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { 
    64       TokenStream result = super.reusableTokenStream(fieldName,reader); 
    65        
    66       result = new ISOLatin1AccentFilter(result); 
    67        
    68       return result; 
     72    // top it up with accent folding 
     73    tok = new ASCIIFoldingFilter(tok); 
     74 
     75    return new TokenStreamComponents(src, tok) { 
     76      @Override 
     77      protected boolean reset(final Reader reader) throws IOException { 
     78        src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength); 
     79        return super.reset(reader); 
     80      } 
     81    }; 
    6982  } 
    70  
    7183 
    7284} 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2IndexModifier.java

    r20731 r24725  
    3434import org.apache.lucene.analysis.Analyzer; 
    3535import org.apache.lucene.document.Document; 
    36 import org.apache.lucene.index.IndexModifier; 
    3736import org.apache.lucene.index.IndexReader; 
    3837import org.apache.lucene.index.IndexWriter; 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java

    r16583 r24725  
    3333 
    3434import java.io.IOException; 
    35 import org.apache.lucene.analysis.standard.StandardAnalyzer; 
     35import java.io.File; 
     36//import org.apache.lucene.analysis.standard.StandardAnalyzer; 
    3637import org.apache.lucene.index.IndexWriter; 
    3738import org.apache.lucene.index.Term; 
     39 
     40import org.apache.lucene.store.SimpleFSDirectory; 
     41import org.apache.lucene.index.IndexWriter.MaxFieldLength; 
    3842 
    3943 
     
    128132        throws IOException 
    129133    { 
    130     index_writer = new IndexWriter(index_path, new StandardAnalyzer()); 
     134    SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path)); 
     135    index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(),  
     136                       MaxFieldLength.UNLIMITED); 
    131137    } 
    132138 
     
    163169    { 
    164170        debug("GS2LuceneDelete.deleteDocument(" + node_id + ")"); 
    165         debug("- Initial number of documents in index: " + index_writer.docCount()); 
     171        debug("- Initial number of documents in index: " + index_writer.numDocs()); 
    166172    index_writer.deleteDocuments(new Term("nodeid", "" + node_id)); 
    167         debug("- Final number of documents in index: " + index_writer.docCount()); 
     173        debug("- Final number of documents in index: " + index_writer.numDocs()); 
    168174    } 
    169175} 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneEditor.java

    r20731 r24725  
    3333 
    3434import java.io.IOException; 
     35import java.io.File; 
    3536import java.util.Arrays; 
    3637import java.util.Enumeration; 
     
    3839 
    3940import org.apache.lucene.analysis.Analyzer; 
    40 import org.apache.lucene.analysis.standard.StandardAnalyzer; 
     41//import org.apache.lucene.analysis.standard.StandardAnalyzer; 
    4142import org.apache.lucene.document.Document; 
    4243import org.apache.lucene.document.Field; 
     44 
     45import org.apache.lucene.store.SimpleFSDirectory; 
     46import org.apache.lucene.index.IndexWriter.MaxFieldLength; 
    4347 
    4448 
     
    146150        throws IOException 
    147151    { 
    148         Analyzer analyzer = new StandardAnalyzer(); 
     152        Analyzer analyzer = new GS2Analyzer(); 
    149153        // create an index in /tmp/index, overwriting an existing one: 
    150154        index_modifier = new GS2IndexModifier(index_path, analyzer); 
     
    188192    { 
    189193        debug("GS2LuceneEditor.editIndex(" + node_id + ",'" + field + "','" + old_value + "','" + new_value + "')"); 
    190         debug("- Initial number of documents in index: " + index_modifier.docCount()); 
     194        debug("- Initial number of documents in index: " + index_modifier.numDocs()); 
    191195        // Retrieve the document requested 
    192196        int doc_num = index_modifier.getDocNumByNodeID(node_id); 
     
    230234                // We also have to initialize the nodeId value 
    231235        // changed to use docOID --kjdon 
    232                 document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.TOKENIZED)); 
     236                document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.ANALYZED)); 
    233237 
    234238                // Re-index document 
     
    296300                for(int i = 0; i < values.size(); i++) 
    297301                    { 
    298                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED)); 
     302                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED)); 
    299303                    } 
    300304                values.clear(); 
     
    318322                for(int i = 0; i < values.size(); i++) 
    319323                    { 
    320                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED)); 
     324                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED)); 
    321325                    } 
    322326                values.clear(); 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java

    r23194 r24725  
    4545import org.apache.lucene.index.Term; 
    4646import org.apache.lucene.analysis.Analyzer; 
     47 
     48import org.apache.lucene.store.SimpleFSDirectory; 
     49import org.apache.lucene.index.IndexWriter.MaxFieldLength; 
    4750 
    4851import java.util.Stack; 
     
    190193    protected String file_id_ = null; 
    191194 
    192     static private String[] stop_words = GS2Analyzer.STOP_WORDS; 
    193  
    194  
    195195    /** pass in true if want to create a new index, false if want to use the existing one */ 
    196196    public Indexer (String doc_tag_level, File index_dir, boolean create)  
     
    206206        reader.setFeature("http://xml.org/sax/features/validation", false); 
    207207 
    208         analyzer_ = new GS2Analyzer(stop_words); 
    209  
    210         writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create); 
     208        SimpleFSDirectory index_dir_dir = new SimpleFSDirectory(new File(index_dir.getPath())); 
     209 
     210        analyzer_ = new GS2Analyzer(); // uses build in stop_word_set 
     211 
     212        writer_ = new IndexWriter(index_dir_dir, analyzer_, create, MaxFieldLength.UNLIMITED); 
     213                        
    211214        // by default, will only index 10,000 words per document 
    212215        // Can throw out_of_memory errors 
     
    318321        //String node_id = atts.getValue("gs2:id"); 
    319322        //print(" " + qName + ": " + node_id + " (" + mode_ + ")" ); 
    320         //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.UN_TOKENIZED)); 
     323        //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.NOT_ANALYZED)); 
    321324         
    322325        current_doc_oid_ = atts.getValue("gs2:docOID"); 
    323326        print(" " + qName + ": " + current_doc_oid_ + " (" + mode_ + ")" ); 
    324         current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.UN_TOKENIZED)); 
     327        current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.NOT_ANALYZED)); 
    325328        } 
    326329         
     
    359362        if (qName.equals(indexable_current_node_)) 
    360363            { 
    361             current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); 
     364            current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); 
    362365            // The byXX fields are used for sorting search results 
    363366            // We don't want to do that for Text or AllFields fields 
     
    365368            if (!qName.equals("TX") && !qName.equals("ZZ")) 
    366369                { 
    367                 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); 
     370                current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); 
    368371                } 
    369372             
     
    472475    { 
    473476        debug("GS2LuceneDelete.deleteDocument(" + doc_id + ")"); 
    474         debug("- Initial number of documents in index: " + writer_.docCount()); 
     477        debug("- Initial number of documents in index: " + writer_.numDocs()); 
    475478        writer_.deleteDocuments(new Term("docOID", doc_id)); 
    476         debug("- Final number of documents in index: " + writer_.docCount()); 
     479        debug("- Final number of documents in index: " + writer_.numDocs()); 
    477480    } 
    478481 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r20910 r24725  
    4545import org.apache.lucene.search.IndexSearcher; 
    4646import org.apache.lucene.search.Query; 
    47 import org.apache.lucene.search.RangeFilter; 
     47import org.apache.lucene.search.TermRangeFilter; 
    4848import org.apache.lucene.search.Searcher; 
    4949import org.apache.lucene.search.ScoreDoc; 
    5050import org.apache.lucene.search.Sort; 
     51import org.apache.lucene.search.SortField; 
    5152import org.apache.lucene.search.TopFieldDocs; 
    5253 
    53  
    54 public class GS2LuceneQuery 
     54import org.apache.lucene.store.Directory; 
     55import org.apache.lucene.store.FSDirectory; 
     56import org.apache.lucene.util.Version; 
     57 
     58public class GS2LuceneQuery extends SharedSoleneQuery 
    5559{ 
    56  
    57  
    58     static private String TEXTFIELD = "TX"; 
    59  
    60     // Use the standard set of English stop words by default 
    61     static private String[] stop_words = GS2Analyzer.STOP_WORDS; 
    62  
    63     private String full_indexdir=""; 
    64     private String default_conjunction_operator = "OR"; 
    65     private String fuzziness = null; 
    66     private String sort_field = null; 
    67     private Sort sorter=new Sort(); 
    68     private String filter_string = null; 
    69     private Filter filter = null; 
    70     private int start_results=1; 
    71     private int end_results=Integer.MAX_VALUE; 
    72  
    73     private QueryParser query_parser = null; 
    74     private QueryParser query_parser_no_stop_words = null; 
    75     private Searcher searcher = null; 
    76     private IndexReader reader = null; 
    77  
    78     static private PrintWriter utf8out = null; 
    79  
    80     static 
    81     { 
    82     try { 
    83         OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8"); 
    84         utf8out = new PrintWriter(osw, true); 
    85     } 
    86         catch (UnsupportedEncodingException e) {  
    87         System.out.println(e);  
    88     } 
    89     } 
    90  
    91      
     60    protected String full_indexdir=""; 
     61 
     62    protected Sort sorter=new Sort(); 
     63    protected Filter filter = null; 
     64 
     65    protected static Version matchVersion = Version.LUCENE_24; 
     66 
     67    protected QueryParser query_parser = null; 
     68    protected QueryParser query_parser_no_stop_words = null; 
     69    protected Searcher searcher = null; 
     70    protected IndexReader reader = null; 
     71 
    9272    public GS2LuceneQuery() { 
     73    super(); 
    9374 
    9475    // Create one query parser with the standard set of stop words, and one with none 
    9576 
    96     query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words)); 
    97         query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { })); 
     77    query_parser = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set 
     78        query_parser_no_stop_words = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer(new String[] { })); 
    9879    } 
    9980     
    10081     
    10182    public boolean initialise() { 
     83 
     84    if (!super.initialise()) { 
     85        return false; 
     86    } 
     87 
    10288 
    10389        if (full_indexdir==null || full_indexdir.length()==-1){ 
     
    10692        return false; 
    10793        } 
     94 
    10895        try { 
    109             searcher = new IndexSearcher(full_indexdir); 
     96        Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir)); 
     97            searcher = new IndexSearcher(full_indexdir_dir,true); 
    11098            reader = ((IndexSearcher) searcher).getIndexReader(); 
    11199         
     
    118106 
    119107    } 
     108 
     109    public void setIndexDir(String full_indexdir) { 
     110    this.full_indexdir = full_indexdir; 
     111    } 
     112 
     113    public void setSortField(String sort_field) { 
     114    super.setSortField(sort_field); 
     115 
     116    if (sort_field == null) { 
     117        this.sorter = new Sort(); 
     118    } else { 
     119        this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!? 
     120    } 
     121    } 
     122 
     123    public void setFilterString(String filter_string) { 
     124    super.setFilterString(filter_string); 
     125    this.filter = parseFilterString(filter_string); 
     126    } 
     127 
     128    public Filter getFilter() { 
     129    return this.filter; 
     130    } 
     131 
    120132     
    121133    public LuceneQueryResult runQuery(String query_string) { 
     
    194206        if (end_results == Integer.MAX_VALUE) { 
    195207        // Perform the query (filter and sorter may be null) 
    196         Hits hits = searcher.search(query, filter, sorter); 
    197         lucene_query_result.setTotalDocs(hits.length()); 
     208        TopFieldDocs hits = searcher.search(query, filter, end_results, sorter); 
     209        lucene_query_result.setTotalDocs(hits.totalHits); 
    198210 
    199211        // Output the matching documents 
    200212        lucene_query_result.setStartResults(start_results); 
    201         lucene_query_result.setEndResults(hits.length()); 
    202  
    203         for (int i = start_results; i <= hits.length(); i++) { 
    204             int lucene_doc_num = hits.id(i - 1); 
    205             Document doc = hits.doc(i - 1); 
     213        lucene_query_result.setEndResults(hits.totalHits); 
     214 
     215        for (int i = start_results; i <= hits.totalHits; i++) { 
     216            int lucene_doc_num = hits.scoreDocs[i - 1].doc; 
     217            Document doc = reader.document(lucene_doc_num); 
    206218            int doc_term_freq = 0; 
    207219            Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num)); 
     
    210222            doc_term_freq = doc_term_freq_object.intValue(); 
    211223            } 
    212             lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq); 
     224            lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq); 
    213225        } 
    214226        } 
     
    256268 
    257269    public void setDefaultConjunctionOperator(String default_conjunction_operator) { 
    258     this.default_conjunction_operator = default_conjunction_operator.toUpperCase(); 
     270    super.setDefaultConjunctionOperator(default_conjunction_operator); 
     271 
    259272    if (default_conjunction_operator.equals("AND")) { 
    260273        query_parser.setDefaultOperator(query_parser.AND_OPERATOR); 
     
    265278    } 
    266279    } 
    267      
    268     public String getDefaultConjunctionOperator() { 
    269     return this.default_conjunction_operator; 
    270     } 
    271      
    272     public void setEndResults(int end_results) { 
    273     this.end_results = end_results; 
    274     } 
    275     public int getEndResults() { 
    276     return this.end_results; 
    277     } 
    278          
    279     public void setFilterString(String filter_string) { 
    280     this.filter_string = filter_string; 
    281     this.filter = parseFilterString(filter_string); 
    282     } 
    283     public String getFilterString() { 
    284     return this.filter_string ; 
    285     } 
    286      
    287     public Filter getFilter() { 
    288     return this.filter; 
    289     } 
    290  
    291     public void setIndexDir(String full_indexdir) { 
    292     this.full_indexdir = full_indexdir; 
    293     } 
    294      
    295     public void setFuzziness(String fuzziness) { 
    296     this.fuzziness = fuzziness; 
    297     } 
    298     public String getFuzziness() { 
    299     return this.fuzziness; 
    300     } 
    301      
    302     public void setSortField(String sort_field) { 
    303     this.sort_field = sort_field; 
    304     if (sort_field == null) { 
    305         this.sorter = new Sort(); 
    306     } else { 
    307         this.sorter = new Sort(sort_field); 
    308     } 
    309     } 
    310     public String getSortField() { 
    311     return this.sort_field; 
    312     } 
    313          
    314     public void setStartResults(int start_results) { 
    315     if (start_results < 1) { 
    316         start_results = 1; 
    317     } 
    318     this.start_results = start_results; 
    319     } 
    320     public int getStartResults() { 
    321     return this.start_results; 
    322     } 
    323          
     280      
     281        
    324282    public void cleanUp() { 
     283    super.cleanUp(); 
    325284    try { 
    326285        if (searcher != null) { 
     
    332291    } 
    333292 
    334     private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 
     293 
     294    protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 
    335295    throws java.io.IOException, org.apache.lucene.queryParser.ParseException 
    336296    { 
     
    434394    } 
    435395 
    436     private Filter parseFilterString(String filter_string) 
     396    protected Filter parseFilterString(String filter_string) 
    437397    { 
    438398    Filter result = null; 
     
    445405        String upper_term = matcher.group(4); 
    446406        boolean include_upper = matcher.group(5).equals("]"); 
    447         result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 
     407        result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 
    448408    } 
    449409    else { 
     
    452412    return result; 
    453413    } 
    454  
    455  
    456     protected void finalize() throws Throwable  
    457     { 
    458     try { 
    459         utf8out.flush();  
    460     } finally { 
    461         super.finalize(); 
    462     } 
    463     } 
    464  
    465414     
     415 
    466416    /** command line program and auxiliary methods */ 
    467417 
    468418    // Fairly self-explanatory I should hope 
    469     static private boolean query_result_caching_enabled = false; 
     419    static protected boolean query_result_caching_enabled = false; 
    470420 
    471421 
    472422    static public void main (String args[]) 
    473423    { 
    474  
    475  
    476424    if (args.length == 0) { 
    477425        System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]"); 
     
    566514    } 
    567515 
    568     private static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)  
     516    protected static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)  
    569517    throws IOException 
    570518    { 
     
    654602    } 
    655603     
    656     private static String fileSafe(String text) 
     604    protected static String fileSafe(String text) 
    657605    { 
    658606    StringBuffer file_safe_text = new StringBuffer(); 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/LuceneQueryResult.java

    r16912 r24725  
    2929import java.util.Vector; 
    3030 
    31 /** a QueryResult class for a lucene search 
     31/** Opportunity to fine tune QueryResult for lucene search 
    3232 * 
    3333 */ 
    34 public class LuceneQueryResult { 
     34 
     35public class LuceneQueryResult extends SharedSoleneQueryResult { 
    3536     
    36     public static final int NO_ERROR = 0; 
    37     public static final int PARSE_ERROR = 1; 
    38     public static final int TOO_MANY_CLAUSES_ERROR = 2; 
    39     public static final int IO_ERROR = 3; 
    40     public static final int OTHER_ERROR = 4; 
    41      
    42     /** the list of DocInfo */ 
    43     protected Vector docs_=null; 
    44     /** the list of TermInfo */ 
    45     protected Vector terms_=null; 
    46     /** the list of stopwords found in the query */ 
    47     protected Vector stopwords_ = null; 
    48     /** the total number of docs found - not necessarily the size of docs_*/ 
    49     protected int total_num_docs_=0; 
    50     /** the start result number if we are retrieving only a portion of the results */ 
    51     protected int start_results_ = 0; 
    52     /** the end result number if we are retrieving only a portion of the results */ 
    53     protected int end_results_ = 0; 
    54     /** whether an error has occurred and what kind it is*/ 
    55     protected int error_ = NO_ERROR; 
    56  
     37    // Currently no fine tuning -- rely on underlying shared Solr/Lucene base class 
    5738    LuceneQueryResult() { 
    58     docs_ = new Vector(); 
    59     terms_ = new Vector(); 
    60     stopwords_ = new Vector(); 
    61     } 
    62      
    63     /** clear the info from the last query - should be called before setting any new docs/terms */ 
    64     public void clear() { 
    65     total_num_docs_=0; 
    66     docs_.clear(); 
    67     terms_.clear(); 
    68     stopwords_.clear(); 
    69     error_ = NO_ERROR; 
    70     } 
    71  
    72     /** returns the result as a String - useful for printing out results */ 
    73     public String toString() { 
    74      
    75     String result = ""; 
    76     result += "docs (ranks): "; 
    77     for (int i=0; i<docs_.size(); i++) { 
    78         result += ((DocInfo)docs_.elementAt(i)).toString()+", "; 
    79     } 
    80     result += "\nterms: "; 
    81     for (int i=0; i<terms_.size(); i++) { 
    82         result += ((TermInfo)terms_.elementAt(i)).toString()+", "; 
    83     } 
    84     result += "\nactual number of docs found = "+total_num_docs_; 
    85      
    86     return result; 
    87     } 
    88     /** a shorter representation - just terms and total docs - not the  
    89     individual docnums and ranks */ 
    90     public String toShortString() { 
    91     String result = ""; 
    92     result += "\nterms: "; 
    93     for (int i=0; i<terms_.size(); i++) { 
    94         result += ((TermInfo)terms_.elementAt(i)).toString()+", "; 
    95     } 
    96     result += "\nactual number of docs found = "+total_num_docs_; 
    97     return result; 
    98     } 
    99      
    100     public void setTotalDocs(int num) { 
    101     total_num_docs_=num; 
    102     } 
    103      
    104     public void setStartResults(int start) { 
    105     start_results_ = start; 
    106     } 
    107  
    108     public void setEndResults(int end) { 
    109     end_results_ = end; 
    110     } 
    111  
    112     public void addDoc(String id, float rank, int termfreq) 
    113     { 
    114     docs_.add(new DocInfo(id, rank, termfreq)); 
    115     } 
    116      
    117     public void addTerm(String term, String field, int match, int freq) { 
    118     TermInfo ti = new TermInfo(); 
    119     ti.term_=term; 
    120     ti.field_=field; 
    121     ti.match_docs_=match; 
    122     ti.term_freq_=freq; 
    123     terms_.add(ti); 
    124     } 
    125     public void addStopWord(String stopword) { 
    126     stopwords_.add(stopword); 
    127     } 
    128     public Vector getDocs() { 
    129     return docs_; 
    130     } 
    131      
    132     public int getError() { 
    133     return error_; 
    134     } 
    135      
    136     public String getErrorString() { 
    137     if (error_ == PARSE_ERROR) { 
    138         return "PARSE_EXCEPTION"; 
    139     } 
    140     if (error_ == TOO_MANY_CLAUSES_ERROR) { 
    141         return "TOO_MANY_CLAUSES"; 
    142     } 
    143     if (error_ == IO_ERROR) { 
    144         return "IO_ERROR"; 
    145     } 
    146     if (error_ == NO_ERROR) { 
    147         return "NO_ERROR"; 
    148     } 
    149     return "UNKNOWN"; 
    150     } 
    151  
    152     public Vector getTerms() { 
    153     return terms_; 
    154     } 
    155      
    156     public Vector getStopWords() { 
    157     return stopwords_; 
    158     } 
    159     public int getTotalDocs() { 
    160     return total_num_docs_; 
    161     } 
    162      
    163     public void setError(int error) { 
    164     error_ = error; 
    165     } 
    166      
    167     public String getXMLString() { 
    168     StringBuffer buffer = new StringBuffer(); 
    169  
    170     // terms 
    171     buffer.append("<QueryTermsInfo num=\"" + terms_.size() + "\"/>\n"); 
    172     for (int i=0; i<terms_.size(); i++) { 
    173         buffer.append(((TermInfo)terms_.elementAt(i)).toXMLString()+"\n"); 
    174     } 
    175  
    176     // stopwords 
    177     for (int i=0; i<stopwords_.size(); i++) { 
    178         buffer.append("<StopWord value=\"" + (String)stopwords_.elementAt(i)+"\" />\n"); 
    179     } 
    180      
    181     // results 
    182     buffer.append("<MatchingDocsInfo num=\"" + total_num_docs_ + "\"/>\n"); 
    183     buffer.append("<StartResults num=\"" + start_results_ + "\"/>\n"); 
    184     buffer.append("<EndResults num=\"" + end_results_ + "\"/>\n"); 
    185      
    186     for (int i=0; i< docs_.size(); i++) { 
    187         buffer.append(((DocInfo)docs_.elementAt(i)).toXMLString()+"\n"); 
    188     } 
    189  
    190     return buffer.toString(); 
    191     } 
    192  
    193   
    194     public class TermInfo { 
    195      
    196     /** the term itself */ 
    197     public String term_=null; 
    198     /** the field for which this term was queried */ 
    199     public String field_=null; 
    200     /** the number of documents containing this term */ 
    201     public int match_docs_=0; 
    202     /** overall term freq for this term */ 
    203     public int term_freq_=0; 
    204      
    205     public TermInfo() { 
    206     } 
    207      
    208     /** output the class as a string */ 
    209     public String toString() { 
    210         String result=""; 
    211         result +="<"+field_+">\""+term_+" docs("+match_docs_; 
    212         result +=")freq("+term_freq_+")"; 
    213         return result; 
    214     } 
    215  
    216     /** output as an XML element */ 
    217     public String toXMLString() { 
    218         return "<Term value=\"" + xmlSafe(term_) + "\" field=\"" + field_ + "\" freq=\"" + term_freq_ + "\" />"; 
    219     } 
    220     } 
    221  
    222  
    223     public class DocInfo 
    224     { 
    225     public String id_ = ""; 
    226     public float rank_ = 0; 
    227     public int termfreq_ = 0; 
    228  
    229     public DocInfo (String id, float rank, int termfreq) 
    230     { 
    231         id_ = id; 
    232         rank_ = rank; 
    233         termfreq_ = termfreq; 
    234     } 
    235  
    236     public String toString() 
    237     { 
    238         return "" + id_ + " (" + rank_ + ") (" + termfreq_ + ")"; 
    239     } 
    240  
    241     public String toXMLString() 
    242     { 
    243         return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" termfreq=\"" + termfreq_ + "\" />"; 
    244     } 
    245     } 
    246  
    247  
    248     // where should this go??? 
    249     public static String xmlSafe(String text) { 
    250     text = text.replaceAll("&","&amp;amp;"); 
    251     text = text.replaceAll("<","&amp;lt;"); 
    252     text = text.replaceAll(">","&amp;gt;"); 
    253     text = text.replaceAll("'","&amp;#039;"); 
    254     text = text.replaceAll("\\\"","&amp;quot;"); 
    255     return text; 
    256     } 
    257   
     39    super(); 
     40    }  
    25841}