Show
Ignore:
Timestamp:
07.10.2011 11:36:07 (9 years ago)
Author:
sjm84
Message:

Lucene 3.x version of code accidentally commited rolling back to 2.x compatible version

Location:
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper
Files:
7 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2Analyzer.java

    r24725 r24731  
    3333import org.apache.lucene.analysis.standard.*; 
    3434 
    35 import org.apache.lucene.analysis.ASCIIFoldingFilter; 
    3635 
    37 import org.apache.lucene.util.Version; 
    38  
    39  
    40 class GS2Analyzer extends GS2StandardAnalyzer  
     36class GS2Analyzer extends StandardAnalyzer  
    4137{ 
    42      
    43     static Version matchVersion = Version.LUCENE_24; 
    44  
    45  
    4638    public GS2Analyzer()  
    4739    { 
    48     super(matchVersion); 
     40    super(); 
    4941    } 
    50      
    5142 
    5243    public GS2Analyzer(Set stopWords)  
    5344    { 
    54     super(matchVersion,stopWords); 
     45    super(stopWords); 
    5546    } 
    5647 
     
    5849    public GS2Analyzer(String [] stopwords)  
    5950    { 
    60     super(matchVersion,StopFilter.makeStopSet(stopwords)); 
     51    super(stopwords); 
     52    } 
     53     
     54    public TokenStream tokenStream(String fieldName, Reader reader)  
     55    { 
     56    TokenStream result = super.tokenStream(fieldName,reader); 
     57    result = new ISOLatin1AccentFilter(result); 
     58 
     59    return result;   
    6160    } 
    6261 
    63   @Override 
    64   protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { 
    65     final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); 
    66     src.setMaxTokenLength(maxTokenLength); 
    67     src.setReplaceInvalidAcronym(replaceInvalidAcronym); 
    68     TokenStream tok = new StandardFilter(matchVersion, src); 
    69     tok = new LowerCaseFilter(matchVersion, tok); 
    70     tok = new StopFilter(matchVersion, tok, stopwords); 
    7162 
    72     // top it up with accent folding 
    73     tok = new ASCIIFoldingFilter(tok); 
     63  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { 
     64      TokenStream result = super.reusableTokenStream(fieldName,reader); 
     65       
     66      result = new ISOLatin1AccentFilter(result); 
     67       
     68      return result; 
     69  } 
    7470 
    75     return new TokenStreamComponents(src, tok) { 
    76       @Override 
    77       protected boolean reset(final Reader reader) throws IOException { 
    78         src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength); 
    79         return super.reset(reader); 
    80       } 
    81     }; 
    82   } 
    8371 
    8472} 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2IndexModifier.java

    r24725 r24731  
    3434import org.apache.lucene.analysis.Analyzer; 
    3535import org.apache.lucene.document.Document; 
     36import org.apache.lucene.index.IndexModifier; 
    3637import org.apache.lucene.index.IndexReader; 
    3738import org.apache.lucene.index.IndexWriter; 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java

    r24725 r24731  
    3333 
    3434import java.io.IOException; 
    35 import java.io.File; 
    36 //import org.apache.lucene.analysis.standard.StandardAnalyzer; 
     35import org.apache.lucene.analysis.standard.StandardAnalyzer; 
    3736import org.apache.lucene.index.IndexWriter; 
    3837import org.apache.lucene.index.Term; 
    39  
    40 import org.apache.lucene.store.SimpleFSDirectory; 
    41 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 
    4238 
    4339 
     
    132128        throws IOException 
    133129    { 
    134     SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path)); 
    135     index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(),  
    136                        MaxFieldLength.UNLIMITED); 
     130    index_writer = new IndexWriter(index_path, new StandardAnalyzer()); 
    137131    } 
    138132 
     
    169163    { 
    170164        debug("GS2LuceneDelete.deleteDocument(" + node_id + ")"); 
    171         debug("- Initial number of documents in index: " + index_writer.numDocs()); 
     165        debug("- Initial number of documents in index: " + index_writer.docCount()); 
    172166    index_writer.deleteDocuments(new Term("nodeid", "" + node_id)); 
    173         debug("- Final number of documents in index: " + index_writer.numDocs()); 
     167        debug("- Final number of documents in index: " + index_writer.docCount()); 
    174168    } 
    175169} 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneEditor.java

    r24725 r24731  
    3333 
    3434import java.io.IOException; 
    35 import java.io.File; 
    3635import java.util.Arrays; 
    3736import java.util.Enumeration; 
     
    3938 
    4039import org.apache.lucene.analysis.Analyzer; 
    41 //import org.apache.lucene.analysis.standard.StandardAnalyzer; 
     40import org.apache.lucene.analysis.standard.StandardAnalyzer; 
    4241import org.apache.lucene.document.Document; 
    4342import org.apache.lucene.document.Field; 
    44  
    45 import org.apache.lucene.store.SimpleFSDirectory; 
    46 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 
    4743 
    4844 
     
    150146        throws IOException 
    151147    { 
    152         Analyzer analyzer = new GS2Analyzer(); 
     148        Analyzer analyzer = new StandardAnalyzer(); 
    153149        // create an index in /tmp/index, overwriting an existing one: 
    154150        index_modifier = new GS2IndexModifier(index_path, analyzer); 
     
    192188    { 
    193189        debug("GS2LuceneEditor.editIndex(" + node_id + ",'" + field + "','" + old_value + "','" + new_value + "')"); 
    194         debug("- Initial number of documents in index: " + index_modifier.numDocs()); 
     190        debug("- Initial number of documents in index: " + index_modifier.docCount()); 
    195191        // Retrieve the document requested 
    196192        int doc_num = index_modifier.getDocNumByNodeID(node_id); 
     
    234230                // We also have to initialize the nodeId value 
    235231        // changed to use docOID --kjdon 
    236                 document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.ANALYZED)); 
     232                document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.TOKENIZED)); 
    237233 
    238234                // Re-index document 
     
    300296                for(int i = 0; i < values.size(); i++) 
    301297                    { 
    302                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED)); 
     298                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED)); 
    303299                    } 
    304300                values.clear(); 
     
    322318                for(int i = 0; i < values.size(); i++) 
    323319                    { 
    324                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED)); 
     320                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED)); 
    325321                    } 
    326322                values.clear(); 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java

    r24725 r24731  
    4545import org.apache.lucene.index.Term; 
    4646import org.apache.lucene.analysis.Analyzer; 
    47  
    48 import org.apache.lucene.store.SimpleFSDirectory; 
    49 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 
    5047 
    5148import java.util.Stack; 
     
    193190    protected String file_id_ = null; 
    194191 
     192    static private String[] stop_words = GS2Analyzer.STOP_WORDS; 
     193 
     194 
    195195    /** pass in true if want to create a new index, false if want to use the existing one */ 
    196196    public Indexer (String doc_tag_level, File index_dir, boolean create)  
     
    206206        reader.setFeature("http://xml.org/sax/features/validation", false); 
    207207 
    208         SimpleFSDirectory index_dir_dir = new SimpleFSDirectory(new File(index_dir.getPath())); 
    209  
    210         analyzer_ = new GS2Analyzer(); // uses build in stop_word_set 
    211  
    212         writer_ = new IndexWriter(index_dir_dir, analyzer_, create, MaxFieldLength.UNLIMITED); 
    213                         
     208        analyzer_ = new GS2Analyzer(stop_words); 
     209 
     210        writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create); 
    214211        // by default, will only index 10,000 words per document 
    215212        // Can throw out_of_memory errors 
     
    321318        //String node_id = atts.getValue("gs2:id"); 
    322319        //print(" " + qName + ": " + node_id + " (" + mode_ + ")" ); 
    323         //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.NOT_ANALYZED)); 
     320        //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.UN_TOKENIZED)); 
    324321         
    325322        current_doc_oid_ = atts.getValue("gs2:docOID"); 
    326323        print(" " + qName + ": " + current_doc_oid_ + " (" + mode_ + ")" ); 
    327         current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.NOT_ANALYZED)); 
     324        current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.UN_TOKENIZED)); 
    328325        } 
    329326         
     
    362359        if (qName.equals(indexable_current_node_)) 
    363360            { 
    364             current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); 
     361            current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); 
    365362            // The byXX fields are used for sorting search results 
    366363            // We don't want to do that for Text or AllFields fields 
     
    368365            if (!qName.equals("TX") && !qName.equals("ZZ")) 
    369366                { 
    370                 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); 
     367                current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); 
    371368                } 
    372369             
     
    475472    { 
    476473        debug("GS2LuceneDelete.deleteDocument(" + doc_id + ")"); 
    477         debug("- Initial number of documents in index: " + writer_.numDocs()); 
     474        debug("- Initial number of documents in index: " + writer_.docCount()); 
    478475        writer_.deleteDocuments(new Term("docOID", doc_id)); 
    479         debug("- Final number of documents in index: " + writer_.numDocs()); 
     476        debug("- Final number of documents in index: " + writer_.docCount()); 
    480477    } 
    481478 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r24725 r24731  
    4545import org.apache.lucene.search.IndexSearcher; 
    4646import org.apache.lucene.search.Query; 
    47 import org.apache.lucene.search.TermRangeFilter; 
     47import org.apache.lucene.search.RangeFilter; 
    4848import org.apache.lucene.search.Searcher; 
    4949import org.apache.lucene.search.ScoreDoc; 
    5050import org.apache.lucene.search.Sort; 
    51 import org.apache.lucene.search.SortField; 
    5251import org.apache.lucene.search.TopFieldDocs; 
    5352 
    54 import org.apache.lucene.store.Directory; 
    55 import org.apache.lucene.store.FSDirectory; 
    56 import org.apache.lucene.util.Version; 
    57  
    58 public class GS2LuceneQuery extends SharedSoleneQuery 
     53 
     54public class GS2LuceneQuery 
    5955{ 
    60     protected String full_indexdir=""; 
    61  
    62     protected Sort sorter=new Sort(); 
    63     protected Filter filter = null; 
    64  
    65     protected static Version matchVersion = Version.LUCENE_24; 
    66  
    67     protected QueryParser query_parser = null; 
    68     protected QueryParser query_parser_no_stop_words = null; 
    69     protected Searcher searcher = null; 
    70     protected IndexReader reader = null; 
    71  
     56 
     57 
     58    static private String TEXTFIELD = "TX"; 
     59 
     60    // Use the standard set of English stop words by default 
     61    static private String[] stop_words = GS2Analyzer.STOP_WORDS; 
     62 
     63    private String full_indexdir=""; 
     64    private String default_conjunction_operator = "OR"; 
     65    private String fuzziness = null; 
     66    private String sort_field = null; 
     67    private Sort sorter=new Sort(); 
     68    private String filter_string = null; 
     69    private Filter filter = null; 
     70    private int start_results=1; 
     71    private int end_results=Integer.MAX_VALUE; 
     72 
     73    private QueryParser query_parser = null; 
     74    private QueryParser query_parser_no_stop_words = null; 
     75    private Searcher searcher = null; 
     76    private IndexReader reader = null; 
     77 
     78    static private PrintWriter utf8out = null; 
     79 
     80    static 
     81    { 
     82    try { 
     83        OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8"); 
     84        utf8out = new PrintWriter(osw, true); 
     85    } 
     86        catch (UnsupportedEncodingException e) {  
     87        System.out.println(e);  
     88    } 
     89    } 
     90 
     91     
    7292    public GS2LuceneQuery() { 
    73     super(); 
    7493 
    7594    // Create one query parser with the standard set of stop words, and one with none 
    7695 
    77     query_parser = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set 
    78         query_parser_no_stop_words = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer(new String[] { })); 
     96    query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words)); 
     97        query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { })); 
    7998    } 
    8099     
    81100     
    82101    public boolean initialise() { 
    83  
    84     if (!super.initialise()) { 
    85         return false; 
    86     } 
    87  
    88102 
    89103        if (full_indexdir==null || full_indexdir.length()==-1){ 
     
    92106        return false; 
    93107        } 
    94  
    95108        try { 
    96         Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir)); 
    97             searcher = new IndexSearcher(full_indexdir_dir,true); 
     109            searcher = new IndexSearcher(full_indexdir); 
    98110            reader = ((IndexSearcher) searcher).getIndexReader(); 
    99111         
     
    106118 
    107119    } 
    108  
    109     public void setIndexDir(String full_indexdir) { 
    110     this.full_indexdir = full_indexdir; 
    111     } 
    112  
    113     public void setSortField(String sort_field) { 
    114     super.setSortField(sort_field); 
    115  
    116     if (sort_field == null) { 
    117         this.sorter = new Sort(); 
    118     } else { 
    119         this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!? 
    120     } 
    121     } 
    122  
    123     public void setFilterString(String filter_string) { 
    124     super.setFilterString(filter_string); 
    125     this.filter = parseFilterString(filter_string); 
    126     } 
    127  
    128     public Filter getFilter() { 
    129     return this.filter; 
    130     } 
    131  
    132120     
    133121    public LuceneQueryResult runQuery(String query_string) { 
     
    206194        if (end_results == Integer.MAX_VALUE) { 
    207195        // Perform the query (filter and sorter may be null) 
    208         TopFieldDocs hits = searcher.search(query, filter, end_results, sorter); 
    209         lucene_query_result.setTotalDocs(hits.totalHits); 
     196        Hits hits = searcher.search(query, filter, sorter); 
     197        lucene_query_result.setTotalDocs(hits.length()); 
    210198 
    211199        // Output the matching documents 
    212200        lucene_query_result.setStartResults(start_results); 
    213         lucene_query_result.setEndResults(hits.totalHits); 
    214  
    215         for (int i = start_results; i <= hits.totalHits; i++) { 
    216             int lucene_doc_num = hits.scoreDocs[i - 1].doc; 
    217             Document doc = reader.document(lucene_doc_num); 
     201        lucene_query_result.setEndResults(hits.length()); 
     202 
     203        for (int i = start_results; i <= hits.length(); i++) { 
     204            int lucene_doc_num = hits.id(i - 1); 
     205            Document doc = hits.doc(i - 1); 
    218206            int doc_term_freq = 0; 
    219207            Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num)); 
     
    222210            doc_term_freq = doc_term_freq_object.intValue(); 
    223211            } 
    224             lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq); 
     212            lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq); 
    225213        } 
    226214        } 
     
    268256 
    269257    public void setDefaultConjunctionOperator(String default_conjunction_operator) { 
    270     super.setDefaultConjunctionOperator(default_conjunction_operator); 
    271  
     258    this.default_conjunction_operator = default_conjunction_operator.toUpperCase(); 
    272259    if (default_conjunction_operator.equals("AND")) { 
    273260        query_parser.setDefaultOperator(query_parser.AND_OPERATOR); 
     
    278265    } 
    279266    } 
    280       
    281         
     267     
     268    public String getDefaultConjunctionOperator() { 
     269    return this.default_conjunction_operator; 
     270    } 
     271     
     272    public void setEndResults(int end_results) { 
     273    this.end_results = end_results; 
     274    } 
     275    public int getEndResults() { 
     276    return this.end_results; 
     277    } 
     278         
     279    public void setFilterString(String filter_string) { 
     280    this.filter_string = filter_string; 
     281    this.filter = parseFilterString(filter_string); 
     282    } 
     283    public String getFilterString() { 
     284    return this.filter_string ; 
     285    } 
     286     
     287    public Filter getFilter() { 
     288    return this.filter; 
     289    } 
     290 
     291    public void setIndexDir(String full_indexdir) { 
     292    this.full_indexdir = full_indexdir; 
     293    } 
     294     
     295    public void setFuzziness(String fuzziness) { 
     296    this.fuzziness = fuzziness; 
     297    } 
     298    public String getFuzziness() { 
     299    return this.fuzziness; 
     300    } 
     301     
     302    public void setSortField(String sort_field) { 
     303    this.sort_field = sort_field; 
     304    if (sort_field == null) { 
     305        this.sorter = new Sort(); 
     306    } else { 
     307        this.sorter = new Sort(sort_field); 
     308    } 
     309    } 
     310    public String getSortField() { 
     311    return this.sort_field; 
     312    } 
     313         
     314    public void setStartResults(int start_results) { 
     315    if (start_results < 1) { 
     316        start_results = 1; 
     317    } 
     318    this.start_results = start_results; 
     319    } 
     320    public int getStartResults() { 
     321    return this.start_results; 
     322    } 
     323         
    282324    public void cleanUp() { 
    283     super.cleanUp(); 
    284325    try { 
    285326        if (searcher != null) { 
     
    291332    } 
    292333 
    293  
    294     protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 
     334    private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 
    295335    throws java.io.IOException, org.apache.lucene.queryParser.ParseException 
    296336    { 
     
    394434    } 
    395435 
    396     protected Filter parseFilterString(String filter_string) 
     436    private Filter parseFilterString(String filter_string) 
    397437    { 
    398438    Filter result = null; 
     
    405445        String upper_term = matcher.group(4); 
    406446        boolean include_upper = matcher.group(5).equals("]"); 
    407         result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 
     447        result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 
    408448    } 
    409449    else { 
     
    412452    return result; 
    413453    } 
    414      
    415  
     454 
     455 
     456    protected void finalize() throws Throwable  
     457    { 
     458    try { 
     459        utf8out.flush();  
     460    } finally { 
     461        super.finalize(); 
     462    } 
     463    } 
     464 
     465     
    416466    /** command line program and auxiliary methods */ 
    417467 
    418468    // Fairly self-explanatory I should hope 
    419     static protected boolean query_result_caching_enabled = false; 
     469    static private boolean query_result_caching_enabled = false; 
    420470 
    421471 
    422472    static public void main (String args[]) 
    423473    { 
     474 
     475 
    424476    if (args.length == 0) { 
    425477        System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]"); 
     
    514566    } 
    515567 
    516     protected static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)  
     568    private static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)  
    517569    throws IOException 
    518570    { 
     
    602654    } 
    603655     
    604     protected static String fileSafe(String text) 
     656    private static String fileSafe(String text) 
    605657    { 
    606658    StringBuffer file_safe_text = new StringBuffer(); 
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/LuceneQueryResult.java

    r24725 r24731  
    2929import java.util.Vector; 
    3030 
    31 /** Opportunity to fine tune QueryResult for lucene search 
     31/** a QueryResult class for a lucene search 
    3232 * 
    3333 */ 
    34  
    35 public class LuceneQueryResult extends SharedSoleneQueryResult { 
    36      
    37     // Currently no fine tuning -- rely on underlying shared Solr/Lucene base class 
     34public class LuceneQueryResult { 
     35     
     36    public static final int NO_ERROR = 0; 
     37    public static final int PARSE_ERROR = 1; 
     38    public static final int TOO_MANY_CLAUSES_ERROR = 2; 
     39    public static final int IO_ERROR = 3; 
     40    public static final int OTHER_ERROR = 4; 
     41     
     42    /** the list of DocInfo */ 
     43    protected Vector docs_=null; 
     44    /** the list of TermInfo */ 
     45    protected Vector terms_=null; 
     46    /** the list of stopwords found in the query */ 
     47    protected Vector stopwords_ = null; 
     48    /** the total number of docs found - not necessarily the size of docs_*/ 
     49    protected int total_num_docs_=0; 
     50    /** the start result number if we are retrieving only a portion of the results */ 
     51    protected int start_results_ = 0; 
     52    /** the end result number if we are retrieving only a portion of the results */ 
     53    protected int end_results_ = 0; 
     54    /** whether an error has occurred and what kind it is*/ 
     55    protected int error_ = NO_ERROR; 
     56 
    3857    LuceneQueryResult() { 
    39     super(); 
    40     }  
     58    docs_ = new Vector(); 
     59    terms_ = new Vector(); 
     60    stopwords_ = new Vector(); 
     61    } 
     62     
     63    /** clear the info from the last query - should be called before setting any new docs/terms */ 
     64    public void clear() { 
     65    total_num_docs_=0; 
     66    docs_.clear(); 
     67    terms_.clear(); 
     68    stopwords_.clear(); 
     69    error_ = NO_ERROR; 
     70    } 
     71 
     72    /** returns the result as a String - useful for printing out results */ 
     73    public String toString() { 
     74     
     75    String result = ""; 
     76    result += "docs (ranks): "; 
     77    for (int i=0; i<docs_.size(); i++) { 
     78        result += ((DocInfo)docs_.elementAt(i)).toString()+", "; 
     79    } 
     80    result += "\nterms: "; 
     81    for (int i=0; i<terms_.size(); i++) { 
     82        result += ((TermInfo)terms_.elementAt(i)).toString()+", "; 
     83    } 
     84    result += "\nactual number of docs found = "+total_num_docs_; 
     85     
     86    return result; 
     87    } 
     88    /** a shorter representation - just terms and total docs - not the  
     89    individual docnums and ranks */ 
     90    public String toShortString() { 
     91    String result = ""; 
     92    result += "\nterms: "; 
     93    for (int i=0; i<terms_.size(); i++) { 
     94        result += ((TermInfo)terms_.elementAt(i)).toString()+", "; 
     95    } 
     96    result += "\nactual number of docs found = "+total_num_docs_; 
     97    return result; 
     98    } 
     99     
     100    public void setTotalDocs(int num) { 
     101    total_num_docs_=num; 
     102    } 
     103     
     104    public void setStartResults(int start) { 
     105    start_results_ = start; 
     106    } 
     107 
     108    public void setEndResults(int end) { 
     109    end_results_ = end; 
     110    } 
     111 
     112    public void addDoc(String id, float rank, int termfreq) 
     113    { 
     114    docs_.add(new DocInfo(id, rank, termfreq)); 
     115    } 
     116     
     117    public void addTerm(String term, String field, int match, int freq) { 
     118    TermInfo ti = new TermInfo(); 
     119    ti.term_=term; 
     120    ti.field_=field; 
     121    ti.match_docs_=match; 
     122    ti.term_freq_=freq; 
     123    terms_.add(ti); 
     124    } 
     125    public void addStopWord(String stopword) { 
     126    stopwords_.add(stopword); 
     127    } 
     128    public Vector getDocs() { 
     129    return docs_; 
     130    } 
     131     
     132    public int getError() { 
     133    return error_; 
     134    } 
     135     
     136    public String getErrorString() { 
     137    if (error_ == PARSE_ERROR) { 
     138        return "PARSE_EXCEPTION"; 
     139    } 
     140    if (error_ == TOO_MANY_CLAUSES_ERROR) { 
     141        return "TOO_MANY_CLAUSES"; 
     142    } 
     143    if (error_ == IO_ERROR) { 
     144        return "IO_ERROR"; 
     145    } 
     146    if (error_ == NO_ERROR) { 
     147        return "NO_ERROR"; 
     148    } 
     149    return "UNKNOWN"; 
     150    } 
     151 
     152    public Vector getTerms() { 
     153    return terms_; 
     154    } 
     155     
     156    public Vector getStopWords() { 
     157    return stopwords_; 
     158    } 
     159    public int getTotalDocs() { 
     160    return total_num_docs_; 
     161    } 
     162     
     163    public void setError(int error) { 
     164    error_ = error; 
     165    } 
     166     
     167    public String getXMLString() { 
     168    StringBuffer buffer = new StringBuffer(); 
     169 
     170    // terms 
     171    buffer.append("<QueryTermsInfo num=\"" + terms_.size() + "\"/>\n"); 
     172    for (int i=0; i<terms_.size(); i++) { 
     173        buffer.append(((TermInfo)terms_.elementAt(i)).toXMLString()+"\n"); 
     174    } 
     175 
     176    // stopwords 
     177    for (int i=0; i<stopwords_.size(); i++) { 
     178        buffer.append("<StopWord value=\"" + (String)stopwords_.elementAt(i)+"\" />\n"); 
     179    } 
     180     
     181    // results 
     182    buffer.append("<MatchingDocsInfo num=\"" + total_num_docs_ + "\"/>\n"); 
     183    buffer.append("<StartResults num=\"" + start_results_ + "\"/>\n"); 
     184    buffer.append("<EndResults num=\"" + end_results_ + "\"/>\n"); 
     185     
     186    for (int i=0; i< docs_.size(); i++) { 
     187        buffer.append(((DocInfo)docs_.elementAt(i)).toXMLString()+"\n"); 
     188    } 
     189 
     190    return buffer.toString(); 
     191    } 
     192 
     193  
     194    public class TermInfo { 
     195     
     196    /** the term itself */ 
     197    public String term_=null; 
     198    /** the field for which this term was queried */ 
     199    public String field_=null; 
     200    /** the number of documents containing this term */ 
     201    public int match_docs_=0; 
     202    /** overall term freq for this term */ 
     203    public int term_freq_=0; 
     204     
     205    public TermInfo() { 
     206    } 
     207     
     208    /** output the class as a string */ 
     209    public String toString() { 
     210        String result=""; 
     211        result +="<"+field_+">\""+term_+" docs("+match_docs_; 
     212        result +=")freq("+term_freq_+")"; 
     213        return result; 
     214    } 
     215 
     216    /** output as an XML element */ 
     217    public String toXMLString() { 
     218        return "<Term value=\"" + xmlSafe(term_) + "\" field=\"" + field_ + "\" freq=\"" + term_freq_ + "\" />"; 
     219    } 
     220    } 
     221 
     222 
     223    public class DocInfo 
     224    { 
     225    public String id_ = ""; 
     226    public float rank_ = 0; 
     227    public int termfreq_ = 0; 
     228 
     229    public DocInfo (String id, float rank, int termfreq) 
     230    { 
     231        id_ = id; 
     232        rank_ = rank; 
     233        termfreq_ = termfreq; 
     234    } 
     235 
     236    public String toString() 
     237    { 
     238        return "" + id_ + " (" + rank_ + ") (" + termfreq_ + ")"; 
     239    } 
     240 
     241    public String toXMLString() 
     242    { 
     243        return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" termfreq=\"" + termfreq_ + "\" />"; 
     244    } 
     245    } 
     246 
     247 
     248    // where should this go??? 
     249    public static String xmlSafe(String text) { 
     250    text = text.replaceAll("&","&amp;amp;"); 
     251    text = text.replaceAll("<","&amp;lt;"); 
     252    text = text.replaceAll(">","&amp;gt;"); 
     253    text = text.replaceAll("'","&amp;#039;"); 
     254    text = text.replaceAll("\\\"","&amp;quot;"); 
     255    return text; 
     256    } 
     257  
    41258}