Ignore:
Timestamp:
2011-10-05T15:44:19+13:00 (13 years ago)
Author:
davidb
Message:

Restruturing of Lucene version 2.x and 3.x to make it easier to control which one is used

Location:
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone
Files:
7 edited
1 moved

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2Analyzer.java

    r17804 r24725  
    3333import org.apache.lucene.analysis.standard.*;
    3434
     35import org.apache.lucene.analysis.ASCIIFoldingFilter;
    3536
    36 class GS2Analyzer extends StandardAnalyzer
     37import org.apache.lucene.util.Version;
     38
     39
     40class GS2Analyzer extends GS2StandardAnalyzer
    3741{
     42   
     43    static Version matchVersion = Version.LUCENE_24;
     44
     45
    3846    public GS2Analyzer()
    3947    {
    40     super();
     48    super(matchVersion);
    4149    }
     50   
    4251
    4352    public GS2Analyzer(Set stopWords)
    4453    {
    45     super(stopWords);
     54    super(matchVersion,stopWords);
    4655    }
    4756
     
    4958    public GS2Analyzer(String [] stopwords)
    5059    {
    51     super(stopwords);
    52     }
    53    
    54     public TokenStream tokenStream(String fieldName, Reader reader)
    55     {
    56     TokenStream result = super.tokenStream(fieldName,reader);
    57     result = new ISOLatin1AccentFilter(result);
    58 
    59     return result; 
     60    super(matchVersion,StopFilter.makeStopSet(stopwords));
    6061    }
    6162
     63  @Override
     64  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
     65    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
     66    src.setMaxTokenLength(maxTokenLength);
     67    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
     68    TokenStream tok = new StandardFilter(matchVersion, src);
     69    tok = new LowerCaseFilter(matchVersion, tok);
     70    tok = new StopFilter(matchVersion, tok, stopwords);
    6271
    63   public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    64       TokenStream result = super.reusableTokenStream(fieldName,reader);
    65      
    66       result = new ISOLatin1AccentFilter(result);
    67      
    68       return result;
     72    // top it up with accent folding
     73    tok = new ASCIIFoldingFilter(tok);
     74
     75    return new TokenStreamComponents(src, tok) {
     76      @Override
     77      protected boolean reset(final Reader reader) throws IOException {
     78        src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength);
     79        return super.reset(reader);
     80      }
     81    };
    6982  }
    70 
    7183
    7284}
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2IndexModifier.java

    r20731 r24725  
    3434import org.apache.lucene.analysis.Analyzer;
    3535import org.apache.lucene.document.Document;
    36 import org.apache.lucene.index.IndexModifier;
    3736import org.apache.lucene.index.IndexReader;
    3837import org.apache.lucene.index.IndexWriter;
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java

    r16583 r24725  
    3333
    3434import java.io.IOException;
    35 import org.apache.lucene.analysis.standard.StandardAnalyzer;
     35import java.io.File;
     36//import org.apache.lucene.analysis.standard.StandardAnalyzer;
    3637import org.apache.lucene.index.IndexWriter;
    3738import org.apache.lucene.index.Term;
     39
     40import org.apache.lucene.store.SimpleFSDirectory;
     41import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    3842
    3943
     
    128132        throws IOException
    129133    {
    130     index_writer = new IndexWriter(index_path, new StandardAnalyzer());
     134    SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path));
     135    index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(),
     136                       MaxFieldLength.UNLIMITED);
    131137    }
    132138
     
    163169    {
    164170        debug("GS2LuceneDelete.deleteDocument(" + node_id + ")");
    165         debug("- Initial number of documents in index: " + index_writer.docCount());
     171        debug("- Initial number of documents in index: " + index_writer.numDocs());
    166172    index_writer.deleteDocuments(new Term("nodeid", "" + node_id));
    167         debug("- Final number of documents in index: " + index_writer.docCount());
     173        debug("- Final number of documents in index: " + index_writer.numDocs());
    168174    }
    169175}
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneEditor.java

    r20731 r24725  
    3333
    3434import java.io.IOException;
     35import java.io.File;
    3536import java.util.Arrays;
    3637import java.util.Enumeration;
     
    3839
    3940import org.apache.lucene.analysis.Analyzer;
    40 import org.apache.lucene.analysis.standard.StandardAnalyzer;
     41//import org.apache.lucene.analysis.standard.StandardAnalyzer;
    4142import org.apache.lucene.document.Document;
    4243import org.apache.lucene.document.Field;
     44
     45import org.apache.lucene.store.SimpleFSDirectory;
     46import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    4347
    4448
     
    146150        throws IOException
    147151    {
    148         Analyzer analyzer = new StandardAnalyzer();
     152        Analyzer analyzer = new GS2Analyzer();
    149153        // create an index in /tmp/index, overwriting an existing one:
    150154        index_modifier = new GS2IndexModifier(index_path, analyzer);
     
    188192    {
    189193        debug("GS2LuceneEditor.editIndex(" + node_id + ",'" + field + "','" + old_value + "','" + new_value + "')");
    190         debug("- Initial number of documents in index: " + index_modifier.docCount());
     194        debug("- Initial number of documents in index: " + index_modifier.numDocs());
    191195        // Retrieve the document requested
    192196        int doc_num = index_modifier.getDocNumByNodeID(node_id);
     
    230234                // We also have to initialize the nodeId value
    231235        // changed to use docOID --kjdon
    232                 document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.TOKENIZED));
     236                document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.ANALYZED));
    233237
    234238                // Re-index document
     
    296300                for(int i = 0; i < values.size(); i++)
    297301                    {
    298                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED));
     302                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED));
    299303                    }
    300304                values.clear();
     
    318322                for(int i = 0; i < values.size(); i++)
    319323                    {
    320                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED));
     324                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED));
    321325                    }
    322326                values.clear();
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java

    r23194 r24725  
    4545import org.apache.lucene.index.Term;
    4646import org.apache.lucene.analysis.Analyzer;
     47
     48import org.apache.lucene.store.SimpleFSDirectory;
     49import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    4750
    4851import java.util.Stack;
     
    190193    protected String file_id_ = null;
    191194
    192     static private String[] stop_words = GS2Analyzer.STOP_WORDS;
    193 
    194 
    195195    /** pass in true if want to create a new index, false if want to use the existing one */
    196196    public Indexer (String doc_tag_level, File index_dir, boolean create)
     
    206206        reader.setFeature("http://xml.org/sax/features/validation", false);
    207207
    208         analyzer_ = new GS2Analyzer(stop_words);
    209 
    210         writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create);
     208        SimpleFSDirectory index_dir_dir = new SimpleFSDirectory(new File(index_dir.getPath()));
     209
     210        analyzer_ = new GS2Analyzer(); // uses build in stop_word_set
     211
     212        writer_ = new IndexWriter(index_dir_dir, analyzer_, create, MaxFieldLength.UNLIMITED);
     213                       
    211214        // by default, will only index 10,000 words per document
    212215        // Can throw out_of_memory errors
     
    318321        //String node_id = atts.getValue("gs2:id");
    319322        //print(" " + qName + ": " + node_id + " (" + mode_ + ")" );
    320         //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.UN_TOKENIZED));
     323        //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.NOT_ANALYZED));
    321324       
    322325        current_doc_oid_ = atts.getValue("gs2:docOID");
    323326        print(" " + qName + ": " + current_doc_oid_ + " (" + mode_ + ")" );
    324         current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.UN_TOKENIZED));
     327        current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.NOT_ANALYZED));
    325328        }
    326329       
     
    359362        if (qName.equals(indexable_current_node_))
    360363            {
    361             current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
     364            current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
    362365            // The byXX fields are used for sorting search results
    363366            // We don't want to do that for Text or AllFields fields
     
    365368            if (!qName.equals("TX") && !qName.equals("ZZ"))
    366369                {
    367                 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
     370                current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
    368371                }
    369372           
     
    472475    {
    473476        debug("GS2LuceneDelete.deleteDocument(" + doc_id + ")");
    474         debug("- Initial number of documents in index: " + writer_.docCount());
     477        debug("- Initial number of documents in index: " + writer_.numDocs());
    475478        writer_.deleteDocuments(new Term("docOID", doc_id));
    476         debug("- Final number of documents in index: " + writer_.docCount());
     479        debug("- Final number of documents in index: " + writer_.numDocs());
    477480    }
    478481
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r20910 r24725  
    4545import org.apache.lucene.search.IndexSearcher;
    4646import org.apache.lucene.search.Query;
    47 import org.apache.lucene.search.RangeFilter;
     47import org.apache.lucene.search.TermRangeFilter;
    4848import org.apache.lucene.search.Searcher;
    4949import org.apache.lucene.search.ScoreDoc;
    5050import org.apache.lucene.search.Sort;
     51import org.apache.lucene.search.SortField;
    5152import org.apache.lucene.search.TopFieldDocs;
    5253
    53 
    54 public class GS2LuceneQuery
     54import org.apache.lucene.store.Directory;
     55import org.apache.lucene.store.FSDirectory;
     56import org.apache.lucene.util.Version;
     57
     58public class GS2LuceneQuery extends SharedSoleneQuery
    5559{
    56 
    57 
    58     static private String TEXTFIELD = "TX";
    59 
    60     // Use the standard set of English stop words by default
    61     static private String[] stop_words = GS2Analyzer.STOP_WORDS;
    62 
    63     private String full_indexdir="";
    64     private String default_conjunction_operator = "OR";
    65     private String fuzziness = null;
    66     private String sort_field = null;
    67     private Sort sorter=new Sort();
    68     private String filter_string = null;
    69     private Filter filter = null;
    70     private int start_results=1;
    71     private int end_results=Integer.MAX_VALUE;
    72 
    73     private QueryParser query_parser = null;
    74     private QueryParser query_parser_no_stop_words = null;
    75     private Searcher searcher = null;
    76     private IndexReader reader = null;
    77 
    78     static private PrintWriter utf8out = null;
    79 
    80     static
    81     {
    82     try {
    83         OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8");
    84         utf8out = new PrintWriter(osw, true);
    85     }
    86         catch (UnsupportedEncodingException e) {
    87         System.out.println(e);
    88     }
    89     }
    90 
    91    
     60    protected String full_indexdir="";
     61
     62    protected Sort sorter=new Sort();
     63    protected Filter filter = null;
     64
     65    protected static Version matchVersion = Version.LUCENE_24;
     66
     67    protected QueryParser query_parser = null;
     68    protected QueryParser query_parser_no_stop_words = null;
     69    protected Searcher searcher = null;
     70    protected IndexReader reader = null;
     71
    9272    public GS2LuceneQuery() {
     73    super();
    9374
    9475    // Create one query parser with the standard set of stop words, and one with none
    9576
    96     query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words));
    97         query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { }));
     77    query_parser = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set
     78        query_parser_no_stop_words = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer(new String[] { }));
    9879    }
    9980   
    10081   
    10182    public boolean initialise() {
     83
     84    if (!super.initialise()) {
     85        return false;
     86    }
     87
    10288
    10389        if (full_indexdir==null || full_indexdir.length()==-1){
     
    10692        return false;
    10793        }
     94
    10895        try {
    109             searcher = new IndexSearcher(full_indexdir);
     96        Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir));
     97            searcher = new IndexSearcher(full_indexdir_dir,true);
    11098            reader = ((IndexSearcher) searcher).getIndexReader();
    11199       
     
    118106
    119107    }
     108
     109    public void setIndexDir(String full_indexdir) {
     110    this.full_indexdir = full_indexdir;
     111    }
     112
     113    public void setSortField(String sort_field) {
     114    super.setSortField(sort_field);
     115
     116    if (sort_field == null) {
     117        this.sorter = new Sort();
     118    } else {
     119        this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!?
     120    }
     121    }
     122
     123    public void setFilterString(String filter_string) {
     124    super.setFilterString(filter_string);
     125    this.filter = parseFilterString(filter_string);
     126    }
     127
     128    public Filter getFilter() {
     129    return this.filter;
     130    }
     131
    120132   
    121133    public LuceneQueryResult runQuery(String query_string) {
     
    194206        if (end_results == Integer.MAX_VALUE) {
    195207        // Perform the query (filter and sorter may be null)
    196         Hits hits = searcher.search(query, filter, sorter);
    197         lucene_query_result.setTotalDocs(hits.length());
     208        TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
     209        lucene_query_result.setTotalDocs(hits.totalHits);
    198210
    199211        // Output the matching documents
    200212        lucene_query_result.setStartResults(start_results);
    201         lucene_query_result.setEndResults(hits.length());
    202 
    203         for (int i = start_results; i <= hits.length(); i++) {
    204             int lucene_doc_num = hits.id(i - 1);
    205             Document doc = hits.doc(i - 1);
     213        lucene_query_result.setEndResults(hits.totalHits);
     214
     215        for (int i = start_results; i <= hits.totalHits; i++) {
     216            int lucene_doc_num = hits.scoreDocs[i - 1].doc;
     217            Document doc = reader.document(lucene_doc_num);
    206218            int doc_term_freq = 0;
    207219            Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
     
    210222            doc_term_freq = doc_term_freq_object.intValue();
    211223            }
    212             lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);
     224            lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
    213225        }
    214226        }
     
    256268
    257269    public void setDefaultConjunctionOperator(String default_conjunction_operator) {
    258     this.default_conjunction_operator = default_conjunction_operator.toUpperCase();
     270    super.setDefaultConjunctionOperator(default_conjunction_operator);
     271
    259272    if (default_conjunction_operator.equals("AND")) {
    260273        query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
     
    265278    }
    266279    }
    267    
    268     public String getDefaultConjunctionOperator() {
    269     return this.default_conjunction_operator;
    270     }
    271    
    272     public void setEndResults(int end_results) {
    273     this.end_results = end_results;
    274     }
    275     public int getEndResults() {
    276     return this.end_results;
    277     }
    278        
    279     public void setFilterString(String filter_string) {
    280     this.filter_string = filter_string;
    281     this.filter = parseFilterString(filter_string);
    282     }
    283     public String getFilterString() {
    284     return this.filter_string ;
    285     }
    286    
    287     public Filter getFilter() {
    288     return this.filter;
    289     }
    290 
    291     public void setIndexDir(String full_indexdir) {
    292     this.full_indexdir = full_indexdir;
    293     }
    294    
    295     public void setFuzziness(String fuzziness) {
    296     this.fuzziness = fuzziness;
    297     }
    298     public String getFuzziness() {
    299     return this.fuzziness;
    300     }
    301    
    302     public void setSortField(String sort_field) {
    303     this.sort_field = sort_field;
    304     if (sort_field == null) {
    305         this.sorter = new Sort();
    306     } else {
    307         this.sorter = new Sort(sort_field);
    308     }
    309     }
    310     public String getSortField() {
    311     return this.sort_field;
    312     }
    313        
    314     public void setStartResults(int start_results) {
    315     if (start_results < 1) {
    316         start_results = 1;
    317     }
    318     this.start_results = start_results;
    319     }
    320     public int getStartResults() {
    321     return this.start_results;
    322     }
    323        
     280     
     281       
    324282    public void cleanUp() {
     283    super.cleanUp();
    325284    try {
    326285        if (searcher != null) {
     
    332291    }
    333292
    334     private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
     293
     294    protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
    335295    throws java.io.IOException, org.apache.lucene.queryParser.ParseException
    336296    {
     
    434394    }
    435395
    436     private Filter parseFilterString(String filter_string)
     396    protected Filter parseFilterString(String filter_string)
    437397    {
    438398    Filter result = null;
     
    445405        String upper_term = matcher.group(4);
    446406        boolean include_upper = matcher.group(5).equals("]");
    447         result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
     407        result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
    448408    }
    449409    else {
     
    452412    return result;
    453413    }
    454 
    455 
    456     protected void finalize() throws Throwable
    457     {
    458     try {
    459         utf8out.flush();
    460     } finally {
    461         super.finalize();
    462     }
    463     }
    464 
    465414   
     415
    466416    /** command line program and auxiliary methods */
    467417
    468418    // Fairly self-explanatory I should hope
    469     static private boolean query_result_caching_enabled = false;
     419    static protected boolean query_result_caching_enabled = false;
    470420
    471421
    472422    static public void main (String args[])
    473423    {
    474 
    475 
    476424    if (args.length == 0) {
    477425        System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]");
     
    566514    }
    567515
    568     private static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)
     516    protected static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)
    569517    throws IOException
    570518    {
     
    654602    }
    655603   
    656     private static String fileSafe(String text)
     604    protected static String fileSafe(String text)
    657605    {
    658606    StringBuffer file_safe_text = new StringBuffer();
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/LuceneQueryResult.java

    r16912 r24725  
    2929import java.util.Vector;
    3030
    31 /** a QueryResult class for a lucene search
     31/** Opportunity to fine tune QueryResult for lucene search
    3232 *
    3333 */
    34 public class LuceneQueryResult {
     34
     35public class LuceneQueryResult extends SharedSoleneQueryResult {
    3536   
    36     public static final int NO_ERROR = 0;
    37     public static final int PARSE_ERROR = 1;
    38     public static final int TOO_MANY_CLAUSES_ERROR = 2;
    39     public static final int IO_ERROR = 3;
    40     public static final int OTHER_ERROR = 4;
    41    
    42     /** the list of DocInfo */
    43     protected Vector docs_=null;
    44     /** the list of TermInfo */
    45     protected Vector terms_=null;
    46     /** the list of stopwords found in the query */
    47     protected Vector stopwords_ = null;
    48     /** the total number of docs found - not necessarily the size of docs_*/
    49     protected int total_num_docs_=0;
    50     /** the start result number if we are retrieving only a portion of the results */
    51     protected int start_results_ = 0;
    52     /** the end result number if we are retrieving only a portion of the results */
    53     protected int end_results_ = 0;
    54     /** whether an error has occurred and what kind it is*/
    55     protected int error_ = NO_ERROR;
    56 
     37    // Currently no fine tuning -- rely on underlying shared Solr/Lucene base class
    5738    LuceneQueryResult() {
    58     docs_ = new Vector();
    59     terms_ = new Vector();
    60     stopwords_ = new Vector();
    61     }
    62    
    63     /** clear the info from the last query - should be called before setting any new docs/terms */
    64     public void clear() {
    65     total_num_docs_=0;
    66     docs_.clear();
    67     terms_.clear();
    68     stopwords_.clear();
    69     error_ = NO_ERROR;
    70     }
    71 
    72     /** returns the result as a String - useful for printing out results */
    73     public String toString() {
    74    
    75     String result = "";
    76     result += "docs (ranks): ";
    77     for (int i=0; i<docs_.size(); i++) {
    78         result += ((DocInfo)docs_.elementAt(i)).toString()+", ";
    79     }
    80     result += "\nterms: ";
    81     for (int i=0; i<terms_.size(); i++) {
    82         result += ((TermInfo)terms_.elementAt(i)).toString()+", ";
    83     }
    84     result += "\nactual number of docs found = "+total_num_docs_;
    85    
    86     return result;
    87     }
    88     /** a shorter representation - just terms and total docs - not the
    89     individual docnums and ranks */
    90     public String toShortString() {
    91     String result = "";
    92     result += "\nterms: ";
    93     for (int i=0; i<terms_.size(); i++) {
    94         result += ((TermInfo)terms_.elementAt(i)).toString()+", ";
    95     }
    96     result += "\nactual number of docs found = "+total_num_docs_;
    97     return result;
    98     }
    99    
    100     public void setTotalDocs(int num) {
    101     total_num_docs_=num;
    102     }
    103    
    104     public void setStartResults(int start) {
    105     start_results_ = start;
    106     }
    107 
    108     public void setEndResults(int end) {
    109     end_results_ = end;
    110     }
    111 
    112     public void addDoc(String id, float rank, int termfreq)
    113     {
    114     docs_.add(new DocInfo(id, rank, termfreq));
    115     }
    116    
    117     public void addTerm(String term, String field, int match, int freq) {
    118     TermInfo ti = new TermInfo();
    119     ti.term_=term;
    120     ti.field_=field;
    121     ti.match_docs_=match;
    122     ti.term_freq_=freq;
    123     terms_.add(ti);
    124     }
    125     public void addStopWord(String stopword) {
    126     stopwords_.add(stopword);
    127     }
    128     public Vector getDocs() {
    129     return docs_;
    130     }
    131    
    132     public int getError() {
    133     return error_;
    134     }
    135    
    136     public String getErrorString() {
    137     if (error_ == PARSE_ERROR) {
    138         return "PARSE_EXCEPTION";
    139     }
    140     if (error_ == TOO_MANY_CLAUSES_ERROR) {
    141         return "TOO_MANY_CLAUSES";
    142     }
    143     if (error_ == IO_ERROR) {
    144         return "IO_ERROR";
    145     }
    146     if (error_ == NO_ERROR) {
    147         return "NO_ERROR";
    148     }
    149     return "UNKNOWN";
    150     }
    151 
    152     public Vector getTerms() {
    153     return terms_;
    154     }
    155    
    156     public Vector getStopWords() {
    157     return stopwords_;
    158     }
    159     public int getTotalDocs() {
    160     return total_num_docs_;
    161     }
    162    
    163     public void setError(int error) {
    164     error_ = error;
    165     }
    166    
    167     public String getXMLString() {
    168     StringBuffer buffer = new StringBuffer();
    169 
    170     // terms
    171     buffer.append("<QueryTermsInfo num=\"" + terms_.size() + "\"/>\n");
    172     for (int i=0; i<terms_.size(); i++) {
    173         buffer.append(((TermInfo)terms_.elementAt(i)).toXMLString()+"\n");
    174     }
    175 
    176     // stopwords
    177     for (int i=0; i<stopwords_.size(); i++) {
    178         buffer.append("<StopWord value=\"" + (String)stopwords_.elementAt(i)+"\" />\n");
    179     }
    180    
    181     // results
    182     buffer.append("<MatchingDocsInfo num=\"" + total_num_docs_ + "\"/>\n");
    183     buffer.append("<StartResults num=\"" + start_results_ + "\"/>\n");
    184     buffer.append("<EndResults num=\"" + end_results_ + "\"/>\n");
    185    
    186     for (int i=0; i< docs_.size(); i++) {
    187         buffer.append(((DocInfo)docs_.elementAt(i)).toXMLString()+"\n");
    188     }
    189 
    190     return buffer.toString();
    191     }
    192 
    193  
    194     public class TermInfo {
    195    
    196     /** the term itself */
    197     public String term_=null;
    198     /** the field for which this term was queried */
    199     public String field_=null;
    200     /** the number of documents containing this term */
    201     public int match_docs_=0;
    202     /** overall term freq for this term */
    203     public int term_freq_=0;
    204    
    205     public TermInfo() {
    206     }
    207    
    208     /** output the class as a string */
    209     public String toString() {
    210         String result="";
    211         result +="<"+field_+">\""+term_+" docs("+match_docs_;
    212         result +=")freq("+term_freq_+")";
    213         return result;
    214     }
    215 
    216     /** output as an XML element */
    217     public String toXMLString() {
    218         return "<Term value=\"" + xmlSafe(term_) + "\" field=\"" + field_ + "\" freq=\"" + term_freq_ + "\" />";
    219     }
    220     }
    221 
    222 
    223     public class DocInfo
    224     {
    225     public String id_ = "";
    226     public float rank_ = 0;
    227     public int termfreq_ = 0;
    228 
    229     public DocInfo (String id, float rank, int termfreq)
    230     {
    231         id_ = id;
    232         rank_ = rank;
    233         termfreq_ = termfreq;
    234     }
    235 
    236     public String toString()
    237     {
    238         return "" + id_ + " (" + rank_ + ") (" + termfreq_ + ")";
    239     }
    240 
    241     public String toXMLString()
    242     {
    243         return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" termfreq=\"" + termfreq_ + "\" />";
    244     }
    245     }
    246 
    247 
    248     // where should this go???
    249     public static String xmlSafe(String text) {
    250     text = text.replaceAll("&","&amp;amp;");
    251     text = text.replaceAll("<","&amp;lt;");
    252     text = text.replaceAll(">","&amp;gt;");
    253     text = text.replaceAll("'","&amp;#039;");
    254     text = text.replaceAll("\\\"","&amp;quot;");
    255     return text;
    256     }
    257  
     39    super();
     40    }
    25841}
Note: See TracChangeset for help on using the changeset viewer.