Ignore:
Timestamp:
2011-10-07T11:36:07+13:00 (13 years ago)
Author:
sjm84
Message:

Lucene 3.x version of code accidentally commited rolling back to 2.x compatible version

Location:
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2Analyzer.java

    r24725 r24731  
    3333import org.apache.lucene.analysis.standard.*;
    3434
    35 import org.apache.lucene.analysis.ASCIIFoldingFilter;
    3635
    37 import org.apache.lucene.util.Version;
    38 
    39 
    40 class GS2Analyzer extends GS2StandardAnalyzer
     36class GS2Analyzer extends StandardAnalyzer
    4137{
    42    
    43     static Version matchVersion = Version.LUCENE_24;
    44 
    45 
    4638    public GS2Analyzer()
    4739    {
    48     super(matchVersion);
     40    super();
    4941    }
    50    
    5142
    5243    public GS2Analyzer(Set stopWords)
    5344    {
    54     super(matchVersion,stopWords);
     45    super(stopWords);
    5546    }
    5647
     
    5849    public GS2Analyzer(String [] stopwords)
    5950    {
    60     super(matchVersion,StopFilter.makeStopSet(stopwords));
     51    super(stopwords);
     52    }
     53   
     54    public TokenStream tokenStream(String fieldName, Reader reader)
     55    {
     56    TokenStream result = super.tokenStream(fieldName,reader);
     57    result = new ISOLatin1AccentFilter(result);
     58
     59    return result; 
    6160    }
    6261
    63   @Override
    64   protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    65     final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    66     src.setMaxTokenLength(maxTokenLength);
    67     src.setReplaceInvalidAcronym(replaceInvalidAcronym);
    68     TokenStream tok = new StandardFilter(matchVersion, src);
    69     tok = new LowerCaseFilter(matchVersion, tok);
    70     tok = new StopFilter(matchVersion, tok, stopwords);
    7162
    72     // top it up with accent folding
    73     tok = new ASCIIFoldingFilter(tok);
     63  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
     64      TokenStream result = super.reusableTokenStream(fieldName,reader);
     65     
     66      result = new ISOLatin1AccentFilter(result);
     67     
     68      return result;
     69  }
    7470
    75     return new TokenStreamComponents(src, tok) {
    76       @Override
    77       protected boolean reset(final Reader reader) throws IOException {
    78         src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength);
    79         return super.reset(reader);
    80       }
    81     };
    82   }
    8371
    8472}
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2IndexModifier.java

    r24725 r24731  
    3434import org.apache.lucene.analysis.Analyzer;
    3535import org.apache.lucene.document.Document;
     36import org.apache.lucene.index.IndexModifier;
    3637import org.apache.lucene.index.IndexReader;
    3738import org.apache.lucene.index.IndexWriter;
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java

    r24725 r24731  
    3333
    3434import java.io.IOException;
    35 import java.io.File;
    36 //import org.apache.lucene.analysis.standard.StandardAnalyzer;
     35import org.apache.lucene.analysis.standard.StandardAnalyzer;
    3736import org.apache.lucene.index.IndexWriter;
    3837import org.apache.lucene.index.Term;
    39 
    40 import org.apache.lucene.store.SimpleFSDirectory;
    41 import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    4238
    4339
     
    132128        throws IOException
    133129    {
    134     SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path));
    135     index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(),
    136                        MaxFieldLength.UNLIMITED);
     130    index_writer = new IndexWriter(index_path, new StandardAnalyzer());
    137131    }
    138132
     
    169163    {
    170164        debug("GS2LuceneDelete.deleteDocument(" + node_id + ")");
    171         debug("- Initial number of documents in index: " + index_writer.numDocs());
     165        debug("- Initial number of documents in index: " + index_writer.docCount());
    172166    index_writer.deleteDocuments(new Term("nodeid", "" + node_id));
    173         debug("- Final number of documents in index: " + index_writer.numDocs());
     167        debug("- Final number of documents in index: " + index_writer.docCount());
    174168    }
    175169}
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneEditor.java

    r24725 r24731  
    3333
    3434import java.io.IOException;
    35 import java.io.File;
    3635import java.util.Arrays;
    3736import java.util.Enumeration;
     
    3938
    4039import org.apache.lucene.analysis.Analyzer;
    41 //import org.apache.lucene.analysis.standard.StandardAnalyzer;
     40import org.apache.lucene.analysis.standard.StandardAnalyzer;
    4241import org.apache.lucene.document.Document;
    4342import org.apache.lucene.document.Field;
    44 
    45 import org.apache.lucene.store.SimpleFSDirectory;
    46 import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    4743
    4844
     
    150146        throws IOException
    151147    {
    152         Analyzer analyzer = new GS2Analyzer();
     148        Analyzer analyzer = new StandardAnalyzer();
    153149        // create an index in /tmp/index, overwriting an existing one:
    154150        index_modifier = new GS2IndexModifier(index_path, analyzer);
     
    192188    {
    193189        debug("GS2LuceneEditor.editIndex(" + node_id + ",'" + field + "','" + old_value + "','" + new_value + "')");
    194         debug("- Initial number of documents in index: " + index_modifier.numDocs());
     190        debug("- Initial number of documents in index: " + index_modifier.docCount());
    195191        // Retrieve the document requested
    196192        int doc_num = index_modifier.getDocNumByNodeID(node_id);
     
    234230                // We also have to initialize the nodeId value
    235231        // changed to use docOID --kjdon
    236                 document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.ANALYZED));
     232                document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.TOKENIZED));
    237233
    238234                // Re-index document
     
    300296                for(int i = 0; i < values.size(); i++)
    301297                    {
    302                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED));
     298                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED));
    303299                    }
    304300                values.clear();
     
    322318                for(int i = 0; i < values.size(); i++)
    323319                    {
    324                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED));
     320                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED));
    325321                    }
    326322                values.clear();
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java

    r24725 r24731  
    4545import org.apache.lucene.index.Term;
    4646import org.apache.lucene.analysis.Analyzer;
    47 
    48 import org.apache.lucene.store.SimpleFSDirectory;
    49 import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    5047
    5148import java.util.Stack;
     
    193190    protected String file_id_ = null;
    194191
     192    static private String[] stop_words = GS2Analyzer.STOP_WORDS;
     193
     194
    195195    /** pass in true if want to create a new index, false if want to use the existing one */
    196196    public Indexer (String doc_tag_level, File index_dir, boolean create)
     
    206206        reader.setFeature("http://xml.org/sax/features/validation", false);
    207207
    208         SimpleFSDirectory index_dir_dir = new SimpleFSDirectory(new File(index_dir.getPath()));
    209 
    210         analyzer_ = new GS2Analyzer(); // uses build in stop_word_set
    211 
    212         writer_ = new IndexWriter(index_dir_dir, analyzer_, create, MaxFieldLength.UNLIMITED);
    213                        
     208        analyzer_ = new GS2Analyzer(stop_words);
     209
     210        writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create);
    214211        // by default, will only index 10,000 words per document
    215212        // Can throw out_of_memory errors
     
    321318        //String node_id = atts.getValue("gs2:id");
    322319        //print(" " + qName + ": " + node_id + " (" + mode_ + ")" );
    323         //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.NOT_ANALYZED));
     320        //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.UN_TOKENIZED));
    324321       
    325322        current_doc_oid_ = atts.getValue("gs2:docOID");
    326323        print(" " + qName + ": " + current_doc_oid_ + " (" + mode_ + ")" );
    327         current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.NOT_ANALYZED));
     324        current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.UN_TOKENIZED));
    328325        }
    329326       
     
    362359        if (qName.equals(indexable_current_node_))
    363360            {
    364             current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
     361            current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
    365362            // The byXX fields are used for sorting search results
    366363            // We don't want to do that for Text or AllFields fields
     
    368365            if (!qName.equals("TX") && !qName.equals("ZZ"))
    369366                {
    370                 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
     367                current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
    371368                }
    372369           
     
    475472    {
    476473        debug("GS2LuceneDelete.deleteDocument(" + doc_id + ")");
    477         debug("- Initial number of documents in index: " + writer_.numDocs());
     474        debug("- Initial number of documents in index: " + writer_.docCount());
    478475        writer_.deleteDocuments(new Term("docOID", doc_id));
    479         debug("- Final number of documents in index: " + writer_.numDocs());
     476        debug("- Final number of documents in index: " + writer_.docCount());
    480477    }
    481478
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r24725 r24731  
    4545import org.apache.lucene.search.IndexSearcher;
    4646import org.apache.lucene.search.Query;
    47 import org.apache.lucene.search.TermRangeFilter;
     47import org.apache.lucene.search.RangeFilter;
    4848import org.apache.lucene.search.Searcher;
    4949import org.apache.lucene.search.ScoreDoc;
    5050import org.apache.lucene.search.Sort;
    51 import org.apache.lucene.search.SortField;
    5251import org.apache.lucene.search.TopFieldDocs;
    5352
    54 import org.apache.lucene.store.Directory;
    55 import org.apache.lucene.store.FSDirectory;
    56 import org.apache.lucene.util.Version;
    57 
    58 public class GS2LuceneQuery extends SharedSoleneQuery
     53
     54public class GS2LuceneQuery
    5955{
    60     protected String full_indexdir="";
    61 
    62     protected Sort sorter=new Sort();
    63     protected Filter filter = null;
    64 
    65     protected static Version matchVersion = Version.LUCENE_24;
    66 
    67     protected QueryParser query_parser = null;
    68     protected QueryParser query_parser_no_stop_words = null;
    69     protected Searcher searcher = null;
    70     protected IndexReader reader = null;
    71 
     56
     57
     58    static private String TEXTFIELD = "TX";
     59
     60    // Use the standard set of English stop words by default
     61    static private String[] stop_words = GS2Analyzer.STOP_WORDS;
     62
     63    private String full_indexdir="";
     64    private String default_conjunction_operator = "OR";
     65    private String fuzziness = null;
     66    private String sort_field = null;
     67    private Sort sorter=new Sort();
     68    private String filter_string = null;
     69    private Filter filter = null;
     70    private int start_results=1;
     71    private int end_results=Integer.MAX_VALUE;
     72
     73    private QueryParser query_parser = null;
     74    private QueryParser query_parser_no_stop_words = null;
     75    private Searcher searcher = null;
     76    private IndexReader reader = null;
     77
     78    static private PrintWriter utf8out = null;
     79
     80    static
     81    {
     82    try {
     83        OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8");
     84        utf8out = new PrintWriter(osw, true);
     85    }
     86        catch (UnsupportedEncodingException e) {
     87        System.out.println(e);
     88    }
     89    }
     90
     91   
    7292    public GS2LuceneQuery() {
    73     super();
    7493
    7594    // Create one query parser with the standard set of stop words, and one with none
    7695
    77     query_parser = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set
    78         query_parser_no_stop_words = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer(new String[] { }));
     96    query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words));
     97        query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { }));
    7998    }
    8099   
    81100   
    82101    public boolean initialise() {
    83 
    84     if (!super.initialise()) {
    85         return false;
    86     }
    87 
    88102
    89103        if (full_indexdir==null || full_indexdir.length()==-1){
     
    92106        return false;
    93107        }
    94 
    95108        try {
    96         Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir));
    97             searcher = new IndexSearcher(full_indexdir_dir,true);
     109            searcher = new IndexSearcher(full_indexdir);
    98110            reader = ((IndexSearcher) searcher).getIndexReader();
    99111       
     
    106118
    107119    }
    108 
    109     public void setIndexDir(String full_indexdir) {
    110     this.full_indexdir = full_indexdir;
    111     }
    112 
    113     public void setSortField(String sort_field) {
    114     super.setSortField(sort_field);
    115 
    116     if (sort_field == null) {
    117         this.sorter = new Sort();
    118     } else {
    119         this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!?
    120     }
    121     }
    122 
    123     public void setFilterString(String filter_string) {
    124     super.setFilterString(filter_string);
    125     this.filter = parseFilterString(filter_string);
    126     }
    127 
    128     public Filter getFilter() {
    129     return this.filter;
    130     }
    131 
    132120   
    133121    public LuceneQueryResult runQuery(String query_string) {
     
    206194        if (end_results == Integer.MAX_VALUE) {
    207195        // Perform the query (filter and sorter may be null)
    208         TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
    209         lucene_query_result.setTotalDocs(hits.totalHits);
     196        Hits hits = searcher.search(query, filter, sorter);
     197        lucene_query_result.setTotalDocs(hits.length());
    210198
    211199        // Output the matching documents
    212200        lucene_query_result.setStartResults(start_results);
    213         lucene_query_result.setEndResults(hits.totalHits);
    214 
    215         for (int i = start_results; i <= hits.totalHits; i++) {
    216             int lucene_doc_num = hits.scoreDocs[i - 1].doc;
    217             Document doc = reader.document(lucene_doc_num);
     201        lucene_query_result.setEndResults(hits.length());
     202
     203        for (int i = start_results; i <= hits.length(); i++) {
     204            int lucene_doc_num = hits.id(i - 1);
     205            Document doc = hits.doc(i - 1);
    218206            int doc_term_freq = 0;
    219207            Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
     
    222210            doc_term_freq = doc_term_freq_object.intValue();
    223211            }
    224             lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
     212            lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);
    225213        }
    226214        }
     
    268256
    269257    public void setDefaultConjunctionOperator(String default_conjunction_operator) {
    270     super.setDefaultConjunctionOperator(default_conjunction_operator);
    271 
     258    this.default_conjunction_operator = default_conjunction_operator.toUpperCase();
    272259    if (default_conjunction_operator.equals("AND")) {
    273260        query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
     
    278265    }
    279266    }
    280      
    281        
     267   
     268    public String getDefaultConjunctionOperator() {
     269    return this.default_conjunction_operator;
     270    }
     271   
     272    public void setEndResults(int end_results) {
     273    this.end_results = end_results;
     274    }
     275    public int getEndResults() {
     276    return this.end_results;
     277    }
     278       
     279    public void setFilterString(String filter_string) {
     280    this.filter_string = filter_string;
     281    this.filter = parseFilterString(filter_string);
     282    }
     283    public String getFilterString() {
     284    return this.filter_string ;
     285    }
     286   
     287    public Filter getFilter() {
     288    return this.filter;
     289    }
     290
     291    public void setIndexDir(String full_indexdir) {
     292    this.full_indexdir = full_indexdir;
     293    }
     294   
     295    public void setFuzziness(String fuzziness) {
     296    this.fuzziness = fuzziness;
     297    }
     298    public String getFuzziness() {
     299    return this.fuzziness;
     300    }
     301   
     302    public void setSortField(String sort_field) {
     303    this.sort_field = sort_field;
     304    if (sort_field == null) {
     305        this.sorter = new Sort();
     306    } else {
     307        this.sorter = new Sort(sort_field);
     308    }
     309    }
     310    public String getSortField() {
     311    return this.sort_field;
     312    }
     313       
     314    public void setStartResults(int start_results) {
     315    if (start_results < 1) {
     316        start_results = 1;
     317    }
     318    this.start_results = start_results;
     319    }
     320    public int getStartResults() {
     321    return this.start_results;
     322    }
     323       
    282324    public void cleanUp() {
    283     super.cleanUp();
    284325    try {
    285326        if (searcher != null) {
     
    291332    }
    292333
    293 
    294     protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
     334    private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
    295335    throws java.io.IOException, org.apache.lucene.queryParser.ParseException
    296336    {
     
    394434    }
    395435
    396     protected Filter parseFilterString(String filter_string)
     436    private Filter parseFilterString(String filter_string)
    397437    {
    398438    Filter result = null;
     
    405445        String upper_term = matcher.group(4);
    406446        boolean include_upper = matcher.group(5).equals("]");
    407         result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
     447        result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
    408448    }
    409449    else {
     
    412452    return result;
    413453    }
    414    
    415 
     454
     455
     456    protected void finalize() throws Throwable
     457    {
     458    try {
     459        utf8out.flush();
     460    } finally {
     461        super.finalize();
     462    }
     463    }
     464
     465   
    416466    /** command line program and auxiliary methods */
    417467
    418468    // Fairly self-explanatory I should hope
    419     static protected boolean query_result_caching_enabled = false;
     469    static private boolean query_result_caching_enabled = false;
    420470
    421471
    422472    static public void main (String args[])
    423473    {
     474
     475
    424476    if (args.length == 0) {
    425477        System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]");
     
    514566    }
    515567
    516     protected static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)
     568    private static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)
    517569    throws IOException
    518570    {
     
    602654    }
    603655   
    604     protected static String fileSafe(String text)
     656    private static String fileSafe(String text)
    605657    {
    606658    StringBuffer file_safe_text = new StringBuffer();
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/LuceneQueryResult.java

    r24725 r24731  
    2929import java.util.Vector;
    3030
    31 /** Opportunity to fine tune QueryResult for lucene search
     31/** a QueryResult class for a lucene search
    3232 *
    3333 */
    34 
    35 public class LuceneQueryResult extends SharedSoleneQueryResult {
    36    
    37     // Currently no fine tuning -- rely on underlying shared Solr/Lucene base class
     34public class LuceneQueryResult {
     35   
     36    public static final int NO_ERROR = 0;
     37    public static final int PARSE_ERROR = 1;
     38    public static final int TOO_MANY_CLAUSES_ERROR = 2;
     39    public static final int IO_ERROR = 3;
     40    public static final int OTHER_ERROR = 4;
     41   
     42    /** the list of DocInfo */
     43    protected Vector docs_=null;
     44    /** the list of TermInfo */
     45    protected Vector terms_=null;
     46    /** the list of stopwords found in the query */
     47    protected Vector stopwords_ = null;
     48    /** the total number of docs found - not necessarily the size of docs_*/
     49    protected int total_num_docs_=0;
     50    /** the start result number if we are retrieving only a portion of the results */
     51    protected int start_results_ = 0;
     52    /** the end result number if we are retrieving only a portion of the results */
     53    protected int end_results_ = 0;
     54    /** whether an error has occurred and what kind it is*/
     55    protected int error_ = NO_ERROR;
     56
    3857    LuceneQueryResult() {
    39     super();
    40     }
     58    docs_ = new Vector();
     59    terms_ = new Vector();
     60    stopwords_ = new Vector();
     61    }
     62   
     63    /** clear the info from the last query - should be called before setting any new docs/terms */
     64    public void clear() {
     65    total_num_docs_=0;
     66    docs_.clear();
     67    terms_.clear();
     68    stopwords_.clear();
     69    error_ = NO_ERROR;
     70    }
     71
     72    /** returns the result as a String - useful for printing out results */
     73    public String toString() {
     74   
     75    String result = "";
     76    result += "docs (ranks): ";
     77    for (int i=0; i<docs_.size(); i++) {
     78        result += ((DocInfo)docs_.elementAt(i)).toString()+", ";
     79    }
     80    result += "\nterms: ";
     81    for (int i=0; i<terms_.size(); i++) {
     82        result += ((TermInfo)terms_.elementAt(i)).toString()+", ";
     83    }
     84    result += "\nactual number of docs found = "+total_num_docs_;
     85   
     86    return result;
     87    }
     88    /** a shorter representation - just terms and total docs - not the
     89    individual docnums and ranks */
     90    public String toShortString() {
     91    String result = "";
     92    result += "\nterms: ";
     93    for (int i=0; i<terms_.size(); i++) {
     94        result += ((TermInfo)terms_.elementAt(i)).toString()+", ";
     95    }
     96    result += "\nactual number of docs found = "+total_num_docs_;
     97    return result;
     98    }
     99   
     100    public void setTotalDocs(int num) {
     101    total_num_docs_=num;
     102    }
     103   
     104    public void setStartResults(int start) {
     105    start_results_ = start;
     106    }
     107
     108    public void setEndResults(int end) {
     109    end_results_ = end;
     110    }
     111
     112    public void addDoc(String id, float rank, int termfreq)
     113    {
     114    docs_.add(new DocInfo(id, rank, termfreq));
     115    }
     116   
     117    public void addTerm(String term, String field, int match, int freq) {
     118    TermInfo ti = new TermInfo();
     119    ti.term_=term;
     120    ti.field_=field;
     121    ti.match_docs_=match;
     122    ti.term_freq_=freq;
     123    terms_.add(ti);
     124    }
     125    public void addStopWord(String stopword) {
     126    stopwords_.add(stopword);
     127    }
     128    public Vector getDocs() {
     129    return docs_;
     130    }
     131   
     132    public int getError() {
     133    return error_;
     134    }
     135   
     136    public String getErrorString() {
     137    if (error_ == PARSE_ERROR) {
     138        return "PARSE_EXCEPTION";
     139    }
     140    if (error_ == TOO_MANY_CLAUSES_ERROR) {
     141        return "TOO_MANY_CLAUSES";
     142    }
     143    if (error_ == IO_ERROR) {
     144        return "IO_ERROR";
     145    }
     146    if (error_ == NO_ERROR) {
     147        return "NO_ERROR";
     148    }
     149    return "UNKNOWN";
     150    }
     151
     152    public Vector getTerms() {
     153    return terms_;
     154    }
     155   
     156    public Vector getStopWords() {
     157    return stopwords_;
     158    }
     159    public int getTotalDocs() {
     160    return total_num_docs_;
     161    }
     162   
     163    public void setError(int error) {
     164    error_ = error;
     165    }
     166   
     167    public String getXMLString() {
     168    StringBuffer buffer = new StringBuffer();
     169
     170    // terms
     171    buffer.append("<QueryTermsInfo num=\"" + terms_.size() + "\"/>\n");
     172    for (int i=0; i<terms_.size(); i++) {
     173        buffer.append(((TermInfo)terms_.elementAt(i)).toXMLString()+"\n");
     174    }
     175
     176    // stopwords
     177    for (int i=0; i<stopwords_.size(); i++) {
     178        buffer.append("<StopWord value=\"" + (String)stopwords_.elementAt(i)+"\" />\n");
     179    }
     180   
     181    // results
     182    buffer.append("<MatchingDocsInfo num=\"" + total_num_docs_ + "\"/>\n");
     183    buffer.append("<StartResults num=\"" + start_results_ + "\"/>\n");
     184    buffer.append("<EndResults num=\"" + end_results_ + "\"/>\n");
     185   
     186    for (int i=0; i< docs_.size(); i++) {
     187        buffer.append(((DocInfo)docs_.elementAt(i)).toXMLString()+"\n");
     188    }
     189
     190    return buffer.toString();
     191    }
     192
     193 
     194    public class TermInfo {
     195   
     196    /** the term itself */
     197    public String term_=null;
     198    /** the field for which this term was queried */
     199    public String field_=null;
     200    /** the number of documents containing this term */
     201    public int match_docs_=0;
     202    /** overall term freq for this term */
     203    public int term_freq_=0;
     204   
     205    public TermInfo() {
     206    }
     207   
     208    /** output the class as a string */
     209    public String toString() {
     210        String result="";
     211        result +="<"+field_+">\""+term_+" docs("+match_docs_;
     212        result +=")freq("+term_freq_+")";
     213        return result;
     214    }
     215
     216    /** output as an XML element */
     217    public String toXMLString() {
     218        return "<Term value=\"" + xmlSafe(term_) + "\" field=\"" + field_ + "\" freq=\"" + term_freq_ + "\" />";
     219    }
     220    }
     221
     222
     223    public class DocInfo
     224    {
     225    public String id_ = "";
     226    public float rank_ = 0;
     227    public int termfreq_ = 0;
     228
     229    public DocInfo (String id, float rank, int termfreq)
     230    {
     231        id_ = id;
     232        rank_ = rank;
     233        termfreq_ = termfreq;
     234    }
     235
     236    public String toString()
     237    {
     238        return "" + id_ + " (" + rank_ + ") (" + termfreq_ + ")";
     239    }
     240
     241    public String toXMLString()
     242    {
     243        return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" termfreq=\"" + termfreq_ + "\" />";
     244    }
     245    }
     246
     247
     248    // where should this go???
     249    public static String xmlSafe(String text) {
     250    text = text.replaceAll("&","&amp;amp;");
     251    text = text.replaceAll("<","&amp;lt;");
     252    text = text.replaceAll(">","&amp;gt;");
     253    text = text.replaceAll("'","&amp;#039;");
     254    text = text.replaceAll("\\\"","&amp;quot;");
     255    return text;
     256    }
     257 
    41258}
Note: See TracChangeset for help on using the changeset viewer.