Context Navigation

← Previous Change
Next Change →

Changeset 24731 for main/trunk/greenstone2/common-src

Timestamp:

2011-10-07T11:36:07+13:00 (13 years ago)

Author:

sjm84

Message:

Lucene 3.x version of code accidentally commited rolling back to 2.x compatible version

Location:

main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper

Files:

: 7 edited

GS2Analyzer.java (modified) (2 diffs)
GS2IndexModifier.java (modified) (1 diff)
GS2LuceneDelete.java (modified) (3 diffs)
GS2LuceneEditor.java (modified) (7 diffs)
GS2LuceneIndexer.java (modified) (7 diffs)
GS2LuceneQuery.java (modified) (13 diffs)
LuceneQueryResult.java (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2Analyzer.java

-              r24725
+              r24731
 import org.apache.lucene.analysis.standard.*;
-import org.apache.lucene.analysis.ASCIIFoldingFilter;
+import org.apache.lucene.util.Version;
+class GS2Analyzer extends GS2StandardAnalyzer
+class GS2Analyzer extends StandardAnalyzer
+{
-    static Version matchVersion = Version.LUCENE_24;
     public GS2Analyzer()
+    {
     super(matchVersion);
+    super();
+    }
     public GS2Analyzer(Set stopWords)
+    {
     super(matchVersion,stopWords);
+    super(stopWords);
+    }
 …
     public GS2Analyzer(String [] stopwords)
+    {
+    super(matchVersion,StopFilter.makeStopSet(stopwords));
+    super(stopwords);
+    }
+    public TokenStream tokenStream(String fieldName, Reader reader)
+    {
+    TokenStream result = super.tokenStream(fieldName,reader);
+    result = new ISOLatin1AccentFilter(result);
+    return result;
+    }
-  @Override
-  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
-    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
-    src.setMaxTokenLength(maxTokenLength);
-    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
-    TokenStream tok = new StandardFilter(matchVersion, src);
-    tok = new LowerCaseFilter(matchVersion, tok);
-    tok = new StopFilter(matchVersion, tok, stopwords);
+    // top it up with accent folding
+    tok = new ASCIIFoldingFilter(tok);
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+      TokenStream result = super.reusableTokenStream(fieldName,reader);
+      result = new ISOLatin1AccentFilter(result);
+      return result;
+  }
-    return new TokenStreamComponents(src, tok) {
-      @Override
-      protected boolean reset(final Reader reader) throws IOException {
-        src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength);
-        return super.reset(reader);
+      }
-    };
+  }
+}

main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2IndexModifier.java

r24725	r24731
34	34	import org.apache.lucene.analysis.Analyzer;
35	35	import org.apache.lucene.document.Document;
	36	import org.apache.lucene.index.IndexModifier;
36	37	import org.apache.lucene.index.IndexReader;
37	38	import org.apache.lucene.index.IndexWriter;

main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java

-              r24725
+              r24731
 import java.io.IOException;
+import java.io.File;
+//import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.store.SimpleFSDirectory;
-import org.apache.lucene.index.IndexWriter.MaxFieldLength;
 …
         throws IOException
+    {
+    SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path));
+    index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(),
+                       MaxFieldLength.UNLIMITED);
+    index_writer = new IndexWriter(index_path, new StandardAnalyzer());
+    }
 …
+    {
         debug("GS2LuceneDelete.deleteDocument(" + node_id + ")");
         debug("- Initial number of documents in index: " + index_writer.numDocs());
+        debug("- Initial number of documents in index: " + index_writer.docCount());
     index_writer.deleteDocuments(new Term("nodeid", "" + node_id));
         debug("- Final number of documents in index: " + index_writer.numDocs());
+        debug("- Final number of documents in index: " + index_writer.docCount());
+    }
+}

main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneEditor.java

-              r24725
+              r24731
 import java.io.IOException;
-import java.io.File;
 import java.util.Arrays;
 import java.util.Enumeration;
 …
 import org.apache.lucene.analysis.Analyzer;
 //import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.store.SimpleFSDirectory;
-import org.apache.lucene.index.IndexWriter.MaxFieldLength;
 …
         throws IOException
+    {
         Analyzer analyzer = new GS2Analyzer();
+        Analyzer analyzer = new StandardAnalyzer();
         // create an index in /tmp/index, overwriting an existing one:
         index_modifier = new GS2IndexModifier(index_path, analyzer);
 …
+    {
         debug("GS2LuceneEditor.editIndex(" + node_id + ",'" + field + "','" + old_value + "','" + new_value + "')");
         debug("- Initial number of documents in index: " + index_modifier.numDocs());
+        debug("- Initial number of documents in index: " + index_modifier.docCount());
         // Retrieve the document requested
         int doc_num = index_modifier.getDocNumByNodeID(node_id);
 …
                 // We also have to initialize the nodeId value
         // changed to use docOID --kjdon
                 document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.ANALYZED));
+                document.add(new Field("docOID", String.valueOf(node_id), Field.Store.YES, Field.Index.TOKENIZED));
                 // Re-index document
 …
                 for(int i = 0; i < values.size(); i++)
+                    {
                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED));
+                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED));
+                    }
                 values.clear();
 …
                 for(int i = 0; i < values.size(); i++)
+                    {
                         document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.ANALYZED));
+                        document.add(new Field(field, (String)values.get(i), Field.Store.YES, Field.Index.TOKENIZED));
+                    }
                 values.clear();

main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java

-              r24725
+              r24731
 import org.apache.lucene.index.Term;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.store.SimpleFSDirectory;
-import org.apache.lucene.index.IndexWriter.MaxFieldLength;
 import java.util.Stack;
 …
     protected String file_id_ = null;
+    static private String[] stop_words = GS2Analyzer.STOP_WORDS;
     /** pass in true if want to create a new index, false if want to use the existing one */
     public Indexer (String doc_tag_level, File index_dir, boolean create)
 …
         reader.setFeature("http://xml.org/sax/features/validation", false);
+        SimpleFSDirectory index_dir_dir = new SimpleFSDirectory(new File(index_dir.getPath()));
+        analyzer_ = new GS2Analyzer(); // uses build in stop_word_set
+        writer_ = new IndexWriter(index_dir_dir, analyzer_, create, MaxFieldLength.UNLIMITED);
+        analyzer_ = new GS2Analyzer(stop_words);
+        writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create);
         // by default, will only index 10,000 words per document
         // Can throw out_of_memory errors
 …
         //String node_id = atts.getValue("gs2:id");
         //print(" " + qName + ": " + node_id + " (" + mode_ + ")" );
         //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.NOT_ANALYZED));
+        //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.UN_TOKENIZED));
         current_doc_oid_ = atts.getValue("gs2:docOID");
         print(" " + qName + ": " + current_doc_oid_ + " (" + mode_ + ")" );
         current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.NOT_ANALYZED));
+        current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.UN_TOKENIZED));
+        }
 …
         if (qName.equals(indexable_current_node_))
+            {
             current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
+            current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
             // The byXX fields are used for sorting search results
             // We don't want to do that for Text or AllFields fields
 …
             if (!qName.equals("TX") && !qName.equals("ZZ"))
+                {
                 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
+                current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
+                }
 …
+    {
         debug("GS2LuceneDelete.deleteDocument(" + doc_id + ")");
         debug("- Initial number of documents in index: " + writer_.numDocs());
+        debug("- Initial number of documents in index: " + writer_.docCount());
         writer_.deleteDocuments(new Term("docOID", doc_id));
         debug("- Final number of documents in index: " + writer_.numDocs());
+        debug("- Final number of documents in index: " + writer_.docCount());
+    }

main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

-              r24725
+              r24731
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermRangeFilter;
+import org.apache.lucene.search.RangeFilter;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TopFieldDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+public class GS2LuceneQuery extends SharedSoleneQuery
+public class GS2LuceneQuery
+{
+    protected String full_indexdir="";
+    protected Sort sorter=new Sort();
+    protected Filter filter = null;
+    protected static Version matchVersion = Version.LUCENE_24;
+    protected QueryParser query_parser = null;
+    protected QueryParser query_parser_no_stop_words = null;
+    protected Searcher searcher = null;
+    protected IndexReader reader = null;
+    static private String TEXTFIELD = "TX";
+    // Use the standard set of English stop words by default
+    static private String[] stop_words = GS2Analyzer.STOP_WORDS;
+    private String full_indexdir="";
+    private String default_conjunction_operator = "OR";
+    private String fuzziness = null;
+    private String sort_field = null;
+    private Sort sorter=new Sort();
+    private String filter_string = null;
+    private Filter filter = null;
+    private int start_results=1;
+    private int end_results=Integer.MAX_VALUE;
+    private QueryParser query_parser = null;
+    private QueryParser query_parser_no_stop_words = null;
+    private Searcher searcher = null;
+    private IndexReader reader = null;
+    static private PrintWriter utf8out = null;
+    static
+    {
+    try {
+        OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8");
+        utf8out = new PrintWriter(osw, true);
+    }
+        catch (UnsupportedEncodingException e) {
+        System.out.println(e);
+    }
+    }
     public GS2LuceneQuery() {
-    super();
     // Create one query parser with the standard set of stop words, and one with none
     query_parser = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set
         query_parser_no_stop_words = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer(new String[] { }));
+    query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words));
+        query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { }));
+    }
     public boolean initialise() {
-    if (!super.initialise()) {
-        return false;
+    }
         if (full_indexdir==null || full_indexdir.length()==-1){
 …
         return false;
+        }
         try {
+        Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir));
+            searcher = new IndexSearcher(full_indexdir_dir,true);
+            searcher = new IndexSearcher(full_indexdir);
             reader = ((IndexSearcher) searcher).getIndexReader();
 …
+    }
-    public void setIndexDir(String full_indexdir) {
-    this.full_indexdir = full_indexdir;
+    }
-    public void setSortField(String sort_field) {
-    super.setSortField(sort_field);
-    if (sort_field == null) {
-        this.sorter = new Sort();
-    } else {
-        this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!?
+    }
+    }
-    public void setFilterString(String filter_string) {
-    super.setFilterString(filter_string);
-    this.filter = parseFilterString(filter_string);
+    }
-    public Filter getFilter() {
-    return this.filter;
+    }
     public LuceneQueryResult runQuery(String query_string) {
 …
         if (end_results == Integer.MAX_VALUE) {
         // Perform the query (filter and sorter may be null)
         TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
         lucene_query_result.setTotalDocs(hits.totalHits);
+        Hits hits = searcher.search(query, filter, sorter);
+        lucene_query_result.setTotalDocs(hits.length());
         // Output the matching documents
         lucene_query_result.setStartResults(start_results);
         lucene_query_result.setEndResults(hits.totalHits);
         for (int i = start_results; i <= hits.totalHits; i++) {
             int lucene_doc_num = hits.scoreDocs[i - 1].doc;
             Document doc = reader.document(lucene_doc_num);
+        lucene_query_result.setEndResults(hits.length());
+        for (int i = start_results; i <= hits.length(); i++) {
+            int lucene_doc_num = hits.id(i - 1);
+            Document doc = hits.doc(i - 1);
             int doc_term_freq = 0;
             Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
 …
             doc_term_freq = doc_term_freq_object.intValue();
+            }
             lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
+            lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);
+        }
+        }
 …
     public void setDefaultConjunctionOperator(String default_conjunction_operator) {
+    super.setDefaultConjunctionOperator(default_conjunction_operator);
+    this.default_conjunction_operator = default_conjunction_operator.toUpperCase();
     if (default_conjunction_operator.equals("AND")) {
         query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
 …
+    }
+    }
+    public String getDefaultConjunctionOperator() {
+    return this.default_conjunction_operator;
+    }
+    public void setEndResults(int end_results) {
+    this.end_results = end_results;
+    }
+    public int getEndResults() {
+    return this.end_results;
+    }
+    public void setFilterString(String filter_string) {
+    this.filter_string = filter_string;
+    this.filter = parseFilterString(filter_string);
+    }
+    public String getFilterString() {
+    return this.filter_string ;
+    }
+    public Filter getFilter() {
+    return this.filter;
+    }
+    public void setIndexDir(String full_indexdir) {
+    this.full_indexdir = full_indexdir;
+    }
+    public void setFuzziness(String fuzziness) {
+    this.fuzziness = fuzziness;
+    }
+    public String getFuzziness() {
+    return this.fuzziness;
+    }
+    public void setSortField(String sort_field) {
+    this.sort_field = sort_field;
+    if (sort_field == null) {
+        this.sorter = new Sort();
+    } else {
+        this.sorter = new Sort(sort_field);
+    }
+    }
+    public String getSortField() {
+    return this.sort_field;
+    }
+    public void setStartResults(int start_results) {
+    if (start_results < 1) {
+        start_results = 1;
+    }
+    this.start_results = start_results;
+    }
+    public int getStartResults() {
+    return this.start_results;
+    }
     public void cleanUp() {
-    super.cleanUp();
     try {
         if (searcher != null) {
 …
+    }
+    protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
+    private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
     throws java.io.IOException, org.apache.lucene.queryParser.ParseException
+    {
 …
+    }
     protected Filter parseFilterString(String filter_string)
+    private Filter parseFilterString(String filter_string)
+    {
     Filter result = null;
 …
         String upper_term = matcher.group(4);
         boolean include_upper = matcher.group(5).equals("]");
         result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
+        result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
+    }
     else {
 …
     return result;
+    }
+    protected void finalize() throws Throwable
+    {
+    try {
+        utf8out.flush();
+    } finally {
+        super.finalize();
+    }
+    }
     /** command line program and auxiliary methods */
     // Fairly self-explanatory I should hope
     static protected boolean query_result_caching_enabled = false;
+    static private boolean query_result_caching_enabled = false;
     static public void main (String args[])
+    {
     if (args.length == 0) {
         System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]");
 …
+    }
     protected static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)
+    private static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)
     throws IOException
+    {
 …
+    }
     protected static String fileSafe(String text)
+    private static String fileSafe(String text)
+    {
     StringBuffer file_safe_text = new StringBuffer();

main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/LuceneQueryResult.java

-              r24725
+              r24731
 import java.util.Vector;
 /** Opportunity to fine tune QueryResult for lucene search
+/** a QueryResult class for a lucene search
+ *
  */
+public class LuceneQueryResult extends SharedSoleneQueryResult {
+    // Currently no fine tuning -- rely on underlying shared Solr/Lucene base class
+public class LuceneQueryResult {
+    public static final int NO_ERROR = 0;
+    public static final int PARSE_ERROR = 1;
+    public static final int TOO_MANY_CLAUSES_ERROR = 2;
+    public static final int IO_ERROR = 3;
+    public static final int OTHER_ERROR = 4;
+    /** the list of DocInfo */
+    protected Vector docs_=null;
+    /** the list of TermInfo */
+    protected Vector terms_=null;
+    /** the list of stopwords found in the query */
+    protected Vector stopwords_ = null;
+    /** the total number of docs found - not necessarily the size of docs_*/
+    protected int total_num_docs_=0;
+    /** the start result number if we are retrieving only a portion of the results */
+    protected int start_results_ = 0;
+    /** the end result number if we are retrieving only a portion of the results */
+    protected int end_results_ = 0;
+    /** whether an error has occurred and what kind it is*/
+    protected int error_ = NO_ERROR;
     LuceneQueryResult() {
+    super();
+    }
+    docs_ = new Vector();
+    terms_ = new Vector();
+    stopwords_ = new Vector();
+    }
+    /** clear the info from the last query - should be called before setting any new docs/terms */
+    public void clear() {
+    total_num_docs_=0;
+    docs_.clear();
+    terms_.clear();
+    stopwords_.clear();
+    error_ = NO_ERROR;
+    }
+    /** returns the result as a String - useful for printing out results */
+    public String toString() {
+    String result = "";
+    result += "docs (ranks): ";
+    for (int i=0; i<docs_.size(); i++) {
+        result += ((DocInfo)docs_.elementAt(i)).toString()+", ";
+    }
+    result += "\nterms: ";
+    for (int i=0; i<terms_.size(); i++) {
+        result += ((TermInfo)terms_.elementAt(i)).toString()+", ";
+    }
+    result += "\nactual number of docs found = "+total_num_docs_;
+    return result;
+    }
+    /** a shorter representation - just terms and total docs - not the
+    individual docnums and ranks */
+    public String toShortString() {
+    String result = "";
+    result += "\nterms: ";
+    for (int i=0; i<terms_.size(); i++) {
+        result += ((TermInfo)terms_.elementAt(i)).toString()+", ";
+    }
+    result += "\nactual number of docs found = "+total_num_docs_;
+    return result;
+    }
+    public void setTotalDocs(int num) {
+    total_num_docs_=num;
+    }
+    public void setStartResults(int start) {
+    start_results_ = start;
+    }
+    public void setEndResults(int end) {
+    end_results_ = end;
+    }
+    public void addDoc(String id, float rank, int termfreq)
+    {
+    docs_.add(new DocInfo(id, rank, termfreq));
+    }
+    public void addTerm(String term, String field, int match, int freq) {
+    TermInfo ti = new TermInfo();
+    ti.term_=term;
+    ti.field_=field;
+    ti.match_docs_=match;
+    ti.term_freq_=freq;
+    terms_.add(ti);
+    }
+    public void addStopWord(String stopword) {
+    stopwords_.add(stopword);
+    }
+    public Vector getDocs() {
+    return docs_;
+    }
+    public int getError() {
+    return error_;
+    }
+    public String getErrorString() {
+    if (error_ == PARSE_ERROR) {
+        return "PARSE_EXCEPTION";
+    }
+    if (error_ == TOO_MANY_CLAUSES_ERROR) {
+        return "TOO_MANY_CLAUSES";
+    }
+    if (error_ == IO_ERROR) {
+        return "IO_ERROR";
+    }
+    if (error_ == NO_ERROR) {
+        return "NO_ERROR";
+    }
+    return "UNKNOWN";
+    }
+    public Vector getTerms() {
+    return terms_;
+    }
+    public Vector getStopWords() {
+    return stopwords_;
+    }
+    public int getTotalDocs() {
+    return total_num_docs_;
+    }
+    public void setError(int error) {
+    error_ = error;
+    }
+    public String getXMLString() {
+    StringBuffer buffer = new StringBuffer();
+    // terms
+    buffer.append("<QueryTermsInfo num=\"" + terms_.size() + "\"/>\n");
+    for (int i=0; i<terms_.size(); i++) {
+        buffer.append(((TermInfo)terms_.elementAt(i)).toXMLString()+"\n");
+    }
+    // stopwords
+    for (int i=0; i<stopwords_.size(); i++) {
+        buffer.append("<StopWord value=\"" + (String)stopwords_.elementAt(i)+"\" />\n");
+    }
+    // results
+    buffer.append("<MatchingDocsInfo num=\"" + total_num_docs_ + "\"/>\n");
+    buffer.append("<StartResults num=\"" + start_results_ + "\"/>\n");
+    buffer.append("<EndResults num=\"" + end_results_ + "\"/>\n");
+    for (int i=0; i< docs_.size(); i++) {
+        buffer.append(((DocInfo)docs_.elementAt(i)).toXMLString()+"\n");
+    }
+    return buffer.toString();
+    }
+    public class TermInfo {
+    /** the term itself */
+    public String term_=null;
+    /** the field for which this term was queried */
+    public String field_=null;
+    /** the number of documents containing this term */
+    public int match_docs_=0;
+    /** overall term freq for this term */
+    public int term_freq_=0;
+    public TermInfo() {
+    }
+    /** output the class as a string */
+    public String toString() {
+        String result="";
+        result +="<"+field_+">\""+term_+" docs("+match_docs_;
+        result +=")freq("+term_freq_+")";
+        return result;
+    }
+    /** output as an XML element */
+    public String toXMLString() {
+        return "<Term value=\"" + xmlSafe(term_) + "\" field=\"" + field_ + "\" freq=\"" + term_freq_ + "\" />";
+    }
+    }
+    public class DocInfo
+    {
+    public String id_ = "";
+    public float rank_ = 0;
+    public int termfreq_ = 0;
+    public DocInfo (String id, float rank, int termfreq)
+    {
+        id_ = id;
+        rank_ = rank;
+        termfreq_ = termfreq;
+    }
+    public String toString()
+    {
+        return "" + id_ + " (" + rank_ + ") (" + termfreq_ + ")";
+    }
+    public String toXMLString()
+    {
+        return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" termfreq=\"" + termfreq_ + "\" />";
+    }
+    }
+    // where should this go???
+    public static String xmlSafe(String text) {
+    text = text.replaceAll("&","&amp;amp;");
+    text = text.replaceAll("<","&amp;lt;");
+    text = text.replaceAll(">","&amp;gt;");
+    text = text.replaceAll("'","&amp;#039;");
+    text = text.replaceAll("\\\"","&amp;quot;");
+    return text;
+    }
+}

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: