Ignore:
Timestamp:
2013-05-16T15:33:15+12:00 (11 years ago)
Author:
kjdon
Message:

sort fields are now separate from index fields. index fields will be like <TI index=1> and sort fields will be like <byTI index=1 tokenize=0>

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper3/GS2LuceneIndexer.java

    r24726 r27359  
    188188    String current_doc_oid_ = "";
    189189    String indexable_current_node_ = "";
     190      boolean tokenize = true;
    190191    String current_contents_ = "";
    191192
     
    330331        if (isIndexable(atts)) {
    331332        indexable_current_node_ = qName;   
     333        if (isTokenized(atts)) {
     334          tokenize = true;
     335        } else {
     336          tokenize = false;
     337        }
    332338        }
    333339        else {
     
    335341        }
    336342    }
     343
     344      public static boolean isTokenized(Attributes atts) {
     345    boolean tokenize = true;
     346    String tok = atts.getValue("tokenize");
     347    if (tok!=null && tok.equals("0")) {
     348      tokenize = false;
     349    }
     350    return tokenize;
     351      }
    337352
    338353    public static boolean isIndexable(Attributes atts)
     
    362377        if (qName.equals(indexable_current_node_))
    363378            {
     379              if (tokenize) {
    364380            current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
    365             // The byXX fields are used for sorting search results
    366             // We don't want to do that for Text or AllFields fields
    367             // They need to be untokenised for sorting
    368             if (!qName.equals("TX") && !qName.equals("ZZ"))
    369                 {
    370                 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
    371                 }
     381              } else {
     382            current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
     383              }
     384            // // The byXX fields are used for sorting search results
     385            // // We don't want to do that for Text or AllFields fields
     386            // // They need to be untokenised for sorting
     387            // if (!qName.equals("TX") && !qName.equals("ZZ"))
     388            //     {
     389            //  current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
     390            //     }
    372391           
    373392            current_contents_ = "";
Note: See TracChangeset for help on using the changeset viewer.