Ignore:
Timestamp:
2015-07-21T05:35:34+12:00 (9 years ago)
Author:
Georgiy Litvinov
Message:

Solr repo modifications for Solr side highlighing and snippets

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/solr/trunk/src/conf/schema.xml.in

    r29932 r30050  
    478478    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
    479479      <analyzer type="index">
     480    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    480481        <tokenizer class="solr.StandardTokenizerFactory"/>
    481482        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
     
    500501    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
    501502      <analyzer type="index">
     503    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    502504        <tokenizer class="solr.StandardTokenizerFactory"/>
    503505        <!-- in this example, we will only use synonyms at query time
     
    548550    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
    549551      <analyzer type="index">
     552    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    550553        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
    551554        <!-- in this example, we will only use synonyms at query time
     
    583586    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
    584587      <analyzer>
     588    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    585589        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
    586590        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
     
    762766    <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
    763767      <analyzer>
     768    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    764769        <tokenizer class="solr.StandardTokenizerFactory"/>
    765770        <!-- for any non-arabic -->
     
    775780    <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
    776781      <analyzer>
     782    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    777783        <tokenizer class="solr.StandardTokenizerFactory"/>
    778784        <filter class="solr.LowerCaseFilterFactory"/>
     
    785791    <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
    786792      <analyzer>
     793    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    787794        <tokenizer class="solr.StandardTokenizerFactory"/>
    788795        <!-- removes l', etc -->
     
    797804    <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
    798805      <analyzer>
     806    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    799807        <tokenizer class="solr.StandardTokenizerFactory"/>
    800808        <!-- normalize width before bigram, as e.g. half-width dakuten combine  -->
     
    809817    <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
    810818      <analyzer>
     819    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    811820        <tokenizer class="solr.StandardTokenizerFactory"/>
    812821        <filter class="solr.SoraniNormalizationFilterFactory"/>
     
    821830    <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
    822831      <analyzer>
     832    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    823833        <tokenizer class="solr.StandardTokenizerFactory"/>
    824834        <filter class="solr.LowerCaseFilterFactory"/>
     
    831841    <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
    832842      <analyzer>
     843    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    833844        <tokenizer class="solr.StandardTokenizerFactory"/>
    834845        <filter class="solr.LowerCaseFilterFactory"/>
     
    841852    <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
    842853      <analyzer>
     854    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    843855        <tokenizer class="solr.StandardTokenizerFactory"/>
    844856        <filter class="solr.LowerCaseFilterFactory"/>
     
    854866    <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
    855867      <analyzer>
     868    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    856869        <tokenizer class="solr.StandardTokenizerFactory"/>
    857870        <!-- greek specific lowercase for sigma -->
     
    865878    <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
    866879      <analyzer>
     880    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    867881        <tokenizer class="solr.StandardTokenizerFactory"/>
    868882        <filter class="solr.LowerCaseFilterFactory"/>
     
    876890    <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
    877891      <analyzer>
     892    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    878893        <tokenizer class="solr.StandardTokenizerFactory"/>
    879894        <filter class="solr.LowerCaseFilterFactory"/>
     
    887902      <analyzer>
    888903        <!-- for ZWNJ -->
     904    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    889905        <charFilter class="solr.PersianCharFilterFactory"/>
    890906        <tokenizer class="solr.StandardTokenizerFactory"/>
     
    899915    <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
    900916      <analyzer>
     917    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    901918        <tokenizer class="solr.StandardTokenizerFactory"/>
    902919        <filter class="solr.LowerCaseFilterFactory"/>
     
    910927    <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
    911928      <analyzer>
     929    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    912930        <tokenizer class="solr.StandardTokenizerFactory"/>
    913931        <!-- removes l', etc -->
     
    924942    <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
    925943      <analyzer>
     944    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    926945        <tokenizer class="solr.StandardTokenizerFactory"/>
    927946        <!-- removes d', etc -->
     
    938957    <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
    939958      <analyzer>
     959    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    940960        <tokenizer class="solr.StandardTokenizerFactory"/>
    941961        <filter class="solr.LowerCaseFilterFactory"/>
     
    949969    <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
    950970      <analyzer>
     971    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    951972        <tokenizer class="solr.StandardTokenizerFactory"/>
    952973        <filter class="solr.LowerCaseFilterFactory"/>
     
    963984    <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
    964985      <analyzer>
     986    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    965987        <tokenizer class="solr.StandardTokenizerFactory"/>
    966988        <filter class="solr.LowerCaseFilterFactory"/>
     
    974996    <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
    975997      <analyzer>
     998    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    976999        <tokenizer class="solr.StandardTokenizerFactory"/>
    9771000        <filter class="solr.LowerCaseFilterFactory"/>
     
    9841007    <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
    9851008      <analyzer>
     1009    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    9861010        <tokenizer class="solr.StandardTokenizerFactory"/>
    9871011        <filter class="solr.LowerCaseFilterFactory"/>
     
    9951019    <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
    9961020      <analyzer>
     1021    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    9971022        <tokenizer class="solr.StandardTokenizerFactory"/>
    9981023        <!-- removes l', etc -->
     
    10411066           See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
    10421067        -->
     1068    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10431069        <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
    10441070        <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
     
    10611087    <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
    10621088      <analyzer>
     1089    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10631090        <tokenizer class="solr.StandardTokenizerFactory"/>
    10641091        <filter class="solr.LowerCaseFilterFactory"/>
     
    10711098    <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
    10721099      <analyzer>
     1100    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10731101        <tokenizer class="solr.StandardTokenizerFactory"/>
    10741102        <filter class="solr.LowerCaseFilterFactory"/>
     
    10821110    <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
    10831111      <analyzer>
     1112    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10841113        <tokenizer class="solr.StandardTokenizerFactory"/>
    10851114        <filter class="solr.LowerCaseFilterFactory"/>
     
    10951124    <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
    10961125      <analyzer>
     1126    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10971127        <tokenizer class="solr.StandardTokenizerFactory"/>
    10981128        <filter class="solr.LowerCaseFilterFactory"/>
     
    11081138    <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
    11091139      <analyzer>
     1140    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11101141        <tokenizer class="solr.StandardTokenizerFactory"/>
    11111142        <filter class="solr.LowerCaseFilterFactory"/>
     
    11181149    <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
    11191150      <analyzer>
     1151    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11201152        <tokenizer class="solr.StandardTokenizerFactory"/>
    11211153        <filter class="solr.LowerCaseFilterFactory"/>
     
    11271159    <!-- Russian with morphology-->
    11281160    <fieldType name="text_ru_morph" class="solr.TextField" positionIncrementGap="100">
    1129           <analyzer>
    1130           <tokenizer class="solr.StandardTokenizerFactory"/>
    1131           <filter class="solr.LowerCaseFilterFactory"/>
    1132           <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
    1133           <filter class="org.apache.lucene.morphology.russian.RussianFilterFactory"/>
    1134           </analyzer>
     1161      <analyzer>
     1162    <charFilter class="solr.HTMLStripCharFilterFactory"/>
     1163        <tokenizer class="solr.StandardTokenizerFactory"/>
     1164        <filter class="solr.LowerCaseFilterFactory"/>
     1165        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
     1166        <filter class="org.apache.lucene.morphology.russian.RussianFilterFactory"/>
     1167      </analyzer>
    11351168    </fieldType>
    11361169 
     
    11381171    <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
    11391172      <analyzer>
     1173    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11401174        <tokenizer class="solr.StandardTokenizerFactory"/>
    11411175        <filter class="solr.LowerCaseFilterFactory"/>
     
    11491183    <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
    11501184      <analyzer>
     1185    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11511186        <tokenizer class="solr.StandardTokenizerFactory"/>
    11521187        <filter class="solr.LowerCaseFilterFactory"/>
     
    11591194    <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
    11601195      <analyzer>
     1196    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11611197        <tokenizer class="solr.StandardTokenizerFactory"/>
    11621198        <filter class="solr.TurkishLowerCaseFilterFactory"/>
Note: See TracChangeset for help on using the changeset viewer.