Ignore:
Timestamp:
2018-06-18T17:24:48+12:00 (6 years ago)
Author:
ak19
Message:

Rebuilt GS3 solr demo collection as per Kathy's request, in order to add the oai-inf db to the prebuilt demo collection, so snapshots and releases will have solr demo working out of the box. The schema.xml has meanwhile also been updated by the rebuild, and now includes attributes like termVectors on <field> elements.

Location:
gs3-extensions/solr/trunk/src/collect/solr-jdbm-demo/etc
Files:
2 added
1 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/solr/trunk/src/collect/solr-jdbm-demo/etc/conf/schema.xml

    r32087 r32199  
    113113   <field name="docOID" type="string" indexed="true" stored="true" required="true" />
    114114
    115     <field name="ZZ" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
    116     <field name="TX" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
    117     <field name="TI" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
    118     <field name="SU" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
    119     <field name="ORG" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
     115    <field name="ZZ" type="text_en_splitting" indexed="true" stored="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true" />
     116    <field name="TX" type="text_en_splitting" indexed="true" stored="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true" />
     117    <field name="TI" type="text_en_splitting" indexed="true" stored="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true" />
     118    <field name="SU" type="text_en_splitting" indexed="true" stored="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true" />
     119    <field name="ORG" type="text_en_splitting" indexed="true" stored="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true" />
    120120
    121121
     
    254254
    255255   <!-- dynamic field for sort/facet fields, which are strings by default. ie not tokenised -->
    256    <dynamicField name="by*" type="string" indexed="true" stored="false" multiValued="false" />
     256   <dynamicField name="by*" type="string" indexed="true" stored="false" multiValued="true" />
    257257
    258258   <!-- uncomment the following to ignore any fields that don't already match an existing
     
    482482    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
    483483      <analyzer type="index">
     484    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    484485        <tokenizer class="solr.StandardTokenizerFactory"/>
    485486        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
     
    504505    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
    505506      <analyzer type="index">
     507    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    506508        <tokenizer class="solr.StandardTokenizerFactory"/>
    507509        <!-- in this example, we will only use synonyms at query time
     
    552554    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
    553555      <analyzer type="index">
     556    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    554557        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
    555558        <!-- in this example, we will only use synonyms at query time
     
    587590    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
    588591      <analyzer>
     592    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    589593        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
    590594        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
     
    766770    <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
    767771      <analyzer>
     772    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    768773        <tokenizer class="solr.StandardTokenizerFactory"/>
    769774        <!-- for any non-arabic -->
     
    779784    <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
    780785      <analyzer>
     786    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    781787        <tokenizer class="solr.StandardTokenizerFactory"/>
    782788        <filter class="solr.LowerCaseFilterFactory"/>
     
    789795    <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
    790796      <analyzer>
     797    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    791798        <tokenizer class="solr.StandardTokenizerFactory"/>
    792799        <!-- removes l', etc -->
     
    801808    <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
    802809      <analyzer>
     810    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    803811        <tokenizer class="solr.StandardTokenizerFactory"/>
    804812        <!-- normalize width before bigram, as e.g. half-width dakuten combine  -->
     
    813821    <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
    814822      <analyzer>
     823    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    815824        <tokenizer class="solr.StandardTokenizerFactory"/>
    816825        <filter class="solr.SoraniNormalizationFilterFactory"/>
     
    825834    <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
    826835      <analyzer>
     836    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    827837        <tokenizer class="solr.StandardTokenizerFactory"/>
    828838        <filter class="solr.LowerCaseFilterFactory"/>
     
    835845    <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
    836846      <analyzer>
     847    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    837848        <tokenizer class="solr.StandardTokenizerFactory"/>
    838849        <filter class="solr.LowerCaseFilterFactory"/>
     
    845856    <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
    846857      <analyzer>
     858    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    847859        <tokenizer class="solr.StandardTokenizerFactory"/>
    848860        <filter class="solr.LowerCaseFilterFactory"/>
     
    858870    <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
    859871      <analyzer>
     872    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    860873        <tokenizer class="solr.StandardTokenizerFactory"/>
    861874        <!-- greek specific lowercase for sigma -->
     
    869882    <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
    870883      <analyzer>
     884    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    871885        <tokenizer class="solr.StandardTokenizerFactory"/>
    872886        <filter class="solr.LowerCaseFilterFactory"/>
     
    880894    <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
    881895      <analyzer>
     896    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    882897        <tokenizer class="solr.StandardTokenizerFactory"/>
    883898        <filter class="solr.LowerCaseFilterFactory"/>
     
    891906      <analyzer>
    892907        <!-- for ZWNJ -->
     908    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    893909        <charFilter class="solr.PersianCharFilterFactory"/>
    894910        <tokenizer class="solr.StandardTokenizerFactory"/>
     
    903919    <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
    904920      <analyzer>
     921    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    905922        <tokenizer class="solr.StandardTokenizerFactory"/>
    906923        <filter class="solr.LowerCaseFilterFactory"/>
     
    914931    <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
    915932      <analyzer>
     933    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    916934        <tokenizer class="solr.StandardTokenizerFactory"/>
    917935        <!-- removes l', etc -->
     
    928946    <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
    929947      <analyzer>
     948    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    930949        <tokenizer class="solr.StandardTokenizerFactory"/>
    931950        <!-- removes d', etc -->
     
    942961    <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
    943962      <analyzer>
     963    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    944964        <tokenizer class="solr.StandardTokenizerFactory"/>
    945965        <filter class="solr.LowerCaseFilterFactory"/>
     
    953973    <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
    954974      <analyzer>
     975    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    955976        <tokenizer class="solr.StandardTokenizerFactory"/>
    956977        <filter class="solr.LowerCaseFilterFactory"/>
     
    967988    <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
    968989      <analyzer>
     990    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    969991        <tokenizer class="solr.StandardTokenizerFactory"/>
    970992        <filter class="solr.LowerCaseFilterFactory"/>
     
    9781000    <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
    9791001      <analyzer>
     1002    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    9801003        <tokenizer class="solr.StandardTokenizerFactory"/>
    9811004        <filter class="solr.LowerCaseFilterFactory"/>
     
    9881011    <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
    9891012      <analyzer>
     1013    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    9901014        <tokenizer class="solr.StandardTokenizerFactory"/>
    9911015        <filter class="solr.LowerCaseFilterFactory"/>
     
    9991023    <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
    10001024      <analyzer>
     1025    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10011026        <tokenizer class="solr.StandardTokenizerFactory"/>
    10021027        <!-- removes l', etc -->
     
    10451070           See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
    10461071        -->
     1072    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10471073        <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
    10481074        <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
     
    10651091    <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
    10661092      <analyzer>
     1093    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10671094        <tokenizer class="solr.StandardTokenizerFactory"/>
    10681095        <filter class="solr.LowerCaseFilterFactory"/>
     
    10751102    <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
    10761103      <analyzer>
     1104    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10771105        <tokenizer class="solr.StandardTokenizerFactory"/>
    10781106        <filter class="solr.LowerCaseFilterFactory"/>
     
    10861114    <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
    10871115      <analyzer>
     1116    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    10881117        <tokenizer class="solr.StandardTokenizerFactory"/>
    10891118        <filter class="solr.LowerCaseFilterFactory"/>
     
    10991128    <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
    11001129      <analyzer>
     1130    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11011131        <tokenizer class="solr.StandardTokenizerFactory"/>
    11021132        <filter class="solr.LowerCaseFilterFactory"/>
     
    11121142    <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
    11131143      <analyzer>
     1144    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11141145        <tokenizer class="solr.StandardTokenizerFactory"/>
    11151146        <filter class="solr.LowerCaseFilterFactory"/>
     
    11221153    <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
    11231154      <analyzer>
     1155    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11241156        <tokenizer class="solr.StandardTokenizerFactory"/>
    11251157        <filter class="solr.LowerCaseFilterFactory"/>
     
    11321164    <fieldType name="text_ru_morph" class="solr.TextField" positionIncrementGap="100">
    11331165      <analyzer>
    1134         <charFilter class="solr.HTMLStripCharFilterFactory"/>
     1166    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11351167        <tokenizer class="solr.StandardTokenizerFactory"/>
    11361168        <filter class="solr.LowerCaseFilterFactory"/>
     
    11391171      </analyzer>
    11401172    </fieldType>
     1173 
    11411174    <!-- Swedish -->
    11421175    <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
    11431176      <analyzer>
     1177    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11441178        <tokenizer class="solr.StandardTokenizerFactory"/>
    11451179        <filter class="solr.LowerCaseFilterFactory"/>
     
    11531187    <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
    11541188      <analyzer>
     1189    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11551190        <tokenizer class="solr.StandardTokenizerFactory"/>
    11561191        <filter class="solr.LowerCaseFilterFactory"/>
     
    11631198    <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
    11641199      <analyzer>
     1200    <charFilter class="solr.HTMLStripCharFilterFactory"/>
    11651201        <tokenizer class="solr.StandardTokenizerFactory"/>
    11661202        <filter class="solr.TurkishLowerCaseFilterFactory"/>
Note: See TracChangeset for help on using the changeset viewer.