Changeset 30050 for gs3-extensions/solr/trunk/src/conf
- Timestamp:
- 2015-07-21T05:35:34+12:00 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/solr/trunk/src/conf/schema.xml.in
r29932 r30050 478 478 <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> 479 479 <analyzer type="index"> 480 <charFilter class="solr.HTMLStripCharFilterFactory"/> 480 481 <tokenizer class="solr.StandardTokenizerFactory"/> 481 482 <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> … … 500 501 <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> 501 502 <analyzer type="index"> 503 <charFilter class="solr.HTMLStripCharFilterFactory"/> 502 504 <tokenizer class="solr.StandardTokenizerFactory"/> 503 505 <!-- in this example, we will only use synonyms at query time … … 548 550 <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> 549 551 <analyzer type="index"> 552 <charFilter class="solr.HTMLStripCharFilterFactory"/> 550 553 <tokenizer class="solr.WhitespaceTokenizerFactory"/> 551 554 <!-- in this example, we will only use synonyms at query time … … 583 586 <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> 584 587 <analyzer> 588 <charFilter class="solr.HTMLStripCharFilterFactory"/> 585 589 <tokenizer class="solr.WhitespaceTokenizerFactory"/> 586 590 <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> … … 762 766 <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> 763 767 <analyzer> 768 <charFilter class="solr.HTMLStripCharFilterFactory"/> 764 769 <tokenizer class="solr.StandardTokenizerFactory"/> 765 770 <!-- for any non-arabic --> … … 775 780 <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> 776 781 <analyzer> 782 <charFilter class="solr.HTMLStripCharFilterFactory"/> 777 783 <tokenizer class="solr.StandardTokenizerFactory"/> 778 784 <filter class="solr.LowerCaseFilterFactory"/> … … 785 791 <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> 786 792 <analyzer> 793 <charFilter class="solr.HTMLStripCharFilterFactory"/> 787 794 <tokenizer class="solr.StandardTokenizerFactory"/> 788 795 <!-- removes l', etc --> … … 797 804 <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> 798 805 <analyzer> 806 <charFilter class="solr.HTMLStripCharFilterFactory"/> 799 807 <tokenizer class="solr.StandardTokenizerFactory"/> 800 808 <!-- normalize width before bigram, as e.g. half-width dakuten combine --> … … 809 817 <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100"> 810 818 <analyzer> 819 <charFilter class="solr.HTMLStripCharFilterFactory"/> 811 820 <tokenizer class="solr.StandardTokenizerFactory"/> 812 821 <filter class="solr.SoraniNormalizationFilterFactory"/> … … 821 830 <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> 822 831 <analyzer> 832 <charFilter class="solr.HTMLStripCharFilterFactory"/> 823 833 <tokenizer class="solr.StandardTokenizerFactory"/> 824 834 <filter class="solr.LowerCaseFilterFactory"/> … … 831 841 <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> 832 842 <analyzer> 843 <charFilter class="solr.HTMLStripCharFilterFactory"/> 833 844 <tokenizer class="solr.StandardTokenizerFactory"/> 834 845 <filter class="solr.LowerCaseFilterFactory"/> … … 841 852 <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> 842 853 <analyzer> 854 <charFilter class="solr.HTMLStripCharFilterFactory"/> 843 855 <tokenizer class="solr.StandardTokenizerFactory"/> 844 856 <filter class="solr.LowerCaseFilterFactory"/> … … 854 866 <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> 855 867 <analyzer> 868 <charFilter class="solr.HTMLStripCharFilterFactory"/> 856 869 <tokenizer class="solr.StandardTokenizerFactory"/> 857 870 <!-- greek specific lowercase for sigma --> … … 865 878 <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> 866 879 <analyzer> 880 <charFilter class="solr.HTMLStripCharFilterFactory"/> 867 881 <tokenizer class="solr.StandardTokenizerFactory"/> 868 882 <filter class="solr.LowerCaseFilterFactory"/> … … 876 890 <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> 877 891 <analyzer> 892 <charFilter class="solr.HTMLStripCharFilterFactory"/> 878 893 <tokenizer class="solr.StandardTokenizerFactory"/> 879 894 <filter class="solr.LowerCaseFilterFactory"/> … … 887 902 <analyzer> 888 903 <!-- for ZWNJ --> 904 <charFilter class="solr.HTMLStripCharFilterFactory"/> 889 905 <charFilter class="solr.PersianCharFilterFactory"/> 890 906 <tokenizer class="solr.StandardTokenizerFactory"/> … … 899 915 <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> 900 916 <analyzer> 917 <charFilter class="solr.HTMLStripCharFilterFactory"/> 901 918 <tokenizer class="solr.StandardTokenizerFactory"/> 902 919 <filter class="solr.LowerCaseFilterFactory"/> … … 910 927 <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> 911 928 <analyzer> 929 <charFilter class="solr.HTMLStripCharFilterFactory"/> 912 930 <tokenizer class="solr.StandardTokenizerFactory"/> 913 931 <!-- removes l', etc --> … … 924 942 <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> 925 943 <analyzer> 944 <charFilter class="solr.HTMLStripCharFilterFactory"/> 926 945 <tokenizer class="solr.StandardTokenizerFactory"/> 927 946 <!-- removes d', etc --> … … 938 957 <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> 939 958 <analyzer> 959 <charFilter class="solr.HTMLStripCharFilterFactory"/> 940 960 <tokenizer class="solr.StandardTokenizerFactory"/> 941 961 <filter class="solr.LowerCaseFilterFactory"/> … … 949 969 <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> 950 970 <analyzer> 971 <charFilter class="solr.HTMLStripCharFilterFactory"/> 951 972 <tokenizer class="solr.StandardTokenizerFactory"/> 952 973 <filter class="solr.LowerCaseFilterFactory"/> … … 963 984 <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> 964 985 <analyzer> 986 <charFilter class="solr.HTMLStripCharFilterFactory"/> 965 987 <tokenizer class="solr.StandardTokenizerFactory"/> 966 988 <filter class="solr.LowerCaseFilterFactory"/> … … 974 996 <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> 975 997 <analyzer> 998 <charFilter class="solr.HTMLStripCharFilterFactory"/> 976 999 <tokenizer class="solr.StandardTokenizerFactory"/> 977 1000 <filter class="solr.LowerCaseFilterFactory"/> … … 984 1007 <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> 985 1008 <analyzer> 1009 <charFilter class="solr.HTMLStripCharFilterFactory"/> 986 1010 <tokenizer class="solr.StandardTokenizerFactory"/> 987 1011 <filter class="solr.LowerCaseFilterFactory"/> … … 995 1019 <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> 996 1020 <analyzer> 1021 <charFilter class="solr.HTMLStripCharFilterFactory"/> 997 1022 <tokenizer class="solr.StandardTokenizerFactory"/> 998 1023 <!-- removes l', etc --> … … 1041 1066 See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. 1042 1067 --> 1068 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1043 1069 <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> 1044 1070 <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> … … 1061 1087 <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> 1062 1088 <analyzer> 1089 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1063 1090 <tokenizer class="solr.StandardTokenizerFactory"/> 1064 1091 <filter class="solr.LowerCaseFilterFactory"/> … … 1071 1098 <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> 1072 1099 <analyzer> 1100 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1073 1101 <tokenizer class="solr.StandardTokenizerFactory"/> 1074 1102 <filter class="solr.LowerCaseFilterFactory"/> … … 1082 1110 <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> 1083 1111 <analyzer> 1112 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1084 1113 <tokenizer class="solr.StandardTokenizerFactory"/> 1085 1114 <filter class="solr.LowerCaseFilterFactory"/> … … 1095 1124 <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> 1096 1125 <analyzer> 1126 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1097 1127 <tokenizer class="solr.StandardTokenizerFactory"/> 1098 1128 <filter class="solr.LowerCaseFilterFactory"/> … … 1108 1138 <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> 1109 1139 <analyzer> 1140 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1110 1141 <tokenizer class="solr.StandardTokenizerFactory"/> 1111 1142 <filter class="solr.LowerCaseFilterFactory"/> … … 1118 1149 <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> 1119 1150 <analyzer> 1151 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1120 1152 <tokenizer class="solr.StandardTokenizerFactory"/> 1121 1153 <filter class="solr.LowerCaseFilterFactory"/> … … 1127 1159 <!-- Russian with morphology--> 1128 1160 <fieldType name="text_ru_morph" class="solr.TextField" positionIncrementGap="100"> 1129 <analyzer> 1130 <tokenizer class="solr.StandardTokenizerFactory"/> 1131 <filter class="solr.LowerCaseFilterFactory"/> 1132 <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> 1133 <filter class="org.apache.lucene.morphology.russian.RussianFilterFactory"/> 1134 </analyzer> 1161 <analyzer> 1162 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1163 <tokenizer class="solr.StandardTokenizerFactory"/> 1164 <filter class="solr.LowerCaseFilterFactory"/> 1165 <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> 1166 <filter class="org.apache.lucene.morphology.russian.RussianFilterFactory"/> 1167 </analyzer> 1135 1168 </fieldType> 1136 1169 … … 1138 1171 <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> 1139 1172 <analyzer> 1173 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1140 1174 <tokenizer class="solr.StandardTokenizerFactory"/> 1141 1175 <filter class="solr.LowerCaseFilterFactory"/> … … 1149 1183 <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> 1150 1184 <analyzer> 1185 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1151 1186 <tokenizer class="solr.StandardTokenizerFactory"/> 1152 1187 <filter class="solr.LowerCaseFilterFactory"/> … … 1159 1194 <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> 1160 1195 <analyzer> 1196 <charFilter class="solr.HTMLStripCharFilterFactory"/> 1161 1197 <tokenizer class="solr.StandardTokenizerFactory"/> 1162 1198 <filter class="solr.TurkishLowerCaseFilterFactory"/>
Note:
See TracChangeset
for help on using the changeset viewer.