[24446] | 1 | <?xml version="1.0" encoding="UTF-8" ?>
|
---|
| 2 | <!--
|
---|
| 3 | Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
| 4 | contributor license agreements. See the NOTICE file distributed with
|
---|
| 5 | this work for additional information regarding copyright ownership.
|
---|
| 6 | The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
| 7 | (the "License"); you may not use this file except in compliance with
|
---|
| 8 | the License. You may obtain a copy of the License at
|
---|
| 9 |
|
---|
| 10 | http://www.apache.org/licenses/LICENSE-2.0
|
---|
| 11 |
|
---|
| 12 | Unless required by applicable law or agreed to in writing, software
|
---|
| 13 | distributed under the License is distributed on an "AS IS" BASIS,
|
---|
| 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
| 15 | See the License for the specific language governing permissions and
|
---|
| 16 | limitations under the License.
|
---|
| 17 | -->
|
---|
| 18 |
|
---|
| 19 | <!--
|
---|
| 20 | This is the Solr schema file. This file should be named "schema.xml" and
|
---|
| 21 | should be in the conf directory under the solr home
|
---|
| 22 | (i.e. ./solr/conf/schema.xml by default)
|
---|
| 23 | or located where the classloader for the Solr webapp can find it.
|
---|
| 24 |
|
---|
| 25 | This example schema is the recommended starting point for users.
|
---|
| 26 | It should be kept correct and concise, usable out-of-the-box.
|
---|
| 27 |
|
---|
| 28 | For more information, on how to customize this file, please see
|
---|
| 29 | http://wiki.apache.org/solr/SchemaXml
|
---|
| 30 |
|
---|
| 31 | PERFORMANCE NOTE: this schema includes many optional features and should not
|
---|
| 32 | be used for benchmarking. To improve performance one could
|
---|
| 33 | - set stored="false" for all fields possible (esp large fields) when you
|
---|
| 34 | only need to search on the field but don't need to return the original
|
---|
| 35 | value.
|
---|
| 36 | - set indexed="false" if you don't need to search on the field, but only
|
---|
| 37 | return the field as a result of searching on other indexed fields.
|
---|
| 38 | - remove all unneeded copyField statements
|
---|
| 39 | - for best index size and searching performance, set "index" to false
|
---|
| 40 | for all general text fields, use copyField to copy them to the
|
---|
| 41 | catchall "text" field, and use that for searching.
|
---|
| 42 | - For maximum indexing performance, use the StreamingUpdateSolrServer
|
---|
| 43 | java client.
|
---|
| 44 | - Remember to run the JVM in server mode, and use a higher logging level
|
---|
| 45 | that avoids logging every request
|
---|
| 46 | -->
|
---|
| 47 |
|
---|
| 48 | <schema name="example" version="1.4">
|
---|
| 49 | <!-- attribute "name" is the name of this schema and is only used for display purposes.
|
---|
| 50 | Applications should change this to reflect the nature of the search collection.
|
---|
| 51 | version="1.4" is Solr's version number for the schema syntax and semantics. It should
|
---|
| 52 | not normally be changed by applications.
|
---|
| 53 | 1.0: multiValued attribute did not exist, all fields are multiValued by nature
|
---|
| 54 | 1.1: multiValued attribute introduced, false by default
|
---|
| 55 | 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
|
---|
| 56 | 1.3: removed optional field compress feature
|
---|
| 57 | 1.4: default auto-phrase (QueryParser feature) to off
|
---|
| 58 | -->
|
---|
| 59 |
|
---|
| 60 | <types>
|
---|
| 61 | <!-- field type definitions. The "name" attribute is
|
---|
| 62 | just a label to be used by field definitions. The "class"
|
---|
| 63 | attribute and any other attributes determine the real
|
---|
| 64 | behavior of the fieldType.
|
---|
| 65 | Class names starting with "solr" refer to java classes in the
|
---|
| 66 | org.apache.solr.analysis package.
|
---|
| 67 | -->
|
---|
| 68 |
|
---|
| 69 | <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
---|
| 70 | <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
---|
| 71 |
|
---|
| 72 | <!-- boolean type: "true" or "false" -->
|
---|
| 73 | <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
---|
| 74 | <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
---|
| 75 | <fieldtype name="binary" class="solr.BinaryField"/>
|
---|
| 76 |
|
---|
| 77 | <!-- The optional sortMissingLast and sortMissingFirst attributes are
|
---|
| 78 | currently supported on types that are sorted internally as strings.
|
---|
| 79 | This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
|
---|
| 80 | - If sortMissingLast="true", then a sort on this field will cause documents
|
---|
| 81 | without the field to come after documents with the field,
|
---|
| 82 | regardless of the requested sort order (asc or desc).
|
---|
| 83 | - If sortMissingFirst="true", then a sort on this field will cause documents
|
---|
| 84 | without the field to come before documents with the field,
|
---|
| 85 | regardless of the requested sort order.
|
---|
| 86 | - If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
---|
| 87 | then default lucene sorting will be used which places docs without the
|
---|
| 88 | field first in an ascending sort and last in a descending sort.
|
---|
| 89 | -->
|
---|
| 90 |
|
---|
| 91 | <!--
|
---|
| 92 | Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
|
---|
| 93 | -->
|
---|
| 94 | <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
---|
| 95 | <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
---|
| 96 | <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
---|
| 97 | <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
---|
| 98 |
|
---|
| 99 | <!--
|
---|
| 100 | Numeric field types that index each value at various levels of precision
|
---|
| 101 | to accelerate range queries when the number of values between the range
|
---|
| 102 | endpoints is large. See the javadoc for NumericRangeQuery for internal
|
---|
| 103 | implementation details.
|
---|
| 104 |
|
---|
| 105 | Smaller precisionStep values (specified in bits) will lead to more tokens
|
---|
| 106 | indexed per value, slightly larger index size, and faster range queries.
|
---|
| 107 | A precisionStep of 0 disables indexing at different precision levels.
|
---|
| 108 | -->
|
---|
| 109 | <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
---|
| 110 | <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
---|
| 111 | <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
---|
| 112 | <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
---|
| 113 |
|
---|
| 114 | <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
---|
| 115 | is a more restricted form of the canonical representation of dateTime
|
---|
| 116 | http://www.w3.org/TR/xmlschema-2/#dateTime
|
---|
| 117 | The trailing "Z" designates UTC time and is mandatory.
|
---|
| 118 | Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
---|
| 119 | All other components are mandatory.
|
---|
| 120 |
|
---|
| 121 | Expressions can also be used to denote calculations that should be
|
---|
| 122 | performed relative to "NOW" to determine the value, ie...
|
---|
| 123 |
|
---|
| 124 | NOW/HOUR
|
---|
| 125 | ... Round to the start of the current hour
|
---|
| 126 | NOW-1DAY
|
---|
| 127 | ... Exactly 1 day prior to now
|
---|
| 128 | NOW/DAY+6MONTHS+3DAYS
|
---|
| 129 | ... 6 months and 3 days in the future from the start of
|
---|
| 130 | the current day
|
---|
| 131 |
|
---|
| 132 | Consult the DateField javadocs for more information.
|
---|
| 133 |
|
---|
| 134 | Note: For faster range queries, consider the tdate type
|
---|
| 135 | -->
|
---|
| 136 | <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
|
---|
| 137 |
|
---|
| 138 | <!-- A Trie based date field for faster date range queries and date faceting. -->
|
---|
| 139 | <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
---|
| 140 |
|
---|
| 141 |
|
---|
| 142 | <!--
|
---|
| 143 | Note:
|
---|
| 144 | These should only be used for compatibility with existing indexes (created with older Solr versions)
|
---|
| 145 | or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
|
---|
| 146 |
|
---|
| 147 | Plain numeric field types that store and index the text
|
---|
| 148 | value verbatim (and hence don't support range queries, since the
|
---|
| 149 | lexicographic ordering isn't equal to the numeric ordering)
|
---|
| 150 | -->
|
---|
| 151 | <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
|
---|
| 152 | <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
|
---|
| 153 | <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
|
---|
| 154 | <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
|
---|
| 155 | <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
|
---|
| 156 |
|
---|
| 157 |
|
---|
| 158 | <!--
|
---|
| 159 | Note:
|
---|
| 160 | These should only be used for compatibility with existing indexes (created with older Solr versions)
|
---|
| 161 | or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
|
---|
| 162 |
|
---|
| 163 | Numeric field types that manipulate the value into
|
---|
| 164 | a string value that isn't human-readable in its internal form,
|
---|
| 165 | but with a lexicographic ordering the same as the numeric ordering,
|
---|
| 166 | so that range queries work correctly.
|
---|
| 167 | -->
|
---|
| 168 | <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
|
---|
| 169 | <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
|
---|
| 170 | <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
|
---|
| 171 | <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
|
---|
| 172 |
|
---|
| 173 |
|
---|
| 174 | <!-- The "RandomSortField" is not used to store or search any
|
---|
| 175 | data. You can declare fields of this type it in your schema
|
---|
| 176 | to generate pseudo-random orderings of your docs for sorting
|
---|
| 177 | purposes. The ordering is generated based on the field name
|
---|
| 178 | and the version of the index, As long as the index version
|
---|
| 179 | remains unchanged, and the same field name is reused,
|
---|
| 180 | the ordering of the docs will be consistent.
|
---|
| 181 | If you want different psuedo-random orderings of documents,
|
---|
| 182 | for the same version of the index, use a dynamicField and
|
---|
| 183 | change the name
|
---|
| 184 | -->
|
---|
| 185 | <fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
---|
| 186 |
|
---|
| 187 | <!-- solr.TextField allows the specification of custom text analyzers
|
---|
| 188 | specified as a tokenizer and a list of token filters. Different
|
---|
| 189 | analyzers may be specified for indexing and querying.
|
---|
| 190 |
|
---|
| 191 | The optional positionIncrementGap puts space between multiple fields of
|
---|
| 192 | this type on the same document, with the purpose of preventing false phrase
|
---|
| 193 | matching across fields.
|
---|
| 194 |
|
---|
| 195 | For more info on customizing your analyzer chain, please see
|
---|
| 196 | http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
---|
| 197 | -->
|
---|
| 198 |
|
---|
| 199 | <!-- One can also specify an existing Analyzer class that has a
|
---|
| 200 | default constructor via the class attribute on the analyzer element
|
---|
| 201 | <fieldType name="text_greek" class="solr.TextField">
|
---|
| 202 | <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
---|
| 203 | </fieldType>
|
---|
| 204 | -->
|
---|
| 205 |
|
---|
| 206 | <!-- A text field that only splits on whitespace for exact matching of words -->
|
---|
| 207 | <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
---|
| 208 | <analyzer>
|
---|
| 209 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 210 | </analyzer>
|
---|
| 211 | </fieldType>
|
---|
| 212 |
|
---|
| 213 | <!-- A general text field that has reasonable, generic
|
---|
| 214 | cross-language defaults: it tokenizes with StandardTokenizer,
|
---|
| 215 | removes stop words from case-insensitive "stopwords.txt"
|
---|
| 216 | (empty by default), and down cases. At query time only, it
|
---|
| 217 | also applies synonyms. -->
|
---|
| 218 | <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
|
---|
| 219 | <analyzer type="index">
|
---|
| 220 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 221 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
---|
| 222 | <!-- in this example, we will only use synonyms at query time
|
---|
| 223 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
| 224 | -->
|
---|
| 225 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 226 | </analyzer>
|
---|
| 227 | <analyzer type="query">
|
---|
| 228 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 229 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
---|
| 230 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
| 231 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 232 | </analyzer>
|
---|
| 233 | </fieldType>
|
---|
| 234 |
|
---|
| 235 | <!-- A text field with defaults appropriate for English: it
|
---|
| 236 | tokenizes with StandardTokenizer, removes English stop words
|
---|
| 237 | (stopwords_en.txt), down cases, protects words from protwords.txt, and
|
---|
| 238 | finally applies Porter's stemming. The query time analyzer
|
---|
| 239 | also applies synonyms from synonyms.txt. -->
|
---|
| 240 | <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
---|
| 241 | <analyzer type="index">
|
---|
| 242 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 243 | <!-- in this example, we will only use synonyms at query time
|
---|
| 244 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
| 245 | -->
|
---|
| 246 | <!-- Case insensitive stop word removal.
|
---|
| 247 | add enablePositionIncrements=true in both the index and query
|
---|
| 248 | analyzers to leave a 'gap' for more accurate phrase queries.
|
---|
| 249 | -->
|
---|
| 250 | <filter class="solr.StopFilterFactory"
|
---|
| 251 | ignoreCase="true"
|
---|
| 252 | words="stopwords_en.txt"
|
---|
| 253 | enablePositionIncrements="true"
|
---|
| 254 | />
|
---|
| 255 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 256 | <filter class="solr.EnglishPossessiveFilterFactory"/>
|
---|
| 257 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 258 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
---|
| 259 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
| 260 | -->
|
---|
| 261 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
| 262 | </analyzer>
|
---|
| 263 | <analyzer type="query">
|
---|
| 264 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 265 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
| 266 | <filter class="solr.StopFilterFactory"
|
---|
| 267 | ignoreCase="true"
|
---|
| 268 | words="stopwords_en.txt"
|
---|
| 269 | enablePositionIncrements="true"
|
---|
| 270 | />
|
---|
| 271 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 272 | <filter class="solr.EnglishPossessiveFilterFactory"/>
|
---|
| 273 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 274 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
---|
| 275 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
| 276 | -->
|
---|
| 277 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
| 278 | </analyzer>
|
---|
| 279 | </fieldType>
|
---|
| 280 |
|
---|
| 281 | <!-- A text field with defaults appropriate for English, plus
|
---|
| 282 | aggressive word-splitting and autophrase features enabled.
|
---|
| 283 | This field is just like text_en, except it adds
|
---|
| 284 | WordDelimiterFilter to enable splitting and matching of
|
---|
| 285 | words on case-change, alpha numeric boundaries, and
|
---|
| 286 | non-alphanumeric chars. This means certain compound word
|
---|
| 287 | cases will work, for example query "wi fi" will match
|
---|
| 288 | document "WiFi" or "wi-fi". However, other cases will still
|
---|
| 289 | not match, for example if the query is "wifi" and the
|
---|
| 290 | document is "wi fi" or if the query is "wi-fi" and the
|
---|
| 291 | document is "wifi".
|
---|
| 292 | -->
|
---|
| 293 | <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
---|
| 294 | <analyzer type="index">
|
---|
| 295 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 296 | <!-- in this example, we will only use synonyms at query time
|
---|
| 297 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
| 298 | -->
|
---|
| 299 | <!-- Case insensitive stop word removal.
|
---|
| 300 | add enablePositionIncrements=true in both the index and query
|
---|
| 301 | analyzers to leave a 'gap' for more accurate phrase queries.
|
---|
| 302 | -->
|
---|
| 303 | <filter class="solr.StopFilterFactory"
|
---|
| 304 | ignoreCase="true"
|
---|
| 305 | words="stopwords_en.txt"
|
---|
| 306 | enablePositionIncrements="true"
|
---|
| 307 | />
|
---|
| 308 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
---|
| 309 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 310 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 311 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
| 312 | </analyzer>
|
---|
| 313 | <analyzer type="query">
|
---|
| 314 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 315 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
| 316 | <filter class="solr.StopFilterFactory"
|
---|
| 317 | ignoreCase="true"
|
---|
| 318 | words="stopwords_en.txt"
|
---|
| 319 | enablePositionIncrements="true"
|
---|
| 320 | />
|
---|
| 321 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
---|
| 322 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 323 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 324 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
| 325 | </analyzer>
|
---|
| 326 | </fieldType>
|
---|
| 327 |
|
---|
| 328 | <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
---|
| 329 | but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
---|
| 330 | <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
---|
| 331 | <analyzer>
|
---|
| 332 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 333 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
| 334 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
|
---|
| 335 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
---|
| 336 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 337 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 338 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
| 339 | <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
---|
| 340 | possible with WordDelimiterFilter in conjuncton with stemming. -->
|
---|
| 341 | <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
---|
| 342 | </analyzer>
|
---|
| 343 | </fieldType>
|
---|
| 344 |
|
---|
| 345 | <!-- Just like text_general except it reverses the characters of
|
---|
| 346 | each token, to enable more efficient leading wildcard queries. -->
|
---|
| 347 | <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
|
---|
| 348 | <analyzer type="index">
|
---|
| 349 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 350 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
---|
| 351 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 352 | <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
---|
| 353 | maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
---|
| 354 | </analyzer>
|
---|
| 355 | <analyzer type="query">
|
---|
| 356 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 357 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
| 358 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
---|
| 359 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 360 | </analyzer>
|
---|
| 361 | </fieldType>
|
---|
| 362 |
|
---|
| 363 | <!-- charFilter + WhitespaceTokenizer -->
|
---|
| 364 | <!--
|
---|
| 365 | <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
|
---|
| 366 | <analyzer>
|
---|
| 367 | <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
---|
| 368 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 369 | </analyzer>
|
---|
| 370 | </fieldType>
|
---|
| 371 | -->
|
---|
| 372 |
|
---|
| 373 | <!-- This is an example of using the KeywordTokenizer along
|
---|
| 374 | With various TokenFilterFactories to produce a sortable field
|
---|
| 375 | that does not include some properties of the source text
|
---|
| 376 | -->
|
---|
| 377 | <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
---|
| 378 | <analyzer>
|
---|
| 379 | <!-- KeywordTokenizer does no actual tokenizing, so the entire
|
---|
| 380 | input string is preserved as a single token
|
---|
| 381 | -->
|
---|
| 382 | <tokenizer class="solr.KeywordTokenizerFactory"/>
|
---|
| 383 | <!-- The LowerCase TokenFilter does what you expect, which can be
|
---|
| 384 | when you want your sorting to be case insensitive
|
---|
| 385 | -->
|
---|
| 386 | <filter class="solr.LowerCaseFilterFactory" />
|
---|
| 387 | <!-- The TrimFilter removes any leading or trailing whitespace -->
|
---|
| 388 | <filter class="solr.TrimFilterFactory" />
|
---|
| 389 | <!-- The PatternReplaceFilter gives you the flexibility to use
|
---|
| 390 | Java Regular expression to replace any sequence of characters
|
---|
| 391 | matching a pattern with an arbitrary replacement string,
|
---|
| 392 | which may include back references to portions of the original
|
---|
| 393 | string matched by the pattern.
|
---|
| 394 |
|
---|
| 395 | See the Java Regular Expression documentation for more
|
---|
| 396 | information on pattern and replacement string syntax.
|
---|
| 397 |
|
---|
| 398 | http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
|
---|
| 399 | -->
|
---|
| 400 | <filter class="solr.PatternReplaceFilterFactory"
|
---|
| 401 | pattern="([^a-z])" replacement="" replace="all"
|
---|
| 402 | />
|
---|
| 403 | </analyzer>
|
---|
| 404 | </fieldType>
|
---|
| 405 |
|
---|
| 406 | <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
---|
| 407 | <analyzer>
|
---|
| 408 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 409 | <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
---|
| 410 | </analyzer>
|
---|
| 411 | </fieldtype>
|
---|
| 412 |
|
---|
| 413 | <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
---|
| 414 | <analyzer>
|
---|
| 415 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 416 | <!--
|
---|
| 417 | The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
|
---|
| 418 | a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
|
---|
| 419 | Attributes of the DelimitedPayloadTokenFilterFactory :
|
---|
| 420 | "delimiter" - a one character delimiter. Default is | (pipe)
|
---|
| 421 | "encoder" - how to encode the following value into a playload
|
---|
| 422 | float -> org.apache.lucene.analysis.payloads.FloatEncoder,
|
---|
| 423 | integer -> o.a.l.a.p.IntegerEncoder
|
---|
| 424 | identity -> o.a.l.a.p.IdentityEncoder
|
---|
| 425 | Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
|
---|
| 426 | -->
|
---|
| 427 | <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
---|
| 428 | </analyzer>
|
---|
| 429 | </fieldtype>
|
---|
| 430 |
|
---|
| 431 | <!-- lowercases the entire field value, keeping it as a single token. -->
|
---|
| 432 | <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
---|
| 433 | <analyzer>
|
---|
| 434 | <tokenizer class="solr.KeywordTokenizerFactory"/>
|
---|
| 435 | <filter class="solr.LowerCaseFilterFactory" />
|
---|
| 436 | </analyzer>
|
---|
| 437 | </fieldType>
|
---|
| 438 |
|
---|
| 439 | <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
|
---|
| 440 | <analyzer>
|
---|
| 441 | <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
|
---|
| 442 | </analyzer>
|
---|
| 443 | </fieldType>
|
---|
| 444 |
|
---|
| 445 | <!-- since fields of this type are by default not stored or indexed,
|
---|
| 446 | any data added to them will be ignored outright. -->
|
---|
| 447 | <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
---|
| 448 |
|
---|
| 449 | <!-- This point type indexes the coordinates as separate fields (subFields)
|
---|
| 450 | If subFieldType is defined, it references a type, and a dynamic field
|
---|
| 451 | definition is created matching *___<typename>. Alternately, if
|
---|
| 452 | subFieldSuffix is defined, that is used to create the subFields.
|
---|
| 453 | Example: if subFieldType="double", then the coordinates would be
|
---|
| 454 | indexed in fields myloc_0___double,myloc_1___double.
|
---|
| 455 | Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
---|
| 456 | in fields myloc_0_d,myloc_1_d
|
---|
| 457 | The subFields are an implementation detail of the fieldType, and end
|
---|
| 458 | users normally should not need to know about them.
|
---|
| 459 | -->
|
---|
| 460 | <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
---|
| 461 |
|
---|
| 462 | <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
---|
| 463 | <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
---|
| 464 |
|
---|
| 465 | <!--
|
---|
| 466 | A Geohash is a compact representation of a latitude longitude pair in a single field.
|
---|
| 467 | See http://wiki.apache.org/solr/SpatialSearch
|
---|
| 468 | -->
|
---|
| 469 | <fieldtype name="geohash" class="solr.GeoHashField"/>
|
---|
| 470 | </types>
|
---|
| 471 |
|
---|
| 472 |
|
---|
| 473 | <fields>
|
---|
| 474 | <!-- Valid attributes for fields:
|
---|
| 475 | name: mandatory - the name for the field
|
---|
| 476 | type: mandatory - the name of a previously defined type from the
|
---|
| 477 | <types> section
|
---|
| 478 | indexed: true if this field should be indexed (searchable or sortable)
|
---|
| 479 | stored: true if this field should be retrievable
|
---|
| 480 | multiValued: true if this field may contain multiple values per document
|
---|
| 481 | omitNorms: (expert) set to true to omit the norms associated with
|
---|
| 482 | this field (this disables length normalization and index-time
|
---|
| 483 | boosting for the field, and saves some memory). Only full-text
|
---|
| 484 | fields or fields that need an index-time boost need norms.
|
---|
| 485 | termVectors: [false] set to true to store the term vector for a
|
---|
| 486 | given field.
|
---|
| 487 | When using MoreLikeThis, fields used for similarity should be
|
---|
| 488 | stored for best performance.
|
---|
| 489 | termPositions: Store position information with the term vector.
|
---|
| 490 | This will increase storage costs.
|
---|
| 491 | termOffsets: Store offset information with the term vector. This
|
---|
| 492 | will increase storage costs.
|
---|
| 493 | default: a value that should be used if no value is specified
|
---|
| 494 | when adding a document.
|
---|
| 495 | -->
|
---|
| 496 |
|
---|
| 497 | <field name="docOID" type="string" indexed="true" stored="true" required="true" />
|
---|
| 498 |
|
---|
| 499 | <!-- ##GREENSTONE-FIELDS## -->
|
---|
| 500 |
|
---|
| 501 | <!--
|
---|
| 502 | <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
|
---|
| 503 | <field name="name" type="text_general" indexed="true" stored="true"/>
|
---|
| 504 | <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
|
---|
| 505 | <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
|
---|
| 506 | <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
| 507 | <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
| 508 | <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
|
---|
| 509 |
|
---|
| 510 | <field name="weight" type="float" indexed="true" stored="true"/>
|
---|
| 511 | <field name="price" type="float" indexed="true" stored="true"/>
|
---|
| 512 | <field name="popularity" type="int" indexed="true" stored="true" />
|
---|
| 513 | <field name="inStock" type="boolean" indexed="true" stored="true" />
|
---|
| 514 | -->
|
---|
| 515 |
|
---|
| 516 | <!--
|
---|
| 517 | The following store examples are used to demonstrate the various ways one might _CHOOSE_ to
|
---|
| 518 | implement spatial. It is highly unlikely that you would ever have ALL of these fields defined.
|
---|
| 519 | -->
|
---|
| 520 | <field name="store" type="location" indexed="true" stored="true"/>
|
---|
| 521 |
|
---|
| 522 | <!-- Common metadata fields, named specifically to match up with
|
---|
| 523 | SolrCell metadata when parsing rich documents such as Word, PDF.
|
---|
| 524 | Some fields are multiValued only because Tika currently may return
|
---|
| 525 | multiple values for them.
|
---|
| 526 | -->
|
---|
| 527 | <!--
|
---|
| 528 | <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
| 529 | <field name="subject" type="text_general" indexed="true" stored="true"/>
|
---|
| 530 | <field name="description" type="text_general" indexed="true" stored="true"/>
|
---|
| 531 | <field name="comments" type="text_general" indexed="true" stored="true"/>
|
---|
| 532 | <field name="author" type="text_general" indexed="true" stored="true"/>
|
---|
| 533 | <field name="keywords" type="text_general" indexed="true" stored="true"/>
|
---|
| 534 | <field name="category" type="text_general" indexed="true" stored="true"/>
|
---|
| 535 | <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
| 536 | <field name="last_modified" type="date" indexed="true" stored="true"/>
|
---|
| 537 | <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
| 538 | -->
|
---|
| 539 |
|
---|
| 540 |
|
---|
| 541 | <!-- catchall field, containing all other searchable text fields (implemented
|
---|
| 542 | via copyField further on in this schema -->
|
---|
| 543 | <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
|
---|
| 544 |
|
---|
| 545 | <!-- catchall text field that indexes tokens both normally and in reverse for efficient
|
---|
| 546 | leading wildcard queries. -->
|
---|
| 547 | <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
|
---|
| 548 |
|
---|
| 549 | <!-- non-tokenized version of manufacturer to make it easier to sort or group
|
---|
| 550 | results by manufacturer. copied from "manu" via copyField -->
|
---|
| 551 | <field name="manu_exact" type="string" indexed="true" stored="false"/>
|
---|
| 552 |
|
---|
| 553 | <field name="payloads" type="payloads" indexed="true" stored="true"/>
|
---|
| 554 |
|
---|
| 555 | <!-- Uncommenting the following will create a "timestamp" field using
|
---|
| 556 | a default value of "NOW" to indicate when each document was indexed.
|
---|
| 557 | -->
|
---|
| 558 | <!--
|
---|
| 559 | <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
---|
| 560 | -->
|
---|
| 561 |
|
---|
| 562 |
|
---|
| 563 | <!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
---|
| 564 | will be used if the name matches any of the patterns.
|
---|
| 565 | RESTRICTION: the glob-like pattern in the name attribute must have
|
---|
| 566 | a "*" only at the start or the end.
|
---|
| 567 | EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
---|
| 568 | Longer patterns will be matched first. if equal size patterns
|
---|
| 569 | both match, the first appearing in the schema will be used. -->
|
---|
| 570 | <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
---|
| 571 | <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
---|
| 572 | <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
---|
| 573 | <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
|
---|
| 574 | <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
| 575 | <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
---|
| 576 | <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
---|
| 577 | <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
---|
| 578 |
|
---|
| 579 | <!-- Type used to index the lat and lon components for the "location" FieldType -->
|
---|
| 580 | <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
---|
| 581 |
|
---|
| 582 | <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
---|
| 583 | <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
|
---|
| 584 |
|
---|
| 585 | <!-- some trie-coded dynamic fields for faster range queries -->
|
---|
| 586 | <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
---|
| 587 | <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
---|
| 588 | <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
---|
| 589 | <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
---|
| 590 | <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
|
---|
| 591 |
|
---|
| 592 | <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
|
---|
| 593 |
|
---|
| 594 | <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
---|
| 595 | <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
| 596 |
|
---|
| 597 | <dynamicField name="random_*" type="random" />
|
---|
[27801] | 598 | <!-- dynamic field for sort/facet fields, which are strings by default. ie not tokenised -->
|
---|
| 599 | <dynamicField name="by*" type="string" indexed="true" stored="false" multiValued="true" />
|
---|
[24446] | 600 | <!-- uncomment the following to ignore any fields that don't already match an existing
|
---|
| 601 | field name or dynamic field, rather than reporting them as an error.
|
---|
| 602 | alternately, change the type="ignored" to some other type e.g. "text" if you want
|
---|
| 603 | unknown fields indexed and/or stored by default -->
|
---|
| 604 | <!--dynamicField name="*" type="ignored" multiValued="true" /-->
|
---|
| 605 |
|
---|
| 606 | </fields>
|
---|
| 607 |
|
---|
| 608 | <!-- Field to use to determine and enforce document uniqueness.
|
---|
| 609 | Unless this field is marked with required="false", it will be a required field
|
---|
| 610 | -->
|
---|
| 611 | <uniqueKey>docOID</uniqueKey>
|
---|
| 612 |
|
---|
| 613 | <!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
---|
| 614 | <defaultSearchField>text</defaultSearchField>
|
---|
| 615 |
|
---|
| 616 | <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
---|
| 617 | <solrQueryParser defaultOperator="OR"/>
|
---|
| 618 |
|
---|
| 619 | <!-- copyField commands copy one field to another at the time a document
|
---|
| 620 | is added to the index. It's used either to index the same field differently,
|
---|
| 621 | or to add multiple fields to the same field for easier/faster searching. -->
|
---|
| 622 |
|
---|
| 623 | <!--
|
---|
| 624 | <copyField source="cat" dest="text"/>
|
---|
| 625 | <copyField source="name" dest="text"/>
|
---|
| 626 | <copyField source="manu" dest="text"/>
|
---|
| 627 | <copyField source="features" dest="text"/>
|
---|
| 628 | <copyField source="includes" dest="text"/>
|
---|
| 629 | <copyField source="manu" dest="manu_exact"/>
|
---|
| 630 | -->
|
---|
| 631 |
|
---|
| 632 | <!-- Above, multiple source fields are copied to the [text] field.
|
---|
| 633 | Another way to map multiple source fields to the same
|
---|
| 634 | destination field is to use the dynamic field syntax.
|
---|
| 635 | copyField also supports a maxChars to copy setting. -->
|
---|
| 636 |
|
---|
| 637 | <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
|
---|
| 638 |
|
---|
| 639 | <!-- copy name to alphaNameSort, a field designed for sorting by name -->
|
---|
| 640 | <!-- <copyField source="name" dest="alphaNameSort"/> -->
|
---|
| 641 |
|
---|
| 642 |
|
---|
| 643 | <!-- Similarity is the scoring routine for each document vs. a query.
|
---|
| 644 | A custom similarity may be specified here, but the default is fine
|
---|
| 645 | for most applications. -->
|
---|
| 646 | <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
|
---|
| 647 | <!-- ... OR ...
|
---|
| 648 | Specify a SimilarityFactory class name implementation
|
---|
| 649 | allowing parameters to be used.
|
---|
| 650 | -->
|
---|
| 651 | <!--
|
---|
| 652 | <similarity class="com.example.solr.CustomSimilarityFactory">
|
---|
| 653 | <str name="paramkey">param value</str>
|
---|
| 654 | </similarity>
|
---|
| 655 | -->
|
---|
| 656 |
|
---|
| 657 |
|
---|
| 658 | </schema>
|
---|