1 | <?xml version="1.0" encoding="UTF-8" ?>
|
---|
2 | <!--
|
---|
3 | Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
4 | contributor license agreements. See the NOTICE file distributed with
|
---|
5 | this work for additional information regarding copyright ownership.
|
---|
6 | The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
7 | (the "License"); you may not use this file except in compliance with
|
---|
8 | the License. You may obtain a copy of the License at
|
---|
9 |
|
---|
10 | http://www.apache.org/licenses/LICENSE-2.0
|
---|
11 |
|
---|
12 | Unless required by applicable law or agreed to in writing, software
|
---|
13 | distributed under the License is distributed on an "AS IS" BASIS,
|
---|
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
15 | See the License for the specific language governing permissions and
|
---|
16 | limitations under the License.
|
---|
17 | -->
|
---|
18 |
|
---|
19 | <!--
|
---|
20 | This is the Solr schema file. This file should be named "schema.xml" and
|
---|
21 | should be in the conf directory under the solr home
|
---|
22 | (i.e. ./solr/conf/schema.xml by default)
|
---|
23 | or located where the classloader for the Solr webapp can find it.
|
---|
24 |
|
---|
25 | This example schema is the recommended starting point for users.
|
---|
26 | It should be kept correct and concise, usable out-of-the-box.
|
---|
27 |
|
---|
28 | For more information, on how to customize this file, please see
|
---|
29 | http://wiki.apache.org/solr/SchemaXml
|
---|
30 |
|
---|
31 | PERFORMANCE NOTE: this schema includes many optional features and should not
|
---|
32 | be used for benchmarking. To improve performance one could
|
---|
33 | - set stored="false" for all fields possible (esp large fields) when you
|
---|
34 | only need to search on the field but don't need to return the original
|
---|
35 | value.
|
---|
36 | - set indexed="false" if you don't need to search on the field, but only
|
---|
37 | return the field as a result of searching on other indexed fields.
|
---|
38 | - remove all unneeded copyField statements
|
---|
39 | - for best index size and searching performance, set "index" to false
|
---|
40 | for all general text fields, use copyField to copy them to the
|
---|
41 | catchall "text" field, and use that for searching.
|
---|
42 | - For maximum indexing performance, use the StreamingUpdateSolrServer
|
---|
43 | java client.
|
---|
44 | - Remember to run the JVM in server mode, and use a higher logging level
|
---|
45 | that avoids logging every request
|
---|
46 | -->
|
---|
47 |
|
---|
48 | <schema name="example" version="1.4">
|
---|
49 | <!-- attribute "name" is the name of this schema and is only used for display purposes.
|
---|
50 | Applications should change this to reflect the nature of the search collection.
|
---|
51 | version="1.4" is Solr's version number for the schema syntax and semantics. It should
|
---|
52 | not normally be changed by applications.
|
---|
53 | 1.0: multiValued attribute did not exist, all fields are multiValued by nature
|
---|
54 | 1.1: multiValued attribute introduced, false by default
|
---|
55 | 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
|
---|
56 | 1.3: removed optional field compress feature
|
---|
57 | 1.4: default auto-phrase (QueryParser feature) to off
|
---|
58 | -->
|
---|
59 |
|
---|
60 | <types>
|
---|
61 | <!-- field type definitions. The "name" attribute is
|
---|
62 | just a label to be used by field definitions. The "class"
|
---|
63 | attribute and any other attributes determine the real
|
---|
64 | behavior of the fieldType.
|
---|
65 | Class names starting with "solr" refer to java classes in the
|
---|
66 | org.apache.solr.analysis package.
|
---|
67 | -->
|
---|
68 |
|
---|
69 | <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
---|
70 | <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
---|
71 |
|
---|
72 | <!-- boolean type: "true" or "false" -->
|
---|
73 | <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
---|
74 | <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
---|
75 | <fieldtype name="binary" class="solr.BinaryField"/>
|
---|
76 |
|
---|
77 | <!-- The optional sortMissingLast and sortMissingFirst attributes are
|
---|
78 | currently supported on types that are sorted internally as strings.
|
---|
79 | This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
|
---|
80 | - If sortMissingLast="true", then a sort on this field will cause documents
|
---|
81 | without the field to come after documents with the field,
|
---|
82 | regardless of the requested sort order (asc or desc).
|
---|
83 | - If sortMissingFirst="true", then a sort on this field will cause documents
|
---|
84 | without the field to come before documents with the field,
|
---|
85 | regardless of the requested sort order.
|
---|
86 | - If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
---|
87 | then default lucene sorting will be used which places docs without the
|
---|
88 | field first in an ascending sort and last in a descending sort.
|
---|
89 | -->
|
---|
90 |
|
---|
91 | <!--
|
---|
92 | Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
|
---|
93 | -->
|
---|
94 | <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
---|
95 | <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
---|
96 | <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
---|
97 | <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
---|
98 |
|
---|
99 | <!--
|
---|
100 | Numeric field types that index each value at various levels of precision
|
---|
101 | to accelerate range queries when the number of values between the range
|
---|
102 | endpoints is large. See the javadoc for NumericRangeQuery for internal
|
---|
103 | implementation details.
|
---|
104 |
|
---|
105 | Smaller precisionStep values (specified in bits) will lead to more tokens
|
---|
106 | indexed per value, slightly larger index size, and faster range queries.
|
---|
107 | A precisionStep of 0 disables indexing at different precision levels.
|
---|
108 | -->
|
---|
109 | <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
---|
110 | <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
---|
111 | <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
---|
112 | <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
---|
113 |
|
---|
114 | <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
---|
115 | is a more restricted form of the canonical representation of dateTime
|
---|
116 | http://www.w3.org/TR/xmlschema-2/#dateTime
|
---|
117 | The trailing "Z" designates UTC time and is mandatory.
|
---|
118 | Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
---|
119 | All other components are mandatory.
|
---|
120 |
|
---|
121 | Expressions can also be used to denote calculations that should be
|
---|
122 | performed relative to "NOW" to determine the value, ie...
|
---|
123 |
|
---|
124 | NOW/HOUR
|
---|
125 | ... Round to the start of the current hour
|
---|
126 | NOW-1DAY
|
---|
127 | ... Exactly 1 day prior to now
|
---|
128 | NOW/DAY+6MONTHS+3DAYS
|
---|
129 | ... 6 months and 3 days in the future from the start of
|
---|
130 | the current day
|
---|
131 |
|
---|
132 | Consult the DateField javadocs for more information.
|
---|
133 |
|
---|
134 | Note: For faster range queries, consider the tdate type
|
---|
135 | -->
|
---|
136 | <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
|
---|
137 |
|
---|
138 | <!-- A Trie based date field for faster date range queries and date faceting. -->
|
---|
139 | <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
---|
140 |
|
---|
141 |
|
---|
142 | <!--
|
---|
143 | Note:
|
---|
144 | These should only be used for compatibility with existing indexes (created with older Solr versions)
|
---|
145 | or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
|
---|
146 |
|
---|
147 | Plain numeric field types that store and index the text
|
---|
148 | value verbatim (and hence don't support range queries, since the
|
---|
149 | lexicographic ordering isn't equal to the numeric ordering)
|
---|
150 | -->
|
---|
151 | <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
|
---|
152 | <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
|
---|
153 | <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
|
---|
154 | <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
|
---|
155 | <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
|
---|
156 |
|
---|
157 |
|
---|
158 | <!--
|
---|
159 | Note:
|
---|
160 | These should only be used for compatibility with existing indexes (created with older Solr versions)
|
---|
161 | or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
|
---|
162 |
|
---|
163 | Numeric field types that manipulate the value into
|
---|
164 | a string value that isn't human-readable in its internal form,
|
---|
165 | but with a lexicographic ordering the same as the numeric ordering,
|
---|
166 | so that range queries work correctly.
|
---|
167 | -->
|
---|
168 | <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
|
---|
169 | <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
|
---|
170 | <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
|
---|
171 | <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
|
---|
172 |
|
---|
173 |
|
---|
174 | <!-- The "RandomSortField" is not used to store or search any
|
---|
175 | data. You can declare fields of this type it in your schema
|
---|
176 | to generate pseudo-random orderings of your docs for sorting
|
---|
177 | purposes. The ordering is generated based on the field name
|
---|
178 | and the version of the index, As long as the index version
|
---|
179 | remains unchanged, and the same field name is reused,
|
---|
180 | the ordering of the docs will be consistent.
|
---|
181 | If you want different psuedo-random orderings of documents,
|
---|
182 | for the same version of the index, use a dynamicField and
|
---|
183 | change the name
|
---|
184 | -->
|
---|
185 | <fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
---|
186 |
|
---|
187 | <!-- solr.TextField allows the specification of custom text analyzers
|
---|
188 | specified as a tokenizer and a list of token filters. Different
|
---|
189 | analyzers may be specified for indexing and querying.
|
---|
190 |
|
---|
191 | The optional positionIncrementGap puts space between multiple fields of
|
---|
192 | this type on the same document, with the purpose of preventing false phrase
|
---|
193 | matching across fields.
|
---|
194 |
|
---|
195 | For more info on customizing your analyzer chain, please see
|
---|
196 | http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
---|
197 | -->
|
---|
198 |
|
---|
199 | <!-- One can also specify an existing Analyzer class that has a
|
---|
200 | default constructor via the class attribute on the analyzer element
|
---|
201 | <fieldType name="text_greek" class="solr.TextField">
|
---|
202 | <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
---|
203 | </fieldType>
|
---|
204 | -->
|
---|
205 |
|
---|
206 | <!-- A text field that only splits on whitespace for exact matching of words -->
|
---|
207 | <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
---|
208 | <analyzer>
|
---|
209 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
210 | </analyzer>
|
---|
211 | </fieldType>
|
---|
212 |
|
---|
213 | <!-- A general text field that has reasonable, generic
|
---|
214 | cross-language defaults: it tokenizes with StandardTokenizer,
|
---|
215 | removes stop words from case-insensitive "stopwords.txt"
|
---|
216 | (empty by default), and down cases. At query time only, it
|
---|
217 | also applies synonyms. -->
|
---|
218 | <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
|
---|
219 | <analyzer type="index">
|
---|
220 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
221 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
---|
222 | <!-- in this example, we will only use synonyms at query time
|
---|
223 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
224 | -->
|
---|
225 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
226 | </analyzer>
|
---|
227 | <analyzer type="query">
|
---|
228 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
229 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
---|
230 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
231 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
232 | </analyzer>
|
---|
233 | </fieldType>
|
---|
234 |
|
---|
235 | <!-- A text field with defaults appropriate for English: it
|
---|
236 | tokenizes with StandardTokenizer, removes English stop words
|
---|
237 | (stopwords_en.txt), down cases, protects words from protwords.txt, and
|
---|
238 | finally applies Porter's stemming. The query time analyzer
|
---|
239 | also applies synonyms from synonyms.txt. -->
|
---|
240 | <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
---|
241 | <analyzer type="index">
|
---|
242 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
243 | <!-- in this example, we will only use synonyms at query time
|
---|
244 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
245 | -->
|
---|
246 | <!-- Case insensitive stop word removal.
|
---|
247 | add enablePositionIncrements=true in both the index and query
|
---|
248 | analyzers to leave a 'gap' for more accurate phrase queries.
|
---|
249 | -->
|
---|
250 | <filter class="solr.StopFilterFactory"
|
---|
251 | ignoreCase="true"
|
---|
252 | words="stopwords_en.txt"
|
---|
253 | enablePositionIncrements="true"
|
---|
254 | />
|
---|
255 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
256 | <filter class="solr.EnglishPossessiveFilterFactory"/>
|
---|
257 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
258 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
---|
259 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
260 | -->
|
---|
261 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
262 | </analyzer>
|
---|
263 | <analyzer type="query">
|
---|
264 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
265 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
266 | <filter class="solr.StopFilterFactory"
|
---|
267 | ignoreCase="true"
|
---|
268 | words="stopwords_en.txt"
|
---|
269 | enablePositionIncrements="true"
|
---|
270 | />
|
---|
271 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
272 | <filter class="solr.EnglishPossessiveFilterFactory"/>
|
---|
273 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
274 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
---|
275 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
276 | -->
|
---|
277 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
278 | </analyzer>
|
---|
279 | </fieldType>
|
---|
280 |
|
---|
281 | <!-- A text field with defaults appropriate for English, plus
|
---|
282 | aggressive word-splitting and autophrase features enabled.
|
---|
283 | This field is just like text_en, except it adds
|
---|
284 | WordDelimiterFilter to enable splitting and matching of
|
---|
285 | words on case-change, alpha numeric boundaries, and
|
---|
286 | non-alphanumeric chars. This means certain compound word
|
---|
287 | cases will work, for example query "wi fi" will match
|
---|
288 | document "WiFi" or "wi-fi". However, other cases will still
|
---|
289 | not match, for example if the query is "wifi" and the
|
---|
290 | document is "wi fi" or if the query is "wi-fi" and the
|
---|
291 | document is "wifi".
|
---|
292 | -->
|
---|
293 | <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
---|
294 | <analyzer type="index">
|
---|
295 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
296 | <!-- in this example, we will only use synonyms at query time
|
---|
297 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
298 | -->
|
---|
299 | <!-- Case insensitive stop word removal.
|
---|
300 | add enablePositionIncrements=true in both the index and query
|
---|
301 | analyzers to leave a 'gap' for more accurate phrase queries.
|
---|
302 | -->
|
---|
303 | <filter class="solr.StopFilterFactory"
|
---|
304 | ignoreCase="true"
|
---|
305 | words="stopwords_en.txt"
|
---|
306 | enablePositionIncrements="true"
|
---|
307 | />
|
---|
308 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
---|
309 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
310 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
311 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
312 | </analyzer>
|
---|
313 | <analyzer type="query">
|
---|
314 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
315 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
316 | <filter class="solr.StopFilterFactory"
|
---|
317 | ignoreCase="true"
|
---|
318 | words="stopwords_en.txt"
|
---|
319 | enablePositionIncrements="true"
|
---|
320 | />
|
---|
321 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
---|
322 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
323 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
324 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
325 | </analyzer>
|
---|
326 | </fieldType>
|
---|
327 |
|
---|
328 | <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
---|
329 | but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
---|
330 | <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
---|
331 | <analyzer>
|
---|
332 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
333 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
334 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
|
---|
335 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
---|
336 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
337 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
338 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
339 | <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
---|
340 | possible with WordDelimiterFilter in conjuncton with stemming. -->
|
---|
341 | <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
---|
342 | </analyzer>
|
---|
343 | </fieldType>
|
---|
344 |
|
---|
345 | <!-- Just like text_general except it reverses the characters of
|
---|
346 | each token, to enable more efficient leading wildcard queries. -->
|
---|
347 | <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
|
---|
348 | <analyzer type="index">
|
---|
349 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
350 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
---|
351 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
352 | <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
---|
353 | maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
---|
354 | </analyzer>
|
---|
355 | <analyzer type="query">
|
---|
356 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
357 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
358 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
---|
359 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
360 | </analyzer>
|
---|
361 | </fieldType>
|
---|
362 |
|
---|
363 | <!-- charFilter + WhitespaceTokenizer -->
|
---|
364 | <!--
|
---|
365 | <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
|
---|
366 | <analyzer>
|
---|
367 | <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
---|
368 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
369 | </analyzer>
|
---|
370 | </fieldType>
|
---|
371 | -->
|
---|
372 |
|
---|
373 | <!-- This is an example of using the KeywordTokenizer along
|
---|
374 | With various TokenFilterFactories to produce a sortable field
|
---|
375 | that does not include some properties of the source text
|
---|
376 | -->
|
---|
377 | <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
---|
378 | <analyzer>
|
---|
379 | <!-- KeywordTokenizer does no actual tokenizing, so the entire
|
---|
380 | input string is preserved as a single token
|
---|
381 | -->
|
---|
382 | <tokenizer class="solr.KeywordTokenizerFactory"/>
|
---|
383 | <!-- The LowerCase TokenFilter does what you expect, which can be
|
---|
384 | when you want your sorting to be case insensitive
|
---|
385 | -->
|
---|
386 | <filter class="solr.LowerCaseFilterFactory" />
|
---|
387 | <!-- The TrimFilter removes any leading or trailing whitespace -->
|
---|
388 | <filter class="solr.TrimFilterFactory" />
|
---|
389 | <!-- The PatternReplaceFilter gives you the flexibility to use
|
---|
390 | Java Regular expression to replace any sequence of characters
|
---|
391 | matching a pattern with an arbitrary replacement string,
|
---|
392 | which may include back references to portions of the original
|
---|
393 | string matched by the pattern.
|
---|
394 |
|
---|
395 | See the Java Regular Expression documentation for more
|
---|
396 | information on pattern and replacement string syntax.
|
---|
397 |
|
---|
398 | http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
|
---|
399 | -->
|
---|
400 | <filter class="solr.PatternReplaceFilterFactory"
|
---|
401 | pattern="([^a-z])" replacement="" replace="all"
|
---|
402 | />
|
---|
403 | </analyzer>
|
---|
404 | </fieldType>
|
---|
405 |
|
---|
406 | <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
---|
407 | <analyzer>
|
---|
408 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
409 | <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
---|
410 | </analyzer>
|
---|
411 | </fieldtype>
|
---|
412 |
|
---|
413 | <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
---|
414 | <analyzer>
|
---|
415 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
416 | <!--
|
---|
417 | The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
|
---|
418 | a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
|
---|
419 | Attributes of the DelimitedPayloadTokenFilterFactory :
|
---|
420 | "delimiter" - a one character delimiter. Default is | (pipe)
|
---|
421 | "encoder" - how to encode the following value into a playload
|
---|
422 | float -> org.apache.lucene.analysis.payloads.FloatEncoder,
|
---|
423 | integer -> o.a.l.a.p.IntegerEncoder
|
---|
424 | identity -> o.a.l.a.p.IdentityEncoder
|
---|
425 | Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
|
---|
426 | -->
|
---|
427 | <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
---|
428 | </analyzer>
|
---|
429 | </fieldtype>
|
---|
430 |
|
---|
431 | <!-- lowercases the entire field value, keeping it as a single token. -->
|
---|
432 | <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
---|
433 | <analyzer>
|
---|
434 | <tokenizer class="solr.KeywordTokenizerFactory"/>
|
---|
435 | <filter class="solr.LowerCaseFilterFactory" />
|
---|
436 | </analyzer>
|
---|
437 | </fieldType>
|
---|
438 |
|
---|
439 | <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
|
---|
440 | <analyzer>
|
---|
441 | <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
|
---|
442 | </analyzer>
|
---|
443 | </fieldType>
|
---|
444 |
|
---|
445 | <!-- since fields of this type are by default not stored or indexed,
|
---|
446 | any data added to them will be ignored outright. -->
|
---|
447 | <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
---|
448 |
|
---|
449 | <!-- This point type indexes the coordinates as separate fields (subFields)
|
---|
450 | If subFieldType is defined, it references a type, and a dynamic field
|
---|
451 | definition is created matching *___<typename>. Alternately, if
|
---|
452 | subFieldSuffix is defined, that is used to create the subFields.
|
---|
453 | Example: if subFieldType="double", then the coordinates would be
|
---|
454 | indexed in fields myloc_0___double,myloc_1___double.
|
---|
455 | Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
---|
456 | in fields myloc_0_d,myloc_1_d
|
---|
457 | The subFields are an implementation detail of the fieldType, and end
|
---|
458 | users normally should not need to know about them.
|
---|
459 | -->
|
---|
460 | <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
---|
461 |
|
---|
462 | <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
---|
463 | <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
---|
464 |
|
---|
465 | <!--
|
---|
466 | A Geohash is a compact representation of a latitude longitude pair in a single field.
|
---|
467 | See http://wiki.apache.org/solr/SpatialSearch
|
---|
468 | -->
|
---|
469 | <fieldtype name="geohash" class="solr.GeoHashField"/>
|
---|
470 | </types>
|
---|
471 |
|
---|
472 |
|
---|
473 | <fields>
|
---|
474 | <!-- Valid attributes for fields:
|
---|
475 | name: mandatory - the name for the field
|
---|
476 | type: mandatory - the name of a previously defined type from the
|
---|
477 | <types> section
|
---|
478 | indexed: true if this field should be indexed (searchable or sortable)
|
---|
479 | stored: true if this field should be retrievable
|
---|
480 | multiValued: true if this field may contain multiple values per document
|
---|
481 | omitNorms: (expert) set to true to omit the norms associated with
|
---|
482 | this field (this disables length normalization and index-time
|
---|
483 | boosting for the field, and saves some memory). Only full-text
|
---|
484 | fields or fields that need an index-time boost need norms.
|
---|
485 | termVectors: [false] set to true to store the term vector for a
|
---|
486 | given field.
|
---|
487 | When using MoreLikeThis, fields used for similarity should be
|
---|
488 | stored for best performance.
|
---|
489 | termPositions: Store position information with the term vector.
|
---|
490 | This will increase storage costs.
|
---|
491 | termOffsets: Store offset information with the term vector. This
|
---|
492 | will increase storage costs.
|
---|
493 | default: a value that should be used if no value is specified
|
---|
494 | when adding a document.
|
---|
495 | -->
|
---|
496 |
|
---|
497 | <field name="id" type="string" indexed="true" stored="true" required="true" />
|
---|
498 | <field name="docOID" type="string" indexed="true" stored="true" required="true" />
|
---|
499 | <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
|
---|
500 | <field name="name" type="text_general" indexed="true" stored="true"/>
|
---|
501 | <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
|
---|
502 | <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
|
---|
503 | <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
504 | <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
505 | <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
|
---|
506 |
|
---|
507 | <field name="weight" type="float" indexed="true" stored="true"/>
|
---|
508 | <field name="price" type="float" indexed="true" stored="true"/>
|
---|
509 | <field name="popularity" type="int" indexed="true" stored="true" />
|
---|
510 | <field name="inStock" type="boolean" indexed="true" stored="true" />
|
---|
511 |
|
---|
512 | <!--
|
---|
513 | The following store examples are used to demonstrate the various ways one might _CHOOSE_ to
|
---|
514 | implement spatial. It is highly unlikely that you would ever have ALL of these fields defined.
|
---|
515 | -->
|
---|
516 | <field name="store" type="location" indexed="true" stored="true"/>
|
---|
517 |
|
---|
518 | <!-- Common metadata fields, named specifically to match up with
|
---|
519 | SolrCell metadata when parsing rich documents such as Word, PDF.
|
---|
520 | Some fields are multiValued only because Tika currently may return
|
---|
521 | multiple values for them.
|
---|
522 | -->
|
---|
523 | <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
524 | <field name="subject" type="text_general" indexed="true" stored="true"/>
|
---|
525 | <field name="description" type="text_general" indexed="true" stored="true"/>
|
---|
526 | <field name="comments" type="text_general" indexed="true" stored="true"/>
|
---|
527 | <field name="author" type="text_general" indexed="true" stored="true"/>
|
---|
528 | <field name="keywords" type="text_general" indexed="true" stored="true"/>
|
---|
529 | <field name="category" type="text_general" indexed="true" stored="true"/>
|
---|
530 | <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
531 | <field name="last_modified" type="date" indexed="true" stored="true"/>
|
---|
532 | <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
533 |
|
---|
534 |
|
---|
535 | <!-- catchall field, containing all other searchable text fields (implemented
|
---|
536 | via copyField further on in this schema -->
|
---|
537 | <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
|
---|
538 |
|
---|
539 | <!-- catchall text field that indexes tokens both normally and in reverse for efficient
|
---|
540 | leading wildcard queries. -->
|
---|
541 | <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
|
---|
542 |
|
---|
543 | <!-- non-tokenized version of manufacturer to make it easier to sort or group
|
---|
544 | results by manufacturer. copied from "manu" via copyField -->
|
---|
545 | <field name="manu_exact" type="string" indexed="true" stored="false"/>
|
---|
546 |
|
---|
547 | <field name="payloads" type="payloads" indexed="true" stored="true"/>
|
---|
548 |
|
---|
549 | <!-- Uncommenting the following will create a "timestamp" field using
|
---|
550 | a default value of "NOW" to indicate when each document was indexed.
|
---|
551 | -->
|
---|
552 | <!--
|
---|
553 | <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
---|
554 | -->
|
---|
555 |
|
---|
556 |
|
---|
557 | <!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
---|
558 | will be used if the name matches any of the patterns.
|
---|
559 | RESTRICTION: the glob-like pattern in the name attribute must have
|
---|
560 | a "*" only at the start or the end.
|
---|
561 | EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
---|
562 | Longer patterns will be matched first. if equal size patterns
|
---|
563 | both match, the first appearing in the schema will be used. -->
|
---|
564 | <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
---|
565 | <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
---|
566 | <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
---|
567 | <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
|
---|
568 | <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
569 | <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
---|
570 | <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
---|
571 | <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
---|
572 |
|
---|
573 | <!-- Type used to index the lat and lon components for the "location" FieldType -->
|
---|
574 | <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
---|
575 |
|
---|
576 | <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
---|
577 | <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
|
---|
578 |
|
---|
579 | <!-- some trie-coded dynamic fields for faster range queries -->
|
---|
580 | <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
---|
581 | <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
---|
582 | <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
---|
583 | <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
---|
584 | <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
|
---|
585 |
|
---|
586 | <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
|
---|
587 |
|
---|
588 | <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
---|
589 | <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
590 |
|
---|
591 | <dynamicField name="random_*" type="random" />
|
---|
592 |
|
---|
593 | <!-- uncomment the following to ignore any fields that don't already match an existing
|
---|
594 | field name or dynamic field, rather than reporting them as an error.
|
---|
595 | alternately, change the type="ignored" to some other type e.g. "text" if you want
|
---|
596 | unknown fields indexed and/or stored by default -->
|
---|
597 | <!--dynamicField name="*" type="ignored" multiValued="true" /-->
|
---|
598 |
|
---|
599 | </fields>
|
---|
600 |
|
---|
601 | <!-- Field to use to determine and enforce document uniqueness.
|
---|
602 | Unless this field is marked with required="false", it will be a required field
|
---|
603 | -->
|
---|
604 | <uniqueKey>docOID</uniqueKey>
|
---|
605 |
|
---|
606 | <!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
---|
607 | <defaultSearchField>text</defaultSearchField>
|
---|
608 |
|
---|
609 | <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
---|
610 | <solrQueryParser defaultOperator="OR"/>
|
---|
611 |
|
---|
612 | <!-- copyField commands copy one field to another at the time a document
|
---|
613 | is added to the index. It's used either to index the same field differently,
|
---|
614 | or to add multiple fields to the same field for easier/faster searching. -->
|
---|
615 |
|
---|
616 | <copyField source="cat" dest="text"/>
|
---|
617 | <copyField source="name" dest="text"/>
|
---|
618 | <copyField source="manu" dest="text"/>
|
---|
619 | <copyField source="features" dest="text"/>
|
---|
620 | <copyField source="includes" dest="text"/>
|
---|
621 | <copyField source="manu" dest="manu_exact"/>
|
---|
622 |
|
---|
623 | <!-- Above, multiple source fields are copied to the [text] field.
|
---|
624 | Another way to map multiple source fields to the same
|
---|
625 | destination field is to use the dynamic field syntax.
|
---|
626 | copyField also supports a maxChars to copy setting. -->
|
---|
627 |
|
---|
628 | <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
|
---|
629 |
|
---|
630 | <!-- copy name to alphaNameSort, a field designed for sorting by name -->
|
---|
631 | <!-- <copyField source="name" dest="alphaNameSort"/> -->
|
---|
632 |
|
---|
633 |
|
---|
634 | <!-- Similarity is the scoring routine for each document vs. a query.
|
---|
635 | A custom similarity may be specified here, but the default is fine
|
---|
636 | for most applications. -->
|
---|
637 | <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
|
---|
638 | <!-- ... OR ...
|
---|
639 | Specify a SimilarityFactory class name implementation
|
---|
640 | allowing parameters to be used.
|
---|
641 | -->
|
---|
642 | <!--
|
---|
643 | <similarity class="com.example.solr.CustomSimilarityFactory">
|
---|
644 | <str name="paramkey">param value</str>
|
---|
645 | </similarity>
|
---|
646 | -->
|
---|
647 |
|
---|
648 |
|
---|
649 | </schema>
|
---|