1 | <?xml version="1.0" encoding="UTF-8" ?>
|
---|
2 | <!--
|
---|
3 | Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
4 | contributor license agreements. See the NOTICE file distributed with
|
---|
5 | this work for additional information regarding copyright ownership.
|
---|
6 | The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
7 | (the "License"); you may not use this file except in compliance with
|
---|
8 | the License. You may obtain a copy of the License at
|
---|
9 |
|
---|
10 | http://www.apache.org/licenses/LICENSE-2.0
|
---|
11 |
|
---|
12 | Unless required by applicable law or agreed to in writing, software
|
---|
13 | distributed under the License is distributed on an "AS IS" BASIS,
|
---|
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
15 | See the License for the specific language governing permissions and
|
---|
16 | limitations under the License.
|
---|
17 | -->
|
---|
18 |
|
---|
19 | <!--
|
---|
20 | This is the Solr schema file. This file should be named "schema.xml" and
|
---|
21 | should be in the conf directory under the solr home
|
---|
22 | (i.e. ./solr/conf/schema.xml by default)
|
---|
23 | or located where the classloader for the Solr webapp can find it.
|
---|
24 |
|
---|
25 | This example schema is the recommended starting point for users.
|
---|
26 | It should be kept correct and concise, usable out-of-the-box.
|
---|
27 |
|
---|
28 | For more information, on how to customize this file, please see
|
---|
29 | http://wiki.apache.org/solr/SchemaXml
|
---|
30 |
|
---|
31 | PERFORMANCE NOTE: this schema includes many optional features and should not
|
---|
32 | be used for benchmarking. To improve performance one could
|
---|
33 | - set stored="false" for all fields possible (esp large fields) when you
|
---|
34 | only need to search on the field but don't need to return the original
|
---|
35 | value.
|
---|
36 | - set indexed="false" if you don't need to search on the field, but only
|
---|
37 | return the field as a result of searching on other indexed fields.
|
---|
38 | - remove all unneeded copyField statements
|
---|
39 | - for best index size and searching performance, set "index" to false
|
---|
40 | for all general text fields, use copyField to copy them to the
|
---|
41 | catchall "text" field, and use that for searching.
|
---|
42 | - For maximum indexing performance, use the StreamingUpdateSolrServer
|
---|
43 | java client.
|
---|
44 | - Remember to run the JVM in server mode, and use a higher logging level
|
---|
45 | that avoids logging every request
|
---|
46 | -->
|
---|
47 |
|
---|
48 | <schema name="example" version="1.5">
|
---|
49 | <!-- attribute "name" is the name of this schema and is only used for display purposes.
|
---|
50 | version="x.y" is Solr's version number for the schema syntax and
|
---|
51 | semantics. It should not normally be changed by applications.
|
---|
52 |
|
---|
53 | 1.0: multiValued attribute did not exist, all fields are multiValued
|
---|
54 | by nature
|
---|
55 | 1.1: multiValued attribute introduced, false by default
|
---|
56 | 1.2: omitTermFreqAndPositions attribute introduced, true by default
|
---|
57 | except for text fields.
|
---|
58 | 1.3: removed optional field compress feature
|
---|
59 | 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
|
---|
60 | behavior when a single string produces multiple tokens. Defaults
|
---|
61 | to off for version >= 1.4
|
---|
62 | 1.5: omitNorms defaults to true for primitive field types
|
---|
63 | (int, float, boolean, string...)
|
---|
64 | -->
|
---|
65 |
|
---|
66 | <fields>
|
---|
67 | <!-- Valid attributes for fields:
|
---|
68 | name: mandatory - the name for the field
|
---|
69 | type: mandatory - the name of a field type from the
|
---|
70 | <types> fieldType section
|
---|
71 | indexed: true if this field should be indexed (searchable or sortable)
|
---|
72 | stored: true if this field should be retrievable
|
---|
73 | docValues: true if this field should have doc values. Doc values are
|
---|
74 | useful for faceting, grouping, sorting and function queries. Although not
|
---|
75 | required, doc values will make the index faster to load, more
|
---|
76 | NRT-friendly and more memory-efficient. They however come with some
|
---|
77 | limitations: they are currently only supported by StrField, UUIDField
|
---|
78 | and all Trie*Fields, and depending on the field type, they might
|
---|
79 | require the field to be single-valued, be required or have a default
|
---|
80 | value (check the documentation of the field type you're interested in
|
---|
81 | for more information)
|
---|
82 | multiValued: true if this field may contain multiple values per document
|
---|
83 | omitNorms: (expert) set to true to omit the norms associated with
|
---|
84 | this field (this disables length normalization and index-time
|
---|
85 | boosting for the field, and saves some memory). Only full-text
|
---|
86 | fields or fields that need an index-time boost need norms.
|
---|
87 | Norms are omitted for primitive (non-analyzed) types by default.
|
---|
88 | termVectors: [false] set to true to store the term vector for a
|
---|
89 | given field.
|
---|
90 | When using MoreLikeThis, fields used for similarity should be
|
---|
91 | stored for best performance.
|
---|
92 | termPositions: Store position information with the term vector.
|
---|
93 | This will increase storage costs.
|
---|
94 | termOffsets: Store offset information with the term vector. This
|
---|
95 | will increase storage costs.
|
---|
96 | required: The field is required. It will throw an error if the
|
---|
97 | value does not exist
|
---|
98 | default: a value that should be used if no value is specified
|
---|
99 | when adding a document.
|
---|
100 | -->
|
---|
101 |
|
---|
102 | <!-- field names should consist of alphanumeric or underscore characters only and
|
---|
103 | not start with a digit. This is not currently strictly enforced,
|
---|
104 | but other field names will not have first class support from all components
|
---|
105 | and back compatibility is not guaranteed. Names with both leading and
|
---|
106 | trailing underscores (e.g. _version_) are reserved.
|
---|
107 | -->
|
---|
108 |
|
---|
109 | <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
|
---|
110 | or Solr won't start. _version_ and update log are required for SolrCloud
|
---|
111 | -->
|
---|
112 |
|
---|
113 | <field name="docOID" type="string" indexed="true" stored="true" required="true" />
|
---|
114 |
|
---|
115 | <!-- ##GREENSTONE-FIELDS## -->
|
---|
116 |
|
---|
117 |
|
---|
118 | <field name="_version_" type="long" indexed="true" stored="true"/>
|
---|
119 |
|
---|
120 | <!-- points to the root document of a block of nested documents. Required for nested
|
---|
121 | document support, may be removed otherwise
|
---|
122 | -->
|
---|
123 | <field name="_root_" type="string" indexed="true" stored="false"/>
|
---|
124 |
|
---|
125 | <!-- Only remove the "id" field if you have a very good reason to. While not strictly
|
---|
126 | required, it is highly recommended. A <uniqueKey> is present in almost all Solr
|
---|
127 | installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
|
---|
128 | -->
|
---|
129 | <!--
|
---|
130 | <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
|
---|
131 | -->
|
---|
132 |
|
---|
133 | <!--
|
---|
134 | <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
|
---|
135 | <field name="name" type="text_general" indexed="true" stored="true"/>
|
---|
136 | <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
|
---|
137 | <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
138 | <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
139 | <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
|
---|
140 |
|
---|
141 | <field name="weight" type="float" indexed="true" stored="true"/>
|
---|
142 | <field name="price" type="float" indexed="true" stored="true"/>
|
---|
143 | <field name="popularity" type="int" indexed="true" stored="true" />
|
---|
144 | <field name="inStock" type="boolean" indexed="true" stored="true" />
|
---|
145 | -->
|
---|
146 | <field name="store" type="location" indexed="true" stored="true"/>
|
---|
147 |
|
---|
148 | <!-- Common metadata fields, named specifically to match up with
|
---|
149 | SolrCell metadata when parsing rich documents such as Word, PDF.
|
---|
150 | Some fields are multiValued only because Tika currently may return
|
---|
151 | multiple values for them. Some metadata is parsed from the documents,
|
---|
152 | but there are some which come from the client context:
|
---|
153 | "content_type": From the HTTP headers of incoming stream
|
---|
154 | "resourcename": From SolrCell request param resource.name
|
---|
155 | -->
|
---|
156 |
|
---|
157 | <!--
|
---|
158 | <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
159 | <field name="subject" type="text_general" indexed="true" stored="true"/>
|
---|
160 | <field name="description" type="text_general" indexed="true" stored="true"/>
|
---|
161 | <field name="comments" type="text_general" indexed="true" stored="true"/>
|
---|
162 | <field name="author" type="text_general" indexed="true" stored="true"/>
|
---|
163 | <field name="keywords" type="text_general" indexed="true" stored="true"/>
|
---|
164 | <field name="category" type="text_general" indexed="true" stored="true"/>
|
---|
165 | <field name="resourcename" type="text_general" indexed="true" stored="true"/>
|
---|
166 | <field name="url" type="text_general" indexed="true" stored="true"/>
|
---|
167 | <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
168 | <field name="last_modified" type="date" indexed="true" stored="true"/>
|
---|
169 | <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
170 | -->
|
---|
171 |
|
---|
172 | <!-- Main body of document extracted by SolrCell.
|
---|
173 | NOTE: This field is not indexed by default, since it is also copied to "text"
|
---|
174 | using copyField below. This is to save space. Use this field for returning and
|
---|
175 | highlighting document content. Use the "text" field to search the content. -->
|
---|
176 | <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
|
---|
177 |
|
---|
178 |
|
---|
179 | <!-- catchall field, containing all other searchable text fields (implemented
|
---|
180 | via copyField further on in this schema -->
|
---|
181 | <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
|
---|
182 |
|
---|
183 | <!-- catchall text field that indexes tokens both normally and in reverse for efficient
|
---|
184 | leading wildcard queries. -->
|
---|
185 | <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
|
---|
186 |
|
---|
187 | <!-- non-tokenized version of manufacturer to make it easier to sort or group
|
---|
188 | results by manufacturer. copied from "manu" via copyField -->
|
---|
189 | <field name="manu_exact" type="string" indexed="true" stored="false"/>
|
---|
190 |
|
---|
191 | <field name="payloads" type="payloads" indexed="true" stored="true"/>
|
---|
192 |
|
---|
193 |
|
---|
194 | <!--
|
---|
195 | Some fields such as popularity and manu_exact could be modified to
|
---|
196 | leverage doc values:
|
---|
197 | <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
|
---|
198 | <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
|
---|
199 | <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
|
---|
200 |
|
---|
201 |
|
---|
202 | Although it would make indexing slightly slower and the index bigger, it
|
---|
203 | would also make the index faster to load, more memory-efficient and more
|
---|
204 | NRT-friendly.
|
---|
205 | -->
|
---|
206 |
|
---|
207 | <!-- Dynamic field definitions allow using convention over configuration
|
---|
208 | for fields via the specification of patterns to match field names.
|
---|
209 | EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
---|
210 | RESTRICTION: the glob-like pattern in the name attribute must have
|
---|
211 | a "*" only at the start or the end. -->
|
---|
212 |
|
---|
213 | <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
---|
214 | <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
|
---|
215 | <dynamicField name="*_s" type="string" indexed="true" stored="true" />
|
---|
216 | <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
217 | <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
---|
218 | <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>
|
---|
219 | <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
|
---|
220 | <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
221 | <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
|
---|
222 | <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
---|
223 | <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
|
---|
224 | <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
---|
225 | <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>
|
---|
226 | <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
---|
227 | <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
|
---|
228 |
|
---|
229 | <!-- Type used to index the lat and lon components for the "location" FieldType -->
|
---|
230 | <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
|
---|
231 |
|
---|
232 | <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
---|
233 | <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
|
---|
234 | <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
|
---|
235 |
|
---|
236 | <!-- some trie-coded dynamic fields for faster range queries -->
|
---|
237 | <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
---|
238 | <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
---|
239 | <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
---|
240 | <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
---|
241 | <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
|
---|
242 |
|
---|
243 | <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
|
---|
244 | <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
|
---|
245 |
|
---|
246 | <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
---|
247 | <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
248 |
|
---|
249 | <dynamicField name="random_*" type="random" />
|
---|
250 |
|
---|
251 | <!-- dynamic field for sort/facet fields, which are strings by default. ie not tokenised -->
|
---|
252 | <dynamicField name="by*" type="string" indexed="true" stored="false" multiValued="true" />
|
---|
253 |
|
---|
254 | <!-- uncomment the following to ignore any fields that don't already match an existing
|
---|
255 | field name or dynamic field, rather than reporting them as an error.
|
---|
256 | alternately, change the type="ignored" to some other type e.g. "text" if you want
|
---|
257 | unknown fields indexed and/or stored by default -->
|
---|
258 | <!--dynamicField name="*" type="ignored" multiValued="true" /-->
|
---|
259 |
|
---|
260 | </fields>
|
---|
261 |
|
---|
262 |
|
---|
263 | <!-- Field to use to determine and enforce document uniqueness.
|
---|
264 | Unless this field is marked with required="false", it will be a required field
|
---|
265 | -->
|
---|
266 | <uniqueKey>docOID</uniqueKey>
|
---|
267 |
|
---|
268 | <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
|
---|
269 | parsing a query string that isn't explicit about the field. Machine (non-user)
|
---|
270 | generated queries are best made explicit, or they can use the "df" request parameter
|
---|
271 | which takes precedence over this.
|
---|
272 | Note: Un-commenting defaultSearchField will be insufficient if your request handler
|
---|
273 | in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
|
---|
274 | <defaultSearchField>text</defaultSearchField> -->
|
---|
275 |
|
---|
276 | <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
|
---|
277 | when parsing a query string to determine if a clause of the query should be marked as
|
---|
278 | required or optional, assuming the clause isn't already marked by some operator.
|
---|
279 | The default is OR, which is generally assumed so it is not a good idea to change it
|
---|
280 | globally here. The "q.op" request parameter takes precedence over this.
|
---|
281 | <solrQueryParser defaultOperator="OR"/> -->
|
---|
282 |
|
---|
283 | <!-- copyField commands copy one field to another at the time a document
|
---|
284 | is added to the index. It's used either to index the same field differently,
|
---|
285 | or to add multiple fields to the same field for easier/faster searching. -->
|
---|
286 | <!--
|
---|
287 | <copyField source="cat" dest="text"/>
|
---|
288 | <copyField source="name" dest="text"/>
|
---|
289 | <copyField source="manu" dest="text"/>
|
---|
290 | <copyField source="features" dest="text"/>
|
---|
291 | <copyField source="includes" dest="text"/>
|
---|
292 | <copyField source="manu" dest="manu_exact"/>
|
---|
293 | -->
|
---|
294 |
|
---|
295 | <!-- Copy the price into a currency enabled field (default USD) -->
|
---|
296 | <!--
|
---|
297 | <copyField source="price" dest="price_c"/>
|
---|
298 | -->
|
---|
299 |
|
---|
300 | <!-- Text fields from SolrCell to search by default in our catch-all field -->
|
---|
301 | <!--
|
---|
302 | <copyField source="title" dest="text"/>
|
---|
303 | <copyField source="author" dest="text"/>
|
---|
304 | <copyField source="description" dest="text"/>
|
---|
305 | <copyField source="keywords" dest="text"/>
|
---|
306 | <copyField source="content" dest="text"/>
|
---|
307 | <copyField source="content_type" dest="text"/>
|
---|
308 | <copyField source="resourcename" dest="text"/>
|
---|
309 | <copyField source="url" dest="text"/>
|
---|
310 | -->
|
---|
311 |
|
---|
312 | <!-- Create a string version of author for faceting -->
|
---|
313 | <!--
|
---|
314 | <copyField source="author" dest="author_s"/>
|
---|
315 | -->
|
---|
316 |
|
---|
317 | <!-- Above, multiple source fields are copied to the [text] field.
|
---|
318 | Another way to map multiple source fields to the same
|
---|
319 | destination field is to use the dynamic field syntax.
|
---|
320 | copyField also supports a maxChars to copy setting. -->
|
---|
321 |
|
---|
322 | <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
|
---|
323 |
|
---|
324 | <!-- copy name to alphaNameSort, a field designed for sorting by name -->
|
---|
325 | <!-- <copyField source="name" dest="alphaNameSort"/> -->
|
---|
326 |
|
---|
327 | <types>
|
---|
328 | <!-- field type definitions. The "name" attribute is
|
---|
329 | just a label to be used by field definitions. The "class"
|
---|
330 | attribute and any other attributes determine the real
|
---|
331 | behavior of the fieldType.
|
---|
332 | Class names starting with "solr" refer to java classes in a
|
---|
333 | standard package such as org.apache.solr.analysis
|
---|
334 | -->
|
---|
335 |
|
---|
336 | <!-- The StrField type is not analyzed, but indexed/stored verbatim.
|
---|
337 | It supports doc values but in that case the field needs to be
|
---|
338 | single-valued and either required or have a default value.
|
---|
339 | -->
|
---|
340 | <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
|
---|
341 |
|
---|
342 | <!-- boolean type: "true" or "false" -->
|
---|
343 | <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
---|
344 |
|
---|
345 | <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
|
---|
346 | currently supported on types that are sorted internally as strings
|
---|
347 | and on numeric types.
|
---|
348 | This includes "string","boolean", and, as of 3.5 (and 4.x),
|
---|
349 | int, float, long, date, double, including the "Trie" variants.
|
---|
350 | - If sortMissingLast="true", then a sort on this field will cause documents
|
---|
351 | without the field to come after documents with the field,
|
---|
352 | regardless of the requested sort order (asc or desc).
|
---|
353 | - If sortMissingFirst="true", then a sort on this field will cause documents
|
---|
354 | without the field to come before documents with the field,
|
---|
355 | regardless of the requested sort order.
|
---|
356 | - If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
---|
357 | then default lucene sorting will be used which places docs without the
|
---|
358 | field first in an ascending sort and last in a descending sort.
|
---|
359 | -->
|
---|
360 |
|
---|
361 | <!--
|
---|
362 | Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
|
---|
363 |
|
---|
364 | These fields support doc values, but they require the field to be
|
---|
365 | single-valued and either be required or have a default value.
|
---|
366 | -->
|
---|
367 | <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
|
---|
368 | <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
|
---|
369 | <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
|
---|
370 | <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
|
---|
371 |
|
---|
372 | <!--
|
---|
373 | Numeric field types that index each value at various levels of precision
|
---|
374 | to accelerate range queries when the number of values between the range
|
---|
375 | endpoints is large. See the javadoc for NumericRangeQuery for internal
|
---|
376 | implementation details.
|
---|
377 |
|
---|
378 | Smaller precisionStep values (specified in bits) will lead to more tokens
|
---|
379 | indexed per value, slightly larger index size, and faster range queries.
|
---|
380 | A precisionStep of 0 disables indexing at different precision levels.
|
---|
381 | -->
|
---|
382 | <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
|
---|
383 | <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
|
---|
384 | <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
|
---|
385 | <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
|
---|
386 |
|
---|
387 | <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
---|
388 | is a more restricted form of the canonical representation of dateTime
|
---|
389 | http://www.w3.org/TR/xmlschema-2/#dateTime
|
---|
390 | The trailing "Z" designates UTC time and is mandatory.
|
---|
391 | Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
---|
392 | All other components are mandatory.
|
---|
393 |
|
---|
394 | Expressions can also be used to denote calculations that should be
|
---|
395 | performed relative to "NOW" to determine the value, ie...
|
---|
396 |
|
---|
397 | NOW/HOUR
|
---|
398 | ... Round to the start of the current hour
|
---|
399 | NOW-1DAY
|
---|
400 | ... Exactly 1 day prior to now
|
---|
401 | NOW/DAY+6MONTHS+3DAYS
|
---|
402 | ... 6 months and 3 days in the future from the start of
|
---|
403 | the current day
|
---|
404 |
|
---|
405 | Consult the DateField javadocs for more information.
|
---|
406 |
|
---|
407 | Note: For faster range queries, consider the tdate type
|
---|
408 | -->
|
---|
409 | <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
|
---|
410 |
|
---|
411 | <!-- A Trie based date field for faster date range queries and date faceting. -->
|
---|
412 | <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
|
---|
413 |
|
---|
414 |
|
---|
415 | <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
---|
416 | <fieldtype name="binary" class="solr.BinaryField"/>
|
---|
417 |
|
---|
418 | <!--
|
---|
419 | Note:
|
---|
420 | These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
|
---|
421 | Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
|
---|
422 |
|
---|
423 | Plain numeric field types that store and index the text
|
---|
424 | value verbatim (and hence don't correctly support range queries, since the
|
---|
425 | lexicographic ordering isn't equal to the numeric ordering)
|
---|
426 | -->
|
---|
427 | <fieldType name="pint" class="solr.IntField"/>
|
---|
428 | <fieldType name="plong" class="solr.LongField"/>
|
---|
429 | <fieldType name="pfloat" class="solr.FloatField"/>
|
---|
430 | <fieldType name="pdouble" class="solr.DoubleField"/>
|
---|
431 | <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
|
---|
432 |
|
---|
433 | <!-- The "RandomSortField" is not used to store or search any
|
---|
434 | data. You can declare fields of this type it in your schema
|
---|
435 | to generate pseudo-random orderings of your docs for sorting
|
---|
436 | or function purposes. The ordering is generated based on the field
|
---|
437 | name and the version of the index. As long as the index version
|
---|
438 | remains unchanged, and the same field name is reused,
|
---|
439 | the ordering of the docs will be consistent.
|
---|
440 | If you want different psuedo-random orderings of documents,
|
---|
441 | for the same version of the index, use a dynamicField and
|
---|
442 | change the field name in the request.
|
---|
443 | -->
|
---|
444 | <fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
---|
445 |
|
---|
446 | <!-- solr.TextField allows the specification of custom text analyzers
|
---|
447 | specified as a tokenizer and a list of token filters. Different
|
---|
448 | analyzers may be specified for indexing and querying.
|
---|
449 |
|
---|
450 | The optional positionIncrementGap puts space between multiple fields of
|
---|
451 | this type on the same document, with the purpose of preventing false phrase
|
---|
452 | matching across fields.
|
---|
453 |
|
---|
454 | For more info on customizing your analyzer chain, please see
|
---|
455 | http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
---|
456 | -->
|
---|
457 |
|
---|
458 | <!-- One can also specify an existing Analyzer class that has a
|
---|
459 | default constructor via the class attribute on the analyzer element.
|
---|
460 | Example:
|
---|
461 | <fieldType name="text_greek" class="solr.TextField">
|
---|
462 | <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
---|
463 | </fieldType>
|
---|
464 | -->
|
---|
465 |
|
---|
466 | <!-- A text field that only splits on whitespace for exact matching of words -->
|
---|
467 | <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
---|
468 | <analyzer>
|
---|
469 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
470 | </analyzer>
|
---|
471 | </fieldType>
|
---|
472 |
|
---|
473 | <!-- A general text field that has reasonable, generic
|
---|
474 | cross-language defaults: it tokenizes with StandardTokenizer,
|
---|
475 | removes stop words from case-insensitive "stopwords.txt"
|
---|
476 | (empty by default), and down cases. At query time only, it
|
---|
477 | also applies synonyms. -->
|
---|
478 | <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
|
---|
479 | <analyzer type="index">
|
---|
480 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
481 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
482 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
---|
483 | <!-- in this example, we will only use synonyms at query time
|
---|
484 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
485 | -->
|
---|
486 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
487 | </analyzer>
|
---|
488 | <analyzer type="query">
|
---|
489 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
490 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
---|
491 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
492 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
493 | </analyzer>
|
---|
494 | </fieldType>
|
---|
495 |
|
---|
496 | <!-- A text field with defaults appropriate for English: it
|
---|
497 | tokenizes with StandardTokenizer, removes English stop words
|
---|
498 | (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
|
---|
499 | finally applies Porter's stemming. The query time analyzer
|
---|
500 | also applies synonyms from synonyms.txt. -->
|
---|
501 | <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
---|
502 | <analyzer type="index">
|
---|
503 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
504 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
505 | <!-- in this example, we will only use synonyms at query time
|
---|
506 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
507 | -->
|
---|
508 | <!-- Case insensitive stop word removal.
|
---|
509 | -->
|
---|
510 | <filter class="solr.StopFilterFactory"
|
---|
511 | ignoreCase="true"
|
---|
512 | words="lang/stopwords_en.txt"
|
---|
513 | />
|
---|
514 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
515 | <filter class="solr.EnglishPossessiveFilterFactory"/>
|
---|
516 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
517 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
---|
518 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
519 | -->
|
---|
520 | <!--<filter class="solr.PorterStemFilterFactory"/>-->
|
---|
521 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
522 | </analyzer>
|
---|
523 | <analyzer type="query">
|
---|
524 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
525 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
526 | <filter class="solr.StopFilterFactory"
|
---|
527 | ignoreCase="true"
|
---|
528 | words="lang/stopwords_en.txt"
|
---|
529 | />
|
---|
530 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
531 | <filter class="solr.EnglishPossessiveFilterFactory"/>
|
---|
532 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
533 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
---|
534 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
535 | -->
|
---|
536 | <!--<filter class="solr.PorterStemFilterFactory"/>-->
|
---|
537 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
538 | </analyzer>
|
---|
539 | </fieldType>
|
---|
540 |
|
---|
541 | <!-- A text field with defaults appropriate for English, plus
|
---|
542 | aggressive word-splitting and autophrase features enabled.
|
---|
543 | This field is just like text_en, except it adds
|
---|
544 | WordDelimiterFilter to enable splitting and matching of
|
---|
545 | words on case-change, alpha numeric boundaries, and
|
---|
546 | non-alphanumeric chars. This means certain compound word
|
---|
547 | cases will work, for example query "wi fi" will match
|
---|
548 | document "WiFi" or "wi-fi".
|
---|
549 | -->
|
---|
550 | <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
---|
551 | <analyzer type="index">
|
---|
552 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
553 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
554 | <!-- in this example, we will only use synonyms at query time
|
---|
555 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
556 | -->
|
---|
557 | <!-- Case insensitive stop word removal.
|
---|
558 | -->
|
---|
559 | <filter class="solr.StopFilterFactory"
|
---|
560 | ignoreCase="true"
|
---|
561 | words="lang/stopwords_en.txt"
|
---|
562 | />
|
---|
563 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
---|
564 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
565 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
566 | <!--<filter class="solr.PorterStemFilterFactory"/>-->
|
---|
567 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
568 | </analyzer>
|
---|
569 | <analyzer type="query">
|
---|
570 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
571 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
572 | <filter class="solr.StopFilterFactory"
|
---|
573 | ignoreCase="true"
|
---|
574 | words="lang/stopwords_en.txt"
|
---|
575 | />
|
---|
576 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
---|
577 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
578 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
579 | <!--<filter class="solr.PorterStemFilterFactory"/>-->
|
---|
580 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
581 | </analyzer>
|
---|
582 | </fieldType>
|
---|
583 |
|
---|
584 | <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
---|
585 | but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
---|
586 | <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
---|
587 | <analyzer>
|
---|
588 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
589 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
590 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
591 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
---|
592 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
---|
593 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
594 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
595 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
596 | <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
---|
597 | possible with WordDelimiterFilter in conjuncton with stemming. -->
|
---|
598 | <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
---|
599 | </analyzer>
|
---|
600 | </fieldType>
|
---|
601 |
|
---|
602 | <!-- Just like text_general except it reverses the characters of
|
---|
603 | each token, to enable more efficient leading wildcard queries. -->
|
---|
604 | <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
|
---|
605 | <analyzer type="index">
|
---|
606 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
607 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
---|
608 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
609 | <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
---|
610 | maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
---|
611 | </analyzer>
|
---|
612 | <analyzer type="query">
|
---|
613 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
614 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
615 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
---|
616 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
617 | </analyzer>
|
---|
618 | </fieldType>
|
---|
619 |
|
---|
620 | <!-- charFilter + WhitespaceTokenizer -->
|
---|
621 | <!--
|
---|
622 | <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
|
---|
623 | <analyzer>
|
---|
624 | <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
---|
625 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
626 | </analyzer>
|
---|
627 | </fieldType>
|
---|
628 | -->
|
---|
629 |
|
---|
630 | <!-- This is an example of using the KeywordTokenizer along
|
---|
631 | With various TokenFilterFactories to produce a sortable field
|
---|
632 | that does not include some properties of the source text
|
---|
633 | -->
|
---|
634 | <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
---|
635 | <analyzer>
|
---|
636 | <!-- KeywordTokenizer does no actual tokenizing, so the entire
|
---|
637 | input string is preserved as a single token
|
---|
638 | -->
|
---|
639 | <tokenizer class="solr.KeywordTokenizerFactory"/>
|
---|
640 | <!-- The LowerCase TokenFilter does what you expect, which can be
|
---|
641 | when you want your sorting to be case insensitive
|
---|
642 | -->
|
---|
643 | <filter class="solr.LowerCaseFilterFactory" />
|
---|
644 | <!-- The TrimFilter removes any leading or trailing whitespace -->
|
---|
645 | <filter class="solr.TrimFilterFactory" />
|
---|
646 | <!-- The PatternReplaceFilter gives you the flexibility to use
|
---|
647 | Java Regular expression to replace any sequence of characters
|
---|
648 | matching a pattern with an arbitrary replacement string,
|
---|
649 | which may include back references to portions of the original
|
---|
650 | string matched by the pattern.
|
---|
651 |
|
---|
652 | See the Java Regular Expression documentation for more
|
---|
653 | information on pattern and replacement string syntax.
|
---|
654 |
|
---|
655 | http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
|
---|
656 | -->
|
---|
657 | <filter class="solr.PatternReplaceFilterFactory"
|
---|
658 | pattern="([^a-z])" replacement="" replace="all"
|
---|
659 | />
|
---|
660 | </analyzer>
|
---|
661 | </fieldType>
|
---|
662 |
|
---|
663 | <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
---|
664 | <analyzer>
|
---|
665 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
666 | <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
---|
667 | </analyzer>
|
---|
668 | </fieldtype>
|
---|
669 |
|
---|
670 | <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
---|
671 | <analyzer>
|
---|
672 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
673 | <!--
|
---|
674 | The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
|
---|
675 | a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
|
---|
676 | Attributes of the DelimitedPayloadTokenFilterFactory :
|
---|
677 | "delimiter" - a one character delimiter. Default is | (pipe)
|
---|
678 | "encoder" - how to encode the following value into a playload
|
---|
679 | float -> org.apache.lucene.analysis.payloads.FloatEncoder,
|
---|
680 | integer -> o.a.l.a.p.IntegerEncoder
|
---|
681 | identity -> o.a.l.a.p.IdentityEncoder
|
---|
682 | Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
|
---|
683 | -->
|
---|
684 | <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
---|
685 | </analyzer>
|
---|
686 | </fieldtype>
|
---|
687 |
|
---|
688 | <!-- lowercases the entire field value, keeping it as a single token. -->
|
---|
689 | <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
---|
690 | <analyzer>
|
---|
691 | <tokenizer class="solr.KeywordTokenizerFactory"/>
|
---|
692 | <filter class="solr.LowerCaseFilterFactory" />
|
---|
693 | </analyzer>
|
---|
694 | </fieldType>
|
---|
695 |
|
---|
696 | <!--
|
---|
697 | Example of using PathHierarchyTokenizerFactory at index time, so
|
---|
698 | queries for paths match documents at that path, or in descendent paths
|
---|
699 | -->
|
---|
700 | <fieldType name="descendent_path" class="solr.TextField">
|
---|
701 | <analyzer type="index">
|
---|
702 | <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
---|
703 | </analyzer>
|
---|
704 | <analyzer type="query">
|
---|
705 | <tokenizer class="solr.KeywordTokenizerFactory" />
|
---|
706 | </analyzer>
|
---|
707 | </fieldType>
|
---|
708 | <!--
|
---|
709 | Example of using PathHierarchyTokenizerFactory at query time, so
|
---|
710 | queries for paths match documents at that path, or in ancestor paths
|
---|
711 | -->
|
---|
712 | <fieldType name="ancestor_path" class="solr.TextField">
|
---|
713 | <analyzer type="index">
|
---|
714 | <tokenizer class="solr.KeywordTokenizerFactory" />
|
---|
715 | </analyzer>
|
---|
716 | <analyzer type="query">
|
---|
717 | <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
---|
718 | </analyzer>
|
---|
719 | </fieldType>
|
---|
720 |
|
---|
721 | <!-- since fields of this type are by default not stored or indexed,
|
---|
722 | any data added to them will be ignored outright. -->
|
---|
723 | <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
---|
724 |
|
---|
725 | <!-- This point type indexes the coordinates as separate fields (subFields)
|
---|
726 | If subFieldType is defined, it references a type, and a dynamic field
|
---|
727 | definition is created matching *___<typename>. Alternately, if
|
---|
728 | subFieldSuffix is defined, that is used to create the subFields.
|
---|
729 | Example: if subFieldType="double", then the coordinates would be
|
---|
730 | indexed in fields myloc_0___double,myloc_1___double.
|
---|
731 | Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
---|
732 | in fields myloc_0_d,myloc_1_d
|
---|
733 | The subFields are an implementation detail of the fieldType, and end
|
---|
734 | users normally should not need to know about them.
|
---|
735 | -->
|
---|
736 | <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
---|
737 |
|
---|
738 | <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
---|
739 | <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
---|
740 |
|
---|
741 | <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
|
---|
742 | For more information about this and other Spatial fields new to Solr 4, see:
|
---|
743 | http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
|
---|
744 | -->
|
---|
745 | <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
|
---|
746 | geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
|
---|
747 |
|
---|
748 | <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
|
---|
749 | Parameters:
|
---|
750 | defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
|
---|
751 | precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
|
---|
752 | providerClass: Lets you plug in other exchange provider backend:
|
---|
753 | solr.FileExchangeRateProvider is the default and takes one parameter:
|
---|
754 | currencyConfig: name of an xml file holding exchange rates
|
---|
755 | solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
|
---|
756 | ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
|
---|
757 | refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
|
---|
758 | -->
|
---|
759 | <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
|
---|
760 |
|
---|
761 |
|
---|
762 |
|
---|
763 | <!-- some examples for different languages (generally ordered by ISO code) -->
|
---|
764 |
|
---|
765 | <!-- Arabic -->
|
---|
766 | <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
|
---|
767 | <analyzer>
|
---|
768 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
769 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
770 | <!-- for any non-arabic -->
|
---|
771 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
772 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
|
---|
773 | <!-- normalizes ﻯ to ﻱ, etc -->
|
---|
774 | <filter class="solr.ArabicNormalizationFilterFactory"/>
|
---|
775 | <filter class="solr.ArabicStemFilterFactory"/>
|
---|
776 | </analyzer>
|
---|
777 | </fieldType>
|
---|
778 |
|
---|
779 | <!-- Bulgarian -->
|
---|
780 | <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
|
---|
781 | <analyzer>
|
---|
782 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
783 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
784 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
785 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
|
---|
786 | <filter class="solr.BulgarianStemFilterFactory"/>
|
---|
787 | </analyzer>
|
---|
788 | </fieldType>
|
---|
789 |
|
---|
790 | <!-- Catalan -->
|
---|
791 | <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
|
---|
792 | <analyzer>
|
---|
793 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
794 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
795 | <!-- removes l', etc -->
|
---|
796 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
|
---|
797 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
798 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
|
---|
799 | <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
|
---|
800 | </analyzer>
|
---|
801 | </fieldType>
|
---|
802 |
|
---|
803 | <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
|
---|
804 | <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
|
---|
805 | <analyzer>
|
---|
806 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
807 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
808 | <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
|
---|
809 | <filter class="solr.CJKWidthFilterFactory"/>
|
---|
810 | <!-- for any non-CJK -->
|
---|
811 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
812 | <filter class="solr.CJKBigramFilterFactory"/>
|
---|
813 | </analyzer>
|
---|
814 | </fieldType>
|
---|
815 |
|
---|
816 | <!-- Kurdish -->
|
---|
817 | <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
|
---|
818 | <analyzer>
|
---|
819 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
820 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
821 | <filter class="solr.SoraniNormalizationFilterFactory"/>
|
---|
822 | <!-- for any latin text -->
|
---|
823 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
824 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
|
---|
825 | <filter class="solr.SoraniStemFilterFactory"/>
|
---|
826 | </analyzer>
|
---|
827 | </fieldType>
|
---|
828 |
|
---|
829 | <!-- Czech -->
|
---|
830 | <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
|
---|
831 | <analyzer>
|
---|
832 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
833 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
834 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
835 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
|
---|
836 | <filter class="solr.CzechStemFilterFactory"/>
|
---|
837 | </analyzer>
|
---|
838 | </fieldType>
|
---|
839 |
|
---|
840 | <!-- Danish -->
|
---|
841 | <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
|
---|
842 | <analyzer>
|
---|
843 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
844 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
845 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
846 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
|
---|
847 | <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
|
---|
848 | </analyzer>
|
---|
849 | </fieldType>
|
---|
850 |
|
---|
851 | <!-- German -->
|
---|
852 | <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
|
---|
853 | <analyzer>
|
---|
854 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
855 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
856 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
857 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
|
---|
858 | <filter class="solr.GermanNormalizationFilterFactory"/>
|
---|
859 | <filter class="solr.GermanLightStemFilterFactory"/>
|
---|
860 | <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
|
---|
861 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
|
---|
862 | </analyzer>
|
---|
863 | </fieldType>
|
---|
864 |
|
---|
865 | <!-- Greek -->
|
---|
866 | <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
|
---|
867 | <analyzer>
|
---|
868 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
869 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
870 | <!-- greek specific lowercase for sigma -->
|
---|
871 | <filter class="solr.GreekLowerCaseFilterFactory"/>
|
---|
872 | <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
|
---|
873 | <filter class="solr.GreekStemFilterFactory"/>
|
---|
874 | </analyzer>
|
---|
875 | </fieldType>
|
---|
876 |
|
---|
877 | <!-- Spanish -->
|
---|
878 | <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
|
---|
879 | <analyzer>
|
---|
880 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
881 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
882 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
883 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
|
---|
884 | <filter class="solr.SpanishLightStemFilterFactory"/>
|
---|
885 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
|
---|
886 | </analyzer>
|
---|
887 | </fieldType>
|
---|
888 |
|
---|
889 | <!-- Basque -->
|
---|
890 | <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
|
---|
891 | <analyzer>
|
---|
892 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
893 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
894 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
895 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
|
---|
896 | <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
|
---|
897 | </analyzer>
|
---|
898 | </fieldType>
|
---|
899 |
|
---|
900 | <!-- Persian -->
|
---|
901 | <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
|
---|
902 | <analyzer>
|
---|
903 | <!-- for ZWNJ -->
|
---|
904 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
905 | <charFilter class="solr.PersianCharFilterFactory"/>
|
---|
906 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
907 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
908 | <filter class="solr.ArabicNormalizationFilterFactory"/>
|
---|
909 | <filter class="solr.PersianNormalizationFilterFactory"/>
|
---|
910 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
|
---|
911 | </analyzer>
|
---|
912 | </fieldType>
|
---|
913 |
|
---|
914 | <!-- Finnish -->
|
---|
915 | <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
|
---|
916 | <analyzer>
|
---|
917 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
918 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
919 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
920 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
|
---|
921 | <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
|
---|
922 | <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
|
---|
923 | </analyzer>
|
---|
924 | </fieldType>
|
---|
925 |
|
---|
926 | <!-- French -->
|
---|
927 | <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
|
---|
928 | <analyzer>
|
---|
929 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
930 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
931 | <!-- removes l', etc -->
|
---|
932 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
|
---|
933 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
934 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
|
---|
935 | <filter class="solr.FrenchLightStemFilterFactory"/>
|
---|
936 | <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
|
---|
937 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
|
---|
938 | </analyzer>
|
---|
939 | </fieldType>
|
---|
940 |
|
---|
941 | <!-- Irish -->
|
---|
942 | <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
|
---|
943 | <analyzer>
|
---|
944 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
945 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
946 | <!-- removes d', etc -->
|
---|
947 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
|
---|
948 | <!-- removes n-, etc. position increments is intentionally false! -->
|
---|
949 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
|
---|
950 | <filter class="solr.IrishLowerCaseFilterFactory"/>
|
---|
951 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
|
---|
952 | <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
|
---|
953 | </analyzer>
|
---|
954 | </fieldType>
|
---|
955 |
|
---|
956 | <!-- Galician -->
|
---|
957 | <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
|
---|
958 | <analyzer>
|
---|
959 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
960 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
961 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
962 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
|
---|
963 | <filter class="solr.GalicianStemFilterFactory"/>
|
---|
964 | <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
|
---|
965 | </analyzer>
|
---|
966 | </fieldType>
|
---|
967 |
|
---|
968 | <!-- Hindi -->
|
---|
969 | <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
|
---|
970 | <analyzer>
|
---|
971 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
972 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
973 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
974 | <!-- normalizes unicode representation -->
|
---|
975 | <filter class="solr.IndicNormalizationFilterFactory"/>
|
---|
976 | <!-- normalizes variation in spelling -->
|
---|
977 | <filter class="solr.HindiNormalizationFilterFactory"/>
|
---|
978 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
|
---|
979 | <filter class="solr.HindiStemFilterFactory"/>
|
---|
980 | </analyzer>
|
---|
981 | </fieldType>
|
---|
982 |
|
---|
983 | <!-- Hungarian -->
|
---|
984 | <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
|
---|
985 | <analyzer>
|
---|
986 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
987 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
988 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
989 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
|
---|
990 | <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
|
---|
991 | <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
|
---|
992 | </analyzer>
|
---|
993 | </fieldType>
|
---|
994 |
|
---|
995 | <!-- Armenian -->
|
---|
996 | <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
|
---|
997 | <analyzer>
|
---|
998 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
999 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1000 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1001 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
|
---|
1002 | <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
|
---|
1003 | </analyzer>
|
---|
1004 | </fieldType>
|
---|
1005 |
|
---|
1006 | <!-- Indonesian -->
|
---|
1007 | <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
|
---|
1008 | <analyzer>
|
---|
1009 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1010 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1011 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1012 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
|
---|
1013 | <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
|
---|
1014 | <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
|
---|
1015 | </analyzer>
|
---|
1016 | </fieldType>
|
---|
1017 |
|
---|
1018 | <!-- Italian -->
|
---|
1019 | <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
|
---|
1020 | <analyzer>
|
---|
1021 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1022 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1023 | <!-- removes l', etc -->
|
---|
1024 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
|
---|
1025 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1026 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
|
---|
1027 | <filter class="solr.ItalianLightStemFilterFactory"/>
|
---|
1028 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
|
---|
1029 | </analyzer>
|
---|
1030 | </fieldType>
|
---|
1031 |
|
---|
1032 | <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
|
---|
1033 |
|
---|
1034 | NOTE: If you want to optimize search for precision, use default operator AND in your query
|
---|
1035 | parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
|
---|
1036 | OR if you would like to optimize for recall (default).
|
---|
1037 | -->
|
---|
1038 | <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
|
---|
1039 | <analyzer>
|
---|
1040 | <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
|
---|
1041 |
|
---|
1042 | Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
|
---|
1043 | is used to segment compounds into its parts and the compound itself is kept as synonym.
|
---|
1044 |
|
---|
1045 | Valid values for attribute mode are:
|
---|
1046 | normal: regular segmentation
|
---|
1047 | search: segmentation useful for search with synonyms compounds (default)
|
---|
1048 | extended: same as search mode, but unigrams unknown words (experimental)
|
---|
1049 |
|
---|
1050 | For some applications it might be good to use search mode for indexing and normal mode for
|
---|
1051 | queries to reduce recall and prevent parts of compounds from being matched and highlighted.
|
---|
1052 | Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
|
---|
1053 |
|
---|
1054 | Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
|
---|
1055 | model with your own entries for segmentation, part-of-speech tags and readings without a need
|
---|
1056 | to specify weights. Notice that user dictionaries have not been subject to extensive testing.
|
---|
1057 |
|
---|
1058 | User dictionary attributes are:
|
---|
1059 | userDictionary: user dictionary filename
|
---|
1060 | userDictionaryEncoding: user dictionary encoding (default is UTF-8)
|
---|
1061 |
|
---|
1062 | See lang/userdict_ja.txt for a sample user dictionary file.
|
---|
1063 |
|
---|
1064 | Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
|
---|
1065 |
|
---|
1066 | See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
|
---|
1067 | -->
|
---|
1068 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1069 | <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
|
---|
1070 | <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
|
---|
1071 | <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (èŸæžåœ¢) -->
|
---|
1072 | <filter class="solr.JapaneseBaseFormFilterFactory"/>
|
---|
1073 | <!-- Removes tokens with certain part-of-speech tags -->
|
---|
1074 | <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
|
---|
1075 | <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
|
---|
1076 | <filter class="solr.CJKWidthFilterFactory"/>
|
---|
1077 | <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
|
---|
1078 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
|
---|
1079 | <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
|
---|
1080 | <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
|
---|
1081 | <!-- Lower-cases romaji characters -->
|
---|
1082 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1083 | </analyzer>
|
---|
1084 | </fieldType>
|
---|
1085 |
|
---|
1086 | <!-- Latvian -->
|
---|
1087 | <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
|
---|
1088 | <analyzer>
|
---|
1089 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1090 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1091 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1092 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
|
---|
1093 | <filter class="solr.LatvianStemFilterFactory"/>
|
---|
1094 | </analyzer>
|
---|
1095 | </fieldType>
|
---|
1096 |
|
---|
1097 | <!-- Dutch -->
|
---|
1098 | <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
|
---|
1099 | <analyzer>
|
---|
1100 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1101 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1102 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1103 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
|
---|
1104 | <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
|
---|
1105 | <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
|
---|
1106 | </analyzer>
|
---|
1107 | </fieldType>
|
---|
1108 |
|
---|
1109 | <!-- Norwegian -->
|
---|
1110 | <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
|
---|
1111 | <analyzer>
|
---|
1112 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1113 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1114 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1115 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
|
---|
1116 | <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
|
---|
1117 | <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> -->
|
---|
1118 | <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> -->
|
---|
1119 | <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both -->
|
---|
1120 | </analyzer>
|
---|
1121 | </fieldType>
|
---|
1122 |
|
---|
1123 | <!-- Portuguese -->
|
---|
1124 | <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
|
---|
1125 | <analyzer>
|
---|
1126 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1127 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1128 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1129 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
|
---|
1130 | <filter class="solr.PortugueseLightStemFilterFactory"/>
|
---|
1131 | <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
|
---|
1132 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
|
---|
1133 | <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
|
---|
1134 | </analyzer>
|
---|
1135 | </fieldType>
|
---|
1136 |
|
---|
1137 | <!-- Romanian -->
|
---|
1138 | <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
|
---|
1139 | <analyzer>
|
---|
1140 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1141 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1142 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1143 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
|
---|
1144 | <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
|
---|
1145 | </analyzer>
|
---|
1146 | </fieldType>
|
---|
1147 |
|
---|
1148 | <!-- Russian -->
|
---|
1149 | <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
|
---|
1150 | <analyzer>
|
---|
1151 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1152 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1153 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1154 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
|
---|
1155 | <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
|
---|
1156 | <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
|
---|
1157 | </analyzer>
|
---|
1158 | </fieldType>
|
---|
1159 | <!-- Russian with morphology-->
|
---|
1160 | <fieldType name="text_ru_morph" class="solr.TextField" positionIncrementGap="100">
|
---|
1161 | <analyzer>
|
---|
1162 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1163 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1164 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1165 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
|
---|
1166 | <filter class="company.evo.jmorphy2.lucene.Jmorphy2StemFilterFactory" dict="lang/pymorphy2_dicts"/>
|
---|
1167 | </analyzer>
|
---|
1168 | </fieldType>
|
---|
1169 |
|
---|
1170 | <!-- Swedish -->
|
---|
1171 | <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
|
---|
1172 | <analyzer>
|
---|
1173 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1174 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1175 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1176 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
|
---|
1177 | <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
|
---|
1178 | <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
|
---|
1179 | </analyzer>
|
---|
1180 | </fieldType>
|
---|
1181 |
|
---|
1182 | <!-- Thai -->
|
---|
1183 | <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
|
---|
1184 | <analyzer>
|
---|
1185 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1186 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1187 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
1188 | <filter class="solr.ThaiWordFilterFactory"/>
|
---|
1189 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
|
---|
1190 | </analyzer>
|
---|
1191 | </fieldType>
|
---|
1192 |
|
---|
1193 | <!-- Turkish -->
|
---|
1194 | <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
|
---|
1195 | <analyzer>
|
---|
1196 | <charFilter class="solr.HTMLStripCharFilterFactory"/>
|
---|
1197 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
1198 | <filter class="solr.TurkishLowerCaseFilterFactory"/>
|
---|
1199 | <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
|
---|
1200 | <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
|
---|
1201 | </analyzer>
|
---|
1202 | </fieldType>
|
---|
1203 |
|
---|
1204 | </types>
|
---|
1205 |
|
---|
1206 | <!-- Similarity is the scoring routine for each document vs. a query.
|
---|
1207 | A custom Similarity or SimilarityFactory may be specified here, but
|
---|
1208 | the default is fine for most applications.
|
---|
1209 | For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
|
---|
1210 | -->
|
---|
1211 | <!--
|
---|
1212 | <similarity class="com.example.solr.CustomSimilarityFactory">
|
---|
1213 | <str name="paramkey">param value</str>
|
---|
1214 | </similarity>
|
---|
1215 | -->
|
---|
1216 |
|
---|
1217 | </schema>
|
---|