[29135] | 1 | <?xml version="1.0" encoding="UTF-8" ?>
|
---|
| 2 | <!--
|
---|
| 3 | Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
| 4 | contributor license agreements. See the NOTICE file distributed with
|
---|
| 5 | this work for additional information regarding copyright ownership.
|
---|
| 6 | The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
| 7 | (the "License"); you may not use this file except in compliance with
|
---|
| 8 | the License. You may obtain a copy of the License at
|
---|
| 9 |
|
---|
| 10 | http://www.apache.org/licenses/LICENSE-2.0
|
---|
| 11 |
|
---|
| 12 | Unless required by applicable law or agreed to in writing, software
|
---|
| 13 | distributed under the License is distributed on an "AS IS" BASIS,
|
---|
| 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
| 15 | See the License for the specific language governing permissions and
|
---|
| 16 | limitations under the License.
|
---|
| 17 | -->
|
---|
| 18 |
|
---|
| 19 | <!--
|
---|
| 20 | This is the Solr schema file. This file should be named "schema.xml" and
|
---|
| 21 | should be in the conf directory under the solr home
|
---|
| 22 | (i.e. ./solr/conf/schema.xml by default)
|
---|
| 23 | or located where the classloader for the Solr webapp can find it.
|
---|
| 24 |
|
---|
| 25 | This example schema is the recommended starting point for users.
|
---|
| 26 | It should be kept correct and concise, usable out-of-the-box.
|
---|
| 27 |
|
---|
| 28 | For more information, on how to customize this file, please see
|
---|
| 29 | http://wiki.apache.org/solr/SchemaXml
|
---|
| 30 |
|
---|
| 31 | PERFORMANCE NOTE: this schema includes many optional features and should not
|
---|
| 32 | be used for benchmarking. To improve performance one could
|
---|
| 33 | - set stored="false" for all fields possible (esp large fields) when you
|
---|
| 34 | only need to search on the field but don't need to return the original
|
---|
| 35 | value.
|
---|
| 36 | - set indexed="false" if you don't need to search on the field, but only
|
---|
| 37 | return the field as a result of searching on other indexed fields.
|
---|
| 38 | - remove all unneeded copyField statements
|
---|
| 39 | - for best index size and searching performance, set "index" to false
|
---|
| 40 | for all general text fields, use copyField to copy them to the
|
---|
| 41 | catchall "text" field, and use that for searching.
|
---|
| 42 | - For maximum indexing performance, use the StreamingUpdateSolrServer
|
---|
| 43 | java client.
|
---|
| 44 | - Remember to run the JVM in server mode, and use a higher logging level
|
---|
| 45 | that avoids logging every request
|
---|
| 46 | -->
|
---|
| 47 |
|
---|
| 48 | <schema name="example" version="1.5">
|
---|
| 49 | <!-- attribute "name" is the name of this schema and is only used for display purposes.
|
---|
| 50 | version="x.y" is Solr's version number for the schema syntax and
|
---|
| 51 | semantics. It should not normally be changed by applications.
|
---|
| 52 |
|
---|
| 53 | 1.0: multiValued attribute did not exist, all fields are multiValued
|
---|
| 54 | by nature
|
---|
| 55 | 1.1: multiValued attribute introduced, false by default
|
---|
| 56 | 1.2: omitTermFreqAndPositions attribute introduced, true by default
|
---|
| 57 | except for text fields.
|
---|
| 58 | 1.3: removed optional field compress feature
|
---|
| 59 | 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
|
---|
| 60 | behavior when a single string produces multiple tokens. Defaults
|
---|
| 61 | to off for version >= 1.4
|
---|
| 62 | 1.5: omitNorms defaults to true for primitive field types
|
---|
| 63 | (int, float, boolean, string...)
|
---|
| 64 | -->
|
---|
| 65 |
|
---|
| 66 | <fields>
|
---|
| 67 | <!-- Valid attributes for fields:
|
---|
| 68 | name: mandatory - the name for the field
|
---|
| 69 | type: mandatory - the name of a field type from the
|
---|
| 70 | <types> fieldType section
|
---|
| 71 | indexed: true if this field should be indexed (searchable or sortable)
|
---|
| 72 | stored: true if this field should be retrievable
|
---|
| 73 | docValues: true if this field should have doc values. Doc values are
|
---|
| 74 | useful for faceting, grouping, sorting and function queries. Although not
|
---|
| 75 | required, doc values will make the index faster to load, more
|
---|
| 76 | NRT-friendly and more memory-efficient. They however come with some
|
---|
| 77 | limitations: they are currently only supported by StrField, UUIDField
|
---|
| 78 | and all Trie*Fields, and depending on the field type, they might
|
---|
| 79 | require the field to be single-valued, be required or have a default
|
---|
| 80 | value (check the documentation of the field type you're interested in
|
---|
| 81 | for more information)
|
---|
| 82 | multiValued: true if this field may contain multiple values per document
|
---|
| 83 | omitNorms: (expert) set to true to omit the norms associated with
|
---|
| 84 | this field (this disables length normalization and index-time
|
---|
| 85 | boosting for the field, and saves some memory). Only full-text
|
---|
| 86 | fields or fields that need an index-time boost need norms.
|
---|
| 87 | Norms are omitted for primitive (non-analyzed) types by default.
|
---|
| 88 | termVectors: [false] set to true to store the term vector for a
|
---|
| 89 | given field.
|
---|
| 90 | When using MoreLikeThis, fields used for similarity should be
|
---|
| 91 | stored for best performance.
|
---|
| 92 | termPositions: Store position information with the term vector.
|
---|
| 93 | This will increase storage costs.
|
---|
| 94 | termOffsets: Store offset information with the term vector. This
|
---|
| 95 | will increase storage costs.
|
---|
| 96 | required: The field is required. It will throw an error if the
|
---|
| 97 | value does not exist
|
---|
| 98 | default: a value that should be used if no value is specified
|
---|
| 99 | when adding a document.
|
---|
| 100 | -->
|
---|
| 101 |
|
---|
| 102 | <!-- field names should consist of alphanumeric or underscore characters only and
|
---|
| 103 | not start with a digit. This is not currently strictly enforced,
|
---|
| 104 | but other field names will not have first class support from all components
|
---|
| 105 | and back compatibility is not guaranteed. Names with both leading and
|
---|
| 106 | trailing underscores (e.g. _version_) are reserved.
|
---|
| 107 | -->
|
---|
| 108 |
|
---|
| 109 | <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
|
---|
| 110 | or Solr won't start. _version_ and update log are required for SolrCloud
|
---|
| 111 | -->
|
---|
| 112 | <field name="_version_" type="long" indexed="true" stored="true"/>
|
---|
| 113 |
|
---|
| 114 | <!-- points to the root document of a block of nested documents. Required for nested
|
---|
| 115 | document support, may be removed otherwise
|
---|
| 116 | -->
|
---|
| 117 | <field name="_root_" type="string" indexed="true" stored="false"/>
|
---|
| 118 |
|
---|
| 119 | <!-- Only remove the "id" field if you have a very good reason to. While not strictly
|
---|
| 120 | required, it is highly recommended. A <uniqueKey> is present in almost all Solr
|
---|
| 121 | installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
|
---|
| 122 | -->
|
---|
| 123 | <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
|
---|
| 124 |
|
---|
| 125 | <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
|
---|
| 126 | <field name="name" type="text_general" indexed="true" stored="true"/>
|
---|
| 127 | <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
|
---|
| 128 | <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
| 129 | <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
| 130 | <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
|
---|
| 131 |
|
---|
| 132 | <field name="weight" type="float" indexed="true" stored="true"/>
|
---|
| 133 | <field name="price" type="float" indexed="true" stored="true"/>
|
---|
| 134 | <field name="popularity" type="int" indexed="true" stored="true" />
|
---|
| 135 | <field name="inStock" type="boolean" indexed="true" stored="true" />
|
---|
| 136 |
|
---|
| 137 | <field name="store" type="location" indexed="true" stored="true"/>
|
---|
| 138 |
|
---|
| 139 | <!-- Common metadata fields, named specifically to match up with
|
---|
| 140 | SolrCell metadata when parsing rich documents such as Word, PDF.
|
---|
| 141 | Some fields are multiValued only because Tika currently may return
|
---|
| 142 | multiple values for them. Some metadata is parsed from the documents,
|
---|
| 143 | but there are some which come from the client context:
|
---|
| 144 | "content_type": From the HTTP headers of incoming stream
|
---|
| 145 | "resourcename": From SolrCell request param resource.name
|
---|
| 146 | -->
|
---|
| 147 | <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
| 148 | <field name="subject" type="text_general" indexed="true" stored="true"/>
|
---|
| 149 | <field name="description" type="text_general" indexed="true" stored="true"/>
|
---|
| 150 | <field name="comments" type="text_general" indexed="true" stored="true"/>
|
---|
| 151 | <field name="author" type="text_general" indexed="true" stored="true"/>
|
---|
| 152 | <field name="keywords" type="text_general" indexed="true" stored="true"/>
|
---|
| 153 | <field name="category" type="text_general" indexed="true" stored="true"/>
|
---|
| 154 | <field name="resourcename" type="text_general" indexed="true" stored="true"/>
|
---|
| 155 | <field name="url" type="text_general" indexed="true" stored="true"/>
|
---|
| 156 | <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
| 157 | <field name="last_modified" type="date" indexed="true" stored="true"/>
|
---|
| 158 | <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
| 159 |
|
---|
| 160 | <!-- Main body of document extracted by SolrCell.
|
---|
| 161 | NOTE: This field is not indexed by default, since it is also copied to "text"
|
---|
| 162 | using copyField below. This is to save space. Use this field for returning and
|
---|
| 163 | highlighting document content. Use the "text" field to search the content. -->
|
---|
| 164 | <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
|
---|
| 165 |
|
---|
| 166 |
|
---|
| 167 | <!-- catchall field, containing all other searchable text fields (implemented
|
---|
| 168 | via copyField further on in this schema -->
|
---|
| 169 | <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
|
---|
| 170 |
|
---|
| 171 | <!-- catchall text field that indexes tokens both normally and in reverse for efficient
|
---|
| 172 | leading wildcard queries. -->
|
---|
| 173 | <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
|
---|
| 174 |
|
---|
| 175 | <!-- non-tokenized version of manufacturer to make it easier to sort or group
|
---|
| 176 | results by manufacturer. copied from "manu" via copyField -->
|
---|
| 177 | <field name="manu_exact" type="string" indexed="true" stored="false"/>
|
---|
| 178 |
|
---|
| 179 | <field name="payloads" type="payloads" indexed="true" stored="true"/>
|
---|
| 180 |
|
---|
| 181 |
|
---|
| 182 | <!--
|
---|
| 183 | Some fields such as popularity and manu_exact could be modified to
|
---|
| 184 | leverage doc values:
|
---|
| 185 | <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
|
---|
| 186 | <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
|
---|
| 187 | <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
|
---|
| 188 |
|
---|
| 189 |
|
---|
| 190 | Although it would make indexing slightly slower and the index bigger, it
|
---|
| 191 | would also make the index faster to load, more memory-efficient and more
|
---|
| 192 | NRT-friendly.
|
---|
| 193 | -->
|
---|
| 194 |
|
---|
| 195 | <!-- Dynamic field definitions allow using convention over configuration
|
---|
| 196 | for fields via the specification of patterns to match field names.
|
---|
| 197 | EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
---|
| 198 | RESTRICTION: the glob-like pattern in the name attribute must have
|
---|
| 199 | a "*" only at the start or the end. -->
|
---|
| 200 |
|
---|
| 201 | <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
---|
| 202 | <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
|
---|
| 203 | <dynamicField name="*_s" type="string" indexed="true" stored="true" />
|
---|
| 204 | <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
|
---|
| 205 | <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
---|
| 206 | <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>
|
---|
| 207 | <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
|
---|
| 208 | <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
| 209 | <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
|
---|
| 210 | <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
---|
| 211 | <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
|
---|
| 212 | <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
---|
| 213 | <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>
|
---|
| 214 | <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
---|
| 215 | <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
|
---|
| 216 |
|
---|
| 217 | <!-- Type used to index the lat and lon components for the "location" FieldType -->
|
---|
| 218 | <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
|
---|
| 219 |
|
---|
| 220 | <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
---|
| 221 | <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
|
---|
| 222 | <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
|
---|
| 223 |
|
---|
| 224 | <!-- some trie-coded dynamic fields for faster range queries -->
|
---|
| 225 | <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
---|
| 226 | <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
---|
| 227 | <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
---|
| 228 | <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
---|
| 229 | <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
|
---|
| 230 |
|
---|
| 231 | <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
|
---|
| 232 | <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
|
---|
| 233 |
|
---|
| 234 | <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
---|
| 235 | <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
---|
| 236 |
|
---|
| 237 | <dynamicField name="random_*" type="random" />
|
---|
| 238 |
|
---|
| 239 | <!-- uncomment the following to ignore any fields that don't already match an existing
|
---|
| 240 | field name or dynamic field, rather than reporting them as an error.
|
---|
| 241 | alternately, change the type="ignored" to some other type e.g. "text" if you want
|
---|
| 242 | unknown fields indexed and/or stored by default -->
|
---|
| 243 | <!--dynamicField name="*" type="ignored" multiValued="true" /-->
|
---|
| 244 |
|
---|
| 245 | </fields>
|
---|
| 246 |
|
---|
| 247 |
|
---|
| 248 | <!-- Field to use to determine and enforce document uniqueness.
|
---|
| 249 | Unless this field is marked with required="false", it will be a required field
|
---|
| 250 | -->
|
---|
| 251 | <uniqueKey>id</uniqueKey>
|
---|
| 252 |
|
---|
| 253 | <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
|
---|
| 254 | parsing a query string that isn't explicit about the field. Machine (non-user)
|
---|
| 255 | generated queries are best made explicit, or they can use the "df" request parameter
|
---|
| 256 | which takes precedence over this.
|
---|
| 257 | Note: Un-commenting defaultSearchField will be insufficient if your request handler
|
---|
| 258 | in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
|
---|
| 259 | <defaultSearchField>text</defaultSearchField> -->
|
---|
| 260 |
|
---|
| 261 | <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
|
---|
| 262 | when parsing a query string to determine if a clause of the query should be marked as
|
---|
| 263 | required or optional, assuming the clause isn't already marked by some operator.
|
---|
| 264 | The default is OR, which is generally assumed so it is not a good idea to change it
|
---|
| 265 | globally here. The "q.op" request parameter takes precedence over this.
|
---|
| 266 | <solrQueryParser defaultOperator="OR"/> -->
|
---|
| 267 |
|
---|
| 268 | <!-- copyField commands copy one field to another at the time a document
|
---|
| 269 | is added to the index. It's used either to index the same field differently,
|
---|
| 270 | or to add multiple fields to the same field for easier/faster searching. -->
|
---|
| 271 |
|
---|
| 272 | <copyField source="cat" dest="text"/>
|
---|
| 273 | <copyField source="name" dest="text"/>
|
---|
| 274 | <copyField source="manu" dest="text"/>
|
---|
| 275 | <copyField source="features" dest="text"/>
|
---|
| 276 | <copyField source="includes" dest="text"/>
|
---|
| 277 | <copyField source="manu" dest="manu_exact"/>
|
---|
| 278 |
|
---|
| 279 | <!-- Copy the price into a currency enabled field (default USD) -->
|
---|
| 280 | <copyField source="price" dest="price_c"/>
|
---|
| 281 |
|
---|
| 282 | <!-- Text fields from SolrCell to search by default in our catch-all field -->
|
---|
| 283 | <copyField source="title" dest="text"/>
|
---|
| 284 | <copyField source="author" dest="text"/>
|
---|
| 285 | <copyField source="description" dest="text"/>
|
---|
| 286 | <copyField source="keywords" dest="text"/>
|
---|
| 287 | <copyField source="content" dest="text"/>
|
---|
| 288 | <copyField source="content_type" dest="text"/>
|
---|
| 289 | <copyField source="resourcename" dest="text"/>
|
---|
| 290 | <copyField source="url" dest="text"/>
|
---|
| 291 |
|
---|
| 292 | <!-- Create a string version of author for faceting -->
|
---|
| 293 | <copyField source="author" dest="author_s"/>
|
---|
| 294 |
|
---|
| 295 | <!-- Above, multiple source fields are copied to the [text] field.
|
---|
| 296 | Another way to map multiple source fields to the same
|
---|
| 297 | destination field is to use the dynamic field syntax.
|
---|
| 298 | copyField also supports a maxChars to copy setting. -->
|
---|
| 299 |
|
---|
| 300 | <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
|
---|
| 301 |
|
---|
| 302 | <!-- copy name to alphaNameSort, a field designed for sorting by name -->
|
---|
| 303 | <!-- <copyField source="name" dest="alphaNameSort"/> -->
|
---|
| 304 |
|
---|
| 305 | <types>
|
---|
| 306 | <!-- field type definitions. The "name" attribute is
|
---|
| 307 | just a label to be used by field definitions. The "class"
|
---|
| 308 | attribute and any other attributes determine the real
|
---|
| 309 | behavior of the fieldType.
|
---|
| 310 | Class names starting with "solr" refer to java classes in a
|
---|
| 311 | standard package such as org.apache.solr.analysis
|
---|
| 312 | -->
|
---|
| 313 |
|
---|
| 314 | <!-- The StrField type is not analyzed, but indexed/stored verbatim.
|
---|
| 315 | It supports doc values but in that case the field needs to be
|
---|
| 316 | single-valued and either required or have a default value.
|
---|
| 317 | -->
|
---|
| 318 | <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
|
---|
| 319 |
|
---|
| 320 | <!-- boolean type: "true" or "false" -->
|
---|
| 321 | <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
---|
| 322 |
|
---|
| 323 | <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
|
---|
| 324 | currently supported on types that are sorted internally as strings
|
---|
| 325 | and on numeric types.
|
---|
| 326 | This includes "string","boolean", and, as of 3.5 (and 4.x),
|
---|
| 327 | int, float, long, date, double, including the "Trie" variants.
|
---|
| 328 | - If sortMissingLast="true", then a sort on this field will cause documents
|
---|
| 329 | without the field to come after documents with the field,
|
---|
| 330 | regardless of the requested sort order (asc or desc).
|
---|
| 331 | - If sortMissingFirst="true", then a sort on this field will cause documents
|
---|
| 332 | without the field to come before documents with the field,
|
---|
| 333 | regardless of the requested sort order.
|
---|
| 334 | - If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
---|
| 335 | then default lucene sorting will be used which places docs without the
|
---|
| 336 | field first in an ascending sort and last in a descending sort.
|
---|
| 337 | -->
|
---|
| 338 |
|
---|
| 339 | <!--
|
---|
| 340 | Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
|
---|
| 341 |
|
---|
| 342 | These fields support doc values, but they require the field to be
|
---|
| 343 | single-valued and either be required or have a default value.
|
---|
| 344 | -->
|
---|
| 345 | <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
|
---|
| 346 | <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
|
---|
| 347 | <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
|
---|
| 348 | <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
|
---|
| 349 |
|
---|
| 350 | <!--
|
---|
| 351 | Numeric field types that index each value at various levels of precision
|
---|
| 352 | to accelerate range queries when the number of values between the range
|
---|
| 353 | endpoints is large. See the javadoc for NumericRangeQuery for internal
|
---|
| 354 | implementation details.
|
---|
| 355 |
|
---|
| 356 | Smaller precisionStep values (specified in bits) will lead to more tokens
|
---|
| 357 | indexed per value, slightly larger index size, and faster range queries.
|
---|
| 358 | A precisionStep of 0 disables indexing at different precision levels.
|
---|
| 359 | -->
|
---|
| 360 | <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
|
---|
| 361 | <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
|
---|
| 362 | <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
|
---|
| 363 | <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
|
---|
| 364 |
|
---|
| 365 | <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
---|
| 366 | is a more restricted form of the canonical representation of dateTime
|
---|
| 367 | http://www.w3.org/TR/xmlschema-2/#dateTime
|
---|
| 368 | The trailing "Z" designates UTC time and is mandatory.
|
---|
| 369 | Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
---|
| 370 | All other components are mandatory.
|
---|
| 371 |
|
---|
| 372 | Expressions can also be used to denote calculations that should be
|
---|
| 373 | performed relative to "NOW" to determine the value, ie...
|
---|
| 374 |
|
---|
| 375 | NOW/HOUR
|
---|
| 376 | ... Round to the start of the current hour
|
---|
| 377 | NOW-1DAY
|
---|
| 378 | ... Exactly 1 day prior to now
|
---|
| 379 | NOW/DAY+6MONTHS+3DAYS
|
---|
| 380 | ... 6 months and 3 days in the future from the start of
|
---|
| 381 | the current day
|
---|
| 382 |
|
---|
| 383 | Consult the DateField javadocs for more information.
|
---|
| 384 |
|
---|
| 385 | Note: For faster range queries, consider the tdate type
|
---|
| 386 | -->
|
---|
| 387 | <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
|
---|
| 388 |
|
---|
| 389 | <!-- A Trie based date field for faster date range queries and date faceting. -->
|
---|
| 390 | <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
|
---|
| 391 |
|
---|
| 392 |
|
---|
| 393 | <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
---|
| 394 | <fieldtype name="binary" class="solr.BinaryField"/>
|
---|
| 395 |
|
---|
| 396 | <!--
|
---|
| 397 | Note:
|
---|
| 398 | These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
|
---|
| 399 | Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
|
---|
| 400 |
|
---|
| 401 | Plain numeric field types that store and index the text
|
---|
| 402 | value verbatim (and hence don't correctly support range queries, since the
|
---|
| 403 | lexicographic ordering isn't equal to the numeric ordering)
|
---|
| 404 | -->
|
---|
| 405 | <fieldType name="pint" class="solr.IntField"/>
|
---|
| 406 | <fieldType name="plong" class="solr.LongField"/>
|
---|
| 407 | <fieldType name="pfloat" class="solr.FloatField"/>
|
---|
| 408 | <fieldType name="pdouble" class="solr.DoubleField"/>
|
---|
| 409 | <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
|
---|
| 410 |
|
---|
| 411 | <!-- The "RandomSortField" is not used to store or search any
|
---|
| 412 | data. You can declare fields of this type it in your schema
|
---|
| 413 | to generate pseudo-random orderings of your docs for sorting
|
---|
| 414 | or function purposes. The ordering is generated based on the field
|
---|
| 415 | name and the version of the index. As long as the index version
|
---|
| 416 | remains unchanged, and the same field name is reused,
|
---|
| 417 | the ordering of the docs will be consistent.
|
---|
| 418 | If you want different psuedo-random orderings of documents,
|
---|
| 419 | for the same version of the index, use a dynamicField and
|
---|
| 420 | change the field name in the request.
|
---|
| 421 | -->
|
---|
| 422 | <fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
---|
| 423 |
|
---|
| 424 | <!-- solr.TextField allows the specification of custom text analyzers
|
---|
| 425 | specified as a tokenizer and a list of token filters. Different
|
---|
| 426 | analyzers may be specified for indexing and querying.
|
---|
| 427 |
|
---|
| 428 | The optional positionIncrementGap puts space between multiple fields of
|
---|
| 429 | this type on the same document, with the purpose of preventing false phrase
|
---|
| 430 | matching across fields.
|
---|
| 431 |
|
---|
| 432 | For more info on customizing your analyzer chain, please see
|
---|
| 433 | http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
---|
| 434 | -->
|
---|
| 435 |
|
---|
| 436 | <!-- One can also specify an existing Analyzer class that has a
|
---|
| 437 | default constructor via the class attribute on the analyzer element.
|
---|
| 438 | Example:
|
---|
| 439 | <fieldType name="text_greek" class="solr.TextField">
|
---|
| 440 | <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
---|
| 441 | </fieldType>
|
---|
| 442 | -->
|
---|
| 443 |
|
---|
| 444 | <!-- A text field that only splits on whitespace for exact matching of words -->
|
---|
| 445 | <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
---|
| 446 | <analyzer>
|
---|
| 447 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 448 | </analyzer>
|
---|
| 449 | </fieldType>
|
---|
| 450 |
|
---|
| 451 | <!-- A general text field that has reasonable, generic
|
---|
| 452 | cross-language defaults: it tokenizes with StandardTokenizer,
|
---|
| 453 | removes stop words from case-insensitive "stopwords.txt"
|
---|
| 454 | (empty by default), and down cases. At query time only, it
|
---|
| 455 | also applies synonyms. -->
|
---|
| 456 | <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
|
---|
| 457 | <analyzer type="index">
|
---|
| 458 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 459 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
---|
| 460 | <!-- in this example, we will only use synonyms at query time
|
---|
| 461 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
| 462 | -->
|
---|
| 463 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 464 | </analyzer>
|
---|
| 465 | <analyzer type="query">
|
---|
| 466 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 467 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
---|
| 468 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
| 469 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 470 | </analyzer>
|
---|
| 471 | </fieldType>
|
---|
| 472 |
|
---|
| 473 | <!-- A text field with defaults appropriate for English: it
|
---|
| 474 | tokenizes with StandardTokenizer, removes English stop words
|
---|
| 475 | (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
|
---|
| 476 | finally applies Porter's stemming. The query time analyzer
|
---|
| 477 | also applies synonyms from synonyms.txt. -->
|
---|
| 478 | <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
---|
| 479 | <analyzer type="index">
|
---|
| 480 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 481 | <!-- in this example, we will only use synonyms at query time
|
---|
| 482 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
| 483 | -->
|
---|
| 484 | <!-- Case insensitive stop word removal.
|
---|
| 485 | -->
|
---|
| 486 | <filter class="solr.StopFilterFactory"
|
---|
| 487 | ignoreCase="true"
|
---|
| 488 | words="lang/stopwords_en.txt"
|
---|
| 489 | />
|
---|
| 490 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 491 | <filter class="solr.EnglishPossessiveFilterFactory"/>
|
---|
| 492 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 493 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
---|
| 494 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
| 495 | -->
|
---|
| 496 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
| 497 | </analyzer>
|
---|
| 498 | <analyzer type="query">
|
---|
| 499 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 500 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
| 501 | <filter class="solr.StopFilterFactory"
|
---|
| 502 | ignoreCase="true"
|
---|
| 503 | words="lang/stopwords_en.txt"
|
---|
| 504 | />
|
---|
| 505 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 506 | <filter class="solr.EnglishPossessiveFilterFactory"/>
|
---|
| 507 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 508 | <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
---|
| 509 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
| 510 | -->
|
---|
| 511 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
| 512 | </analyzer>
|
---|
| 513 | </fieldType>
|
---|
| 514 |
|
---|
| 515 | <!-- A text field with defaults appropriate for English, plus
|
---|
| 516 | aggressive word-splitting and autophrase features enabled.
|
---|
| 517 | This field is just like text_en, except it adds
|
---|
| 518 | WordDelimiterFilter to enable splitting and matching of
|
---|
| 519 | words on case-change, alpha numeric boundaries, and
|
---|
| 520 | non-alphanumeric chars. This means certain compound word
|
---|
| 521 | cases will work, for example query "wi fi" will match
|
---|
| 522 | document "WiFi" or "wi-fi".
|
---|
| 523 | -->
|
---|
| 524 | <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
---|
| 525 | <analyzer type="index">
|
---|
| 526 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 527 | <!-- in this example, we will only use synonyms at query time
|
---|
| 528 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
| 529 | -->
|
---|
| 530 | <!-- Case insensitive stop word removal.
|
---|
| 531 | -->
|
---|
| 532 | <filter class="solr.StopFilterFactory"
|
---|
| 533 | ignoreCase="true"
|
---|
| 534 | words="lang/stopwords_en.txt"
|
---|
| 535 | />
|
---|
| 536 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
---|
| 537 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 538 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 539 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
| 540 | </analyzer>
|
---|
| 541 | <analyzer type="query">
|
---|
| 542 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 543 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
| 544 | <filter class="solr.StopFilterFactory"
|
---|
| 545 | ignoreCase="true"
|
---|
| 546 | words="lang/stopwords_en.txt"
|
---|
| 547 | />
|
---|
| 548 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
---|
| 549 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 550 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 551 | <filter class="solr.PorterStemFilterFactory"/>
|
---|
| 552 | </analyzer>
|
---|
| 553 | </fieldType>
|
---|
| 554 |
|
---|
| 555 | <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
---|
| 556 | but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
---|
| 557 | <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
---|
| 558 | <analyzer>
|
---|
| 559 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 560 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
---|
| 561 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
---|
| 562 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
---|
| 563 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 564 | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
---|
| 565 | <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
---|
| 566 | <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
---|
| 567 | possible with WordDelimiterFilter in conjuncton with stemming. -->
|
---|
| 568 | <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
---|
| 569 | </analyzer>
|
---|
| 570 | </fieldType>
|
---|
| 571 |
|
---|
| 572 | <!-- Just like text_general except it reverses the characters of
|
---|
| 573 | each token, to enable more efficient leading wildcard queries. -->
|
---|
| 574 | <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
|
---|
| 575 | <analyzer type="index">
|
---|
| 576 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 577 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
---|
| 578 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 579 | <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
---|
| 580 | maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
---|
| 581 | </analyzer>
|
---|
| 582 | <analyzer type="query">
|
---|
| 583 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 584 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
---|
| 585 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
---|
| 586 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 587 | </analyzer>
|
---|
| 588 | </fieldType>
|
---|
| 589 |
|
---|
| 590 | <!-- charFilter + WhitespaceTokenizer -->
|
---|
| 591 | <!--
|
---|
| 592 | <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
|
---|
| 593 | <analyzer>
|
---|
| 594 | <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
---|
| 595 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 596 | </analyzer>
|
---|
| 597 | </fieldType>
|
---|
| 598 | -->
|
---|
| 599 |
|
---|
| 600 | <!-- This is an example of using the KeywordTokenizer along
|
---|
| 601 | With various TokenFilterFactories to produce a sortable field
|
---|
| 602 | that does not include some properties of the source text
|
---|
| 603 | -->
|
---|
| 604 | <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
---|
| 605 | <analyzer>
|
---|
| 606 | <!-- KeywordTokenizer does no actual tokenizing, so the entire
|
---|
| 607 | input string is preserved as a single token
|
---|
| 608 | -->
|
---|
| 609 | <tokenizer class="solr.KeywordTokenizerFactory"/>
|
---|
| 610 | <!-- The LowerCase TokenFilter does what you expect, which can be
|
---|
| 611 | when you want your sorting to be case insensitive
|
---|
| 612 | -->
|
---|
| 613 | <filter class="solr.LowerCaseFilterFactory" />
|
---|
| 614 | <!-- The TrimFilter removes any leading or trailing whitespace -->
|
---|
| 615 | <filter class="solr.TrimFilterFactory" />
|
---|
| 616 | <!-- The PatternReplaceFilter gives you the flexibility to use
|
---|
| 617 | Java Regular expression to replace any sequence of characters
|
---|
| 618 | matching a pattern with an arbitrary replacement string,
|
---|
| 619 | which may include back references to portions of the original
|
---|
| 620 | string matched by the pattern.
|
---|
| 621 |
|
---|
| 622 | See the Java Regular Expression documentation for more
|
---|
| 623 | information on pattern and replacement string syntax.
|
---|
| 624 |
|
---|
| 625 | http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
|
---|
| 626 | -->
|
---|
| 627 | <filter class="solr.PatternReplaceFilterFactory"
|
---|
| 628 | pattern="([^a-z])" replacement="" replace="all"
|
---|
| 629 | />
|
---|
| 630 | </analyzer>
|
---|
| 631 | </fieldType>
|
---|
| 632 |
|
---|
| 633 | <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
---|
| 634 | <analyzer>
|
---|
| 635 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 636 | <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
---|
| 637 | </analyzer>
|
---|
| 638 | </fieldtype>
|
---|
| 639 |
|
---|
| 640 | <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
---|
| 641 | <analyzer>
|
---|
| 642 | <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
---|
| 643 | <!--
|
---|
| 644 | The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
|
---|
| 645 | a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
|
---|
| 646 | Attributes of the DelimitedPayloadTokenFilterFactory :
|
---|
| 647 | "delimiter" - a one character delimiter. Default is | (pipe)
|
---|
| 648 | "encoder" - how to encode the following value into a playload
|
---|
| 649 | float -> org.apache.lucene.analysis.payloads.FloatEncoder,
|
---|
| 650 | integer -> o.a.l.a.p.IntegerEncoder
|
---|
| 651 | identity -> o.a.l.a.p.IdentityEncoder
|
---|
| 652 | Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
|
---|
| 653 | -->
|
---|
| 654 | <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
---|
| 655 | </analyzer>
|
---|
| 656 | </fieldtype>
|
---|
| 657 |
|
---|
| 658 | <!-- lowercases the entire field value, keeping it as a single token. -->
|
---|
| 659 | <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
---|
| 660 | <analyzer>
|
---|
| 661 | <tokenizer class="solr.KeywordTokenizerFactory"/>
|
---|
| 662 | <filter class="solr.LowerCaseFilterFactory" />
|
---|
| 663 | </analyzer>
|
---|
| 664 | </fieldType>
|
---|
| 665 |
|
---|
| 666 | <!--
|
---|
| 667 | Example of using PathHierarchyTokenizerFactory at index time, so
|
---|
| 668 | queries for paths match documents at that path, or in descendent paths
|
---|
| 669 | -->
|
---|
| 670 | <fieldType name="descendent_path" class="solr.TextField">
|
---|
| 671 | <analyzer type="index">
|
---|
| 672 | <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
---|
| 673 | </analyzer>
|
---|
| 674 | <analyzer type="query">
|
---|
| 675 | <tokenizer class="solr.KeywordTokenizerFactory" />
|
---|
| 676 | </analyzer>
|
---|
| 677 | </fieldType>
|
---|
| 678 | <!--
|
---|
| 679 | Example of using PathHierarchyTokenizerFactory at query time, so
|
---|
| 680 | queries for paths match documents at that path, or in ancestor paths
|
---|
| 681 | -->
|
---|
| 682 | <fieldType name="ancestor_path" class="solr.TextField">
|
---|
| 683 | <analyzer type="index">
|
---|
| 684 | <tokenizer class="solr.KeywordTokenizerFactory" />
|
---|
| 685 | </analyzer>
|
---|
| 686 | <analyzer type="query">
|
---|
| 687 | <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
---|
| 688 | </analyzer>
|
---|
| 689 | </fieldType>
|
---|
| 690 |
|
---|
| 691 | <!-- since fields of this type are by default not stored or indexed,
|
---|
| 692 | any data added to them will be ignored outright. -->
|
---|
| 693 | <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
---|
| 694 |
|
---|
| 695 | <!-- This point type indexes the coordinates as separate fields (subFields)
|
---|
| 696 | If subFieldType is defined, it references a type, and a dynamic field
|
---|
| 697 | definition is created matching *___<typename>. Alternately, if
|
---|
| 698 | subFieldSuffix is defined, that is used to create the subFields.
|
---|
| 699 | Example: if subFieldType="double", then the coordinates would be
|
---|
| 700 | indexed in fields myloc_0___double,myloc_1___double.
|
---|
| 701 | Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
---|
| 702 | in fields myloc_0_d,myloc_1_d
|
---|
| 703 | The subFields are an implementation detail of the fieldType, and end
|
---|
| 704 | users normally should not need to know about them.
|
---|
| 705 | -->
|
---|
| 706 | <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
---|
| 707 |
|
---|
| 708 | <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
---|
| 709 | <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
---|
| 710 |
|
---|
| 711 | <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
|
---|
| 712 | For more information about this and other Spatial fields new to Solr 4, see:
|
---|
| 713 | http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
|
---|
| 714 | -->
|
---|
| 715 | <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
|
---|
| 716 | geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
|
---|
| 717 |
|
---|
| 718 | <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
|
---|
| 719 | Parameters:
|
---|
| 720 | defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
|
---|
| 721 | precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
|
---|
| 722 | providerClass: Lets you plug in other exchange provider backend:
|
---|
| 723 | solr.FileExchangeRateProvider is the default and takes one parameter:
|
---|
| 724 | currencyConfig: name of an xml file holding exchange rates
|
---|
| 725 | solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
|
---|
| 726 | ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
|
---|
| 727 | refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
|
---|
| 728 | -->
|
---|
| 729 | <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
|
---|
| 730 |
|
---|
| 731 |
|
---|
| 732 |
|
---|
| 733 | <!-- some examples for different languages (generally ordered by ISO code) -->
|
---|
| 734 |
|
---|
| 735 | <!-- Arabic -->
|
---|
| 736 | <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
|
---|
| 737 | <analyzer>
|
---|
| 738 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 739 | <!-- for any non-arabic -->
|
---|
| 740 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 741 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
|
---|
| 742 | <!-- normalizes ﻯ to ﻱ, etc -->
|
---|
| 743 | <filter class="solr.ArabicNormalizationFilterFactory"/>
|
---|
| 744 | <filter class="solr.ArabicStemFilterFactory"/>
|
---|
| 745 | </analyzer>
|
---|
| 746 | </fieldType>
|
---|
| 747 |
|
---|
| 748 | <!-- Bulgarian -->
|
---|
| 749 | <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
|
---|
| 750 | <analyzer>
|
---|
| 751 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 752 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 753 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
|
---|
| 754 | <filter class="solr.BulgarianStemFilterFactory"/>
|
---|
| 755 | </analyzer>
|
---|
| 756 | </fieldType>
|
---|
| 757 |
|
---|
| 758 | <!-- Catalan -->
|
---|
| 759 | <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
|
---|
| 760 | <analyzer>
|
---|
| 761 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 762 | <!-- removes l', etc -->
|
---|
| 763 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
|
---|
| 764 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 765 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
|
---|
| 766 | <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
|
---|
| 767 | </analyzer>
|
---|
| 768 | </fieldType>
|
---|
| 769 |
|
---|
| 770 | <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
|
---|
| 771 | <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
|
---|
| 772 | <analyzer>
|
---|
| 773 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 774 | <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
|
---|
| 775 | <filter class="solr.CJKWidthFilterFactory"/>
|
---|
| 776 | <!-- for any non-CJK -->
|
---|
| 777 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 778 | <filter class="solr.CJKBigramFilterFactory"/>
|
---|
| 779 | </analyzer>
|
---|
| 780 | </fieldType>
|
---|
| 781 |
|
---|
| 782 | <!-- Kurdish -->
|
---|
| 783 | <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
|
---|
| 784 | <analyzer>
|
---|
| 785 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 786 | <filter class="solr.SoraniNormalizationFilterFactory"/>
|
---|
| 787 | <!-- for any latin text -->
|
---|
| 788 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 789 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
|
---|
| 790 | <filter class="solr.SoraniStemFilterFactory"/>
|
---|
| 791 | </analyzer>
|
---|
| 792 | </fieldType>
|
---|
| 793 |
|
---|
| 794 | <!-- Czech -->
|
---|
| 795 | <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
|
---|
| 796 | <analyzer>
|
---|
| 797 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 798 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 799 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
|
---|
| 800 | <filter class="solr.CzechStemFilterFactory"/>
|
---|
| 801 | </analyzer>
|
---|
| 802 | </fieldType>
|
---|
| 803 |
|
---|
| 804 | <!-- Danish -->
|
---|
| 805 | <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
|
---|
| 806 | <analyzer>
|
---|
| 807 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 808 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 809 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
|
---|
| 810 | <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
|
---|
| 811 | </analyzer>
|
---|
| 812 | </fieldType>
|
---|
| 813 |
|
---|
| 814 | <!-- German -->
|
---|
| 815 | <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
|
---|
| 816 | <analyzer>
|
---|
| 817 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 818 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 819 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
|
---|
| 820 | <filter class="solr.GermanNormalizationFilterFactory"/>
|
---|
| 821 | <filter class="solr.GermanLightStemFilterFactory"/>
|
---|
| 822 | <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
|
---|
| 823 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
|
---|
| 824 | </analyzer>
|
---|
| 825 | </fieldType>
|
---|
| 826 |
|
---|
| 827 | <!-- Greek -->
|
---|
| 828 | <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
|
---|
| 829 | <analyzer>
|
---|
| 830 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 831 | <!-- greek specific lowercase for sigma -->
|
---|
| 832 | <filter class="solr.GreekLowerCaseFilterFactory"/>
|
---|
| 833 | <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
|
---|
| 834 | <filter class="solr.GreekStemFilterFactory"/>
|
---|
| 835 | </analyzer>
|
---|
| 836 | </fieldType>
|
---|
| 837 |
|
---|
| 838 | <!-- Spanish -->
|
---|
| 839 | <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
|
---|
| 840 | <analyzer>
|
---|
| 841 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 842 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 843 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
|
---|
| 844 | <filter class="solr.SpanishLightStemFilterFactory"/>
|
---|
| 845 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
|
---|
| 846 | </analyzer>
|
---|
| 847 | </fieldType>
|
---|
| 848 |
|
---|
| 849 | <!-- Basque -->
|
---|
| 850 | <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
|
---|
| 851 | <analyzer>
|
---|
| 852 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 853 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 854 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
|
---|
| 855 | <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
|
---|
| 856 | </analyzer>
|
---|
| 857 | </fieldType>
|
---|
| 858 |
|
---|
| 859 | <!-- Persian -->
|
---|
| 860 | <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
|
---|
| 861 | <analyzer>
|
---|
| 862 | <!-- for ZWNJ -->
|
---|
| 863 | <charFilter class="solr.PersianCharFilterFactory"/>
|
---|
| 864 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 865 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 866 | <filter class="solr.ArabicNormalizationFilterFactory"/>
|
---|
| 867 | <filter class="solr.PersianNormalizationFilterFactory"/>
|
---|
| 868 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
|
---|
| 869 | </analyzer>
|
---|
| 870 | </fieldType>
|
---|
| 871 |
|
---|
| 872 | <!-- Finnish -->
|
---|
| 873 | <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
|
---|
| 874 | <analyzer>
|
---|
| 875 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 876 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 877 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
|
---|
| 878 | <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
|
---|
| 879 | <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
|
---|
| 880 | </analyzer>
|
---|
| 881 | </fieldType>
|
---|
| 882 |
|
---|
| 883 | <!-- French -->
|
---|
| 884 | <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
|
---|
| 885 | <analyzer>
|
---|
| 886 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 887 | <!-- removes l', etc -->
|
---|
| 888 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
|
---|
| 889 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 890 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
|
---|
| 891 | <filter class="solr.FrenchLightStemFilterFactory"/>
|
---|
| 892 | <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
|
---|
| 893 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
|
---|
| 894 | </analyzer>
|
---|
| 895 | </fieldType>
|
---|
| 896 |
|
---|
| 897 | <!-- Irish -->
|
---|
| 898 | <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
|
---|
| 899 | <analyzer>
|
---|
| 900 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 901 | <!-- removes d', etc -->
|
---|
| 902 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
|
---|
| 903 | <!-- removes n-, etc. position increments is intentionally false! -->
|
---|
| 904 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
|
---|
| 905 | <filter class="solr.IrishLowerCaseFilterFactory"/>
|
---|
| 906 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
|
---|
| 907 | <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
|
---|
| 908 | </analyzer>
|
---|
| 909 | </fieldType>
|
---|
| 910 |
|
---|
| 911 | <!-- Galician -->
|
---|
| 912 | <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
|
---|
| 913 | <analyzer>
|
---|
| 914 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 915 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 916 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
|
---|
| 917 | <filter class="solr.GalicianStemFilterFactory"/>
|
---|
| 918 | <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
|
---|
| 919 | </analyzer>
|
---|
| 920 | </fieldType>
|
---|
| 921 |
|
---|
| 922 | <!-- Hindi -->
|
---|
| 923 | <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
|
---|
| 924 | <analyzer>
|
---|
| 925 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 926 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 927 | <!-- normalizes unicode representation -->
|
---|
| 928 | <filter class="solr.IndicNormalizationFilterFactory"/>
|
---|
| 929 | <!-- normalizes variation in spelling -->
|
---|
| 930 | <filter class="solr.HindiNormalizationFilterFactory"/>
|
---|
| 931 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
|
---|
| 932 | <filter class="solr.HindiStemFilterFactory"/>
|
---|
| 933 | </analyzer>
|
---|
| 934 | </fieldType>
|
---|
| 935 |
|
---|
| 936 | <!-- Hungarian -->
|
---|
| 937 | <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
|
---|
| 938 | <analyzer>
|
---|
| 939 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 940 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 941 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
|
---|
| 942 | <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
|
---|
| 943 | <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
|
---|
| 944 | </analyzer>
|
---|
| 945 | </fieldType>
|
---|
| 946 |
|
---|
| 947 | <!-- Armenian -->
|
---|
| 948 | <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
|
---|
| 949 | <analyzer>
|
---|
| 950 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 951 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 952 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
|
---|
| 953 | <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
|
---|
| 954 | </analyzer>
|
---|
| 955 | </fieldType>
|
---|
| 956 |
|
---|
| 957 | <!-- Indonesian -->
|
---|
| 958 | <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
|
---|
| 959 | <analyzer>
|
---|
| 960 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 961 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 962 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
|
---|
| 963 | <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
|
---|
| 964 | <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
|
---|
| 965 | </analyzer>
|
---|
| 966 | </fieldType>
|
---|
| 967 |
|
---|
| 968 | <!-- Italian -->
|
---|
| 969 | <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
|
---|
| 970 | <analyzer>
|
---|
| 971 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 972 | <!-- removes l', etc -->
|
---|
| 973 | <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
|
---|
| 974 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 975 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
|
---|
| 976 | <filter class="solr.ItalianLightStemFilterFactory"/>
|
---|
| 977 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
|
---|
| 978 | </analyzer>
|
---|
| 979 | </fieldType>
|
---|
| 980 |
|
---|
| 981 | <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
|
---|
| 982 |
|
---|
| 983 | NOTE: If you want to optimize search for precision, use default operator AND in your query
|
---|
| 984 | parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
|
---|
| 985 | OR if you would like to optimize for recall (default).
|
---|
| 986 | -->
|
---|
| 987 | <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
|
---|
| 988 | <analyzer>
|
---|
| 989 | <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
|
---|
| 990 |
|
---|
| 991 | Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
|
---|
| 992 | is used to segment compounds into its parts and the compound itself is kept as synonym.
|
---|
| 993 |
|
---|
| 994 | Valid values for attribute mode are:
|
---|
| 995 | normal: regular segmentation
|
---|
| 996 | search: segmentation useful for search with synonyms compounds (default)
|
---|
| 997 | extended: same as search mode, but unigrams unknown words (experimental)
|
---|
| 998 |
|
---|
| 999 | For some applications it might be good to use search mode for indexing and normal mode for
|
---|
| 1000 | queries to reduce recall and prevent parts of compounds from being matched and highlighted.
|
---|
| 1001 | Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
|
---|
| 1002 |
|
---|
| 1003 | Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
|
---|
| 1004 | model with your own entries for segmentation, part-of-speech tags and readings without a need
|
---|
| 1005 | to specify weights. Notice that user dictionaries have not been subject to extensive testing.
|
---|
| 1006 |
|
---|
| 1007 | User dictionary attributes are:
|
---|
| 1008 | userDictionary: user dictionary filename
|
---|
| 1009 | userDictionaryEncoding: user dictionary encoding (default is UTF-8)
|
---|
| 1010 |
|
---|
| 1011 | See lang/userdict_ja.txt for a sample user dictionary file.
|
---|
| 1012 |
|
---|
| 1013 | Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
|
---|
| 1014 |
|
---|
| 1015 | See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
|
---|
| 1016 | -->
|
---|
| 1017 | <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
|
---|
| 1018 | <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
|
---|
| 1019 | <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (èŸæžåœ¢) -->
|
---|
| 1020 | <filter class="solr.JapaneseBaseFormFilterFactory"/>
|
---|
| 1021 | <!-- Removes tokens with certain part-of-speech tags -->
|
---|
| 1022 | <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
|
---|
| 1023 | <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
|
---|
| 1024 | <filter class="solr.CJKWidthFilterFactory"/>
|
---|
| 1025 | <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
|
---|
| 1026 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
|
---|
| 1027 | <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
|
---|
| 1028 | <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
|
---|
| 1029 | <!-- Lower-cases romaji characters -->
|
---|
| 1030 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1031 | </analyzer>
|
---|
| 1032 | </fieldType>
|
---|
| 1033 |
|
---|
| 1034 | <!-- Latvian -->
|
---|
| 1035 | <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1036 | <analyzer>
|
---|
| 1037 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1038 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1039 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
|
---|
| 1040 | <filter class="solr.LatvianStemFilterFactory"/>
|
---|
| 1041 | </analyzer>
|
---|
| 1042 | </fieldType>
|
---|
| 1043 |
|
---|
| 1044 | <!-- Dutch -->
|
---|
| 1045 | <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1046 | <analyzer>
|
---|
| 1047 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1048 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1049 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
|
---|
| 1050 | <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
|
---|
| 1051 | <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
|
---|
| 1052 | </analyzer>
|
---|
| 1053 | </fieldType>
|
---|
| 1054 |
|
---|
| 1055 | <!-- Norwegian -->
|
---|
| 1056 | <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1057 | <analyzer>
|
---|
| 1058 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1059 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1060 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
|
---|
| 1061 | <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
|
---|
| 1062 | <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> -->
|
---|
| 1063 | <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> -->
|
---|
| 1064 | <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both -->
|
---|
| 1065 | </analyzer>
|
---|
| 1066 | </fieldType>
|
---|
| 1067 |
|
---|
| 1068 | <!-- Portuguese -->
|
---|
| 1069 | <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1070 | <analyzer>
|
---|
| 1071 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1072 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1073 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
|
---|
| 1074 | <filter class="solr.PortugueseLightStemFilterFactory"/>
|
---|
| 1075 | <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
|
---|
| 1076 | <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
|
---|
| 1077 | <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
|
---|
| 1078 | </analyzer>
|
---|
| 1079 | </fieldType>
|
---|
| 1080 |
|
---|
| 1081 | <!-- Romanian -->
|
---|
| 1082 | <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1083 | <analyzer>
|
---|
| 1084 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1085 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1086 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
|
---|
| 1087 | <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
|
---|
| 1088 | </analyzer>
|
---|
| 1089 | </fieldType>
|
---|
| 1090 |
|
---|
| 1091 | <!-- Russian -->
|
---|
| 1092 | <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1093 | <analyzer>
|
---|
| 1094 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1095 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1096 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
|
---|
| 1097 | <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
|
---|
| 1098 | <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
|
---|
| 1099 | </analyzer>
|
---|
| 1100 | </fieldType>
|
---|
[29985] | 1101 | <!-- Russian with morphology-->
|
---|
| 1102 | <fieldType name="text_ru_morph" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1103 | <analyzer>
|
---|
| 1104 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1105 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1106 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
|
---|
| 1107 | <filter class="org.apache.lucene.morphology.russian.RussianFilterFactory"/>
|
---|
| 1108 | </analyzer>
|
---|
| 1109 | </fieldType>
|
---|
| 1110 |
|
---|
[29135] | 1111 | <!-- Swedish -->
|
---|
| 1112 | <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1113 | <analyzer>
|
---|
| 1114 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1115 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1116 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
|
---|
| 1117 | <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
|
---|
| 1118 | <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
|
---|
| 1119 | </analyzer>
|
---|
| 1120 | </fieldType>
|
---|
| 1121 |
|
---|
| 1122 | <!-- Thai -->
|
---|
| 1123 | <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1124 | <analyzer>
|
---|
| 1125 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1126 | <filter class="solr.LowerCaseFilterFactory"/>
|
---|
| 1127 | <filter class="solr.ThaiWordFilterFactory"/>
|
---|
| 1128 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
|
---|
| 1129 | </analyzer>
|
---|
| 1130 | </fieldType>
|
---|
| 1131 |
|
---|
| 1132 | <!-- Turkish -->
|
---|
| 1133 | <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
|
---|
| 1134 | <analyzer>
|
---|
| 1135 | <tokenizer class="solr.StandardTokenizerFactory"/>
|
---|
| 1136 | <filter class="solr.TurkishLowerCaseFilterFactory"/>
|
---|
| 1137 | <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
|
---|
| 1138 | <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
|
---|
| 1139 | </analyzer>
|
---|
| 1140 | </fieldType>
|
---|
| 1141 |
|
---|
| 1142 | </types>
|
---|
| 1143 |
|
---|
| 1144 | <!-- Similarity is the scoring routine for each document vs. a query.
|
---|
| 1145 | A custom Similarity or SimilarityFactory may be specified here, but
|
---|
| 1146 | the default is fine for most applications.
|
---|
| 1147 | For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
|
---|
| 1148 | -->
|
---|
| 1149 | <!--
|
---|
| 1150 | <similarity class="com.example.solr.CustomSimilarityFactory">
|
---|
| 1151 | <str name="paramkey">param value</str>
|
---|
| 1152 | </similarity>
|
---|
| 1153 | -->
|
---|
| 1154 |
|
---|
| 1155 | </schema>
|
---|