root/gs3-extensions/solr/trunk/src/collect/solr-jdbm-demo/etc/conf/schema.xml @ 27850

Revision 27850, 33.9 KB (checked in by davidb, 6 years ago)

config files used by solr

Line 
1<?xml version="1.0" encoding="UTF-8" ?>
2<!--
3 Licensed to the Apache Software Foundation (ASF) under one or more
4 contributor license agreements.  See the NOTICE file distributed with
5 this work for additional information regarding copyright ownership.
6 The ASF licenses this file to You under the Apache License, Version 2.0
7 (the "License"); you may not use this file except in compliance with
8 the License.  You may obtain a copy of the License at
9
10     http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17-->
18
19<!-- 
20 This is the Solr schema file. This file should be named "schema.xml" and
21 should be in the conf directory under the solr home
22 (i.e. ./solr/conf/schema.xml by default)
23 or located where the classloader for the Solr webapp can find it.
24
25 This example schema is the recommended starting point for users.
26 It should be kept correct and concise, usable out-of-the-box.
27
28 For more information, on how to customize this file, please see
29 http://wiki.apache.org/solr/SchemaXml
30
31 PERFORMANCE NOTE: this schema includes many optional features and should not
32 be used for benchmarking.  To improve performance one could
33  - set stored="false" for all fields possible (esp large fields) when you
34    only need to search on the field but don't need to return the original
35    value.
36  - set indexed="false" if you don't need to search on the field, but only
37    return the field as a result of searching on other indexed fields.
38  - remove all unneeded copyField statements
39  - for best index size and searching performance, set "index" to false
40    for all general text fields, use copyField to copy them to the
41    catchall "text" field, and use that for searching.
42  - For maximum indexing performance, use the StreamingUpdateSolrServer
43    java client.
44  - Remember to run the JVM in server mode, and use a higher logging level
45    that avoids logging every request
46-->
47
48<schema name="example" version="1.4">
49  <!-- attribute "name" is the name of this schema and is only used for display purposes.
50       Applications should change this to reflect the nature of the search collection.
51       version="1.4" is Solr's version number for the schema syntax and semantics.  It should
52       not normally be changed by applications.
53       1.0: multiValued attribute did not exist, all fields are multiValued by nature
54       1.1: multiValued attribute introduced, false by default
55       1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
56       1.3: removed optional field compress feature
57       1.4: default auto-phrase (QueryParser feature) to off
58     -->
59
60  <types>
61    <!-- field type definitions. The "name" attribute is
62       just a label to be used by field definitions.  The "class"
63       attribute and any other attributes determine the real
64       behavior of the fieldType.
65         Class names starting with "solr" refer to java classes in the
66       org.apache.solr.analysis package.
67    -->
68
69    <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
70    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
71
72    <!-- boolean type: "true" or "false" -->
73    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
74    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
75    <fieldtype name="binary" class="solr.BinaryField"/>
76
77    <!-- The optional sortMissingLast and sortMissingFirst attributes are
78         currently supported on types that are sorted internally as strings.
79           This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
80       - If sortMissingLast="true", then a sort on this field will cause documents
81         without the field to come after documents with the field,
82         regardless of the requested sort order (asc or desc).
83       - If sortMissingFirst="true", then a sort on this field will cause documents
84         without the field to come before documents with the field,
85         regardless of the requested sort order.
86       - If sortMissingLast="false" and sortMissingFirst="false" (the default),
87         then default lucene sorting will be used which places docs without the
88         field first in an ascending sort and last in a descending sort.
89    -->   
90
91    <!--
92      Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
93    -->
94    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
95    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
96    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
97    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
98
99    <!--
100     Numeric field types that index each value at various levels of precision
101     to accelerate range queries when the number of values between the range
102     endpoints is large. See the javadoc for NumericRangeQuery for internal
103     implementation details.
104
105     Smaller precisionStep values (specified in bits) will lead to more tokens
106     indexed per value, slightly larger index size, and faster range queries.
107     A precisionStep of 0 disables indexing at different precision levels.
108    -->
109    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
110    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
111    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
112    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
113
114    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
115         is a more restricted form of the canonical representation of dateTime
116         http://www.w3.org/TR/xmlschema-2/#dateTime   
117         The trailing "Z" designates UTC time and is mandatory.
118         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
119         All other components are mandatory.
120
121         Expressions can also be used to denote calculations that should be
122         performed relative to "NOW" to determine the value, ie...
123
124               NOW/HOUR
125                  ... Round to the start of the current hour
126               NOW-1DAY
127                  ... Exactly 1 day prior to now
128               NOW/DAY+6MONTHS+3DAYS
129                  ... 6 months and 3 days in the future from the start of
130                      the current day
131                     
132         Consult the DateField javadocs for more information.
133
134         Note: For faster range queries, consider the tdate type
135      -->
136    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
137
138    <!-- A Trie based date field for faster date range queries and date faceting. -->
139    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
140
141
142    <!--
143      Note:
144      These should only be used for compatibility with existing indexes (created with older Solr versions)
145      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
146
147      Plain numeric field types that store and index the text
148      value verbatim (and hence don't support range queries, since the
149      lexicographic ordering isn't equal to the numeric ordering)
150    -->
151    <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
152    <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
153    <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
154    <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
155    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
156
157
158    <!--
159      Note:
160      These should only be used for compatibility with existing indexes (created with older Solr versions)
161      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
162
163      Numeric field types that manipulate the value into
164      a string value that isn't human-readable in its internal form,
165      but with a lexicographic ordering the same as the numeric ordering,
166      so that range queries work correctly.
167    -->
168    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
169    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
170    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
171    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
172
173
174    <!-- The "RandomSortField" is not used to store or search any
175         data.  You can declare fields of this type it in your schema
176         to generate pseudo-random orderings of your docs for sorting
177         purposes.  The ordering is generated based on the field name
178         and the version of the index, As long as the index version
179         remains unchanged, and the same field name is reused,
180         the ordering of the docs will be consistent. 
181         If you want different psuedo-random orderings of documents,
182         for the same version of the index, use a dynamicField and
183         change the name
184     -->
185    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
186
187    <!-- solr.TextField allows the specification of custom text analyzers
188         specified as a tokenizer and a list of token filters. Different
189         analyzers may be specified for indexing and querying.
190
191         The optional positionIncrementGap puts space between multiple fields of
192         this type on the same document, with the purpose of preventing false phrase
193         matching across fields.
194
195         For more info on customizing your analyzer chain, please see
196         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
197     -->
198
199    <!-- One can also specify an existing Analyzer class that has a
200         default constructor via the class attribute on the analyzer element
201    <fieldType name="text_greek" class="solr.TextField">
202      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
203    </fieldType>
204    -->
205
206    <!-- A text field that only splits on whitespace for exact matching of words -->
207    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
208      <analyzer>
209        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
210      </analyzer>
211    </fieldType>
212
213    <!-- A general text field that has reasonable, generic
214         cross-language defaults: it tokenizes with StandardTokenizer,
215     removes stop words from case-insensitive "stopwords.txt"
216     (empty by default), and down cases.  At query time only, it
217     also applies synonyms. -->
218    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
219      <analyzer type="index">
220        <tokenizer class="solr.StandardTokenizerFactory"/>
221        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
222        <!-- in this example, we will only use synonyms at query time
223        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
224        -->
225        <filter class="solr.LowerCaseFilterFactory"/>
226      </analyzer>
227      <analyzer type="query">
228        <tokenizer class="solr.StandardTokenizerFactory"/>
229        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
230        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
231        <filter class="solr.LowerCaseFilterFactory"/>
232      </analyzer>
233    </fieldType>
234
235    <!-- A text field with defaults appropriate for English: it
236         tokenizes with StandardTokenizer, removes English stop words
237         (stopwords_en.txt), down cases, protects words from protwords.txt, and
238         finally applies Porter's stemming.  The query time analyzer
239         also applies synonyms from synonyms.txt. -->
240    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
241      <analyzer type="index">
242        <tokenizer class="solr.StandardTokenizerFactory"/>
243        <!-- in this example, we will only use synonyms at query time
244        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
245        -->
246        <!-- Case insensitive stop word removal.
247          add enablePositionIncrements=true in both the index and query
248          analyzers to leave a 'gap' for more accurate phrase queries.
249        -->
250        <filter class="solr.StopFilterFactory"
251                ignoreCase="true"
252                words="stopwords_en.txt"
253                enablePositionIncrements="true"
254                />
255        <filter class="solr.LowerCaseFilterFactory"/>
256    <filter class="solr.EnglishPossessiveFilterFactory"/>
257        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
258    <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
259        <filter class="solr.EnglishMinimalStemFilterFactory"/>
260    -->
261        <filter class="solr.PorterStemFilterFactory"/>
262      </analyzer>
263      <analyzer type="query">
264        <tokenizer class="solr.StandardTokenizerFactory"/>
265        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
266        <filter class="solr.StopFilterFactory"
267                ignoreCase="true"
268                words="stopwords_en.txt"
269                enablePositionIncrements="true"
270                />
271        <filter class="solr.LowerCaseFilterFactory"/>
272    <filter class="solr.EnglishPossessiveFilterFactory"/>
273        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
274    <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
275        <filter class="solr.EnglishMinimalStemFilterFactory"/>
276    -->
277        <filter class="solr.PorterStemFilterFactory"/>
278      </analyzer>
279    </fieldType>
280
281    <!-- A text field with defaults appropriate for English, plus
282     aggressive word-splitting and autophrase features enabled.
283     This field is just like text_en, except it adds
284     WordDelimiterFilter to enable splitting and matching of
285     words on case-change, alpha numeric boundaries, and
286     non-alphanumeric chars.  This means certain compound word
287     cases will work, for example query "wi fi" will match
288     document "WiFi" or "wi-fi".  However, other cases will still
289     not match, for example if the query is "wifi" and the
290     document is "wi fi" or if the query is "wi-fi" and the
291     document is "wifi".
292        -->
293    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
294      <analyzer type="index">
295        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
296        <!-- in this example, we will only use synonyms at query time
297        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
298        -->
299        <!-- Case insensitive stop word removal.
300          add enablePositionIncrements=true in both the index and query
301          analyzers to leave a 'gap' for more accurate phrase queries.
302        -->
303        <filter class="solr.StopFilterFactory"
304                ignoreCase="true"
305                words="stopwords_en.txt"
306                enablePositionIncrements="true"
307                />
308        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
309        <filter class="solr.LowerCaseFilterFactory"/>
310        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
311        <filter class="solr.PorterStemFilterFactory"/>
312      </analyzer>
313      <analyzer type="query">
314        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
315        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
316        <filter class="solr.StopFilterFactory"
317                ignoreCase="true"
318                words="stopwords_en.txt"
319                enablePositionIncrements="true"
320                />
321        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
322        <filter class="solr.LowerCaseFilterFactory"/>
323        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
324        <filter class="solr.PorterStemFilterFactory"/>
325      </analyzer>
326    </fieldType>
327
328    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
329         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
330    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
331      <analyzer>
332        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
333        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
334        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
335        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
336        <filter class="solr.LowerCaseFilterFactory"/>
337        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
338        <filter class="solr.EnglishMinimalStemFilterFactory"/>
339        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
340             possible with WordDelimiterFilter in conjuncton with stemming. -->
341        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
342      </analyzer>
343    </fieldType>
344
345    <!-- Just like text_general except it reverses the characters of
346     each token, to enable more efficient leading wildcard queries. -->
347    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
348      <analyzer type="index">
349        <tokenizer class="solr.StandardTokenizerFactory"/>
350        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
351        <filter class="solr.LowerCaseFilterFactory"/>
352        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
353           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
354      </analyzer>
355      <analyzer type="query">
356        <tokenizer class="solr.StandardTokenizerFactory"/>
357        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
358        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
359        <filter class="solr.LowerCaseFilterFactory"/>
360      </analyzer>
361    </fieldType>
362
363    <!-- charFilter + WhitespaceTokenizer  -->
364    <!--
365    <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
366      <analyzer>
367        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
368        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
369      </analyzer>
370    </fieldType>
371    -->
372
373    <!-- This is an example of using the KeywordTokenizer along
374         With various TokenFilterFactories to produce a sortable field
375         that does not include some properties of the source text
376      -->
377    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
378      <analyzer>
379        <!-- KeywordTokenizer does no actual tokenizing, so the entire
380             input string is preserved as a single token
381          -->
382        <tokenizer class="solr.KeywordTokenizerFactory"/>
383        <!-- The LowerCase TokenFilter does what you expect, which can be
384             when you want your sorting to be case insensitive
385          -->
386        <filter class="solr.LowerCaseFilterFactory" />
387        <!-- The TrimFilter removes any leading or trailing whitespace -->
388        <filter class="solr.TrimFilterFactory" />
389        <!-- The PatternReplaceFilter gives you the flexibility to use
390             Java Regular expression to replace any sequence of characters
391             matching a pattern with an arbitrary replacement string,
392             which may include back references to portions of the original
393             string matched by the pattern.
394             
395             See the Java Regular Expression documentation for more
396             information on pattern and replacement string syntax.
397             
398             http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
399          -->
400        <filter class="solr.PatternReplaceFilterFactory"
401                pattern="([^a-z])" replacement="" replace="all"
402        />
403      </analyzer>
404    </fieldType>
405   
406    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
407      <analyzer>
408        <tokenizer class="solr.StandardTokenizerFactory"/>
409        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
410      </analyzer>
411    </fieldtype>
412
413    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
414      <analyzer>
415        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
416        <!--
417        The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
418        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
419        Attributes of the DelimitedPayloadTokenFilterFactory :
420         "delimiter" - a one character delimiter. Default is | (pipe)
421     "encoder" - how to encode the following value into a playload
422        float -> org.apache.lucene.analysis.payloads.FloatEncoder,
423        integer -> o.a.l.a.p.IntegerEncoder
424        identity -> o.a.l.a.p.IdentityEncoder
425            Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
426         -->
427        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
428      </analyzer>
429    </fieldtype>
430
431    <!-- lowercases the entire field value, keeping it as a single token.  -->
432    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
433      <analyzer>
434        <tokenizer class="solr.KeywordTokenizerFactory"/>
435        <filter class="solr.LowerCaseFilterFactory" />
436      </analyzer>
437    </fieldType>
438
439    <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
440      <analyzer>
441        <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
442      </analyzer>
443    </fieldType>
444
445    <!-- since fields of this type are by default not stored or indexed,
446         any data added to them will be ignored outright.  -->
447    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
448
449    <!-- This point type indexes the coordinates as separate fields (subFields)
450      If subFieldType is defined, it references a type, and a dynamic field
451      definition is created matching *___<typename>.  Alternately, if
452      subFieldSuffix is defined, that is used to create the subFields.
453      Example: if subFieldType="double", then the coordinates would be
454        indexed in fields myloc_0___double,myloc_1___double.
455      Example: if subFieldSuffix="_d" then the coordinates would be indexed
456        in fields myloc_0_d,myloc_1_d
457      The subFields are an implementation detail of the fieldType, and end
458      users normally should not need to know about them.
459     -->
460    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
461
462    <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
463    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
464
465   <!--
466    A Geohash is a compact representation of a latitude longitude pair in a single field.
467    See http://wiki.apache.org/solr/SpatialSearch
468   -->
469    <fieldtype name="geohash" class="solr.GeoHashField"/>
470 </types>
471
472
473 <fields>
474   <!-- Valid attributes for fields:
475     name: mandatory - the name for the field
476     type: mandatory - the name of a previously defined type from the
477       <types> section
478     indexed: true if this field should be indexed (searchable or sortable)
479     stored: true if this field should be retrievable
480     multiValued: true if this field may contain multiple values per document
481     omitNorms: (expert) set to true to omit the norms associated with
482       this field (this disables length normalization and index-time
483       boosting for the field, and saves some memory).  Only full-text
484       fields or fields that need an index-time boost need norms.
485     termVectors: [false] set to true to store the term vector for a
486       given field.
487       When using MoreLikeThis, fields used for similarity should be
488       stored for best performance.
489     termPositions: Store position information with the term vector. 
490       This will increase storage costs.
491     termOffsets: Store offset information with the term vector. This
492       will increase storage costs.
493     default: a value that should be used if no value is specified
494       when adding a document.
495   -->
496
497   <field name="docOID" type="string" indexed="true" stored="true" required="true" />
498
499    <field name="ZZ" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
500    <field name="TX" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
501    <field name="TI" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
502    <field name="SU" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
503    <field name="ORG" type="text_en_splitting" indexed="true" stored="false" multiValued="true" />
504
505<!--
506   <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
507   <field name="name" type="text_general" indexed="true" stored="true"/>
508   <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
509   <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
510   <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
511   <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
512   <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
513
514   <field name="weight" type="float" indexed="true" stored="true"/>
515   <field name="price"  type="float" indexed="true" stored="true"/>
516   <field name="popularity" type="int" indexed="true" stored="true" />
517   <field name="inStock" type="boolean" indexed="true" stored="true" />
518-->
519
520   <!--
521   The following store examples are used to demonstrate the various ways one might _CHOOSE_ to
522    implement spatial.  It is highly unlikely that you would ever have ALL of these fields defined.
523    -->
524   <field name="store" type="location" indexed="true" stored="true"/>
525
526   <!-- Common metadata fields, named specifically to match up with
527     SolrCell metadata when parsing rich documents such as Word, PDF.
528     Some fields are multiValued only because Tika currently may return
529     multiple values for them.
530   -->
531<!--
532   <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
533   <field name="subject" type="text_general" indexed="true" stored="true"/>
534   <field name="description" type="text_general" indexed="true" stored="true"/>
535   <field name="comments" type="text_general" indexed="true" stored="true"/>
536   <field name="author" type="text_general" indexed="true" stored="true"/>
537   <field name="keywords" type="text_general" indexed="true" stored="true"/>
538   <field name="category" type="text_general" indexed="true" stored="true"/>
539   <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
540   <field name="last_modified" type="date" indexed="true" stored="true"/>
541   <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
542-->
543
544
545   <!-- catchall field, containing all other searchable text fields (implemented
546        via copyField further on in this schema  -->
547   <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
548
549   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
550        leading wildcard queries. -->
551   <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
552
553   <!-- non-tokenized version of manufacturer to make it easier to sort or group
554        results by manufacturer.  copied from "manu" via copyField -->
555   <field name="manu_exact" type="string" indexed="true" stored="false"/>
556
557   <field name="payloads" type="payloads" indexed="true" stored="true"/>
558
559   <!-- Uncommenting the following will create a "timestamp" field using
560        a default value of "NOW" to indicate when each document was indexed.
561     -->
562   <!--
563   <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
564     -->
565   
566
567   <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
568        will be used if the name matches any of the patterns.
569        RESTRICTION: the glob-like pattern in the name attribute must have
570        a "*" only at the start or the end.
571        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
572        Longer patterns will be matched first.  if equal size patterns
573        both match, the first appearing in the schema will be used.  -->
574   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
575   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
576   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
577   <dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/>
578   <dynamicField name="*_txt" type="text_general"    indexed="true"  stored="true" multiValued="true"/>
579   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
580   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
581   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
582
583   <!-- Type used to index the lat and lon components for the "location" FieldType -->
584   <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false"/>
585
586   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
587   <dynamicField name="*_p"  type="location" indexed="true" stored="true"/>
588
589   <!-- some trie-coded dynamic fields for faster range queries -->
590   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
591   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
592   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
593   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
594   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
595
596   <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
597
598   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
599   <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
600
601   <dynamicField name="random_*" type="random" />
602<!-- dynamic field for sort/facet fields, which are strings by default. ie not tokenised -->
603    <dynamicField name="by*" type="string" indexed="true" stored="false" multiValued="true" />
604   <!-- uncomment the following to ignore any fields that don't already match an existing
605        field name or dynamic field, rather than reporting them as an error.
606        alternately, change the type="ignored" to some other type e.g. "text" if you want
607        unknown fields indexed and/or stored by default -->
608   <!--dynamicField name="*" type="ignored" multiValued="true" /-->
609   
610 </fields>
611
612 <!-- Field to use to determine and enforce document uniqueness.
613      Unless this field is marked with required="false", it will be a required field
614   -->
615 <uniqueKey>docOID</uniqueKey>
616
617 <!-- field for the QueryParser to use when an explicit fieldname is absent -->
618 <defaultSearchField>text</defaultSearchField>
619
620 <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
621 <solrQueryParser defaultOperator="OR"/>
622
623  <!-- copyField commands copy one field to another at the time a document
624        is added to the index.  It's used either to index the same field differently,
625        or to add multiple fields to the same field for easier/faster searching.  -->
626
627<!--
628   <copyField source="cat" dest="text"/>
629   <copyField source="name" dest="text"/>
630   <copyField source="manu" dest="text"/>
631   <copyField source="features" dest="text"/>
632   <copyField source="includes" dest="text"/>
633   <copyField source="manu" dest="manu_exact"/>
634-->
635   
636   <!-- Above, multiple source fields are copied to the [text] field.
637      Another way to map multiple source fields to the same
638      destination field is to use the dynamic field syntax.
639      copyField also supports a maxChars to copy setting.  -->
640       
641   <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
642
643   <!-- copy name to alphaNameSort, a field designed for sorting by name -->
644   <!-- <copyField source="name" dest="alphaNameSort"/> -->
645 
646
647 <!-- Similarity is the scoring routine for each document vs. a query.
648      A custom similarity may be specified here, but the default is fine
649      for most applications.  -->
650 <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
651 <!-- ... OR ...
652      Specify a SimilarityFactory class name implementation
653      allowing parameters to be used.
654 -->
655 <!--
656 <similarity class="com.example.solr.CustomSimilarityFactory">
657   <str name="paramkey">param value</str>
658 </similarity>
659 -->
660
661
662</schema>
Note: See TracBrowser for help on using the browser.