1 | package org.greenstone.LuceneWrapper;
|
---|
2 | /**
|
---|
3 | * Licensed to the Apache Software Foundation (ASF) under one or more
|
---|
4 | * contributor license agreements. See the NOTICE file distributed with
|
---|
5 | * this work for additional information regarding copyright ownership.
|
---|
6 | * The ASF licenses this file to You under the Apache License, Version 2.0
|
---|
7 | * (the "License"); you may not use this file except in compliance with
|
---|
8 | * the License. You may obtain a copy of the License at
|
---|
9 | *
|
---|
10 | * http://www.apache.org/licenses/LICENSE-2.0
|
---|
11 | *
|
---|
12 | * Unless required by applicable law or agreed to in writing, software
|
---|
13 | * distributed under the License is distributed on an "AS IS" BASIS,
|
---|
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
15 | * See the License for the specific language governing permissions and
|
---|
16 | * limitations under the License.
|
---|
17 | */
|
---|
18 |
|
---|
19 | import org.apache.lucene.analysis.Analyzer;
|
---|
20 | import org.apache.lucene.document.Document;
|
---|
21 | import org.apache.lucene.store.Directory;
|
---|
22 | import org.apache.lucene.store.FSDirectory;
|
---|
23 | import org.apache.lucene.store.LockObtainFailedException;
|
---|
24 |
|
---|
25 | import org.apache.lucene.analysis.Analyzer;
|
---|
26 | import org.apache.lucene.document.Document;
|
---|
27 | import org.apache.lucene.index.IndexReader;
|
---|
28 | import org.apache.lucene.index.IndexWriter;
|
---|
29 | import org.apache.lucene.index.Term;
|
---|
30 | import org.apache.lucene.index.TermPositions;
|
---|
31 | import org.apache.lucene.search.Searcher;
|
---|
32 | import org.apache.lucene.search.IndexSearcher;
|
---|
33 |
|
---|
34 | import org.apache.lucene.index.LogMergePolicy;
|
---|
35 | import org.apache.lucene.index.CorruptIndexException;
|
---|
36 | import org.apache.lucene.index.StaleReaderException;
|
---|
37 | import org.apache.lucene.index.SerialMergeScheduler;
|
---|
38 |
|
---|
39 | import java.io.File;
|
---|
40 | import java.io.IOException;
|
---|
41 | import java.io.PrintStream;
|
---|
42 |
|
---|
43 | /**
|
---|
44 | * <p>[Note that as of <b>2.1</b>, all but one of the
|
---|
45 | * methods in this class are available via {@link
|
---|
46 | * IndexWriter}. The one method that is not available is
|
---|
47 | * {@link #deleteDocument(int)}.]</p>
|
---|
48 | *
|
---|
49 | * A class to modify an index, i.e. to delete and add documents. This
|
---|
50 | * class hides {@link IndexReader} and {@link IndexWriter} so that you
|
---|
51 | * do not need to care about implementation details such as that adding
|
---|
52 | * documents is done via IndexWriter and deletion is done via IndexReader.
|
---|
53 | *
|
---|
54 | * <p>Note that you cannot create more than one <code>IndexModifier</code> object
|
---|
55 | * on the same directory at the same time.
|
---|
56 | *
|
---|
57 | * <p>Example usage:
|
---|
58 | *
|
---|
59 | <!-- ======================================================== -->
|
---|
60 | <!-- = Java Sourcecode to HTML automatically converted code = -->
|
---|
61 | <!-- = Java2Html Converter V4.1 2004 by Markus Gebhard [email protected] = -->
|
---|
62 | <!-- = Further information: http://www.java2html.de = -->
|
---|
63 | <div align="left" class="java">
|
---|
64 | <table border="0" cellpadding="3" cellspacing="0" bgcolor="#ffffff">
|
---|
65 | <tr>
|
---|
66 | <!-- start source code -->
|
---|
67 | <td nowrap="nowrap" valign="top" align="left">
|
---|
68 | <code>
|
---|
69 | <font color="#ffffff"> </font><font color="#000000">Analyzer analyzer = </font><font color="#7f0055"><b>new </b></font><font color="#000000">StandardAnalyzer</font><font color="#000000">()</font><font color="#000000">;</font><br/>
|
---|
70 | <font color="#ffffff"> </font><font color="#3f7f5f">// create an index in /tmp/index, overwriting an existing one:</font><br/>
|
---|
71 | <font color="#ffffff"> </font><font color="#000000">IndexModifier indexModifier = </font><font color="#7f0055"><b>new </b></font><font color="#000000">IndexModifier</font><font color="#000000">(</font><font color="#2a00ff">"/tmp/index"</font><font color="#000000">, analyzer, </font><font color="#7f0055"><b>true</b></font><font color="#000000">)</font><font color="#000000">;</font><br/>
|
---|
72 | <font color="#ffffff"> </font><font color="#000000">Document doc = </font><font color="#7f0055"><b>new </b></font><font color="#000000">Document</font><font color="#000000">()</font><font color="#000000">;</font><br/>
|
---|
73 | <font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">, Field.Store.YES, Field.Index.NOT_ANALYZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
|
---|
74 | <font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"body"</font><font color="#000000">, </font><font color="#2a00ff">"a simple test"</font><font color="#000000">, Field.Store.YES, Field.Index.ANALYZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
|
---|
75 | <font color="#ffffff"> </font><font color="#000000">indexModifier.addDocument</font><font color="#000000">(</font><font color="#000000">doc</font><font color="#000000">)</font><font color="#000000">;</font><br/>
|
---|
76 | <font color="#ffffff"> </font><font color="#7f0055"><b>int </b></font><font color="#000000">deleted = indexModifier.delete</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Term</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">))</font><font color="#000000">;</font><br/>
|
---|
77 | <font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#2a00ff">"Deleted " </font><font color="#000000">+ deleted + </font><font color="#2a00ff">" document"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
|
---|
78 | <font color="#ffffff"> </font><font color="#000000">indexModifier.flush</font><font color="#000000">()</font><font color="#000000">;</font><br/>
|
---|
79 | <font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">indexModifier.docCount</font><font color="#000000">() </font><font color="#000000">+ </font><font color="#2a00ff">" docs in index"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
|
---|
80 | <font color="#ffffff"> </font><font color="#000000">indexModifier.close</font><font color="#000000">()</font><font color="#000000">;</font></code>
|
---|
81 |
|
---|
82 | </td>
|
---|
83 | <!-- end source code -->
|
---|
84 | </tr>
|
---|
85 | </table>
|
---|
86 | </div>
|
---|
87 | <!-- = END of automatically generated HTML code = -->
|
---|
88 | <!-- ======================================================== -->
|
---|
89 | *
|
---|
90 | * <p>Not all methods of IndexReader and IndexWriter are offered by this
|
---|
91 | * class. If you need access to additional methods, either use those classes
|
---|
92 | * directly or implement your own class that extends <code>IndexModifier</code>.
|
---|
93 | *
|
---|
94 | * <p>Although an instance of this class can be used from more than one
|
---|
95 | * thread, you will not get the best performance. You might want to use
|
---|
96 | * IndexReader and IndexWriter directly for that (but you will need to
|
---|
97 | * care about synchronization yourself then).
|
---|
98 | *
|
---|
99 | * <p>While you can freely mix calls to add() and delete() using this class,
|
---|
100 | * you should batch you calls for best performance. For example, if you
|
---|
101 | * want to update 20 documents, you should first delete all those documents,
|
---|
102 | * then add all the new documents.
|
---|
103 | *
|
---|
104 | * @deprecated Please use {@link IndexWriter} instead.
|
---|
105 | */
|
---|
106 | public class IndexModifier {
|
---|
107 |
|
---|
108 | protected IndexWriter indexWriter = null;
|
---|
109 | protected IndexReader indexReader = null;
|
---|
110 |
|
---|
111 | protected Directory directory = null;
|
---|
112 | protected Analyzer analyzer = null;
|
---|
113 | protected boolean open = false, closeDir = false;
|
---|
114 |
|
---|
115 | // Lucene defaults:
|
---|
116 | protected PrintStream infoStream = null;
|
---|
117 | protected boolean useCompoundFile = true;
|
---|
118 |
|
---|
119 | protected int maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
|
---|
120 | protected int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
|
---|
121 | protected int mergeFactor = LogMergePolicy.DEFAULT_MERGE_FACTOR;
|
---|
122 |
|
---|
123 | /**
|
---|
124 | * Open an index with write access.
|
---|
125 | *
|
---|
126 | * @param directory the index directory
|
---|
127 | * @param analyzer the analyzer to use for adding new documents
|
---|
128 | * @param create <code>true</code> to create the index or overwrite the existing one;
|
---|
129 | * <code>false</code> to append to the existing index
|
---|
130 | * @throws CorruptIndexException if the index is corrupt
|
---|
131 | * @throws LockObtainFailedException if another writer
|
---|
132 | * has this index open (<code>write.lock</code> could not
|
---|
133 | * be obtained)
|
---|
134 | * @throws IOException if there is a low-level IO error
|
---|
135 | */
|
---|
136 | public IndexModifier(Directory directory, Analyzer analyzer, boolean create) throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
137 | init(directory, analyzer, create);
|
---|
138 | }
|
---|
139 |
|
---|
140 | /**
|
---|
141 | * Open an index with write access.
|
---|
142 | *
|
---|
143 | * @param dirName the index directory
|
---|
144 | * @param analyzer the analyzer to use for adding new documents
|
---|
145 | * @param create <code>true</code> to create the index or overwrite the existing one;
|
---|
146 | * <code>false</code> to append to the existing index
|
---|
147 | * @throws CorruptIndexException if the index is corrupt
|
---|
148 | * @throws LockObtainFailedException if another writer
|
---|
149 | * has this index open (<code>write.lock</code> could not
|
---|
150 | * be obtained)
|
---|
151 | * @throws IOException if there is a low-level IO error
|
---|
152 | */
|
---|
153 | public IndexModifier(String dirName, Analyzer analyzer, boolean create) throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
154 | Directory dir = FSDirectory.open(new File(dirName));
|
---|
155 | this.closeDir = true;
|
---|
156 | init(dir, analyzer, create);
|
---|
157 | }
|
---|
158 |
|
---|
159 | /**
|
---|
160 | * Open an index with write access.
|
---|
161 | *
|
---|
162 | * @param file the index directory
|
---|
163 | * @param analyzer the analyzer to use for adding new documents
|
---|
164 | * @param create <code>true</code> to create the index or overwrite the existing one;
|
---|
165 | * <code>false</code> to append to the existing index
|
---|
166 | * @throws CorruptIndexException if the index is corrupt
|
---|
167 | * @throws LockObtainFailedException if another writer
|
---|
168 | * has this index open (<code>write.lock</code> could not
|
---|
169 | * be obtained)
|
---|
170 | * @throws IOException if there is a low-level IO error
|
---|
171 | */
|
---|
172 | public IndexModifier(File file, Analyzer analyzer, boolean create) throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
173 | Directory dir = FSDirectory.open(file);
|
---|
174 | this.closeDir = true;
|
---|
175 | init(dir, analyzer, create);
|
---|
176 | }
|
---|
177 |
|
---|
178 | /**
|
---|
179 | * Initialize an IndexWriter.
|
---|
180 | * @throws CorruptIndexException if the index is corrupt
|
---|
181 | * @throws LockObtainFailedException if another writer
|
---|
182 | * has this index open (<code>write.lock</code> could not
|
---|
183 | * be obtained)
|
---|
184 | * @throws IOException if there is a low-level IO error
|
---|
185 | */
|
---|
186 | protected void init(Directory directory, Analyzer analyzer, boolean create) throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
187 | this.directory = directory;
|
---|
188 | synchronized(this.directory) {
|
---|
189 | this.analyzer = analyzer;
|
---|
190 | indexWriter = new IndexWriter(directory, analyzer, create, IndexWriter.MaxFieldLength.LIMITED);
|
---|
191 | open = true;
|
---|
192 | }
|
---|
193 | }
|
---|
194 |
|
---|
195 | /**
|
---|
196 | * Throw an IllegalStateException if the index is closed.
|
---|
197 | * @throws IllegalStateException
|
---|
198 | */
|
---|
199 | protected void assureOpen() {
|
---|
200 | if (!open) {
|
---|
201 | throw new IllegalStateException("Index is closed");
|
---|
202 | }
|
---|
203 | }
|
---|
204 |
|
---|
205 | /**
|
---|
206 | * Close the IndexReader and open an IndexWriter.
|
---|
207 | * @throws CorruptIndexException if the index is corrupt
|
---|
208 | * @throws LockObtainFailedException if another writer
|
---|
209 | * has this index open (<code>write.lock</code> could not
|
---|
210 | * be obtained)
|
---|
211 | * @throws IOException if there is a low-level IO error
|
---|
212 | */
|
---|
213 | protected void createIndexWriter() throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
214 | if (indexWriter == null) {
|
---|
215 | if (indexReader != null) {
|
---|
216 | indexReader.close();
|
---|
217 | indexReader = null;
|
---|
218 | }
|
---|
219 | indexWriter = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(maxFieldLength));
|
---|
220 | // IndexModifier cannot use ConcurrentMergeScheduler
|
---|
221 | // because it synchronizes on the directory which can
|
---|
222 | // cause deadlock
|
---|
223 | indexWriter.setMergeScheduler(new SerialMergeScheduler());
|
---|
224 | indexWriter.setInfoStream(infoStream);
|
---|
225 | indexWriter.setUseCompoundFile(useCompoundFile);
|
---|
226 | if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH)
|
---|
227 | indexWriter.setMaxBufferedDocs(maxBufferedDocs);
|
---|
228 | indexWriter.setMergeFactor(mergeFactor);
|
---|
229 | }
|
---|
230 | }
|
---|
231 |
|
---|
232 | /**
|
---|
233 | * Close the IndexWriter and open an IndexReader.
|
---|
234 | * @throws CorruptIndexException if the index is corrupt
|
---|
235 | * @throws IOException if there is a low-level IO error
|
---|
236 | */
|
---|
237 | protected void createIndexReader() throws CorruptIndexException, IOException {
|
---|
238 | if (indexReader == null) {
|
---|
239 | if (indexWriter != null) {
|
---|
240 | indexWriter.close();
|
---|
241 | indexWriter = null;
|
---|
242 | }
|
---|
243 | indexReader = IndexReader.open(directory);
|
---|
244 | }
|
---|
245 | }
|
---|
246 |
|
---|
247 | /**
|
---|
248 | * Make sure all changes are written to disk.
|
---|
249 | * @throws CorruptIndexException if the index is corrupt
|
---|
250 | * @throws LockObtainFailedException if another writer
|
---|
251 | * has this index open (<code>write.lock</code> could not
|
---|
252 | * be obtained)
|
---|
253 | * @throws IOException if there is a low-level IO error
|
---|
254 | */
|
---|
255 | public void flush() throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
256 | synchronized(directory) {
|
---|
257 | assureOpen();
|
---|
258 | if (indexWriter != null) {
|
---|
259 | indexWriter.close();
|
---|
260 | indexWriter = null;
|
---|
261 | createIndexWriter();
|
---|
262 | } else {
|
---|
263 | indexReader.close();
|
---|
264 | indexReader = null;
|
---|
265 | createIndexReader();
|
---|
266 | }
|
---|
267 | }
|
---|
268 | }
|
---|
269 |
|
---|
270 | /**
|
---|
271 | * Adds a document to this index, using the provided analyzer instead of the
|
---|
272 | * one specific in the constructor. If the document contains more than
|
---|
273 | * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
|
---|
274 | * discarded.
|
---|
275 | * @see IndexWriter#addDocument(Document, Analyzer)
|
---|
276 | * @throws IllegalStateException if the index is closed
|
---|
277 | * @throws CorruptIndexException if the index is corrupt
|
---|
278 | * @throws LockObtainFailedException if another writer
|
---|
279 | * has this index open (<code>write.lock</code> could not
|
---|
280 | * be obtained)
|
---|
281 | * @throws IOException if there is a low-level IO error
|
---|
282 | */
|
---|
283 | public void addDocument(Document doc, Analyzer docAnalyzer) throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
284 | synchronized(directory) {
|
---|
285 | assureOpen();
|
---|
286 | createIndexWriter();
|
---|
287 | if (docAnalyzer != null)
|
---|
288 | indexWriter.addDocument(doc, docAnalyzer);
|
---|
289 | else
|
---|
290 | indexWriter.addDocument(doc);
|
---|
291 | }
|
---|
292 | }
|
---|
293 |
|
---|
294 | /**
|
---|
295 | * Adds a document to this index. If the document contains more than
|
---|
296 | * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
|
---|
297 | * discarded.
|
---|
298 | * @see IndexWriter#addDocument(Document)
|
---|
299 | * @throws IllegalStateException if the index is closed
|
---|
300 | * @throws CorruptIndexException if the index is corrupt
|
---|
301 | * @throws LockObtainFailedException if another writer
|
---|
302 | * has this index open (<code>write.lock</code> could not
|
---|
303 | * be obtained)
|
---|
304 | * @throws IOException if there is a low-level IO error
|
---|
305 | */
|
---|
306 | public void addDocument(Document doc) throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
307 | addDocument(doc, null);
|
---|
308 | }
|
---|
309 |
|
---|
310 | /**
|
---|
311 | * Deletes all documents containing <code>term</code>.
|
---|
312 | * This is useful if one uses a document field to hold a unique ID string for
|
---|
313 | * the document. Then to delete such a document, one merely constructs a
|
---|
314 | * term with the appropriate field and the unique ID string as its text and
|
---|
315 | * passes it to this method. Returns the number of documents deleted.
|
---|
316 | * @return the number of documents deleted
|
---|
317 | * @see IndexReader#deleteDocuments(Term)
|
---|
318 | * @throws IllegalStateException if the index is closed
|
---|
319 | * @throws StaleReaderException if the index has changed
|
---|
320 | * since this reader was opened
|
---|
321 | * @throws CorruptIndexException if the index is corrupt
|
---|
322 | * @throws LockObtainFailedException if another writer
|
---|
323 | * has this index open (<code>write.lock</code> could not
|
---|
324 | * be obtained)
|
---|
325 | * @throws IOException if there is a low-level IO error
|
---|
326 | */
|
---|
327 | public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
|
---|
328 | synchronized(directory) {
|
---|
329 | assureOpen();
|
---|
330 | createIndexReader();
|
---|
331 | return indexReader.deleteDocuments(term);
|
---|
332 | }
|
---|
333 | }
|
---|
334 |
|
---|
335 | /**
|
---|
336 | * Deletes the document numbered <code>docNum</code>.
|
---|
337 | * @see IndexReader#deleteDocument(int)
|
---|
338 | * @throws StaleReaderException if the index has changed
|
---|
339 | * since this reader was opened
|
---|
340 | * @throws CorruptIndexException if the index is corrupt
|
---|
341 | * @throws LockObtainFailedException if another writer
|
---|
342 | * has this index open (<code>write.lock</code> could not
|
---|
343 | * be obtained)
|
---|
344 | * @throws IllegalStateException if the index is closed
|
---|
345 | */
|
---|
346 | public void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
|
---|
347 | synchronized(directory) {
|
---|
348 | assureOpen();
|
---|
349 | createIndexReader();
|
---|
350 | indexReader.deleteDocument(docNum);
|
---|
351 | }
|
---|
352 | }
|
---|
353 |
|
---|
354 |
|
---|
355 | /**
|
---|
356 | * Returns the number of documents currently in this
|
---|
357 | * index. If the writer is currently open, this returns
|
---|
358 | * {@link IndexWriter#docCount()}, else {@link
|
---|
359 | * IndexReader#numDocs()}. But, note that {@link
|
---|
360 | * IndexWriter#docCount()} does not take deletions into
|
---|
361 | * account, unlike {@link IndexReader#numDocs}.
|
---|
362 | * @throws IllegalStateException if the index is closed
|
---|
363 | */
|
---|
364 | public int numDocs() throws java.io.IOException {
|
---|
365 | synchronized(directory) {
|
---|
366 | assureOpen();
|
---|
367 | if (indexWriter != null) {
|
---|
368 | return indexWriter.numDocs();
|
---|
369 | } else {
|
---|
370 | return indexReader.numDocs();
|
---|
371 | }
|
---|
372 | }
|
---|
373 | }
|
---|
374 |
|
---|
375 | /**
|
---|
376 | * Merges all segments together into a single segment, optimizing an index
|
---|
377 | * for search.
|
---|
378 | * @see IndexWriter#optimize()
|
---|
379 | * @throws IllegalStateException if the index is closed
|
---|
380 | * @throws CorruptIndexException if the index is corrupt
|
---|
381 | * @throws LockObtainFailedException if another writer
|
---|
382 | * has this index open (<code>write.lock</code> could not
|
---|
383 | * be obtained)
|
---|
384 | * @throws IOException if there is a low-level IO error
|
---|
385 | */
|
---|
386 | public void optimize() throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
387 | synchronized(directory) {
|
---|
388 | assureOpen();
|
---|
389 | createIndexWriter();
|
---|
390 | indexWriter.optimize();
|
---|
391 | }
|
---|
392 | }
|
---|
393 |
|
---|
394 | /**
|
---|
395 | * If non-null, information about merges and a message when
|
---|
396 | * {@link #getMaxFieldLength()} is reached will be printed to this.
|
---|
397 | * <p>Example: <tt>index.setInfoStream(System.err);</tt>
|
---|
398 | * @see IndexWriter#setInfoStream(PrintStream)
|
---|
399 | * @throws IllegalStateException if the index is closed
|
---|
400 | */
|
---|
401 | public void setInfoStream(PrintStream infoStream) throws java.io.IOException {
|
---|
402 | synchronized(directory) {
|
---|
403 | assureOpen();
|
---|
404 | if (indexWriter != null) {
|
---|
405 | indexWriter.setInfoStream(infoStream);
|
---|
406 | }
|
---|
407 | this.infoStream = infoStream;
|
---|
408 | }
|
---|
409 | }
|
---|
410 |
|
---|
411 | /**
|
---|
412 | * @see IndexModifier#setInfoStream(PrintStream)
|
---|
413 | * @throws CorruptIndexException if the index is corrupt
|
---|
414 | * @throws LockObtainFailedException if another writer
|
---|
415 | * has this index open (<code>write.lock</code> could not
|
---|
416 | * be obtained)
|
---|
417 | * @throws IOException if there is a low-level IO error
|
---|
418 | */
|
---|
419 | public PrintStream getInfoStream() throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
420 | synchronized(directory) {
|
---|
421 | assureOpen();
|
---|
422 | createIndexWriter();
|
---|
423 | return indexWriter.getInfoStream();
|
---|
424 | }
|
---|
425 | }
|
---|
426 |
|
---|
427 | /**
|
---|
428 | * Setting to turn on usage of a compound file. When on, multiple files
|
---|
429 | * for each segment are merged into a single file once the segment creation
|
---|
430 | * is finished. This is done regardless of what directory is in use.
|
---|
431 | * @see IndexWriter#setUseCompoundFile(boolean)
|
---|
432 | * @throws IllegalStateException if the index is closed
|
---|
433 | */
|
---|
434 | public void setUseCompoundFile(boolean useCompoundFile) {
|
---|
435 | synchronized(directory) {
|
---|
436 | assureOpen();
|
---|
437 | if (indexWriter != null) {
|
---|
438 | indexWriter.setUseCompoundFile(useCompoundFile);
|
---|
439 | }
|
---|
440 | this.useCompoundFile = useCompoundFile;
|
---|
441 | }
|
---|
442 | }
|
---|
443 |
|
---|
444 | /**
|
---|
445 | * @see IndexModifier#setUseCompoundFile(boolean)
|
---|
446 | * @throws CorruptIndexException if the index is corrupt
|
---|
447 | * @throws LockObtainFailedException if another writer
|
---|
448 | * has this index open (<code>write.lock</code> could not
|
---|
449 | * be obtained)
|
---|
450 | * @throws IOException if there is a low-level IO error
|
---|
451 | */
|
---|
452 | public boolean getUseCompoundFile() throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
453 | synchronized(directory) {
|
---|
454 | assureOpen();
|
---|
455 | createIndexWriter();
|
---|
456 | return indexWriter.getUseCompoundFile();
|
---|
457 | }
|
---|
458 | }
|
---|
459 |
|
---|
460 | /**
|
---|
461 | * The maximum number of terms that will be indexed for a single field in a
|
---|
462 | * document. This limits the amount of memory required for indexing, so that
|
---|
463 | * collections with very large files will not crash the indexing process by
|
---|
464 | * running out of memory.<p/>
|
---|
465 | * Note that this effectively truncates large documents, excluding from the
|
---|
466 | * index terms that occur further in the document. If you know your source
|
---|
467 | * documents are large, be sure to set this value high enough to accommodate
|
---|
468 | * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
|
---|
469 | * is your memory, but you should anticipate an OutOfMemoryError.<p/>
|
---|
470 | * By default, no more than 10,000 terms will be indexed for a field.
|
---|
471 | * @see IndexWriter#setMaxFieldLength(int)
|
---|
472 | * @throws IllegalStateException if the index is closed
|
---|
473 | */
|
---|
474 | public void setMaxFieldLength(int maxFieldLength) {
|
---|
475 | synchronized(directory) {
|
---|
476 | assureOpen();
|
---|
477 | if (indexWriter != null) {
|
---|
478 | indexWriter.setMaxFieldLength(maxFieldLength);
|
---|
479 | }
|
---|
480 | this.maxFieldLength = maxFieldLength;
|
---|
481 | }
|
---|
482 | }
|
---|
483 |
|
---|
484 | /**
|
---|
485 | * @see IndexModifier#setMaxFieldLength(int)
|
---|
486 | * @throws CorruptIndexException if the index is corrupt
|
---|
487 | * @throws LockObtainFailedException if another writer
|
---|
488 | * has this index open (<code>write.lock</code> could not
|
---|
489 | * be obtained)
|
---|
490 | * @throws IOException if there is a low-level IO error
|
---|
491 | */
|
---|
492 | public int getMaxFieldLength() throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
493 | synchronized(directory) {
|
---|
494 | assureOpen();
|
---|
495 | createIndexWriter();
|
---|
496 | return indexWriter.getMaxFieldLength();
|
---|
497 | }
|
---|
498 | }
|
---|
499 |
|
---|
500 | /**
|
---|
501 | * Determines the minimal number of documents required before the buffered
|
---|
502 | * in-memory documents are merging and a new Segment is created.
|
---|
503 | * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},
|
---|
504 | * large value gives faster indexing. At the same time, mergeFactor limits
|
---|
505 | * the number of files open in a FSDirectory.
|
---|
506 | *
|
---|
507 | * <p>The default value is 10.
|
---|
508 | *
|
---|
509 | * @see IndexWriter#setMaxBufferedDocs(int)
|
---|
510 | * @throws IllegalStateException if the index is closed
|
---|
511 | * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2
|
---|
512 | */
|
---|
513 | public void setMaxBufferedDocs(int maxBufferedDocs) {
|
---|
514 | synchronized(directory) {
|
---|
515 | assureOpen();
|
---|
516 | if (indexWriter != null) {
|
---|
517 | indexWriter.setMaxBufferedDocs(maxBufferedDocs);
|
---|
518 | }
|
---|
519 | this.maxBufferedDocs = maxBufferedDocs;
|
---|
520 | }
|
---|
521 | }
|
---|
522 |
|
---|
523 | /**
|
---|
524 | * @see IndexModifier#setMaxBufferedDocs(int)
|
---|
525 | * @throws CorruptIndexException if the index is corrupt
|
---|
526 | * @throws LockObtainFailedException if another writer
|
---|
527 | * has this index open (<code>write.lock</code> could not
|
---|
528 | * be obtained)
|
---|
529 | * @throws IOException if there is a low-level IO error
|
---|
530 | */
|
---|
531 | public int getMaxBufferedDocs() throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
532 | synchronized(directory) {
|
---|
533 | assureOpen();
|
---|
534 | createIndexWriter();
|
---|
535 | return indexWriter.getMaxBufferedDocs();
|
---|
536 | }
|
---|
537 | }
|
---|
538 |
|
---|
539 | /**
|
---|
540 | * Determines how often segment indices are merged by addDocument(). With
|
---|
541 | * smaller values, less RAM is used while indexing, and searches on
|
---|
542 | * unoptimized indices are faster, but indexing speed is slower. With larger
|
---|
543 | * values, more RAM is used during indexing, and while searches on unoptimized
|
---|
544 | * indices are slower, indexing is faster. Thus larger values (> 10) are best
|
---|
545 | * for batch index creation, and smaller values (< 10) for indices that are
|
---|
546 | * interactively maintained.
|
---|
547 | * <p>This must never be less than 2. The default value is 10.
|
---|
548 | *
|
---|
549 | * @see IndexWriter#setMergeFactor(int)
|
---|
550 | * @throws IllegalStateException if the index is closed
|
---|
551 | */
|
---|
552 | public void setMergeFactor(int mergeFactor) {
|
---|
553 | synchronized(directory) {
|
---|
554 | assureOpen();
|
---|
555 | if (indexWriter != null) {
|
---|
556 | indexWriter.setMergeFactor(mergeFactor);
|
---|
557 | }
|
---|
558 | this.mergeFactor = mergeFactor;
|
---|
559 | }
|
---|
560 | }
|
---|
561 |
|
---|
562 | /**
|
---|
563 | * @see IndexModifier#setMergeFactor(int)
|
---|
564 | * @throws CorruptIndexException if the index is corrupt
|
---|
565 | * @throws LockObtainFailedException if another writer
|
---|
566 | * has this index open (<code>write.lock</code> could not
|
---|
567 | * be obtained)
|
---|
568 | * @throws IOException if there is a low-level IO error
|
---|
569 | */
|
---|
570 | public int getMergeFactor() throws CorruptIndexException, LockObtainFailedException, IOException {
|
---|
571 | synchronized(directory) {
|
---|
572 | assureOpen();
|
---|
573 | createIndexWriter();
|
---|
574 | return indexWriter.getMergeFactor();
|
---|
575 | }
|
---|
576 | }
|
---|
577 |
|
---|
578 | /**
|
---|
579 | * Close this index, writing all pending changes to disk.
|
---|
580 | *
|
---|
581 | * @throws IllegalStateException if the index has been closed before already
|
---|
582 | * @throws CorruptIndexException if the index is corrupt
|
---|
583 | * @throws IOException if there is a low-level IO error
|
---|
584 | */
|
---|
585 | public void close() throws CorruptIndexException, IOException {
|
---|
586 | synchronized(directory) {
|
---|
587 | if (!open)
|
---|
588 | throw new IllegalStateException("Index is closed already");
|
---|
589 | if (indexWriter != null) {
|
---|
590 | indexWriter.close();
|
---|
591 | indexWriter = null;
|
---|
592 | } else if (indexReader != null) {
|
---|
593 | indexReader.close();
|
---|
594 | indexReader = null;
|
---|
595 | }
|
---|
596 | open = false;
|
---|
597 | if (closeDir) {
|
---|
598 | directory.close();
|
---|
599 | }
|
---|
600 | closeDir = false;
|
---|
601 | }
|
---|
602 | }
|
---|
603 |
|
---|
604 | public String toString() {
|
---|
605 | return "Index@" + directory;
|
---|
606 | }
|
---|
607 |
|
---|
608 | /*
|
---|
609 | // used as an example in the javadoc:
|
---|
610 | public static void main(String[] args) throws IOException {
|
---|
611 | Analyzer analyzer = new StandardAnalyzer();
|
---|
612 | // create an index in /tmp/index, overwriting an existing one:
|
---|
613 | IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
|
---|
614 | Document doc = new Document();
|
---|
615 | doc.add(new Fieldable("id", "1", Fieldable.Store.YES, Fieldable.Index.NOT_ANALYZED));
|
---|
616 | doc.add(new Fieldable("body", "a simple test", Fieldable.Store.YES, Fieldable.Index.ANALYZED));
|
---|
617 | indexModifier.addDocument(doc);
|
---|
618 | int deleted = indexModifier.delete(new Term("id", "1"));
|
---|
619 | System.out.println("Deleted " + deleted + " document");
|
---|
620 | indexModifier.flush();
|
---|
621 | System.out.println(indexModifier.docCount() + " docs in index");
|
---|
622 | indexModifier.close();
|
---|
623 | }*/
|
---|
624 |
|
---|
625 | }
|
---|