source: main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java@ 24725

Last change on this file since 24725 was 24725, checked in by davidb, 13 years ago

Restruturing of Lucene version 2.x and 3.x to make it easier to control which one is used

  • Property svn:keywords set to Author Date Id Revision
File size: 5.6 KB
Line 
1/** @file GS2LuceneDelete.java
2 *
3 * Provides a wrapper to the document deleting features of Lucene.
4 *
5 * This java application makes use of the existing Lucene class IndexModifier
6 * to access and make changes to the information stored about documents in a
7 * Lucene database. This is an essential component of the IncrementalBuilder
8 * PERL module, and endevours to make editing the text and metadata of
9 * documents without having to rebuild the entire collection a reality (in
10 * other words, true incremental/dynamic building).
11 *
12 * A component of the Greenstone digital library software from the New Zealand
13 * Digital Library Project at the University of Waikato, New Zealand.
14 *
15 * This program is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * This program is distributed in the hope that it will be useful, but WITHOUT
21 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
23 * more details.
24 *
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, write to the Free Software Foundation, Inc., 675
27 * Mass Ave, Cambridge, MA 02139, USA.
28 *
29 * Copyright (c) 2006 DL Consulting Ltd., New Zealand
30 */
31
32package org.greenstone.LuceneWrapper;
33
34import java.io.IOException;
35import java.io.File;
36//import org.apache.lucene.analysis.standard.StandardAnalyzer;
37import org.apache.lucene.index.IndexWriter;
38import org.apache.lucene.index.Term;
39
40import org.apache.lucene.store.SimpleFSDirectory;
41import org.apache.lucene.index.IndexWriter.MaxFieldLength;
42
43
44/** Contains methods for deleting a document that has previously been indexed
45 * into a Lucene database.
46 * @author John Thompson, DL Consulting Ltd. (unless stated otherwise)
47 */
48public class GS2LuceneDelete
49{
50 /** This is the main entry point to the deletor and is responsible for
51 * parsing the arguments and creating an instance of the deletor class.
52 *
53 * @param args The arguments passed into the application as a string
54 * array
55 * @return An integer describing the exit state of the application
56 * @throws Exception on any fatal error state
57 */
58 static public void main (String args[])
59 throws Exception
60 {
61 // Parse arguments
62 String index_path = "";
63 int node_id = -1;
64
65 for (int i = 0; i < args.length; i += 2)
66 {
67 if (args[i].equals("--index"))
68 {
69 index_path = args[i + 1];
70 }
71 else if (args[i].equals("--nodeid"))
72 {
73 node_id = Integer.parseInt(args[i + 1]);
74 }
75 else
76 {
77 System.out.println("Error! Unknown argument: " + args[i]);
78 GS2LuceneDelete.printUsage();
79 System.exit(0);
80 }
81 }
82
83 // Check arguments
84 if (index_path.equals(""))
85 {
86 System.out.println("Error! Missing index path");
87 GS2LuceneDelete.printUsage();
88 System.exit(0);
89 }
90 if (node_id == -1)
91 {
92 System.out.println("Error! Missing or invalid Node ID");
93 GS2LuceneDelete.printUsage();
94 System.exit(0);
95 }
96
97 // Instantiate deletor, and perform the delete
98 GS2LuceneDelete deletor = new GS2LuceneDelete(index_path);
99 deletor.deleteDocument(node_id);
100 deletor.destroy();
101 deletor = null;
102 }
103
104
105 /** Display program usage message.
106 */
107 static public void printUsage()
108 {
109 System.out.println("usage: GS2LuceneDelete --index <path> --nodeid <int>");
110 System.out.println("");
111 System.out.println("where:");
112 System.out.println(" index - is the full path to the directory containing the directory");
113 System.out.println(" to edit, including the level (ie didx, sidx)");
114 System.out.println(" nodeid - the unique identifier of the document to delete. This is the");
115 System.out.println(" same as the docnum in the GDBM");
116 System.out.println("");
117 }
118
119
120 /** **/
121 private boolean debug = true;
122
123 /** **/
124 private IndexWriter index_writer = null;
125
126
127 /** Constructor which takes the path to the Lucene index to be edited.
128 *
129 * @param index_path The full path to the index directory as a String
130 */
131 public GS2LuceneDelete(String index_path)
132 throws IOException
133 {
134 SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path));
135 index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(),
136 MaxFieldLength.UNLIMITED);
137 }
138
139
140 /** When called prints a debug message but only if debugging is enabled.
141 */
142 public void debug(String message)
143 {
144 if (debug)
145 {
146 System.err.println(message);
147 }
148 }
149
150
151 /** Destructor which unallocates connection to Lucene.
152 */
153 public void destroy()
154 throws IOException
155 {
156 index_writer.close();
157 index_writer = null;
158 }
159
160
161 /** Delete the indicated document from the Lucene index. This process is
162 * very similar to the initial step of index editing.
163 *
164 * @param node_id The unique identifier of a Lucene document as an
165 * integer
166 */
167 public void deleteDocument(int node_id)
168 throws IOException
169 {
170 debug("GS2LuceneDelete.deleteDocument(" + node_id + ")");
171 debug("- Initial number of documents in index: " + index_writer.numDocs());
172 index_writer.deleteDocuments(new Term("nodeid", "" + node_id));
173 debug("- Final number of documents in index: " + index_writer.numDocs());
174 }
175}
Note: See TracBrowser for help on using the repository browser.