source: trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java@ 13686

Last change on this file since 13686 was 13686, checked in by kjdon, 17 years ago

package has changed to org.greenstone.LuceneWrapper to be consistent with other indexer packages

  • Property svn:keywords set to Author Date Id Revision
File size: 6.6 KB
Line 
1/** @file GS2LuceneDelete.java
2 *
3 * Provides a wrapper to the document deleting features of Lucene.
4 *
5 * This java application makes use of the existing Lucene class IndexModifier
6 * to access and make changes to the information stored about documents in a
7 * Lucene database. This is an essential component of the IncrementalBuilder
8 * PERL module, and endevours to make editing the text and metadata of
9 * documents without having to rebuild the entire collection a reality (in
10 * other words, true incremental/dynamic building).
11 *
12 * A component of the Greenstone digital library software from the New Zealand
13 * Digital Library Project at the University of Waikato, New Zealand.
14 *
15 * This program is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * This program is distributed in the hope that it will be useful, but WITHOUT
21 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
23 * more details.
24 *
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, write to the Free Software Foundation, Inc., 675
27 * Mass Ave, Cambridge, MA 02139, USA.
28 *
29 * Copyright (c) 2006 DL Consulting Ltd., New Zealand
30 */
31
32package org.greenstone.LuceneWrapper;
33
34import java.io.IOException;
35import java.util.Arrays;
36import java.util.Enumeration;
37import java.util.Vector;
38
39import org.apache.lucene.analysis.Analyzer;
40import org.apache.lucene.analysis.standard.StandardAnalyzer;
41import org.apache.lucene.document.Document;
42import org.apache.lucene.document.Field;
43
44
45/** Contains methods for deleting a document that has previously been indexed
46 * into a Lucene database.
47 * @author John Thompson, DL Consulting Ltd. (unless stated otherwise)
48 */
49public class GS2LuceneDelete
50{
51 /** This is the main entry point to the deletor and is responsible for
52 * parsing the arguments and creating an instance of the deletor class.
53 *
54 * @param args The arguments passed into the application as a string
55 * array
56 * @return An integer describing the exit state of the application
57 * @throws Exception on any fatal error state
58 */
59 static public void main (String args[])
60 throws Exception
61 {
62 // Parse arguments
63 int node_id = -1;
64 String index_path = "";
65
66 for (int i = 0; i < args.length; i += 2)
67 {
68 if (args[i].equals("--index"))
69 {
70 index_path = args[i + 1];
71 }
72 else if (args[i].equals("--nodeid"))
73 {
74 String temp = args[i + 1];
75 node_id = Integer.parseInt(temp);
76 temp = null; // Off to the gc with you!
77 }
78 else
79 {
80 System.out.println("Error! Unknown argument: " + args[i]);
81 GS2LuceneDelete.printUsage();
82 }
83 }
84
85 // Check arguments
86 if(index_path.equals(""))
87 {
88 System.out.println("Error! Missing index path");
89 GS2LuceneDelete.printUsage();
90 }
91 if(node_id == -1)
92 {
93 System.out.println("Error! Missing or invalid Node ID");
94 GS2LuceneDelete.printUsage();
95 }
96
97
98 // Instantiate deletor, and perform the delete
99 GS2LuceneDelete deletor = new GS2LuceneDelete(index_path);
100 deletor.deleteDocument(node_id);
101 deletor.destroy();
102 deletor = null;
103 }
104 /** main() **/
105
106 /** **/
107 private boolean debug = true;
108
109 /** **/
110 private GS2IndexModifier index_modifier;
111
112 /** Constructor which takes the path to the Lucene index to be edited.
113 *
114 * @param index_path The full path to the index directory as a String
115 */
116 public GS2LuceneDelete(String index_path)
117 throws IOException
118 {
119 Analyzer analyzer = new StandardAnalyzer();
120 index_modifier = new GS2IndexModifier(index_path, analyzer);
121 }
122 /** GS2LuceneDelete **/
123
124 /** When called prints a debug message but only if debugging is enabled.
125 */
126 public void debug(String message)
127 {
128 if(debug)
129 {
130 System.err.println(message);
131 }
132 }
133 /** debug() **/
134
135 /** Destructor which unallocates connection to Lucene.
136 */
137 public void destroy()
138 throws IOException
139 {
140 index_modifier.close();
141 index_modifier = null;
142 }
143
144 /** Delete the indicated document from the Lucene index. This process is
145 * very similar to the initial step of index editing.
146 *
147 * @param node_id The unique identifier of a Lucene document as an
148 * integer
149 */
150 public void deleteDocument(int node_id)
151 throws IOException
152 {
153 debug("GS2LuceneDelete.deleteDocument(" + node_id + ")");
154 debug("- Initial number of documents in index: " + index_modifier.docCount());
155 // Retrieve the document requested
156 int doc_num = index_modifier.getDocNumByNodeID(node_id);
157 if (doc_num != -1)
158 {
159 debug("* Found document #" + doc_num);
160 // Retrieve the actual document
161 Document document = index_modifier.document(doc_num);
162 // Remove the document from the index before modifying
163 index_modifier.deleteDocument(doc_num);
164 debug("* Removed document from index");
165 }
166 else
167 {
168 debug("- No such document!");
169 }
170 debug("- Final number of documents in index: " + index_modifier.docCount());
171 }
172 /** editIndex() **/
173
174 /** Display program usage message.
175 */
176 static public void printUsage()
177 {
178 System.out.println("usage: GS2LuceneDelete --index <path> --nodeid <int>");
179 System.out.println("");
180 System.out.println("where:");
181 System.out.println(" index - is the full path to the directory containing the directory");
182 System.out.println(" to edit, including the level (ie didx, sidx)");
183 System.out.println(" nodeid - the unique identifier of the document to delete. This is the");
184 System.out.println(" same as the docnum in the GDBM");
185 System.out.println("");
186 System.exit(0);
187 }
188 /** printUsage() **/
189
190}
191/** class GS2LuceneDelete **/
Note: See TracBrowser for help on using the repository browser.